当前位置: 首页 > news >正文

Atlas 800I A2 离线部署 DeepSeek-R1-Distill-Llama-70B

一、环境信息

1.1、硬件信息

Atlas 800I A2

1.2、环境信息

注意:这里驱动固件最好用商业版,我这里用的社区版有点小问题

操作系统:openEuler 22.03 LTS
NPU驱动:Ascend-hdk-910b-npu-driver_24.1.rc3_linux-aarch64.run
NPU固件:Ascend-hdk-910b-npu-firmware_7.5.0.1.129.run
MindIE镜像:2.0.T3-800I-A2-py311-openeuler24.03-lts

二、安装驱动固件

2.1、创建运行用户和所属组

groupadd HwHiAiUser
useradd -g HwHiAiUser -d /home/HwHiAiUser -m HwHiAiUser -s /bin/bash

2.2、安装驱动(根据提示选择是否重启)

./Ascend-hdk-910b-npu-driver_24.1.rc3_linux-aarch64.run --full --install-for-all

2.3、安装固件(根据提示选择是否重启)

./Ascend-hdk-910b-npu-firmware_7.5.0.1.129.run --full

三、创建容器

docker run -itd --privileged  --name=deepseek-70b --net=host \--shm-size 500g \--device=/dev/davinci0 \--device=/dev/davinci1 \--device=/dev/davinci2 \--device=/dev/davinci3 \--device=/dev/davinci4 \--device=/dev/davinci5 \--device=/dev/davinci6 \--device=/dev/davinci7 \--device=/dev/davinci_manager \--device=/dev/hisi_hdc \--device=/dev/devmm_svm \-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \-v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware \-v /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi \-v /usr/local/sbin:/usr/local/sbin \-v /etc/hccn.conf:/etc/hccn.conf \-v /data:/data \swr.cn-south-1.myhuaweicloud.com/ascendhub/mindie:2.0.T3-800I-A2-py311-openeuler24.03-lts \bash

四、配置、启动MindIE服务

4.1、配置环境变量

vim ~/.bashrcsource /usr/local/Ascend/ascend-toolkit/set_env.sh
source /usr/local/Ascend/nnal/atb/set_env.sh
source /usr/local/Ascend/atb-models/set_env.sh
source /usr/local/Ascend/mindie/set_env.sh# 日志打印(默认Error级别)
export MINDIE_LOG_TO_STDOUT=1# 虚拟内存
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True# ATB WorkSpace 使能
export ATB_WORKSPACE_MEM_ALLOC_ALG_TYPE=3
export ATB_WORKSPACE_MEM_ALLOC_GLOBAL=1 # 单线程加速权重加载
export OMP_NUM_THREADS=1# 设置精度饱和模式,防止fp16引起的上下溢出
export INF_NAN_MODE_ENABLE=0# 关闭确定性计算
export HCCL_DETERMINISTIC=false# 使能AIV,暂时还有问题性问题,不建议线上业务使用,性能复现必须开启
export HCCL_OP_EXPANSION_MODE="AIV"# 使能内存复用
export ATB_LAYER_INTERNAL_TENSOR_REUSE=1
export ATB_OPERATION_EXECUTE_ASYNC=1
export ATB_CONVERT_NCHW_TO_ND=1
export ATB_WORKSPACE_MEM_ALLOC_GLOBAL=1
export ATB_WORKSPACE_MEM_ALLOC_ALG_TYPE=3
export ATB_CONTEXT_WORKSPACE_SIZE=0
export ATB_LAUNCH_KERNEL_WITH_TILING=1
export ATB_LLM_ENABLE_AUTO_TRANSPOSE=0# CPU亲和性调度
export CPU_AFFINITY_CONF=1
export TASK_QUEUE_ENABLE=1
unset ASCEND_LAUNCH_BLOCKING

4.2、配置mindie配置文件

# 修改MindIE配置文件
cd /usr/local/Ascend/mindie/latest/mindie-service/
vim conf/config.json

 修改点标红项:

{
    "Version" : "1.0.0",
    "LogConfig" :
    {
        "logLevel" : "Info",
        "logFileSize" : 20,
        "logFileNum" : 20,
        "logPath" : "logs/mindie-server.log"
    },

    "ServerConfig" :
    {
        "ipAddress" : "0.0.0.0",
        "managementIpAddress" : "127.0.0.2",
        "port" : 1025,
        "managementPort" : 1026,
        "metricsPort" : 1027,
        "allowAllZeroIpListening" : true,
        "maxLinkNum" : 1000,
        "httpsEnabled" : false,
        "fullTextEnabled" : false,
        "tlsCaPath" : "security/ca/",
        "tlsCaFile" : ["ca.pem"],
        "tlsCert" : "security/certs/server.pem",
        "tlsPk" : "security/keys/server.key.pem",
        "tlsPkPwd" : "security/pass/key_pwd.txt",
        "tlsCrlPath" : "security/certs/",
        "tlsCrlFiles" : ["server_crl.pem"],
        "managementTlsCaFile" : ["management_ca.pem"],
        "managementTlsCert" : "security/certs/management/server.pem",
        "managementTlsPk" : "security/keys/management/server.key.pem",
        "managementTlsPkPwd" : "security/pass/management/key_pwd.txt",
        "managementTlsCrlPath" : "security/management/certs/",
        "managementTlsCrlFiles" : ["server_crl.pem"],
        "kmcKsfMaster" : "tools/pmt/master/ksfa",
        "kmcKsfStandby" : "tools/pmt/standby/ksfb",
        "inferMode" : "standard",
        "interCommTLSEnabled" : false,
        "interCommPort" : 1121,
        "interCommTlsCaPath" : "security/grpc/ca/",
        "interCommTlsCaFiles" : ["ca.pem"],
        "interCommTlsCert" : "security/grpc/certs/server.pem",
        "interCommPk" : "security/grpc/keys/server.key.pem",
        "interCommPkPwd" : "security/grpc/pass/key_pwd.txt",
        "interCommTlsCrlPath" : "security/grpc/certs/",
        "interCommTlsCrlFiles" : ["server_crl.pem"],
        "openAiSupport" : "vllm"
    },

    "BackendConfig" : {
        "backendName" : "mindieservice_llm_engine",
        "modelInstanceNumber" : 1,
        "npuDeviceIds" : [[0,1,2,3,4,5,6,7]],
        "tokenizerProcessNumber" : 8,
        "multiNodesInferEnabled" : false,
        "multiNodesInferPort" : 1120,
        "interNodeTLSEnabled" : false,
        "interNodeTlsCaPath" : "security/grpc/ca/",
        "interNodeTlsCaFiles" : ["ca.pem"],
        "interNodeTlsCert" : "security/grpc/certs/server.pem",
        "interNodeTlsPk" : "security/grpc/keys/server.key.pem",
        "interNodeTlsPkPwd" : "security/grpc/pass/mindie_server_key_pwd.txt",
        "interNodeTlsCrlPath" : "security/grpc/certs/",
        "interNodeTlsCrlFiles" : ["server_crl.pem"],
        "interNodeKmcKsfMaster" : "tools/pmt/master/ksfa",
        "interNodeKmcKsfStandby" : "tools/pmt/standby/ksfb",
        "ModelDeployConfig" :
        {
            "maxSeqLen" : 2560,
            "maxInputTokenLen" : 2048,
            "truncation" : false,
            "ModelConfig" : [
                {
                    "modelInstanceType" : "Standard",
                    "modelName" : "deepseek-70b",
                    "modelWeightPath" : "/data/70b",
                    "worldSize" : 8,
                    "cpuMemSize" : 5,
                    "npuMemSize" : -1,
                    "backendType" : "atb",
                    "trustRemoteCode" : false
                }
            ]
        },

        "ScheduleConfig" :
        {
            "templateType" : "Standard",
            "templateName" : "Standard_LLM",
            "cacheBlockSize" : 128,

            "maxPrefillBatchSize" : 50,
            "maxPrefillTokens" : 8192,
            "prefillTimeMsPerReq" : 150,
            "prefillPolicyType" : 0,

            "decodeTimeMsPerReq" : 50,
            "decodePolicyType" : 0,

            "maxBatchSize" : 200,
            "maxIterTimes" : 512,
            "maxPreemptCount" : 0,
            "supportSelectBatch" : false,
            "maxQueueDelayMicroseconds" : 5000
        }
    }
}

# 设置权重的配置文件的权限
chmod 750 {/path-to-weights/config.json}# 拉起服务
nohup ./bin/mindieservice_daemon > output.log 2>&1 &# 停止服务
ps -ef | grep mindieservice
pkill -9 mindieservice

五、测试

curl http://IP:1025/v1/chat/completions -d '{"model": "改为mindie配置文件中设置的名称","messages": [{"role": "user", "content": "请告诉我关于人工智能的一些信息。"}],"stream": true}'

六、部署open-webui

# 修改点:
# OPENAI_API_BASE_URLS:改为自己的推理服务接口IP
# /data/open-webui:open-webui的数据存储目录docker run -itd --name open-webui \
--net host \
-e PORT=3006 \
-e OPENAI_API_BASE_URLS=http://192.168.50.1:1025/v1 \
-v /data/open-webui:/app/backend/data \
022ecf5a33b8 bash

相关文章:

  • 使用钉钉机器人推送系统内部的ERP停机维护公告
  • Mysql的深度分页查询优化
  • 鲲鹏麒麟搭建Docker仓库
  • DeepSeek 部署中的常见问题及解决方案全解析
  • DrissionPage 请求一次换一个代理(不重启chrome)
  • 快速上手GO的net/http包,个人学习笔记
  • CentOS 7 磁盘阵列搭建与管理全攻略
  • 【计算机视觉】CV实战项目- 深度解析FaceAI:一款全能的人脸检测与图像处理工具库
  • 基于霍尔效应传感器的 BLDC 电机梯形控制方案详解
  • 从零开始学习SLAM|技术路线
  • uniapp+vue3移动端实现输入验证码
  • 数据中台-数据质量管理系统:从架构到实战
  • 函数重载(Function Overloading)
  • 什么是 低秩矩阵(Low-Rank)
  • 多级缓存架构深度解析:从设计原理到生产实践
  • AI时代的能力重构与终身进化
  • Spring Boot 自动配置深度解析:从源码结构到设计哲学
  • 2025上海车展 | 移远通信全栈车载智能解决方案重磅亮相,重构“全域智能”出行新范式
  • 关于QT信号、槽、槽函数的讲解
  • mongo客户端操作mongodb记录
  • 现场观察·国防部记者会|美将举行大演习“应对中国”,备战太平洋引发关注
  • 全国党委和政府秘书长会议在京召开,蔡奇出席并讲话
  • 中国田协通报苏州马拉松“方便门”处理情况:涉事10人禁赛3年
  • 著名哲学家、中山大学哲学系原系主任李锦全逝世
  • 一季度沪苏浙皖GDP增速均快于去年全年,新兴动能持续壮大
  • 习近平向气候和公正转型领导人峰会发表致辞