- curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
- && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
- sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
- sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list \
- && \
- sudo apt-get update
sudo apt-get install -y nvidia-container-toolkit
Configure the container runtime by using the nvidia-ctk
command:
sudo nvidia-ctk runtime configure --runtime=docker
The nvidia-ctk
command modifies the /etc/docker/daemon.json
file on the host. The file is updated so that Docker can use the NVIDIA Container Runtime.
Restart the Docker daemon:
sudo systemctl restart docker
vi /etc/docker/daemon.json
- {
- "runtimes": {
- "nvidia": {
- "args": [],
- "path": "nvidia-container-runtime"
- }
- },
- "registry-mirrors":[
- "https://mirror.ccs.tencentyun.com",
- "http://registry.docker-cn.com",
- "http://docker.mirrors.ustc.edu.cn",
- "http://hub-mirror.c.163.com",
- "https://3laho3y3.mirror.aliyuncs.com",
- "http://f1361db2.m.daocloud.io",
- "https://docker.mirrors.sjtug.sjtu.edu.cn",
- "https://docker.nju.edu.cn",
- "https://dockerproxy.com",
- "https://mirror.baidubce.com"
- ],
- "insecure-registries":[
- "registry.docker-cn.com",
- "docker.mirrors.ustc.edu.cn"
- ],
- "debug":true,
- "experimental":true
- }
sudo docker pull nvidia/cuda:11.7.1-cudnn8-devel-ubuntu20.04
需要等待较长时间1-2小时。
docker run -it -d --gpus all --name NLP_env --hostname NLP_env --shm-size 8g -e NVIDIA_DRIVER_CAPABILITIES=compute,utility,video -v /home/modeldata:/containerdata -p 6667:22 --restart always nvidia/cuda:11.7.1-cudnn8-devel-ubuntu20.04 /bin/bash
-d 后台运行
--gpus all 选择所有gpu
--name 容器名称
--hostname 主机名称
--shm-size 共享内存
-e 环境设置 这里NVIDIA_DRIVER_CAPABILITIES主要保证容器里能够查看并且使用显卡
-v 目录映射(宿主机:容器内)
-p 端口映射(宿主机:容器内)
--restart always 重启启动
运行
docker ps -a
显示
测试通过
docker exec -it NLP_env nvidia-smi
查看显卡是否在容器内运行正常。
docker exec -it NLP_env bash
- apt update
- apt install openssh-server
- apt install vim
- vim /etc/ssh/sshd_config
将sshd_config内容修改PermitRootLogin yes
- service ssh restart
- passwd
输入登录密码并确认。
之后可以通过ssh,登录IP,端口号6667,账号root,密码登录即可。