已经根据官网提供的命令安装mindspore,之前自己的GPU机器已经有了cuda10.1,后来按照官网提供的
conda install mindspore-gpu=1.5.0 cudatoolkit=10.1 -c mindspore -c conda-forge进行安装,没有问题,可是导入mindspore出现ERROR
【截图信息】
查看自己cuda文件夹下内容,发现cuda配置是正确的
但是cuda/lib64下之前没有libcuda.so,后来在lib64/stubs下找到libcuda.so, 将其拷贝到lib64/下,还是报错图1中问题
conda list下发现这些包
自己去mindspore/run_check/_check_version.py找到相对应内容, 它会传入libcu和libcudnn 可是这几个文件都没在cuda10.1中找到,该怎么办啊?只有lib64这个文件夹
- class GPUEnvChecker(EnvChecker):
- """GPU environment check."""
-
- def __init__(self):
- self.version = ["10.1", "11.1"]
- self.lib_key_to_lib_name = {'libcu': 'libcuda.so'}
- # env
- self.path = os.getenv("PATH")
- self.ld_lib_path = os.getenv("LD_LIBRARY_PATH")
-
- # check
- self.v = "0"
- self.cuda_lib_path = self._get_lib_path("libcu")
- self.cuda_bin_path = self._get_bin_path("cuda")
- self.cudnn_lib_path = self._get_lib_path("libcudnn")
-
- def check_env(self, e):
- raise e
-
- def set_env(self):
- return
-
- def _get_bin_path(self, bin_name):
- """Get bin path by bin name."""
- if bin_name == "cuda":
- return self._get_cuda_bin_path()
- return []
-
- def _get_cuda_bin_path(self):
- """Get cuda bin path by lib path."""
- path_list = []
- for path in self.cuda_lib_path:
- path = os.path.abspath(path.strip()+"/bin/")
- if Path(path).is_dir():
- path_list.append(path)
- return np.unique(path_list)
-
- def _get_nvcc_version(self, is_set_env):
- """Get cuda version by nvcc command."""
- nvcc_result = subprocess.run(["nvcc --version | grep release"],
- timeout=3, text=True, capture_output=True, check=False, shell=True)
- if nvcc_result.returncode:
- if not is_set_env:
- for path in self.cuda_bin_path:
- if Path(path + "/nvcc").is_file():
- os.environ['PATH'] = path + ":" + os.environ['PATH']
- return self._get_nvcc_version(True)
- return ""
- result = nvcc_result.stdout
- for line in result.split('\n'):
- if line:
- return line.strip().split("release")[1].split(",")[0].strip()
- return ""
-
- def _get_cudnn_version(self):
- """Get cudnn version by libcudnn.so."""
- cudnn_version = []
- for path in self.cudnn_lib_path:
- ls_cudnn = subprocess.run(["ls " + path + "/lib*/libcudnn.so.*.*"], timeout=10, text=True,
- capture_output=True, check=False, shell=True)
- if ls_cudnn.returncode == 0:
- cudnn_version = ls_cudnn.stdout.split('/')[-1].strip('libcudnn.so.').strip().split('.')
- if len(cudnn_version) == 2:
- cudnn_version.append('0')
- break
- version_str = ''.join([n for n in cudnn_version])
- return version_str[0:3]
-
- def _get_cudart_version(self):
- """Get cuda runtime version by libcudart.so."""
- for path in self.cuda_lib_path:
- ls_cudart = subprocess.run(["ls " + path + "/lib*/libcudart.so.*.*.*"], timeout=10, text=True,
- capture_output=True, check=False, shell=True)
- if ls_cudart.returncode == 0:
- self.v = ls_cudart.stdout.split('/')[-1].strip('libcudart.so.').strip()
- break
- return self.v
-
- def check_version(self):
- """Check cuda version."""
- version_match = False
- if self._check_version():
- version_match = True
- if not version_match:
- if self.v == "0":
- logger.warning("Can not found cuda libs, please confirm that the correct "
- "cuda version has been installed, you can refer to the "
- "installation guidelines: https://www.mindspore.cn/install")
- else:
- logger.warning(f"MindSpore version {__version__} and cuda version {self.v} does not match, "
- "please refer to the installation guide for version matching "
- "information: https://www.mindspore.cn/install")
- nvcc_version = self._get_nvcc_version(False)
- if nvcc_version and (nvcc_version not in self.version):
- logger.warning(f"MindSpore version {__version__} and nvcc(cuda bin) version {nvcc_version} "
- "does not match, please refer to the installation guide for version matching "
- "information: https://www.mindspore.cn/install")
- cudnn_version = self._get_cudnn_version()
- if cudnn_version and int(cudnn_version) < 760:
- logger.warning(f"MindSpore version {__version__} and cudDNN version {cudnn_version} "
- "does not match, please refer to the installation guide for version matching "
- "information: https://www.mindspore.cn/install. The recommended version is "
- "CUDA10.1 with cuDNN7.6.x and CUAD11.1 with cuDNN8.0.x")
- if cudnn_version and int(cudnn_version) < 800 and int(str(self.v).split('.')[0]) > 10:
- logger.warning(f"CUDA version {self.v} and cuDNN version {cudnn_version} "
- "does not match, please refer to the installation guide for version matching "
- "information: https://www.mindspore.cn/install. The recommended version is "
- "CUAD11.1 with cuDNN8.0.x")
-
- def _check_version(self):
- """Check cuda version"""
- v = self._get_cudart_version()
- v = version.parse(v)
- v_str = str(v.major) + "." + str(v.minor)
- if v_str not in self.version:
- return False
- return True
-
- def _get_lib_path(self, lib_name):
- """Get gpu lib path by ldd command."""
- path_list = []
- current_path = os.path.split(os.path.realpath(__file__))[0]
- mindspore_path = os.path.join(current_path, "../")
- try:
- ldd_result = subprocess.run(["ldd " + mindspore_path + "/_c_expression*.so* | grep " + lib_name],
- timeout=10, text=True, capture_output=True, check=False, shell=True)
- if ldd_result.returncode:
- logger.error(f"{self.lib_key_to_lib_name[lib_name]} (need by mindspore-gpu) is not found, please "
- f"confirm that _c_expression.so is in directory:{mindspore_path} and the correct cuda "
- "version has been installed, you can refer to the installation "
- "guidelines: https://www.mindspore.cn/install")
- return path_list
- result = ldd_result.stdout
- for i in result.split('\n'):
- path = i.partition("=>")[2]
- if path.lower().find("not found") > 0:
- logger.warning(f"Cuda {self.version} version(need by mindspore-gpu) is not found, please confirm "
- "that the path of cuda is set to the env LD_LIBRARY_PATH, please refer to the "
- "installation guidelines: https://www.mindspore.cn/install")
- continue
- path = path.partition(lib_name)[0]
- if path:
- path_list.append(os.path.abspath(path.strip() + "../"))
- return np.unique(path_list)
- except subprocess.TimeoutExpired:
- logger.warning("Failed to check cuda version due to the ldd command timeout, please confirm that "
- "the correct cuda version has been installed, you can refer to the "
- "installation guidelines: https://www.mindspore.cn/install")
- return path_list
-
- def _read_version(self, file_path):
- """Get gpu version info in version.txt."""
- with open(file_path, 'r') as f:
- all_info = f.readlines()
- for line in all_info:
- if line.startswith("CUDA Version"):
- self.v = line.strip().split("CUDA Version")[1]
- return self.v
- return self.v
cudnn应该是你没装。要去官网另外装的。
可以看这篇博客 的cudnn,去装下
且看张小白如何用暗影精灵玩转MindSpore(三)3080的崛起-云社区-华为云
至于cuda 10.1的so为啥没找到。这个就不清楚了。按理说,你conda装了cuda10.1的toolkit,应该在conda环境的目录下能找到cuda10.1对应的so文件。它的位置未必是/usr/local/cuda10.1
conda环境跟系统环境是属于两套环境。
当然,你也可以重新到官网下载cuda10.1的包重装一下。安装方式也可以参考我上面的链接。
装cuda的时候别装驱动只装toolkit。