• 【mindspore1.5.0】安装mindspore报libcuda.so没找到和libcudnn没找到


    已经根据官网提供的命令安装mindspore,之前自己的GPU机器已经有了cuda10.1,后来按照官网提供的

    conda install mindspore-gpu=1.5.0 cudatoolkit=10.1 -c mindspore -c conda-forge进行安装,没有问题,可是导入mindspore出现ERROR

    【截图信息】

    查看自己cuda文件夹下内容,发现cuda配置是正确的

    但是cuda/lib64下之前没有libcuda.so,后来在lib64/stubs下找到libcuda.so, 将其拷贝到lib64/下,还是报错图1中问题

    conda list下发现这些包

    自己去mindspore/run_check/_check_version.py找到相对应内容, 它会传入libcu和libcudnn 可是这几个文件都没在cuda10.1中找到,该怎么办啊?只有lib64这个文件夹

    1. class GPUEnvChecker(EnvChecker):
    2. """GPU environment check."""
    3. def __init__(self):
    4. self.version = ["10.1", "11.1"]
    5. self.lib_key_to_lib_name = {'libcu': 'libcuda.so'}
    6. # env
    7. self.path = os.getenv("PATH")
    8. self.ld_lib_path = os.getenv("LD_LIBRARY_PATH")
    9. # check
    10. self.v = "0"
    11. self.cuda_lib_path = self._get_lib_path("libcu")
    12. self.cuda_bin_path = self._get_bin_path("cuda")
    13. self.cudnn_lib_path = self._get_lib_path("libcudnn")
    14. def check_env(self, e):
    15. raise e
    16. def set_env(self):
    17. return
    18. def _get_bin_path(self, bin_name):
    19. """Get bin path by bin name."""
    20. if bin_name == "cuda":
    21. return self._get_cuda_bin_path()
    22. return []
    23. def _get_cuda_bin_path(self):
    24. """Get cuda bin path by lib path."""
    25. path_list = []
    26. for path in self.cuda_lib_path:
    27. path = os.path.abspath(path.strip()+"/bin/")
    28. if Path(path).is_dir():
    29. path_list.append(path)
    30. return np.unique(path_list)
    31. def _get_nvcc_version(self, is_set_env):
    32. """Get cuda version by nvcc command."""
    33. nvcc_result = subprocess.run(["nvcc --version | grep release"],
    34. timeout=3, text=True, capture_output=True, check=False, shell=True)
    35. if nvcc_result.returncode:
    36. if not is_set_env:
    37. for path in self.cuda_bin_path:
    38. if Path(path + "/nvcc").is_file():
    39. os.environ['PATH'] = path + ":" + os.environ['PATH']
    40. return self._get_nvcc_version(True)
    41. return ""
    42. result = nvcc_result.stdout
    43. for line in result.split('\n'):
    44. if line:
    45. return line.strip().split("release")[1].split(",")[0].strip()
    46. return ""
    47. def _get_cudnn_version(self):
    48. """Get cudnn version by libcudnn.so."""
    49. cudnn_version = []
    50. for path in self.cudnn_lib_path:
    51. ls_cudnn = subprocess.run(["ls " + path + "/lib*/libcudnn.so.*.*"], timeout=10, text=True,
    52. capture_output=True, check=False, shell=True)
    53. if ls_cudnn.returncode == 0:
    54. cudnn_version = ls_cudnn.stdout.split('/')[-1].strip('libcudnn.so.').strip().split('.')
    55. if len(cudnn_version) == 2:
    56. cudnn_version.append('0')
    57. break
    58. version_str = ''.join([n for n in cudnn_version])
    59. return version_str[0:3]
    60. def _get_cudart_version(self):
    61. """Get cuda runtime version by libcudart.so."""
    62. for path in self.cuda_lib_path:
    63. ls_cudart = subprocess.run(["ls " + path + "/lib*/libcudart.so.*.*.*"], timeout=10, text=True,
    64. capture_output=True, check=False, shell=True)
    65. if ls_cudart.returncode == 0:
    66. self.v = ls_cudart.stdout.split('/')[-1].strip('libcudart.so.').strip()
    67. break
    68. return self.v
    69. def check_version(self):
    70. """Check cuda version."""
    71. version_match = False
    72. if self._check_version():
    73. version_match = True
    74. if not version_match:
    75. if self.v == "0":
    76. logger.warning("Can not found cuda libs, please confirm that the correct "
    77. "cuda version has been installed, you can refer to the "
    78. "installation guidelines: https://www.mindspore.cn/install")
    79. else:
    80. logger.warning(f"MindSpore version {__version__} and cuda version {self.v} does not match, "
    81. "please refer to the installation guide for version matching "
    82. "information: https://www.mindspore.cn/install")
    83. nvcc_version = self._get_nvcc_version(False)
    84. if nvcc_version and (nvcc_version not in self.version):
    85. logger.warning(f"MindSpore version {__version__} and nvcc(cuda bin) version {nvcc_version} "
    86. "does not match, please refer to the installation guide for version matching "
    87. "information: https://www.mindspore.cn/install")
    88. cudnn_version = self._get_cudnn_version()
    89. if cudnn_version and int(cudnn_version) < 760:
    90. logger.warning(f"MindSpore version {__version__} and cudDNN version {cudnn_version} "
    91. "does not match, please refer to the installation guide for version matching "
    92. "information: https://www.mindspore.cn/install. The recommended version is "
    93. "CUDA10.1 with cuDNN7.6.x and CUAD11.1 with cuDNN8.0.x")
    94. if cudnn_version and int(cudnn_version) < 800 and int(str(self.v).split('.')[0]) > 10:
    95. logger.warning(f"CUDA version {self.v} and cuDNN version {cudnn_version} "
    96. "does not match, please refer to the installation guide for version matching "
    97. "information: https://www.mindspore.cn/install. The recommended version is "
    98. "CUAD11.1 with cuDNN8.0.x")
    99. def _check_version(self):
    100. """Check cuda version"""
    101. v = self._get_cudart_version()
    102. v = version.parse(v)
    103. v_str = str(v.major) + "." + str(v.minor)
    104. if v_str not in self.version:
    105. return False
    106. return True
    107. def _get_lib_path(self, lib_name):
    108. """Get gpu lib path by ldd command."""
    109. path_list = []
    110. current_path = os.path.split(os.path.realpath(__file__))[0]
    111. mindspore_path = os.path.join(current_path, "../")
    112. try:
    113. ldd_result = subprocess.run(["ldd " + mindspore_path + "/_c_expression*.so* | grep " + lib_name],
    114. timeout=10, text=True, capture_output=True, check=False, shell=True)
    115. if ldd_result.returncode:
    116. logger.error(f"{self.lib_key_to_lib_name[lib_name]} (need by mindspore-gpu) is not found, please "
    117. f"confirm that _c_expression.so is in directory:{mindspore_path} and the correct cuda "
    118. "version has been installed, you can refer to the installation "
    119. "guidelines: https://www.mindspore.cn/install")
    120. return path_list
    121. result = ldd_result.stdout
    122. for i in result.split('\n'):
    123. path = i.partition("=>")[2]
    124. if path.lower().find("not found") > 0:
    125. logger.warning(f"Cuda {self.version} version(need by mindspore-gpu) is not found, please confirm "
    126. "that the path of cuda is set to the env LD_LIBRARY_PATH, please refer to the "
    127. "installation guidelines: https://www.mindspore.cn/install")
    128. continue
    129. path = path.partition(lib_name)[0]
    130. if path:
    131. path_list.append(os.path.abspath(path.strip() + "../"))
    132. return np.unique(path_list)
    133. except subprocess.TimeoutExpired:
    134. logger.warning("Failed to check cuda version due to the ldd command timeout, please confirm that "
    135. "the correct cuda version has been installed, you can refer to the "
    136. "installation guidelines: https://www.mindspore.cn/install")
    137. return path_list
    138. def _read_version(self, file_path):
    139. """Get gpu version info in version.txt."""
    140. with open(file_path, 'r') as f:
    141. all_info = f.readlines()
    142. for line in all_info:
    143. if line.startswith("CUDA Version"):
    144. self.v = line.strip().split("CUDA Version")[1]
    145. return self.v
    146. return self.v

    cudnn应该是你没装。要去官网另外装的。

    可以看这篇博客 的cudnn,去装下

    且看张小白如何用暗影精灵玩转MindSpore(三)3080的崛起-云社区-华为云

    至于cuda 10.1的so为啥没找到。这个就不清楚了。按理说,你conda装了cuda10.1的toolkit,应该在conda环境的目录下能找到cuda10.1对应的so文件。它的位置未必是/usr/local/cuda10.1

    conda环境跟系统环境是属于两套环境。

    当然,你也可以重新到官网下载cuda10.1的包重装一下。安装方式也可以参考我上面的链接。

    装cuda的时候别装驱动只装toolkit。

  • 相关阅读:
    k8s--基础--02--组件
    【etcd】的限流设计
    上午卷-6.网络与多媒体基础知识-软件设计师
    解决报错:gnutls_handshake() failed: The TLS connection was non-properly terminated.
    Redis分页+多条件模糊查询组合实现思路
    【AI学习指南】八、PaddlePaddle自然语言处理-PaddleNLP的进阶应用
    Android 14 系统启动流程 之 启动init进程、启动Zygote进程
    【面试题】如何理解 前端设计模式-测策略模式?
    RocketMQ(13)——指定NameServer的四种方式
    SSM - Springboot - MyBatis-Plus 全栈体系(二十一)
  • 原文地址:https://blog.csdn.net/weixin_45666880/article/details/126421155