• NCCL源码解析①:初始化及ncclUniqueId的产生


    1791bffd02c61c39108f3a16b915e3e8.png


    作者|KIDGINBROOK
    更新|潘丽晨

     

    NCCL是英伟达开源的GPU通信库,支持集合通信和点对点通信。

    看下官方给的一个demo:

    1. #include
    2. #include "cuda_runtime.h"
    3. #include "nccl.h"
    4. #include "mpi.h"
    5. #include
    6. #include
    7. #define MPICHECK(cmd) do { \
    8. int e = cmd; \
    9. if( e != MPI_SUCCESS ) { \
    10. printf("Failed: MPI error %s:%d '%d'\n", \
    11. __FILE__,__LINE__, e); \
    12. exit(EXIT_FAILURE); \
    13. } \
    14. } while(0)
    15. #define CUDACHECK(cmd) do { \
    16. cudaError_t e = cmd; \
    17. if( e != cudaSuccess ) { \
    18. printf("Failed: Cuda error %s:%d '%s'\n", \
    19. __FILE__,__LINE__,cudaGetErrorString(e)); \
    20. exit(EXIT_FAILURE); \
    21. } \
    22. } while(0)
    23. #define NCCLCHECK(cmd) do { \
    24. ncclResult_t r = cmd; \
    25. if (r!= ncclSuccess) { \
    26. printf("Failed, NCCL error %s:%d '%s'\n", \
    27. __FILE__,__LINE__,ncclGetErrorString(r)); \
    28. exit(EXIT_FAILURE); \
    29. } \
    30. } while(0)
    31. static uint64_t getHostHash(const char* string) {
    32. // Based on DJB2a, result = result * 33 ^ char
    33. uint64_t result = 5381;
    34. for (int c = 0; string[c] != '\0'; c++){
    35. result = ((result << 5) + result) ^ string[c];
    36. }
    37. return result;
    38. }
    39. static void getHostName(char* hostname, int maxlen) {
    40. gethostname(hostname, maxlen);
    41. for (int i=0; i< maxlen; i++) {
    42. if (hostname[i] == '.') {
    43. hostname[i] = '\0';
    44. return;
    45. }
    46. }
    47. }
    48. int main(int argc, char* argv[])
    49. {
    50. int size = 32*1024*1024;
    51. int myRank, nRanks, localRank = 0;
    52. //initializing MPI
    53. MPICHECK(MPI_Init(&argc, &argv));
    54. MPICHECK(MPI_Comm_rank(MPI_COMM_WORLD, &myRank));
    55. MPICHECK(MPI_Comm_size(MPI_COMM_WORLD, &nRanks));
    56. //calculating localRank which is used in selecting a GPU
    57. uint64_t hostHashs[nRanks];
    58. char hostname[1024];
    59. getHostName(hostname, 1024);
    60. hostHashs[myRank] = getHostHash(hostname);
    61. MPICHECK(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, hostHashs, sizeof(uint64_t), MPI_BYTE, MPI_COMM_WORLD));
    62. for (int p=0; p
    63. if (p == myRank) break;
    64. if (hostHashs[p] == hostHashs[myRank]) localRank++;
    65. }
    66. //each process is using two GPUs
    67. int nDev = 2;
    68. float** sendbuff = (float**)malloc(nDev * sizeof(float*));
    69. float** recvbuff = (float**)malloc(nDev * sizeof(float*));
    70. cudaStream_t* s = (cudaStream_t*)malloc(sizeof(cudaStream_t)*nDev);
    71. //picking GPUs based on localRank
    72. for (int i = 0; i < nDev; ++i) {
    73. CUDACHECK(cudaSetDevice(localRank*nDev + i));
    74. CUDACHECK(cudaMalloc(sendbuff + i, size * sizeof(float)));
    75. CUDACHECK(cudaMalloc(recvbuff + i, size * sizeof(float)));
    76. CUDACHECK(cudaMemset(sendbuff[i], 1, size * sizeof(float)));
    77. CUDACHECK(cudaMemset(recvbuff[i], 0, size * sizeof(float)));
    78. CUDACHECK(cudaStreamCreate(s+i));
    79. }
    80. ncclUniqueId id;
    81. ncclComm_t comms[nDev];
    82. //generating NCCL unique ID at one process and bro
  • 相关阅读:
    Linux系统网卡配置详细教程!
    Spring入门
    linux入门---用匿名管道实现一个功能
    Backblaze发布2023中期SSD故障数据质量报告
    网络问题排查
    Kotlin编程实战——与Java互操作(10)
    c#数组次序统计系列1
    8.7 typedef关键字
    HorizontalScrollView滚到当前tab,并且居中显示
    SQL binary 轉float 絕對好用
  • 原文地址:https://blog.csdn.net/OneFlow_Official/article/details/129543775