• cuda实践之一(hello world)


    1.实践点

    • Grid和Block的内部三维索引

    1. blockIdx.x, blockIdx.y, blockIdx.z;
    2. threadIdx.x, threadIdx.y, threadId
    • 核函数的定义:

    2. 代码

    • common.h

    1. #include
    2. #ifndef _COMMON_H
    3. #define _COMMON_H
    4. #define CHECK(call) \
    5. { \
    6. const cudaError_t error = call; \
    7. if (error != cudaSuccess) \
    8. { \
    9. fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
    10. fprintf(stderr, "code: %d, reason: %s\n", error, \
    11. cudaGetErrorString(error)); \
    12. exit(1); \
    13. } \
    14. }
    15. #define CHECK_CUBLAS(call) \
    16. { \
    17. cublasStatus_t err; \
    18. if ((err = (call)) != CUBLAS_STATUS_SUCCESS) \
    19. { \
    20. fprintf(stderr, "Got CUBLAS error %d at %s:%d\n", err, __FILE__, \
    21. __LINE__); \
    22. exit(1); \
    23. } \
    24. }
    25. #define CHECK_CURAND(call) \
    26. { \
    27. curandStatus_t err; \
    28. if ((err = (call)) != CURAND_STATUS_SUCCESS) \
    29. { \
    30. fprintf(stderr, "Got CURAND error %d at %s:%d\n", err, __FILE__, \
    31. __LINE__); \
    32. exit(1); \
    33. } \
    34. }
    35. #define CHECK_CUFFT(call) \
    36. { \
    37. cufftResult err; \
    38. if ( (err = (call)) != CUFFT_SUCCESS) \
    39. { \
    40. fprintf(stderr, "Got CUFFT error %d at %s:%d\n", err, __FILE__, \
    41. __LINE__); \
    42. exit(1); \
    43. } \
    44. }
    45. #define CHECK_CUSPARSE(call) \
    46. { \
    47. cusparseStatus_t err; \
    48. if ((err = (call)) != CUSPARSE_STATUS_SUCCESS) \
    49. { \
    50. fprintf(stderr, "Got error %d at %s:%d\n", err, __FILE__, __LINE__); \
    51. cudaError_t cuda_err = cudaGetLastError(); \
    52. if (cuda_err != cudaSuccess) \
    53. { \
    54. fprintf(stderr, " CUDA error \"%s\" also detected\n", \
    55. cudaGetErrorString(cuda_err)); \
    56. } \
    57. exit(1); \
    58. } \
    59. }
    60. inline double seconds()
    61. {
    62. struct timeval tp;
    63. struct timezone tzp;
    64. int i = gettimeofday(&tp, &tzp);
    65. return ((double)tp.tv_sec + (double)tp.tv_usec * 1.e-6); // amazing float
    66. }
    67. #endif // _COMMON_H
    • hello.cu

      1. #include "../common/common.h"
      2. #include
      3. /*
      4. * A simple introduction to programming in CUDA. This program prints "Hello
      5. * World from GPU! from 10 CUDA threads running on the GPU.
      6. */
      7. __global__ void helloFromGPU()
      8. {
      9. printf("Hello World from GPU block(%d, %d, %d) thread (%d, %d, %d)!\n", blockIdx.x, blockIdx.y, blockIdx.z,
      10. threadIdx.x, threadIdx.y, threadIdx.z); // why GPU can call printf function ??????
      11. }
      12. int main(int argc, char **argv)
      13. {
      14. printf("Hello World from CPU!\n");
      15. // grid(1,1,1) with only 1 block, 10 threads for each block (10, 1, 1)
      16. helloFromGPU<<<1, 20>>>();
      17. // CHECK(cudaDeviceReset());
      18. CHECK(cudaDeviceSynchronize());
      19. return 0;
      20. }
    • Makefile

    1. APPS=hello
    2. all: ${APPS}
    3. %: %.cu
    4. nvcc -O2 -arch=sm_60 -o $@ $<
    5. clean:
    6. rm -f ${APPS}

    3. QA:

    • 核函数是在GPU设备上执行的,为什么能执行printf函数,其实现原理是什么样的?

  • 相关阅读:
    ElementUI实现增删改功能以及表单验证
    水产行业智能供应链管理平台解决方案:支撑企业供应链数字化,提升企业管理效益
    spark分布式计算框架
    DHTMLX Scheduler 6.0.3 Crack
    【广州华锐互动】工业零件拆装VR培训:无需前往现场,提高学习效率
    【JavaEE基础与高级 第56章】Java中的打印流、属性集、IO流异常的处理详细使用介绍
    软件安全测试-软件安全测试概述
    华为OD机试 - 螺旋数字矩阵
    springboot+vue球员数据统计分析系统java
    Flink之KeyedState
  • 原文地址:https://blog.csdn.net/landy_john/article/details/126752596