cuda实践之一（hello world）

1.实践点

cuda核函数调用方式：<<< ... >>>

Grid和Block的内部三维索引


blockIdx.x, blockIdx.y, blockIdx.z; 
threadIdx.x, threadIdx.y, threadId

核函数的定义：

2. 代码

common.h


#include 
 
#ifndef _COMMON_H
#define _COMMON_H
 
#define CHECK(call)                                                            \
{                                                                              \
    const cudaError_t error = call;                                            \
    if (error != cudaSuccess)                                                  \
    {                                                                          \
        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
                cudaGetErrorString(error));                                    \
        exit(1);                                                               \
    }                                                                          \
}
 
#define CHECK_CUBLAS(call)                                                     \
{                                                                              \
    cublasStatus_t err;                                                        \
    if ((err = (call)) != CUBLAS_STATUS_SUCCESS)                               \
    {                                                                          \
        fprintf(stderr, "Got CUBLAS error %d at %s:%d\n", err, __FILE__,       \
                __LINE__);                                                     \
        exit(1);                                                               \
    }                                                                          \
}
 
#define CHECK_CURAND(call)                                                     \
{                                                                              \
    curandStatus_t err;                                                        \
    if ((err = (call)) != CURAND_STATUS_SUCCESS)                               \
    {                                                                          \
        fprintf(stderr, "Got CURAND error %d at %s:%d\n", err, __FILE__,       \
                __LINE__);                                                     \
        exit(1);                                                               \
    }                                                                          \
}
 
#define CHECK_CUFFT(call)                                                      \
{                                                                              \
    cufftResult err;                                                           \
    if ( (err = (call)) != CUFFT_SUCCESS)                                      \
    {                                                                          \
        fprintf(stderr, "Got CUFFT error %d at %s:%d\n", err, __FILE__,        \
                __LINE__);                                                     \
        exit(1);                                                               \
    }                                                                          \
}
 
#define CHECK_CUSPARSE(call)                                                   \
{                                                                              \
    cusparseStatus_t err;                                                      \
    if ((err = (call)) != CUSPARSE_STATUS_SUCCESS)                             \
    {                                                                          \
        fprintf(stderr, "Got error %d at %s:%d\n", err, __FILE__, __LINE__);   \
        cudaError_t cuda_err = cudaGetLastError();                             \
        if (cuda_err != cudaSuccess)                                           \
        {                                                                      \
            fprintf(stderr, "  CUDA error \"%s\" also detected\n",             \
                    cudaGetErrorString(cuda_err));                             \
        }                                                                      \
        exit(1);                                                               \
    }                                                                          \
}
 
inline double seconds()
{
    struct timeval tp;
    struct timezone tzp;
    int i = gettimeofday(&tp, &tzp);
    return ((double)tp.tv_sec + (double)tp.tv_usec * 1.e-6); // amazing float
}
 
#endif // _COMMON_H

hello.cu


#include "../common/common.h"
#include 
 
/*
 * A simple introduction to programming in CUDA. This program prints "Hello
 * World from GPU! from 10 CUDA threads running on the GPU.
 */
 
__global__ void helloFromGPU()
{
    printf("Hello World from GPU block(%d, %d, %d) thread (%d, %d, %d)!\n", blockIdx.x, blockIdx.y, blockIdx.z,
                threadIdx.x, threadIdx.y, threadIdx.z); // why GPU can call printf function ??????
}
 
int main(int argc, char **argv)
{
    printf("Hello World from CPU!\n");
 
    // grid(1,1,1) with only 1 block, 10 threads for each block (10, 1, 1)
    helloFromGPU<<<1, 20>>>();
    // CHECK(cudaDeviceReset());
    CHECK(cudaDeviceSynchronize());
    return 0;
}

Makefile


APPS=hello
 
all: ${APPS}
 
%: %.cu
    nvcc -O2 -arch=sm_60 -o $@ $<
 
clean:
    rm -f ${APPS}

3. QA：

核函数是在GPU设备上执行的，为什么能执行printf函数，其实现原理是什么样的？

相关阅读:
ElementUI实现增删改功能以及表单验证
 水产行业智能供应链管理平台解决方案：支撑企业供应链数字化，提升企业管理效益
 spark分布式计算框架
 DHTMLX Scheduler 6.0.3 Crack
【广州华锐互动】工业零件拆装VR培训：无需前往现场，提高学习效率
 【JavaEE基础与高级第56章】Java中的打印流、属性集、IO流异常的处理详细使用介绍
 软件安全测试-软件安全测试概述
 华为OD机试 - 螺旋数字矩阵
 springboot+vue球员数据统计分析系统java
Flink之KeyedState
原文地址：https://blog.csdn.net/landy_john/article/details/126752596