cuda核函数调用方式:<<< ... >>>
Grid和Block的内部三维索引
- blockIdx.x, blockIdx.y, blockIdx.z;
- threadIdx.x, threadIdx.y, threadId
核函数的定义:
common.h
- #include
-
- #ifndef _COMMON_H
- #define _COMMON_H
-
- #define CHECK(call) \
- { \
- const cudaError_t error = call; \
- if (error != cudaSuccess) \
- { \
- fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
- fprintf(stderr, "code: %d, reason: %s\n", error, \
- cudaGetErrorString(error)); \
- exit(1); \
- } \
- }
-
- #define CHECK_CUBLAS(call) \
- { \
- cublasStatus_t err; \
- if ((err = (call)) != CUBLAS_STATUS_SUCCESS) \
- { \
- fprintf(stderr, "Got CUBLAS error %d at %s:%d\n", err, __FILE__, \
- __LINE__); \
- exit(1); \
- } \
- }
-
- #define CHECK_CURAND(call) \
- { \
- curandStatus_t err; \
- if ((err = (call)) != CURAND_STATUS_SUCCESS) \
- { \
- fprintf(stderr, "Got CURAND error %d at %s:%d\n", err, __FILE__, \
- __LINE__); \
- exit(1); \
- } \
- }
-
- #define CHECK_CUFFT(call) \
- { \
- cufftResult err; \
- if ( (err = (call)) != CUFFT_SUCCESS) \
- { \
- fprintf(stderr, "Got CUFFT error %d at %s:%d\n", err, __FILE__, \
- __LINE__); \
- exit(1); \
- } \
- }
-
- #define CHECK_CUSPARSE(call) \
- { \
- cusparseStatus_t err; \
- if ((err = (call)) != CUSPARSE_STATUS_SUCCESS) \
- { \
- fprintf(stderr, "Got error %d at %s:%d\n", err, __FILE__, __LINE__); \
- cudaError_t cuda_err = cudaGetLastError(); \
- if (cuda_err != cudaSuccess) \
- { \
- fprintf(stderr, " CUDA error \"%s\" also detected\n", \
- cudaGetErrorString(cuda_err)); \
- } \
- exit(1); \
- } \
- }
-
- inline double seconds()
- {
- struct timeval tp;
- struct timezone tzp;
- int i = gettimeofday(&tp, &tzp);
- return ((double)tp.tv_sec + (double)tp.tv_usec * 1.e-6); // amazing float
- }
-
- #endif // _COMMON_H
hello.cu
- #include "../common/common.h"
- #include
-
- /*
- * A simple introduction to programming in CUDA. This program prints "Hello
- * World from GPU! from 10 CUDA threads running on the GPU.
- */
-
- __global__ void helloFromGPU()
- {
- printf("Hello World from GPU block(%d, %d, %d) thread (%d, %d, %d)!\n", blockIdx.x, blockIdx.y, blockIdx.z,
- threadIdx.x, threadIdx.y, threadIdx.z); // why GPU can call printf function ??????
- }
-
- int main(int argc, char **argv)
- {
- printf("Hello World from CPU!\n");
-
- // grid(1,1,1) with only 1 block, 10 threads for each block (10, 1, 1)
- helloFromGPU<<<1, 20>>>();
- // CHECK(cudaDeviceReset());
- CHECK(cudaDeviceSynchronize());
- return 0;
- }
Makefile
- APPS=hello
-
- all: ${APPS}
-
- %: %.cu
- nvcc -O2 -arch=sm_60 -o $@ $<
-
- clean:
- rm -f ${APPS}
核函数是在GPU设备上执行的,为什么能执行printf函数,其实现原理是什么样的?