• 【GPU并行计算】Ubuntu安装GPU驱动和CUDA+CMakeLists.txt的编写+RGB图像转灰度CUDA程序


    Ubuntu安装CUDA和GPU驱动

    CUDA安装方法

    方法一:如果没有装GPU驱动

    1. wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
    2. sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
    3. wget https://developer.download.nvidia.com/compute/cuda/11.2.0/local_installers/cuda-repo-ubuntu1804-11-2-local_11.2.0-460.27.04-1_amd64.deb
    4. sudo dpkg -i cuda-repo-ubuntu1804-11-2-local_11.2.0-460.27.04-1_amd64.deb
    5. sudo apt-key add /var/cuda-repo-ubuntu1804-11-2-local/7fa2af80.pub
    6. sudo apt-get update
    7. sudo apt-get -y install cuda

     如果下载不到deb包,也可以通过在官网下载cuda的deb包安装。

    方法二:已经装了GPU驱动

    通过在官网下载cuda的run脚本安装。安装时,取消安装显卡驱动。

    1. wget https://developer.download.nvidia.com/compute/cuda/11.2.0/local_installers/cuda_11.2.0_460.27.04_linux.run
    2. sudo sh cuda_11.2.0_460.27.04_linux.run

    GPU驱动安装方法

    1.在显卡官网下载驱动NVIDIA*run.sh文件

    2.屏蔽掉nouveau

    sudo gedit /etc/modprobe.d/blacklist-nouveau.conf

    填入以下两行:

    blacklist nouveau

    options nouveau modeset=0

    3.执行命令

    sudo update-initramfs -u

    4.reboot重启电脑

    5.CTRL+ALT+F1进入字符界面,执行

    sudo service lightdm stop

    6.执行安装显卡脚本 sudo ./NVIDIA*run.sh

    7.reboot重启

    CMakeLists.txt的编写

    1. # CMake 最低版本号要求
    2. cmake_minimum_required (VERSION 2.8)
    3. # 设置项目名称
    4. SET(PROJECT_NAME "demo" CACHE STRING "project's name")
    5. # 项目信息
    6. project (${PROJECT_NAME})
    7. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
    8. set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -DNDEBUG ")
    9. SET(CMAKE_CONFIGURATION_TYPES "Release" CACHE STRING "" FORCE)
    10. SET(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
    11. find_package(OpenCV REQUIRED NO_MODULE PATHS /home/leonjin/project/master-opencv-4.5.2/opencv-4.5.2/build NO_DEFAULT_PATH)
    12. include_directories(${OpenCV_INCLUDE_DIRS})
    13. message("OpenCV_INCLUDE_DIRS : ${OpenCV_INCLUDE_DIRS}")
    14. message("OpenCV_LIBS : ${OpenCV_LIBS}")
    15. find_package(OpenMP REQUIRED)
    16. if(OpenMP_FOUND)
    17. message(STATUS "found openmp")
    18. set(CMAKE_C_FLAGS ${CMAKE_C_FLAGS} ${OPENMP_C_FLAGS})
    19. set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} ${OPENMP_CXX_FLAGS})
    20. set(CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS})
    21. else()
    22. message(FATAL_ERROR "openmp not found!")
    23. endif()
    24. find_package(CUDA REQUIRED)
    25. message("CUDA_INCLUDE_DIRS: ${CUDA_INCLUDE_DIRS}")
    26. cuda_add_executable(image_load_save image_load_save.cpp rgb_to_grey.h rgb_to_grey.cu)
    27. target_link_libraries(image_load_save ${OpenCV_LIBS})

    采用cuda_add_executable构建可执行文件(正常是add_executable)

    采用cuda_add_library构建链接库(正常是add_library)

    理解CUDA中的网格(Grid),线程块(Block)和线程(thread)

    https://www.cnblogs.com/QZ-CMD/articles/15983695.html

     对于CUDA的软件架构我们在逻辑上分为三个层次结构每个层次结构类型有三个维度(x,y,z),层次结构从小到大依次是Thread(线程),Block(线程块),Grid(网格)。

    一个kernel对应一个Grid,该Grid又包含若干个Block,Block内包含若干个thread。Grid跑GPU的时候,可能是独占一个GPU,也可能是多个kernel并发占用一个GPU。
     

    RGB图像转灰度CUDA程序

    image_load_save.cpp

    1. #include
    2. #include
    3. #include
    4. #include
    5. #include
    6. #include
    7. #include "rgb_to_grey.h"
    8. void runTest(int argc, char **argv);
    9. void load_image(std::string image_path, unsigned char **img_data, unsigned int& numData, int& width, int& height)
    10. {
    11. cv::Mat _image_mat = cv::imread(image_path);
    12. width = _image_mat.cols;
    13. height = _image_mat.rows;
    14. numData = _image_mat.rows * _image_mat.cols;
    15. int channelNums = _image_mat.channels();
    16. *img_data = (unsigned char *)malloc(sizeof(unsigned char) * numData * 3);
    17. for (int row_id = 0; row_id < height; ++row_id)
    18. {
    19. for (int col_id = 0; col_id < width; ++col_id)
    20. {
    21. cv::Vec3b _color = _image_mat.at(row_id, col_id);
    22. *(*img_data+(row_id*width + col_id)*3 + 0) = _color[0];
    23. *(*img_data+(row_id*width + col_id)*3 + 1) = _color[1];
    24. *(*img_data+(row_id*width + col_id)*3 + 2) = _color[2];
    25. }
    26. }
    27. }
    28. void save_image(std::string image_path, unsigned char **img_data, int width, int height)
    29. {
    30. cv::Mat _output_mat(height, width, CV_8UC1);
    31. _output_mat.data = *img_data;
    32. cv::imshow("_output_mat",_output_mat);
    33. cv::imwrite(image_path, _output_mat);
    34. cv::waitKey(0);
    35. }
    36. int
    37. main(int argc, char **argv)
    38. {
    39. runTest(argc, argv);
    40. }
    41. void readGPUMemory(){
    42. size_t avail;
    43. size_t total;
    44. int deviceCount=0;
    45. cudaGetDeviceCount(&deviceCount); // 用deviceCount获取显卡总数量
    46. for(int i_dev=0;i_dev
    47. {
    48. cudaSetDevice(i_dev); // 使用第i_dev张显卡作为使用的显卡
    49. cudaMemGetInfo(&avail, &total); // 获取可用和总显存大小
    50. printf("Device %d Memeory:\n",i_dev);
    51. printf("Avaliable Memery = %dm Total Memory = %dm\n", int(avail/1024/1024), int(total / 1024 / 1024));
    52. printf("\n");
    53. }
    54. }
    55. void
    56. runTest(int argc, char **argv)
    57. {
    58. readGPUMemory();//读取当前可用显存
    59. std::string _input_image = "../data/fg.jpg";
    60. std::string _output_image = "output.jpg";
    61. unsigned char *src_img = NULL;
    62. unsigned int numData;
    63. int width, height;
    64. load_image(_input_image, &src_img, numData, width, height);//加载图像
    65. unsigned int memSize = sizeof(u_char) * numData;
    66. unsigned int input_memSize = sizeof(u_char) * numData * 3;
    67. unsigned char *d_img;
    68. checkCudaErrors(cudaMalloc((void **) &d_img, input_memSize));//在GPU显存中开辟数据存放空间
    69. checkCudaErrors(cudaMemcpy(d_img, src_img, input_memSize, cudaMemcpyHostToDevice));//从host端拷贝数据到device端
    70. // 定义灰度图
    71. unsigned char *d_odata;
    72. checkCudaErrors(cudaMalloc((void **) &d_odata, memSize));//在GPU显存中开辟数据存放空间
    73. unsigned char *h_odata = (unsigned char *)malloc(memSize);
    74. for (unsigned int i = 0; i < numData; i++)
    75. h_odata[i] = 0;
    76. checkCudaErrors(cudaMemcpy(d_odata, h_odata, memSize, cudaMemcpyHostToDevice));//从host端拷贝数据到device端
    77. readGPUMemory();
    78. convertGreyInterface(d_img, d_odata, width, height);//运行封装的cuda函数
    79. checkCudaErrors(cudaMemcpy(h_odata, d_odata, memSize, cudaMemcpyDeviceToHost));//从device端拷贝数据到host端
    80. save_image(_output_image, &h_odata, width, height);
    81. checkCudaErrors(cudaFree(d_odata));//释放显存
    82. checkCudaErrors(cudaFree(d_img));
    83. if (h_odata != NULL)
    84. free(h_odata);//释放内存
    85. if (src_img != NULL)
    86. free(src_img);
    87. readGPUMemory();
    88. }

    rgb_to_grey.h

    1. #include
    2. #define blockSize_x 32
    3. #define blockSize_y 32
    4. #define checkCudaErrors(res) if(res!=cudaSuccess){exit(-1);}
    5. extern "C" {
    6. void convertGreyInterface(unsigned char* d_img, unsigned char *d_odata, int width, int height);
    7. }

    rgb_to_grey.cu

    1. #include
    2. #include
    3. #include
    4. #include
    5. #include
    6. #include "rgb_to_grey.h"
    7. int iDivUp(int a, int b)
    8. {
    9. return ((a % b) != 0) ? (a / b + 1) : (a / b);
    10. }
    11. __device__ float getalpha(int c){
    12. if(c==0)return 0.114;
    13. if(c==1)return 0.587;
    14. if(c==2)return 0.299;
    15. return 0;
    16. }
    17. __global__ void
    18. convertGrey(unsigned char* d_img, unsigned char *d_odata, int width, int height)
    19. {
    20. // access thread id
    21. const int idx = blockDim.x * blockIdx.x + threadIdx.x;
    22. const int idy = blockDim.y * blockIdx.y + threadIdx.y;
    23. int offset = idy * width + idx;
    24. unsigned char _val_b = *(d_img + offset * 3 + 0);
    25. unsigned char _val_g = *(d_img + offset * 3 + 1);
    26. unsigned char _val_r = *(d_img + offset * 3 + 2);
    27. unsigned char out = (unsigned char)(_val_b * getalpha(0) + _val_g * getalpha(1) + _val_r * getalpha(2));
    28. d_odata[offset] = out;
    29. }
    30. extern "C" void convertGreyInterface(unsigned char* d_img, unsigned char *d_odata, int width, int height)
    31. {
    32. dim3 numThreads = dim3(blockSize_x, blockSize_y, 1);
    33. dim3 numBlocks = dim3(iDivUp(width, numThreads.x), iDivUp(height, numThreads.y));
    34. convertGrey<<>>(d_img, d_odata, width, height);
    35. }

  • 相关阅读:
    快手资讯 | 快手前CEO宿华业务助理彭佳瞳、商业算法策略负责人李勇保被曝离职
    SkyWalking内置MQE语法
    Node.js |(五)包管理工具 | 尚硅谷2023版Node.js零基础视频教程
    [6368] 20 接手新团队:士气低、交付迟、事故多发,如何下手解决?
    Java带图片的excel数据导入
    MySQL间隙锁死锁问题
    力扣(LeetCode)82. 删除排序链表中的重复元素 II(C语言)
    汇聚荣拼多多运营策略是怎么样的?
    mybatis foeahe 批量插入 删除 修改
    python调整colorbar宽度的两种方法
  • 原文地址:https://blog.csdn.net/jin739738709/article/details/126033393