• CUDA小白 - NPP(4) 图像处理 Data Exchange and Initialization(1)


    cuda小白
    原始API链接 NPP

    GPU架构近些年也有不少的变化,具体的可以参考别的博主的介绍,都比较详细。还有一些cuda中的专有名词的含义,可以参考《详解CUDA的Context、Stream、Warp、SM、SP、Kernel、Block、Grid》

    常见的NppStatus,可以看这里

    如有问题,请指出,谢谢

    Image Set Operations

    当前模块主要功能是set图像中的像素值,主要分为三个大类:将ROI区域内的所有像素设置为一个特殊的值(Set),mask赋值(Masked Set),以及单通道赋值(Channel Set)。
    三个大类分别以一个三通道的uint8_t为例子简单介绍一下。

    // ROI区域内的三通道设置为aValue
    NppStatus nppiSet_8u_C3R(const Npp8u aValue[3],
    					 	 Npp8u *pDst,
    						 int nDstStep,
    						 NppiSize oSizeROI);
    // 通过mask控制ROI区域内的那些像素会被set
    NppStatus nppiSet_8u_C3MR(const Npp8u aValue[3],
    						  Npp8u *pDst,
    						  int nDstStep,
    						  NppiSize oSizeROI,
    						  const Npp8u *pMask,
    						  int nMaskStep);	
    // 通过pointer的起始位置区别,选择某通道设置为固定值
    NppStatus nppiSet_8u_C3CR(Npp8u nValue,
    						  Npp8u *pDst,
    						  int nDstStep,
    						  NppiSize oSizeROI);
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    code
    #include 
    #include 
    #include 
    #include 
    
    #define CUDA_FREE(ptr) { if (ptr != nullptr) { cudaFree(ptr); ptr = nullptr; } }
    
    int main() {
      std::string directory = "../";
    
      // =============== load image ===============
      cv::Mat image_dog = cv::imread(directory + "dog.png");
      int image_width = image_dog.cols;
      int image_height = image_dog.rows;
      int image_size = image_width * image_height;
    
      // =============== device memory ===============
      uint8_t *out_ptr1, *out_ptr2, *out_ptr3;
      cudaMalloc((void**)&out_ptr1, image_size * 3 * sizeof(uint8_t));
      cudaMalloc((void**)&out_ptr2, image_size * 3 * sizeof(uint8_t));
      cudaMalloc((void**)&out_ptr3, image_size * 3 * sizeof(uint8_t));
      cudaMemcpy(out_ptr1, image_dog.data, image_size * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice);
      cudaMemcpy(out_ptr2, image_dog.data, image_size * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice);
      cudaMemcpy(out_ptr3, image_dog.data, image_size * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice);
    
      cv::Mat mask = cv::Mat::zeros(image_height, image_width, CV_8UC1);
      cv::Mat mask1 = cv::Mat::ones(image_height * 3 / 4, image_width * 3 / 4, CV_8UC1);
      cv::Rect rc1 = cv::Rect(image_width / 4, image_height / 4, image_width * 3 / 4, image_height * 3 / 4);
      mask1.copyTo(mask(rc1));
    
      uint8_t *gpu_mask;
      cudaMalloc((void**)&gpu_mask, image_size * sizeof(uint8_t));
      cudaMemcpy(gpu_mask, mask.data, image_size * sizeof(uint8_t), cudaMemcpyHostToDevice);
    
      NppiSize roi1, roi2;
      roi1.width = image_width;
      roi1.height = image_height;
      roi2.width = image_width / 2;
      roi2.height = image_height / 2;
    
      cv::Mat out_image = cv::Mat::zeros(image_height, image_width, CV_8UC3);
      NppStatus status;
      // =============== nppiSet_8u_C3R ===============
      uint8_t value[3] = { 255, 0, 0 };
      status = nppiSet_8u_C3R(value, out_ptr1, image_width * 3, roi1);
      if (status != NPP_SUCCESS) {
        std::cout << "[GPU] ERROR nppiSet_8u_C3R failed, status = " << status << std::endl;
        return false;
      }
      cudaMemcpy(out_image.data, out_ptr1, image_size * 3, cudaMemcpyDeviceToHost);
      cv::imwrite(directory + "set.jpg", out_image);
    
      // =============== nppiSet_8u_C3R ===============
      uint8_t value2[3] = { 0, 0, 255 };
      status = nppiSet_8u_C3MR(value2, out_ptr2, image_width * 3, roi1, gpu_mask, image_width);
      if (status != NPP_SUCCESS) {
        std::cout << "[GPU] ERROR nppiSet_8u_C3MR failed, status = " << status << std::endl;
        return false;
      }
      cudaMemcpy(out_image.data, out_ptr2, image_size * 3, cudaMemcpyD![请添加图片描述](https://img-blog.csdnimg.cn/9da721ce7d4649839ef40228bb3937e1.png)
    eviceToHost);
      cv::imwrite(directory + "set_mask.jpg", out_image);
      
      // green
      status = nppiSet_8u_C3CR(255, out_ptr3 + image_width * 3 * 200 + 1, image_width * 3, roi1);
      if (status != NPP_SUCCESS) {
        std::cout << "[GPU] ERROR nppiSet_8u_C3CR failed, status = " << status << std::endl;
        return false;
      }
      cudaMemcpy(out_image.data, out_ptr3, image_size * 3, cudaMemcpyDeviceToHost);
      cv::imwrite(directory + "set_channel.jpg", out_image);
    
    
      // free
      CUDA_FREE(out_ptr1)
      CUDA_FREE(out_ptr2)
      CUDA_FREE(out_ptr3)
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    make
    cmake_minimum_required(VERSION 3.20)
    project(test)
    
    find_package(OpenCV REQUIRED)
    include_directories(${OpenCV_INCLUDE_DIRS})
    
    find_package(CUDA REQUIRED)
    include_directories(${CUDA_INCLUDE_DIRS})
    file(GLOB CUDA_LIBS "/usr/local/cuda/lib64/*.so")
    
    add_executable(test test.cpp)
    target_link_libraries(test
                          ${OpenCV_LIBS}
                          ${CUDA_LIBS}
    )
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    result

    请添加图片描述
    注意:

    1. mask使用的是单通道的,仅表示那些像素需要进行set,那些不需要
    2. 通道的set,通过指针来表示是针对那个通道进行转换。输入指针表示开始set的起始位置,对于三通道的图像而言,则是隔两个set一次。
    Image Copy Operations

    除了比较常见的copy操作(copy,masked copy,channel copy)之外,还有一些planar和packed之间的来回拷贝,拷贝的同时伴随着border,以及Copy Sub-pixel(没接触过)

    // 单纯的拷贝
    NppStatus nppiCopy_8u_C3R(const Npp8u *pSrc,
    						  int nSrcStep,
    						  Npp8u *pDst,
    						  int nDstStep,
    						  NppiSize oSizeROI);	
    // 依据mask有选择性的进行拷贝
    NppStatus nppiCopy_8u_C3MR(const Npp8u *pSrc,
    						   int nSrcStep,
    						   Npp8u *pDst,
    						   int nDstStep,
    						   NppiSize oSizeROI,
    						   const Npp8u *pMask,
    						   int nMaskStep);	
    // Channel Copy, 将一个多通道的某个通道拷贝到另外一个多通道图像的某一个channel
    NppStatus nppiCopy_8u_C3CR(const Npp8u *pSrc,
    						   int nSrcStep,
    						   Npp8u *pDst,
    						   int nDstStep,
    						   NppiSize oSizeROI);	
    // Extract Channel Copy, 将一个多通道的某个通道拷贝到另外一个单通道的图像
    NppStatus nppiCopy_8u_C3C1R(const Npp8u * pSrc,
    						    int nSrcStep,
    							Npp8u *pDst,
    							int nDstStep,
    							NppiSize oSizeROI);	
    // Insert Channel Copy, 一个单通道的图像拷贝到多通道中的某一个通道
    NppStatus nppiCopy_8u_C1C3R(const Npp8u * pSrc,
    							int nSrcStep,
    							Npp8u * pDst,
    							int nDstStep,
    							NppiSize oSizeROI);
    // 剩下的接口平时接触较少,所以暂时不做详细介绍
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    code
    #include 
    #include 
    #include 
    #include 
    
    #define CUDA_FREE(ptr) { if (ptr != nullptr) { cudaFree(ptr); ptr = nullptr; } }
    
    int main() {
      std::string directory = "../";
    
      // =============== load image ===============
      cv::Mat image_dog = cv::imread(directory + "dog.png");
      cv::Mat image_dog_gray;
      cv::cvtColor(image_dog, image_dog_gray, CV_RGB2GRAY);
    
      int image_width = image_dog.cols;
      int image_height = image_dog.rows;
      int image_size = image_width * image_height;
    
      // =============== device memory ===============
      uint8_t *in_image, *in_img_gray;
      cudaMalloc((void**)&in_image, image_size * 3 * sizeof(uint8_t));
      cudaMalloc((void**)&in_img_gray, image_size * sizeof(uint8_t));
      cudaMemcpy(in_image, image_dog.data, image_size * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice);
      cudaMemcpy(in_img_gray, image_dog_gray.data, image_size * sizeof(uint8_t), cudaMemcpyHostToDevice);
    
      uint8_t *out_ptr1, *out_ptr2, *out_ptr3, *out_ptr4, *out_ptr5;
      cudaMalloc((void**)&out_ptr1, image_size * 3 * sizeof(uint8_t));  // 三通道
      cudaMalloc((void**)&out_ptr2, image_size * 3 * sizeof(uint8_t));  // 三通道
      cudaMalloc((void**)&out_ptr3, image_size * 3 * sizeof(uint8_t));  // 三通道
      cudaMalloc((void**)&out_ptr4, image_size * sizeof(uint8_t));  // 单通道
      cudaMalloc((void**)&out_ptr5, image_size * 3 * sizeof(uint8_t));  // 三通道
    
      // mask
      cv::Mat mask = cv::Mat::zeros(image_height, image_width, CV_8UC1);
      cv::Mat mask1 = cv::Mat::ones(image_height * 3 / 4, image_width * 3 / 4, CV_8UC1);
      cv::Rect rc1 = cv::Rect(image_width / 4, image_height / 4, image_width * 3 / 4, image_height * 3 / 4);
      mask1.copyTo(mask(rc1));
    
      uint8_t *gpu_mask;
      cudaMalloc((void**)&gpu_mask, image_size * sizeof(uint8_t));
      cudaMemcpy(gpu_mask, mask.data, image_size * sizeof(uint8_t), cudaMemcpyHostToDevice);
    
      NppiSize roi1, roi2;
      roi1.width = image_width;
      roi1.height = image_height;
      roi2.width = image_width / 2;
      roi2.height = image_height / 2;
    
      cv::Mat out_image = cv::Mat::zeros(image_height, image_width, CV_8UC3);
      cv::Mat out_single = cv::Mat::zeros(image_height, image_width, CV_8UC1);
      NppStatus status;
      // =============== nppiCopy_8u_C3R ===============
      status = nppiCopy_8u_C3R(in_image, image_width * 3, out_ptr1, image_width * 3, roi1);
      if (status != NPP_SUCCESS) {
        std::cout << "[GPU] ERROR nppiCopy_8u_C3R failed, status = " << status << std::endl;
        return false;
      }
      cudaMemcpy(out_image.data, out_ptr1, image_size * 3, cudaMemcpyDeviceToHost);
      cv::imwrite(directory + "copy.jpg", out_image);
    
      // =============== nppiCopy_8u_C3MR ===============
      status = nppiCopy_8u_C3MR(in_image, image_width * 3, out_ptr2, image_width * 3, roi1, gpu_mask, image_width);
      if (status != NPP_SUCCESS) {
        std::cout << "[GPU] ERROR nppiCopy_8u_C3MR failed, status = " << status << std::endl;
        return false;
      }
      cudaMemcpy(out_image.data, out_ptr2, image_size * 3, cudaMemcpyDeviceToHost);
      cv::imwrite(directory + "copy_mask.jpg", out_image);
      
      // =============== nppiCopy_8u_C3CR ===============
      status = nppiCopy_8u_C3CR(in_image, image_width * 3, out_ptr3, image_width * 3, roi1);
      if (status != NPP_SUCCESS) {
        std::cout << "[GPU] ERROR nppiCopy_8u_C3CR failed, status = " << status << std::endl;
        return false;
      }
      cudaMemcpy(out_image.data, out_ptr3, image_size * 3, cudaMemcpyDeviceToHost);
      cv::imwrite(directory + "copy_channel.jpg", out_image);
    
      // =============== nppiCopy_8u_C3C1R ===============
      status = nppiCopy_8u_C3C1R(in_image, image_width * 3, out_ptr4, image_width, roi1);
      if (status != NPP_SUCCESS) {
        std::cout << "[GPU] ERROR nppiCopy_8u_C3C1R failed, status = " << status << std::endl;
        return false;
      }
      cudaMemcpy(out_single.data, out_ptr4, image_size, cudaMemcpyDeviceToHost);
      cv::imwrite(directory + "copy_channel_extract.jpg", out_single);
    
      // =============== nppiCopy_8u_C1C3R ===============
      status = nppiCopy_8u_C1C3R(in_img_gray, image_width, out_ptr5, image_width * 3, roi1);
      if (status != NPP_SUCCESS) {
        std::cout << "[GPU] ERROR nppiCopy_8u_C1C3R failed, status = " << status << std::endl;
        return false;
      }
      cudaMemcpy(out_image.data, out_ptr5, image_size * 3, cudaMemcpyDeviceToHost);
      cv::imwrite(directory + "copy_channel_insert.jpg", out_image);
    
      // free
      CUDA_FREE(in_image)
      CUDA_FREE(in_img_gray)
      CUDA_FREE(out_ptr1)
      CUDA_FREE(out_ptr2)
      CUDA_FREE(out_ptr3)
      CUDA_FREE(out_ptr4)
      CUDA_FREE(out_ptr5)
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    make
    cmake_minimum_required(VERSION 3.20)
    project(test)
    
    find_package(OpenCV REQUIRED)
    include_directories(${OpenCV_INCLUDE_DIRS})
    
    find_package(CUDA REQUIRED)
    include_directories(${CUDA_INCLUDE_DIRS})
    file(GLOB CUDA_LIBS "/usr/local/cuda/lib64/*.so")
    
    add_executable(test test.cpp)
    target_link_libraries(test
                          ${OpenCV_LIBS}
                          ${CUDA_LIBS}
    )
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    result

    请添加图片描述
    注意:

    1. 由于提取三个通道进行copy,存图的时候只有单个通道,因此呈现出来的结果是灰色的。
  • 相关阅读:
    ☆☆如何学习MATLAB☆☆
    如何实现安卓屏幕分享及视频聊天?(源码)
    ubuntu20.04+ROS noetic在线运行单USB双目ORB_SLAM
    计算机视觉学习——表面检测
    Excel If函数
    (典题)线段计数 CF690 div3 F
    声音好听,颜值能打,基于PaddleGAN给人工智能AI语音模型配上动态画面(Python3.10)
    Rockchip RK3399 - DRM crtc基础知识
    ES 集群常用排查命令
    新品发布 | Cloudpods 3.9.1 版本上线
  • 原文地址:https://blog.csdn.net/u011732139/article/details/132606436