• CUDA小白 - NPP(11) 图像处理 Comparison Operations


    cuda小白
    原始API链接 NPP

    GPU架构近些年也有不少的变化,具体的可以参考别的博主的介绍,都比较详细。还有一些cuda中的专有名词的含义,可以参考《详解CUDA的Context、Stream、Warp、SM、SP、Kernel、Block、Grid》

    常见的NppStatus,可以看这里

    Thresholding Operations

    分通道,逐像素进行比较,根据指定的Operation,如果不符合则更新当前值。当前模块分为两大类,一个是直接原地址进行操作,另外一类则是指定不同的输出地址。

    /*
    enum NppCmpOp {
      NPP_CMP_LESS,
      NPP_CMP_LESS_EQ,
      NPP_CMP_EQ,
      NPP_CMP_GREATER_EQ,
      NPP_CMP_GREATER
    }; 
    */
    // 通用的,如果满足比较条件,则
    NppStatus nppiThreshold_8u_C3R(const Npp8u *pSrc,
    							   int nSrcStep,
    							   Npp8u *pDst,
    							   int nDstStep,
    							   NppiSize oSizeROI,
    							   const Npp8u rThresholds[3],
    							   NppCmpOp eComparisonOperation);
    // 大于   NPP_CMP_GREATER_EQ
    NppStatus nppiThreshold_GT_8u_C3R(const Npp8u *pSrc,
    								  int nSrcStep,
    							      Npp8u *pDst,
    							      int nDstStep,
    								  NppiSize oSizeROI,
    							      const Npp8u rThresholds[3]);
    // 小于 NPP_CMP_LESS_EQ
    NppStatus nppiThreshold_LT_8u_C3R(const Npp8u *pSrc,
    							      int nSrcStep,
    								  Npp8u *pDst,
    								  int nDstStep,
    								  NppiSize oSizeROI,
    								  const Npp8u rThresholds[3]);
    // 指定需要设置的值
    NppStatus nppiThreshold_Val_8u_C3R(const Npp8u *pSrc,
    								   int nSrcStep,
    								   Npp8u *pDst,
    								   int nDstStep,
    								   NppiSize oSizeROI,
    								   const Npp8u rThresholds[3],
    								   const Npp8u rValues[3],
    								   NppCmpOp eComparisonOperation);
    NppStatus nppiThreshold_GTVal_8u_C3R(const Npp8u * pSrc,
    									 int nSrcStep,
    									 Npp8u *pDst,
    									 int nDstStep,
    									 NppiSize oSizeROI,
    									 const Npp8u rThresholds[3],
    									 const Npp8u rValues[3]);
    NppStatus nppiThreshold_LTVal_8u_C3R(const Npp8u *pSrc,
    									 int nSrcStep,
    									 Npp8u *pDst,
    									 int nDstStep,
    									 NppiSize oSizeROI,
    									 const Npp8u rThresholds[3],
    									 const Npp8u rValues[3]);
    // 设置上下界
    NppStatus nppiThreshold_LTValGTVal_8u_C3R(const Npp8u *pSrc,
    										  int nSrcStep,
    										  Npp8u *pDst,
    										  int nDstStep,
    										  NppiSize oSizeROI,
    										  const Npp8u rThresholdsLT[3],
    										  const Npp8u rValuesLT[3],
    										  const Npp8u rThresholdsGT[3],
    										  const Npp8u rValuesGT[3]);
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64

    两边各选用一个接口作为示例

    code
    #include 
    #include 
    #include 
    #include 
    
    #define CUDA_FREE(ptr) { if (ptr != nullptr) { cudaFree(ptr); ptr = nullptr; } }
    
    int main() {
      std::string directory = "../";
      cv::Mat image_dog = cv::imread(directory + "dog.png");
      int image_width = image_dog.cols;
      int image_height = image_dog.rows;
      int image_size = image_width * image_height;
    
      // =============== device memory ===============
      // input
      uint8_t *in_image;
      cudaMalloc((void**)&in_image, image_size * 3 * sizeof(uint8_t));
      cudaMemcpy(in_image, image_dog.data, image_size * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice);
    
      // output
      uint8_t *out_ptr1, *out_ptr2;
      cudaMalloc((void**)&out_ptr1, image_size * 3 * sizeof(uint8_t));  // 三通道
      cudaMalloc((void**)&out_ptr2, image_size * 3 * sizeof(uint8_t));  // 三通道
    
      NppiSize in_size;
      in_size.width = image_width;
      in_size.height = image_height;
    
      uint8_t threshold[3] = {150, 150, 150};
      cv::Mat out_image = cv::Mat::zeros(image_height, image_width, CV_8UC3);
      // =============== nppiThreshold_GT_8u_C3R ===============
      NppStatus status;
      status = nppiThreshold_GT_8u_C3R(in_image, image_width * 3, out_ptr1, image_width * 3, 
                                       in_size, threshold);
      if (status != NPP_SUCCESS) {
        std::cout << "[GPU] ERROR nppiThreshold_GT_8u_C3R failed, status = " << status << std::endl;
        return false;
      }
      cudaMemcpy(out_image.data, out_ptr1, image_size * 3, cudaMemcpyDeviceToHost);
      cv::imwrite(directory + "threshold_gt.jpg", out_image);
    
      // =============== nppiThreshold_GTVal_8u_C3R ===============
      uint8_t value[3] = {255, 255, 255};
      status = nppiThreshold_GTVal_8u_C3R(in_image, image_width * 3, out_ptr2, image_width * 3, 
                                          in_size, threshold, value);
      if (status != NPP_SUCCESS) {
        std::cout << "[GPU] ERROR nppiThreshold_GTVal_8u_C3R failed, status = " << status << std::endl;
        return false;
      }
      cudaMemcpy(out_image.data, out_ptr2, image_size * 3, cudaMemcpyDeviceToHost);
      cv::imwrite(directory + "threshold_gt_value.jpg", out_image);
    
      // free
      CUDA_FREE(in_image)
      CUDA_FREE(out_ptr1)
      CUDA_FREE(out_ptr2)
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    make
    cmake_minimum_required(VERSION 3.20)
    project(test)
    
    find_package(OpenCV REQUIRED)
    include_directories(${OpenCV_INCLUDE_DIRS})
    
    find_package(CUDA REQUIRED)
    include_directories(${CUDA_INCLUDE_DIRS})
    file(GLOB CUDA_LIBS "/usr/local/cuda/lib64/*.so")
    
    add_executable(test test.cpp)
    target_link_libraries(test
                          ${OpenCV_LIBS}
                          ${CUDA_LIBS}
    )
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    result

    请添加图片描述

    Comparison Operations

    本文到此就只阐述比较简单的两个接口,其他的结果按需索取

    NppStatus nppiCompare_8u_C3R(const Npp8u *pSrc1,
    							 int nSrc1Step,
    							 const Npp8u *pSrc2,
    							 int nSrc2Step,
    							 Npp8u *pDst,
    							 int nDstStep,
    							 NppiSize oSizeROI,
    							 NppCmpOp eComparisonOperation);
    NppStatus nppiCompareC_8u_C3R(const Npp8u *pSrc,
    							  int nSrcStep,
    							  const Npp8u *pConstants,
    							  Npp8u * pDst,
    							  int nDstStep,
    							  NppiSize oSizeROI,
    							  NppCmpOp eComparisonOperation);
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15

    目的就是比较两张图或者将一张图与一个constant进行比较,并且生成一个二进制的结果图像。二进制的结果图像类型是8UC1,如果是不同的话,则设置为0,反之表示uint8_t的最大值。

    code
    #include 
    #include 
    #include 
    #include 
    
    #define CUDA_FREE(ptr) { if (ptr != nullptr) { cudaFree(ptr); ptr = nullptr; } }
    
    int main() {
      std::string directory = "../";
      cv::Mat image_dog = cv::imread(directory + "dog.png");
      int image_width = image_dog.cols;
      int image_height = image_dog.rows;
      int image_size = image_width * image_height;
    
      // =============== device memory ===============
      // input
      uint8_t *in_image1, *in_image2;
      cudaMalloc((void**)&in_image1, image_size * 3 * sizeof(uint8_t));
      cudaMalloc((void**)&in_image2, image_size * 3 * sizeof(uint8_t));
      cudaMemcpy(in_image1, image_dog.data, image_size * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice);
      
      cv::Mat mask = cv::Mat::zeros(image_height, image_width, CV_8UC3);
      int step = 4;
      int step_width = image_width / step;
      cv::Mat ones = cv::Mat::ones(image_height, step_width, CV_8UC3);
      for (int i = 1; i < step; ++i) {
        cv::Rect rc1 = cv::Rect(i * step_width, 0, step_width, image_height);
        mask(rc1) = ones.clone() * 50 * i;
      }
      
      cudaMemcpy(in_image2, mask.data, image_size * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice);
    
      // output
      uint8_t *out_ptr1, *out_ptr2;
      cudaMalloc((void**)&out_ptr1, image_size * sizeof(uint8_t));  // 三通道
      cudaMalloc((void**)&out_ptr2, image_size * sizeof(uint8_t));  // 三通道
    
      NppiSize in_size;
      in_size.width = image_width;
      in_size.height = image_height;
    
      cv::Mat out_image = cv::Mat::zeros(image_height, image_width, CV_8UC1);
      NppStatus status;
      // =============== nppiCompare_8u_C3R ===============
      status = nppiCompare_8u_C3R(in_image1, image_width * 3, in_image2, image_width * 3, 
                                  out_ptr1, image_width, in_size, NPP_CMP_GREATER);
      if (status != NPP_SUCCESS) {
        std::cout << "[GPU] ERROR nppiCompare_8u_C3R failed, status = " << status << std::endl;
        return false;
      }
      cudaMemcpy(out_image.data, out_ptr1, image_size, cudaMemcpyDeviceToHost);
      cv::imwrite(directory + "compare.jpg", out_image);
    
      // =============== nppiCompareC_8u_C3R ===============
      uint8_t constant[3] = {100, 100, 100};
      status = nppiCompareC_8u_C3R(in_image1, image_width * 3, constant, out_ptr2, image_width, 
                                   in_size, NPP_CMP_GREATER);
      if (status != NPP_SUCCESS) {
        std::cout << "[GPU] ERROR nppiCompareC_8u_C3R failed, status = " << status << std::endl;
        return false;
      }
      cudaMemcpy(out_image.data, out_ptr2, image_size, cudaMemcpyDeviceToHost);
      cv::imwrite(directory + "comparec.jpg", out_image);
    
      // free
      CUDA_FREE(in_image1)
      CUDA_FREE(in_image2)
      CUDA_FREE(out_ptr1)
      CUDA_FREE(out_ptr2)
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    make
    cmake_minimum_required(VERSION 3.20)
    project(test)
    
    find_package(OpenCV REQUIRED)
    include_directories(${OpenCV_INCLUDE_DIRS})
    
    find_package(CUDA REQUIRED)
    include_directories(${CUDA_INCLUDE_DIRS})
    file(GLOB CUDA_LIBS "/usr/local/cuda/lib64/*.so")
    
    add_executable(test test.cpp)
    target_link_libraries(test
                          ${OpenCV_LIBS}![请添加图片描述](https://img-blog.csdnimg.cn/81402e58c241462fa7d22d7783b5d176.png)
    
                          ${CUDA_LIBS}
    )
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    result

    请添加图片描述

  • 相关阅读:
    [vue] vue2 Invalid Host header 解决问题
    百元开放式耳机哪款好一点耐用、百元耳放推荐
    Redis面试题
    面向对象的设计-设计模式-5种创建型模式
    炮轰特斯拉「无图」,一家老牌图商的反击
    3D行业趋势2024
    华为欧拉 openEuler 23.09 一键安装 Oracle 12CR2 单机
    刷题之小蓝吃糖果和你究竟有几个好姐妹
    json对象中对Long类型和String类型相互转换
    数据结构·顺序表
  • 原文地址:https://blog.csdn.net/u011732139/article/details/132962984