• 缩放图片算法优化 sse


    前情提要
    这里实现了打印文件的缩放算法 缩放打印文件(prt,prn)
    核心功能如下:

    void CZoomPrtFile::zoomPrtFile(BYTE* pTargetData)
    {
    	float xRatio = static_cast<float>(m_perWidth - 1) / m_zoomWidth;
    	float yRatio = static_cast<float>(m_perHeight - 1) / m_zoomHeight;
    
    	int srcX=0, srcY=0;
    	int nTargetLineSize = (m_zoomWidth * m_header.nImageColorDeep + 7) / 8;
    
    	BYTE* pSourceLineData = new BYTE[m_header.nBytePerLine];
    	for (size_t row = 0; row < m_zoomHeight; row++) 
    	{
    		srcY = static_cast<int>(yRatio * row);
    		for (int ColorNum = 0; ColorNum < m_header.nImageColorNum; ColorNum++)
    		{
    
    			memset(pSourceLineData, 0, m_header.nBytePerLine);
    			ReadLine(pSourceLineData, srcY * m_header.nImageColorNum + ColorNum, 0, m_header.nBytePerLine);
    
    			for (size_t column = 0; column < m_zoomWidth; column++) 
    			{
    				srcX = static_cast<int>(xRatio * column);
    				// 获取源图像的 bit像素值
    				unsigned char srcValue = getPixel(pSourceLineData, srcX, m_header.nImageColorDeep);
    				int y = (row * m_header.nImageColorNum + ColorNum) * nTargetLineSize + sizeof(m_header);
    				// 设置目标图像的2bit像素值
    				setPixel(pTargetData+y,column, srcValue, m_header.nImageColorDeep);
    			}
    		}
    	}
    
    	delete[] pSourceLineData;
    }
    
    unsigned char CZoomPrtFile::getPixel(unsigned char* data, int x, int nImageColorDeep)
    {
    	//unsigned char存储 8/int nImageColorDeep 个 int nImageColorDeep bit像素 ,其中 nImageColorDeep :1,2,4,8 RIP image output bit per color
    	int byteIndex = x * nImageColorDeep / 8; // 字节索引
    	int bitIndex = (x * nImageColorDeep) % 8; // 位索引
    
    	// 创建掩码
    	unsigned char mask = (1 << nImageColorDeep) - 1;
    	//std::lock_guard locker(fileMutex);
    	// 位运算提取像素
    
    	unsigned char pixel;
    	{ 
    		std::lock_guard<std::mutex> locker(fileMutex); 
    		pixel = data[byteIndex];
    	}
    	pixel = (pixel >> bitIndex) & mask;
    
    	return pixel;
    }
    
    unsigned char CZoomPrtFile::getPixel(unsigned char data, int x, int nImageColorDeep)
    {
    	//unsigned char存储 8/int nImageColorDeep 个 int nImageColorDeep bit像素 ,其中 nImageColorDeep :1,2,4,8 RIP image output bit per color
    	int byteIndex = x * nImageColorDeep / 8; // 字节索引
    	int bitIndex = (x * nImageColorDeep) % 8; // 位索引
    
    	// 创建掩码
    	unsigned char mask = (1 << nImageColorDeep) - 1;
    	//std::lock_guard locker(fileMutex);
    	// 位运算提取像素
    	unsigned char pixel = (data >> bitIndex) & mask;
    
    	return pixel;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68

    使用sse 128

    void CZoomPrtFile::zoomPrtFileSSE128(BYTE* pTargetData)
    {
    	const float xRatio = static_cast<float>(m_perWidth - 1) / m_zoomWidth;
    	const float yRatio = static_cast<float>(m_perHeight - 1) / m_zoomHeight;
    
    	__m128 xmmRatio = _mm_set1_ps(xRatio);
    	__m128 ymmRatio = _mm_set1_ps(yRatio);
    	__m128i colorDeep = _mm_set1_epi32(m_header.nImageColorDeep);
    
    	__m128i bitMask = _mm_set1_epi8(0x07);
    	unsigned char mask = (1 << m_header.nImageColorNum) - 1;
    	__m128i mmMask = _mm_set1_epi32(mask); 
    	
    	BYTE* pSourceLineData = new BYTE[m_header.nBytePerLine];
    	int nTargetLineSize = (m_zoomWidth * m_header.nImageColorDeep + 7) / 8;
    
    	for (int row = 0; row < m_zoomHeight; row++)
    	{
    		int srcY = static_cast<int>(yRatio * row);
    
    		for (int ColorNum = 0; ColorNum < m_header.nImageColorNum; ColorNum++)
    		{
    			memset(pSourceLineData, 0, m_header.nBytePerLine);
    			ReadLine(pSourceLineData, srcY * m_header.nImageColorNum + ColorNum, 0, m_header.nBytePerLine);
    
    			for (int column = 0; column < m_zoomWidth; column+=4)
    			{
    				// 加载128位值。返回值代表寄存器的变量中的相同值,地址p不需要16字节对齐。
    				__m128i xmmColumn = _mm_setr_epi32(column, column + 1, column + 2, column + 3);
    				// 列索引转换为浮点数
    				__m128 xmmColumnF = _mm_cvtepi32_ps(xmmColumn);
    				// 乘以缩放比例  转换为整数
    				__m128i xmmSrcXInt = _mm_cvttps_epi32(_mm_mul_ps(xmmColumnF, xmmRatio));
    
    
    				//原图
    				 xmmSrcXInt = _mm_mullo_epi32(xmmSrcXInt, colorDeep);
    				// 方法1:右移三位实现除以8
    				__m128i xmmSrcXByteIndex = _mm_srli_epi32(xmmSrcXInt, 3);// 字节索引
    				__m128i xmmSrcXBitIndex = _mm_and_si128(xmmSrcXInt, bitMask);// 位索引
    				//目标图
    				__m128i xmmDesX = _mm_mullo_epi32(xmmColumn, colorDeep);//位索引 
    				__m128i xmmDesXByteIndex = _mm_srli_epi32(xmmDesX, 3);// 字节索引
    				__m128i xmmDesXBitIndex = _mm_and_si128(xmmDesX, bitMask);// 位索引
    
    				//源数据
    				alignas(16) int srcXByteIndex[4];//
    				_mm_store_si128((__m128i*)srcXByteIndex, xmmSrcXByteIndex);
    				alignas(16) int srcXBitIndex[4];//
    				_mm_store_si128((__m128i*)srcXBitIndex, xmmSrcXBitIndex);
    
    				unsigned char pixel[4]{ 
    					(pSourceLineData[srcXByteIndex[0]] >> srcXBitIndex[0])& mask,
    					(pSourceLineData[srcXByteIndex[1]] >> srcXBitIndex[1])& mask,
    					(pSourceLineData[srcXByteIndex[2]] >> srcXBitIndex[2])& mask,
    					(pSourceLineData[srcXByteIndex[3]] >> srcXBitIndex[3])& mask
    				};
    				//目标数据
    				int y = (row * m_header.nImageColorNum + ColorNum) * nTargetLineSize + sizeof(m_header);
    				BYTE* desDataPointer = pTargetData + y;
    
    				alignas(16) int DesXByteIndex[4];
    				_mm_store_si128((__m128i*)DesXByteIndex, xmmDesXByteIndex);
    				alignas(16) int DesXBitIndex[4];//
    				_mm_store_si128((__m128i*)DesXBitIndex, xmmDesXBitIndex);
    
    				desDataPointer[DesXByteIndex[0]] = (desDataPointer[DesXByteIndex[0]] & ~(mask << DesXBitIndex[0])) |
    					((pixel[0] /*& mask*/ ) << DesXBitIndex[0]);
    				desDataPointer[DesXByteIndex[1]] = (desDataPointer[DesXByteIndex[1]] & ~(mask << DesXBitIndex[1])) |
    					((pixel[1] /*& mask*/) << DesXBitIndex[1]);
    				desDataPointer[DesXByteIndex[2]] = (desDataPointer[DesXByteIndex[2]] & ~(mask << DesXBitIndex[2])) |
    					((pixel[2] /*& mask*/) << DesXBitIndex[2]);
    				desDataPointer[DesXByteIndex[3]] = (desDataPointer[DesXByteIndex[3]] & ~(mask << DesXBitIndex[3])) |
    					((pixel[3] /*& mask*/) << DesXBitIndex[3]);
    			}
    		}
    	}
    
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79

    本来提取像素和设置像素都应该用sse写的,但是折磨了chatgpt好几天 也没有写出来。
    所以这里仅用sse优化 源数据以及目标数据坐标的计算
    即使是这样优化效果也很好
    在这里插入图片描述

  • 相关阅读:
    ceph delete pool
    网站的静态资源怎么获取?
    算法套路学习笔记(第二章) 动态规划系列 2.13-2.19
    喹啉羧酸类 DHODH 抑制剂用于治疗急性髓系白血病
    NET 3行代码实现文字转语音功能
    golang的垃圾回收算法之七标记过程
    KMP算法——通俗易懂讲好KMP算法:实例图解分析+详细代码注解 --》你的所有疑惑在本文都能得到解答
    金融与大模型:引领行业未来的创新融合
    【开源微服务项目】论如何在微服务中优雅的实现Redis序列化配置
    laravel练习03
  • 原文地址:https://blog.csdn.net/fuyouzhiyi/article/details/134445033