计算一组Tensor的直方图C算法实现

Tensor量化中，计算信息度损失的的一个重要算法叫做KL散度算法，关于其介绍请参考博客：

模型量化中的KL散度扫盲_papaofdoudou的博客-CSDN博客_kl三度

本文介绍其程序实现：

首先构造一组TENSOR向量,维度为150528的列向量。

观察其原始的直方图分布，其分布特点如下图所示：

程序实现：


#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
 
#define DBG(fmt, ...)   do { printf("%s line %d, "fmt"\n", __func__, __LINE__, ##__VA_ARGS__); } while (0)
#define min(x,y) ({ \
    typeof(x) _x = (x);    \
    typeof(y) _y = (y);    \
    (void) (&_x == &_y);    \
    _x < _y ? _x : _y; })
 
int get_tensor_from_txt_file(char *file_path, float **buf)
{
    int len = 0;
    static float *memory = NULL;
    static int max_len = 10 * 1024 * 1024;
    FILE *fp = NULL;
 
    if(memory == NULL)
    {
        memory = (float*) malloc(max_len * sizeof(float));
    }
    
    if((fp = fopen(file_path, "r")) == NULL)
    {
        DBG("open tensor error.");
        exit(-1);
    }
 
    while(!feof(fp))
    {
        fscanf(fp, "%f", &memory[len ++]);
    }
 
    *buf = (float*)malloc(len * sizeof(float));
    if(len == 0 || *buf == NULL)
    {
        DBG("read tensor error, len %d, *buf %p", len, *buf);
        exit(-1);
    }
    memcpy(*buf, memory, len * sizeof(float));
 
    fclose(fp);
 
    return len;
}
 
int main(int argc, char **argv)
{
    FILE *file;
 
    DBG("in");
 
    if(argc != 3)
    {
        DBG("input error, you should use this program like that: program tensor binsnum.");
        exit(-1);
    }
 
    int tensor0_len;
    float *tensor0_dat;
    int bins = atoi(argv[2]);
    
    tensor0_len = 0;
    tensor0_dat = NULL;
 
    tensor0_len = get_tensor_from_txt_file(argv[1], &tensor0_dat);
    DBG("tensor len %d.", tensor0_len);
 
    float absmax = 0.f;
    int i = 0;
 
    for(i = 0; i < tensor0_len + 1; i ++)
    {
        if(fabs(tensor0_dat[i]) > absmax)
            absmax = fabs(tensor0_dat[i]);
    }
    
    DBG("abs = %f.", absmax);
 
    int *histogram = malloc(bins * sizeof(int));
    float *histogram_norm = malloc(bins * sizeof(float));
    if(histogram == NULL || histogram_norm == NULL)
    {
        DBG("fatal error, malloc histogram failure.");
        exit(-1);
    }
 
    memset(histogram, 0x00, bins * sizeof(int));
 
    for(i = 0; i < tensor0_len; i ++)
    {
        if (tensor0_dat[i] == 0.f) continue;
 
        const int index = min((int)(fabs(tensor0_dat[i]) / absmax * bins), (bins - 1));
        histogram[index] += 1;
    }
 
    for(i = 0; i < bins; i ++)
    {
        DBG("histogram[%d] = %d.", i, histogram[i]);
    }
 
    //直方图归一化
    int sum = 0;
 
    for(i = 0; i < bins; i ++)
    {
        sum += histogram[i];
        histogram_norm[i] = 0.f;
    }
 
    for(i = 0; i < bins; i ++)
        histogram_norm[i] = (float)((float)histogram[i])/(float)sum;
 
    for(i = 0; i < bins; i ++)
    {
        DBG("histogram[%d] = %f.", i, histogram_norm[i]);
    }
 
    DBG("out");
    return 0;
}

运行：

3BINS：

100BINS：

100BINS的数据可视化：

和上面有所差异，原因可能是程序中绝对值的处理，将负值也作为正值处理了。为了验证，我们找到这批数据中的最小值，它是负的，我们将其加上一个偏移，正好变为0，这样所有的值都变为了正数，平移数据范围不会影响直方图的分布，所以我们就可以验证我们的猜测是否正确。

代码：


#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
 
#define DBG(fmt, ...)   do { printf("%s line %d, "fmt"\n", __func__, __LINE__, ##__VA_ARGS__); } while (0)
#define min(x,y) ({ \
    typeof(x) _x = (x);    \
    typeof(y) _y = (y);    \
    (void) (&_x == &_y);    \
    _x < _y ? _x : _y; })
 
int get_tensor_from_txt_file(char *file_path, float **buf)
{
    int len = 0;
    static float *memory = NULL;
    static int max_len = 10 * 1024 * 1024;
    FILE *fp = NULL;
 
    if(memory == NULL)
    {
        memory = (float*) malloc(max_len * sizeof(float));
    }
    
    if((fp = fopen(file_path, "r")) == NULL)
    {
        DBG("open tensor error.");
        exit(-1);
    }
 
    while(!feof(fp))
    {
        fscanf(fp, "%f", &memory[len ++]);
    }
 
    *buf = (float*)malloc(len * sizeof(float));
    if(len == 0 || *buf == NULL)
    {
        DBG("read tensor error, len %d, *buf %p", len, *buf);
        exit(-1);
    }
    memcpy(*buf, memory, len * sizeof(float));
 
    fclose(fp);
 
    return len;
}
 
int main(int argc, char **argv)
{
    FILE *file;
 
    DBG("in");
 
    if(argc != 3)
    {
        DBG("input error, you should use this program like that: program tensor binsnum.");
        exit(-1);
    }
 
    int tensor0_len;
    float *tensor0_dat;
    int bins = atoi(argv[2]);
    
    tensor0_len = 0;
    tensor0_dat = NULL;
 
    tensor0_len = get_tensor_from_txt_file(argv[1], &tensor0_dat);
    DBG("tensor len %d.", tensor0_len);
 
    float absmax = 0.f;
    float min = 0.f;
    int i = 0;
 
    for(i = 0; i < tensor0_len + 1; i ++)
	{
        tensor0_dat[i] += 87.939552;
	}
 
    for(i = 0; i < tensor0_len + 1; i ++)
    {
        if(fabs(tensor0_dat[i]) > absmax)
            absmax = fabs(tensor0_dat[i]);
 
        if(tensor0_dat[i] < min)
            min = tensor0_dat[i];
    }
    
    DBG("abs = %f, min %f.", absmax, min);
 
    int *histogram = malloc(bins * sizeof(int));
    float *histogram_norm = malloc(bins * sizeof(float));
    if(histogram == NULL || histogram_norm == NULL)
    {
        DBG("fatal error, malloc histogram failure.");
        exit(-1);
    }
 
    memset(histogram, 0x00, bins * sizeof(int));
 
    for(i = 0; i < tensor0_len; i ++)
    {
        if (tensor0_dat[i] == 0.f) continue;
 
        const int index = min((int)(fabs(tensor0_dat[i]) / absmax * bins), (bins - 1));
        histogram[index] += 1;
    }
 
    for(i = 0; i < bins; i ++)
    {
        DBG("histogram[%d] = %d.", i, histogram[i]);
    }
 
    //直方图归一化
    int sum = 0;
 
    for(i = 0; i < bins; i ++)
    {
        sum += histogram[i];
        histogram_norm[i] = 0.f;
    }
 
    for(i = 0; i < bins; i ++)
        histogram_norm[i] = (float)((float)histogram[i])/(float)sum;
 
    for(i = 0; i < bins; i ++)
    {
        printf("%f\n", histogram_norm[i]);
    }
 
    DBG("out");
    return 0;
}

这次直方图曲线和本篇开头的直方图符合了：

数据可视化部分的PYTHON代码：


import numpy as np
import linecache
import matplotlib.pyplot as plt
 
filename = "output.tensor"
cols = 1 # number of column
divided_ch = ' ' # divided_character between numbers
 
def dat_to_matrix(filename):
    file = open(filename)
    lines = file.readlines()
    rows = len(lines)
    # print(rows)
    # print(lines)
    datamat = np.zeros(rows)
    row = 0
 
    for line in lines:
        line = line.strip().split(divided_ch) # strip remove block space in line
        datamat[row:] = line[:]
        row += 1
 
    return datamat
 
 
data = dat_to_matrix(filename)
# print(data)
X=np.linspace(0,1,100)      # X轴坐标数据
plt.figure(figsize=(8,6))   # 定义图的大小
plt.plot(X,data)            # 绘制曲线图
plt.show()

总结：

1.直方图平移后会被压缩，但是曲线的变化趋势不变，可以作计算导数的思想实验来验证这一点。

2.对于AI训练和推理来说，数据本身的分布形状比数据本身要重要的多.

当OFFSET过大的时候，以山峰高度为例，相当于我们选取的基础海平面太低，以至于无法体现地表山峰的高度趋势了，这个时候，计算的直方图会被压缩。如下图将OFFSET从87改为870

产生的直方图为,对比上图，可以看到图像形状没有变化，但是被压缩了。

证明很简单，设n>m>0.a > 0

$\frac{m}{n} \ ?\ \frac{a+m}{a+n}$

?应该是什么呢？先通分。

$\\ \frac{m(a+n)}{n(a+n)} ? \frac{n(a+m)}{n(a+n)} \\ \because m(a+n)=ma+mn<na+mn=n(a+m) \\ \therefore \frac{m}{n} \ < \ \frac{a+m}{a+n}$

总结：

很多时候，量化重要的一步是找出tensor的值域边界，得到每层tensor值的上下边界，在此基础上确定threhold。

参考博客：

使用NCNN的INT8量化方式进行推理_papaofdoudou的博客-CSDN博客_int8量化 ncnn

结束

相关阅读:
苹果秋季发布会官宣，新款Mac将搭载M3芯片，来势迅猛！
SW - 清除零件实体表面上无用的凸起
 Spring MVC 返回JSON数据
 高防服务器有用么？
RocketMQ
SLAM第１１讲
 微电网两阶段鲁棒优化（Matlab代码实现）
使用Kubernetes部署Kubernetes集群
 【数据结构】字符串匹配（暴力匹配）
分权分域有啥内容？
原文地址：https://blog.csdn.net/tugouxp/article/details/125900447