• 计算一组Tensor的直方图C算法实现


    Tensor量化中,计算信息度损失的的一个重要算法叫做KL散度算法,关于其介绍请参考博客:

    模型量化中的KL散度扫盲_papaofdoudou的博客-CSDN博客_kl三度

    本文介绍其程序实现:

    首先构造一组TENSOR向量,维度为150528的列向量。

    观察其原始的直方图分布,其分布特点如下图所示:

    程序实现:

    1. #include
    2. #include
    3. #include
    4. #include
    5. #include
    6. #include
    7. #include
    8. #include
    9. #include
    10. #define DBG(fmt, ...) do { printf("%s line %d, "fmt"\n", __func__, __LINE__, ##__VA_ARGS__); } while (0)
    11. #define min(x,y) ({ \
    12. typeof(x) _x = (x); \
    13. typeof(y) _y = (y); \
    14. (void) (&_x == &_y); \
    15. _x < _y ? _x : _y; })
    16. int get_tensor_from_txt_file(char *file_path, float **buf)
    17. {
    18. int len = 0;
    19. static float *memory = NULL;
    20. static int max_len = 10 * 1024 * 1024;
    21. FILE *fp = NULL;
    22. if(memory == NULL)
    23. {
    24. memory = (float*) malloc(max_len * sizeof(float));
    25. }
    26. if((fp = fopen(file_path, "r")) == NULL)
    27. {
    28. DBG("open tensor error.");
    29. exit(-1);
    30. }
    31. while(!feof(fp))
    32. {
    33. fscanf(fp, "%f", &memory[len ++]);
    34. }
    35. *buf = (float*)malloc(len * sizeof(float));
    36. if(len == 0 || *buf == NULL)
    37. {
    38. DBG("read tensor error, len %d, *buf %p", len, *buf);
    39. exit(-1);
    40. }
    41. memcpy(*buf, memory, len * sizeof(float));
    42. fclose(fp);
    43. return len;
    44. }
    45. int main(int argc, char **argv)
    46. {
    47. FILE *file;
    48. DBG("in");
    49. if(argc != 3)
    50. {
    51. DBG("input error, you should use this program like that: program tensor binsnum.");
    52. exit(-1);
    53. }
    54. int tensor0_len;
    55. float *tensor0_dat;
    56. int bins = atoi(argv[2]);
    57. tensor0_len = 0;
    58. tensor0_dat = NULL;
    59. tensor0_len = get_tensor_from_txt_file(argv[1], &tensor0_dat);
    60. DBG("tensor len %d.", tensor0_len);
    61. float absmax = 0.f;
    62. int i = 0;
    63. for(i = 0; i < tensor0_len + 1; i ++)
    64. {
    65. if(fabs(tensor0_dat[i]) > absmax)
    66. absmax = fabs(tensor0_dat[i]);
    67. }
    68. DBG("abs = %f.", absmax);
    69. int *histogram = malloc(bins * sizeof(int));
    70. float *histogram_norm = malloc(bins * sizeof(float));
    71. if(histogram == NULL || histogram_norm == NULL)
    72. {
    73. DBG("fatal error, malloc histogram failure.");
    74. exit(-1);
    75. }
    76. memset(histogram, 0x00, bins * sizeof(int));
    77. for(i = 0; i < tensor0_len; i ++)
    78. {
    79. if (tensor0_dat[i] == 0.f) continue;
    80. const int index = min((int)(fabs(tensor0_dat[i]) / absmax * bins), (bins - 1));
    81. histogram[index] += 1;
    82. }
    83. for(i = 0; i < bins; i ++)
    84. {
    85. DBG("histogram[%d] = %d.", i, histogram[i]);
    86. }
    87. //直方图归一化
    88. int sum = 0;
    89. for(i = 0; i < bins; i ++)
    90. {
    91. sum += histogram[i];
    92. histogram_norm[i] = 0.f;
    93. }
    94. for(i = 0; i < bins; i ++)
    95. histogram_norm[i] = (float)((float)histogram[i])/(float)sum;
    96. for(i = 0; i < bins; i ++)
    97. {
    98. DBG("histogram[%d] = %f.", i, histogram_norm[i]);
    99. }
    100. DBG("out");
    101. return 0;
    102. }

    运行:

    3BINS:

     100BINS:

    100BINS的数据可视化:

     和上面有所差异,原因可能是程序中绝对值的处理,将负值也作为正值处理了。为了验证,我们找到这批数据中的最小值,它是负的,我们将其加上一个偏移,正好变为0,这样所有的值都变为了正数,平移数据范围不会影响直方图的分布,所以我们就可以验证我们的猜测是否正确。

    代码:

    1. #include
    2. #include
    3. #include
    4. #include
    5. #include
    6. #include
    7. #include
    8. #include
    9. #include
    10. #define DBG(fmt, ...) do { printf("%s line %d, "fmt"\n", __func__, __LINE__, ##__VA_ARGS__); } while (0)
    11. #define min(x,y) ({ \
    12. typeof(x) _x = (x); \
    13. typeof(y) _y = (y); \
    14. (void) (&_x == &_y); \
    15. _x < _y ? _x : _y; })
    16. int get_tensor_from_txt_file(char *file_path, float **buf)
    17. {
    18. int len = 0;
    19. static float *memory = NULL;
    20. static int max_len = 10 * 1024 * 1024;
    21. FILE *fp = NULL;
    22. if(memory == NULL)
    23. {
    24. memory = (float*) malloc(max_len * sizeof(float));
    25. }
    26. if((fp = fopen(file_path, "r")) == NULL)
    27. {
    28. DBG("open tensor error.");
    29. exit(-1);
    30. }
    31. while(!feof(fp))
    32. {
    33. fscanf(fp, "%f", &memory[len ++]);
    34. }
    35. *buf = (float*)malloc(len * sizeof(float));
    36. if(len == 0 || *buf == NULL)
    37. {
    38. DBG("read tensor error, len %d, *buf %p", len, *buf);
    39. exit(-1);
    40. }
    41. memcpy(*buf, memory, len * sizeof(float));
    42. fclose(fp);
    43. return len;
    44. }
    45. int main(int argc, char **argv)
    46. {
    47. FILE *file;
    48. DBG("in");
    49. if(argc != 3)
    50. {
    51. DBG("input error, you should use this program like that: program tensor binsnum.");
    52. exit(-1);
    53. }
    54. int tensor0_len;
    55. float *tensor0_dat;
    56. int bins = atoi(argv[2]);
    57. tensor0_len = 0;
    58. tensor0_dat = NULL;
    59. tensor0_len = get_tensor_from_txt_file(argv[1], &tensor0_dat);
    60. DBG("tensor len %d.", tensor0_len);
    61. float absmax = 0.f;
    62. float min = 0.f;
    63. int i = 0;
    64. for(i = 0; i < tensor0_len + 1; i ++)
    65. {
    66. tensor0_dat[i] += 87.939552;
    67. }
    68. for(i = 0; i < tensor0_len + 1; i ++)
    69. {
    70. if(fabs(tensor0_dat[i]) > absmax)
    71. absmax = fabs(tensor0_dat[i]);
    72. if(tensor0_dat[i] < min)
    73. min = tensor0_dat[i];
    74. }
    75. DBG("abs = %f, min %f.", absmax, min);
    76. int *histogram = malloc(bins * sizeof(int));
    77. float *histogram_norm = malloc(bins * sizeof(float));
    78. if(histogram == NULL || histogram_norm == NULL)
    79. {
    80. DBG("fatal error, malloc histogram failure.");
    81. exit(-1);
    82. }
    83. memset(histogram, 0x00, bins * sizeof(int));
    84. for(i = 0; i < tensor0_len; i ++)
    85. {
    86. if (tensor0_dat[i] == 0.f) continue;
    87. const int index = min((int)(fabs(tensor0_dat[i]) / absmax * bins), (bins - 1));
    88. histogram[index] += 1;
    89. }
    90. for(i = 0; i < bins; i ++)
    91. {
    92. DBG("histogram[%d] = %d.", i, histogram[i]);
    93. }
    94. //直方图归一化
    95. int sum = 0;
    96. for(i = 0; i < bins; i ++)
    97. {
    98. sum += histogram[i];
    99. histogram_norm[i] = 0.f;
    100. }
    101. for(i = 0; i < bins; i ++)
    102. histogram_norm[i] = (float)((float)histogram[i])/(float)sum;
    103. for(i = 0; i < bins; i ++)
    104. {
    105. printf("%f\n", histogram_norm[i]);
    106. }
    107. DBG("out");
    108. return 0;
    109. }

    这次直方图曲线和本篇开头的直方图符合了:

    数据可视化部分的PYTHON代码:

    1. import numpy as np
    2. import linecache
    3. import matplotlib.pyplot as plt
    4. filename = "output.tensor"
    5. cols = 1 # number of column
    6. divided_ch = ' ' # divided_character between numbers
    7. def dat_to_matrix(filename):
    8. file = open(filename)
    9. lines = file.readlines()
    10. rows = len(lines)
    11. # print(rows)
    12. # print(lines)
    13. datamat = np.zeros(rows)
    14. row = 0
    15. for line in lines:
    16. line = line.strip().split(divided_ch) # strip remove block space in line
    17. datamat[row:] = line[:]
    18. row += 1
    19. return datamat
    20. data = dat_to_matrix(filename)
    21. # print(data)
    22. X=np.linspace(0,1,100) # X轴坐标数据
    23. plt.figure(figsize=(8,6)) # 定义图的大小
    24. plt.plot(X,data) # 绘制曲线图
    25. plt.show()

    总结:

    1.直方图平移后会被压缩,但是曲线的变化趋势不变,可以作计算导数的思想实验来验证这一点。

    2.对于AI训练和推理来说,数据本身的分布形状比数据本身要重要的多.

    当OFFSET过大的时候,以山峰高度为例,相当于我们选取的基础海平面太低,以至于无法体现地表山峰的高度趋势了,这个时候,计算的直方图会被压缩。如下图将OFFSET从87改为870

    产生的直方图为,对比上图,可以看到图像形状没有变化,但是被压缩了。

    证明很简单,设n>m>0.a > 0

    \frac{m}{n} \ ?\ \frac{a+m}{a+n}

    ?应该是什么呢?先通分。

    \\ \frac{m(a+n)}{n(a+n)} ? \frac{n(a+m)}{n(a+n)} \\ \because m(a+n)=ma+mn<na+mn=n(a+m) \\ \therefore \frac{m}{n} \ < \ \frac{a+m}{a+n}

    总结:

    很多时候,量化重要的一步是找出tensor的值域边界,得到每层tensor值的上下边界,在此基础上确定threhold。

    参考博客:

    使用NCNN的INT8量化方式进行推理_papaofdoudou的博客-CSDN博客_int8量化 ncnn


    结束 

  • 相关阅读:
    制定项目管理计划
    rsync远程同步
    1行代码提取6种TCGA表达矩阵和临床信息
    常见排序算法之选择排序
    【MATLAB】 辛几何模态分解信号分解+FFT傅里叶频谱变换组合算法
    C++实现轻量级RPC分布式网络通信框架
    校园棒球运动会运营策划案·棒球联盟
    javascript算法排序之希尔排序
    Windows c/c++开发环境安装以及配置VSCode
    MFC随记:AfxBeginThread创建工作线程
  • 原文地址:https://blog.csdn.net/tugouxp/article/details/125900447