• 几个排序器的verilog及其资源占用、延时分析


    提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档


    前言

    因为课题需要,调研了几个快速排序方法,并手写或者改进了若干待测试对象,包括记分板型冒泡排序(这个是别人的)、插入排序(这个是我写的)、双调排序(这个我改了又改,可能还会接着改进)、堆排序(这个是别人的)。以上都在7035开发板上测试了资源。除了插入排序截了时序图之外,其他几个我就直接给代码,外加资源占用情况和延迟(这个延迟是给定例子的延迟)。不介绍原理了,默认都懂!

    这个领域比较小众,能查到的文献相比时髦的深度学习硬件加速器要少得多得多,但还是有存在价值。外加上开源的排序器硬件加速方案,按照作者的说法要么就是纯粹从原理介绍到电路设计,要么就额外添加几句自己的设计很牛,总之缺乏统一的对比,说服力不够。

    我在这篇博客做的工作就是:

    1、改进双调排序;

    2、纯手撸一个4状态的插入排序;

    3、合理比较。

    书写、修改、调试不易,请大家多多惠存~


    一、记分板型冒泡排序

    以下是.v

    1. // reference: https://mp.weixin.qq.com/s/BesXJzfle_ZvW__C6DBNrA
    2. module sort_bubble #(parameter BITWIDTH = 8, parameter ELEMENTS = 32)(
    3. input clk ,
    4. input rst ,
    5. input [BITWIDTH*ELEMENTS-1:0] data_in ,
    6. input data_in_valid ,
    7. output [BITWIDTH*ELEMENTS-1:0] data_out ,
    8. output data_out_valid
    9. );
    10. reg [ 5:0] data_in_valid_ff;
    11. reg [BITWIDTH*ELEMENTS-1:0] data_in_ff[5:0] ;
    12. reg v[ELEMENTS-1:0][ELEMENTS-1:0] ;
    13. reg [ 1:0] sum_1[31:0][15:0] ;
    14. reg [ 2:0] sum_2[31:0][7:0] ;
    15. reg [ 3:0] sum_3[31:0][3:0] ;
    16. reg [ 4:0] sum_4[31:0][1:0] ;
    17. reg [ 5:0] sum_5[31:0] ;
    18. reg [BITWIDTH-1:0] data_out_temp[ELEMENTS-1:0] ;
    19. reg data_out_valid_temp;
    20. genvar i;
    21. genvar j;
    22. always @(posedge clk ) begin
    23. if(rst == 1'b1)begin
    24. data_in_valid_ff <= 6'b0;
    25. end
    26. else begin
    27. data_in_valid_ff <= {data_in_valid_ff[4:0], data_in_valid};
    28. end
    29. end
    30. always @(posedge clk ) begin
    31. data_in_ff[0] <= data_in;
    32. end
    33. generate
    34. for ( i = 0; i < 5 ; i = i + 1 ) begin : LOOP_DATA_IN
    35. always @(posedge clk ) begin
    36. data_in_ff[i+1] <= data_in_ff[i];
    37. end
    38. end
    39. endgenerate
    40. generate
    41. for ( i = 0 ; i < 32 ; i = i + 1 ) begin : LOOP_V_I
    42. for ( j = i ; j < 32 ; j = j + 1) begin : LOOP_V_J
    43. always @(posedge clk ) begin
    44. if(data_in_valid == 1'b1)begin
    45. v[i][j] <= data_in[i*8 +: 8] >= data_in[j*8 +: 8]; // 2D Parallel
    46. v[j][i] <= data_in[i*8 +: 8] < data_in[j*8 +: 8]; // 2D Parallel
    47. end
    48. end
    49. end
    50. end
    51. endgenerate
    52. generate
    53. for ( i = 0 ; i < 32 ; i = i + 1 ) begin : LOOP_SUM_1_I
    54. for ( j = 0 ; j < 16 ; j = j + 1) begin : LOOP_SUM_1_J
    55. always @(posedge clk ) begin
    56. if(data_in_valid_ff[0] == 1'b1)begin
    57. sum_1[i][j] <= v[i][j*2] + v[i][j*2 + 1];
    58. end
    59. end
    60. end
    61. end
    62. endgenerate
    63. generate
    64. for ( i = 0 ; i < 32 ; i = i + 1 ) begin : LOOP_SUM_2_I
    65. for ( j = 0 ; j < 8 ; j = j + 1) begin : LOOP_SUM_2_J
    66. always @(posedge clk ) begin
    67. if(data_in_valid_ff[1] == 1'b1)begin
    68. sum_2[i][j] <= sum_1[i][j*2] + sum_1[i][j*2 + 1];
    69. end
    70. end
    71. end
    72. end
    73. endgenerate
    74. generate
    75. for ( i = 0 ; i < 32 ; i = i + 1 ) begin : LOOP_SUM_3_I
    76. for ( j = 0 ; j < 4 ; j = j + 1) begin : LOOP_SUM_3_J
    77. always @(posedge clk ) begin
    78. if(data_in_valid_ff[2] == 1'b1)begin
    79. sum_3[i][j] <= sum_2[i][j*2] + sum_2[i][j*2 + 1];
    80. end
    81. end
    82. end
    83. end
    84. endgenerate
    85. generate
    86. for ( i = 0 ; i < 32 ; i = i + 1 ) begin : LOOP_SUM_4_I
    87. for ( j = 0 ; j < 2 ; j = j + 1) begin : LOOP_SUM_4_J
    88. always @(posedge clk ) begin
    89. if(data_in_valid_ff[3] == 1'b1)begin
    90. sum_4[i][j] <= sum_3[i][j*2] + sum_3[i][j*2 + 1];
    91. end
    92. end
    93. end
    94. end
    95. endgenerate
    96. generate
    97. for ( i = 0 ; i < 32 ; i = i + 1 ) begin : LOOP_SUM_5_I
    98. always @(posedge clk ) begin
    99. if(data_in_valid_ff[4] == 1'b1)begin
    100. sum_5[i] <= sum_4[i][0] + sum_4[i][1];
    101. end
    102. end
    103. end
    104. endgenerate
    105. always @(posedge clk ) begin : LOOP_DATA_OUT_TEMP_CLK
    106. integer k;
    107. for ( k = 0; k < 32; k = k + 1) begin : LOOP_DATA_OUT_TEMP
    108. if(data_in_valid_ff[5] == 1'b1)begin
    109. data_out_temp[sum_5[k]] <= data_in_ff[5][k*8 +: 8];
    110. data_out_valid_temp <= 1'b1;
    111. end
    112. else begin
    113. data_out_temp[sum_5[k]] <= 8'd0;
    114. data_out_valid_temp <= 1'b0;
    115. end
    116. end
    117. end
    118. generate
    119. for ( i = 0 ; i < 32 ; i = i + 1) begin : LOOP_DATA_OUT
    120. assign data_out[i*8 +: 8] = data_out_temp[i] ;
    121. assign data_out_valid = data_out_valid_temp;
    122. end
    123. endgenerate
    124. endmodule

    以下是testbench 

    1. `timescale 1ns / 1ps
    2. module tb_bubble(
    3. );
    4. reg clk;
    5. reg rst;
    6. reg [7:0] data_in_0;
    7. reg [7:0] data_in_1;
    8. reg [7:0] data_in_2;
    9. reg [7:0] data_in_3;
    10. reg [7:0] data_in_4;
    11. reg [7:0] data_in_5;
    12. reg [7:0] data_in_6;
    13. reg [7:0] data_in_7;
    14. reg [7:0] data_in_8;
    15. reg [7:0] data_in_9;
    16. reg [7:0] data_in_10;
    17. reg [7:0] data_in_11;
    18. reg [7:0] data_in_12;
    19. reg [7:0] data_in_13;
    20. reg [7:0] data_in_14;
    21. reg [7:0] data_in_15;
    22. reg [7:0] data_in_16;
    23. reg [7:0] data_in_17;
    24. reg [7:0] data_in_18;
    25. reg [7:0] data_in_19;
    26. reg [7:0] data_in_20;
    27. reg [7:0] data_in_21;
    28. reg [7:0] data_in_22;
    29. reg [7:0] data_in_23;
    30. reg [7:0] data_in_24;
    31. reg [7:0] data_in_25;
    32. reg [7:0] data_in_26;
    33. reg [7:0] data_in_27;
    34. reg [7:0] data_in_28;
    35. reg [7:0] data_in_29;
    36. reg [7:0] data_in_30;
    37. reg [7:0] data_in_31;
    38. wire [255:0] data_in;
    39. reg data_in_valid;
    40. wire [255:0] data_out;
    41. wire data_out_valid;
    42. initial begin
    43. clk = 1'b0;
    44. rst = 1'b1;
    45. #50
    46. rst = 1'b0;
    47. end
    48. always #5 clk = !clk;
    49. always @(posedge clk ) begin
    50. if(rst == 1'b1)begin
    51. data_in_0 <= 8'd0;
    52. data_in_1 <= 8'd0;
    53. data_in_2 <= 8'd0;
    54. data_in_3 <= 8'd0;
    55. data_in_4 <= 8'd0;
    56. data_in_5 <= 8'd0;
    57. data_in_6 <= 8'd0;
    58. data_in_7 <= 8'd0;
    59. data_in_8 <= 8'd0;
    60. data_in_9 <= 8'd0;
    61. data_in_10 <= 8'd0;
    62. data_in_11 <= 8'd0;
    63. data_in_12 <= 8'd0;
    64. data_in_13 <= 8'd0;
    65. data_in_14 <= 8'd0;
    66. data_in_15 <= 8'd0;
    67. data_in_16 <= 8'd0;
    68. data_in_17 <= 8'd0;
    69. data_in_18 <= 8'd0;
    70. data_in_19 <= 8'd0;
    71. data_in_20 <= 8'd0;
    72. data_in_21 <= 8'd0;
    73. data_in_22 <= 8'd0;
    74. data_in_23 <= 8'd0;
    75. data_in_24 <= 8'd0;
    76. data_in_25 <= 8'd0;
    77. data_in_26 <= 8'd0;
    78. data_in_27 <= 8'd0;
    79. data_in_28 <= 8'd0;
    80. data_in_29 <= 8'd0;
    81. data_in_30 <= 8'd0;
    82. data_in_31 <= 8'd0;
    83. data_in_valid <= 1'b0;
    84. end
    85. else begin
    86. data_in_0 <= {$random} % 255;
    87. data_in_1 <= {$random} % 255;
    88. data_in_2 <= {$random} % 255;
    89. data_in_3 <= {$random} % 255;
    90. data_in_4 <= {$random} % 255;
    91. data_in_5 <= {$random} % 255;
    92. data_in_6 <= {$random} % 255;
    93. data_in_7 <= {$random} % 255;
    94. data_in_8 <= {$random} % 255;
    95. data_in_9 <= {$random} % 255;
    96. data_in_10 <= {$random} % 255;
    97. data_in_11 <= {$random} % 255;
    98. data_in_12 <= {$random} % 255;
    99. data_in_13 <= {$random} % 255;
    100. data_in_14 <= {$random} % 255;
    101. data_in_15 <= {$random} % 255;
    102. data_in_16 <= {$random} % 255;
    103. data_in_17 <= {$random} % 255;
    104. data_in_18 <= {$random} % 255;
    105. data_in_19 <= {$random} % 255;
    106. data_in_20 <= {$random} % 255;
    107. data_in_21 <= {$random} % 255;
    108. data_in_22 <= {$random} % 255;
    109. data_in_23 <= {$random} % 255;
    110. data_in_24 <= {$random} % 255;
    111. data_in_25 <= {$random} % 255;
    112. data_in_26 <= {$random} % 255;
    113. data_in_27 <= {$random} % 255;
    114. data_in_28 <= {$random} % 255;
    115. data_in_29 <= {$random} % 255;
    116. data_in_30 <= {$random} % 255;
    117. data_in_31 <= {$random} % 255;
    118. data_in_valid <= 1'b1;
    119. end
    120. end
    121. assign data_in = {data_in_0, data_in_1, data_in_2, data_in_3, data_in_4, data_in_5, data_in_6, data_in_7, data_in_8, data_in_9, data_in_10, data_in_11, data_in_12, data_in_13, data_in_14, data_in_15,data_in_16, data_in_17, data_in_18, data_in_19, data_in_20, data_in_21, data_in_22, data_in_23,data_in_24, data_in_25, data_in_26, data_in_27, data_in_28, data_in_29, data_in_30, data_in_31};
    122. sort_bubble sort_bubble_u(
    123. .clk (clk ),
    124. .rst (rst ),
    125. .data_in (data_in ),
    126. .data_in_valid (data_in_valid ),
    127. .data_out (data_out ),
    128. .data_out_valid (data_out_valid)
    129. );
    130. endmodule

    二、插入排序

    以下是.v

    1. module sort_insertion #(parameter BITWIDTH = 8, parameter ELEMENTS = 32)(
    2. input clk,
    3. input rst_n,
    4. input [BITWIDTH*ELEMENTS-1:0] data_in,
    5. output reg [BITWIDTH*ELEMENTS-1:0] data_out
    6. );
    7. reg [7:0] array [0:31];
    8. reg [5:0] i, j;
    9. reg [7:0] key;
    10. // State machine states
    11. reg [2:0] state_c;
    12. reg [2:0] state_n;
    13. parameter IDLE = 3'b000,
    14. SORT = 3'b001,
    15. BREAK= 3'b010,
    16. DONE = 3'b011;
    17. genvar p;
    18. generate
    19. for (p=0; p<32; p=p+1) begin: ASSIGN_FOR_DATA_OUT
    20. always @(posedge clk or negedge rst_n) begin
    21. data_out[p*8 +: 8] <= array[p];
    22. end
    23. end
    24. endgenerate
    25. // State Transfer - First
    26. always @(posedge clk or negedge rst_n) begin
    27. if (!rst_n) begin
    28. state_c <= IDLE;
    29. end
    30. else begin
    31. state_c <= state_n;
    32. end
    33. end
    34. // State Transfer - Second
    35. always @(*) begin
    36. case (state_c)
    37. IDLE: begin
    38. state_n = BREAK;
    39. end
    40. SORT: begin
    41. if (j == 0 || array[j] <= key) state_n = BREAK;
    42. else state_n = state_c;
    43. end
    44. BREAK: begin
    45. if (i > 0 && i < 31) state_n = SORT;
    46. else if (i == 31) state_n = DONE;
    47. else state_n = state_c;
    48. end
    49. default: state_n = DONE;
    50. endcase
    51. end
    52. // State Transfer - Third
    53. always @(posedge clk or negedge rst_n) begin
    54. if (!rst_n) begin i <= 0; j <= 0; key <= 8'd0; end
    55. else if (state_c == SORT) begin
    56. if (j == 0 || array[j] <= key) begin
    57. i <= i; j <= j - 1; key <= array[i]; // 同时跳转到break
    58. end
    59. else begin
    60. i <= i; j <= j - 1; key <= key; // 接着比较
    61. end
    62. end
    63. else if (state_c == BREAK) begin
    64. if ( i <= 30) begin
    65. i <= i + 1; j <= i; key <= array[i+1];
    66. end
    67. else begin
    68. i <= i + 1; j <= 0; key <= 8'd0;
    69. end
    70. end
    71. else begin i <= 0; j <= 0; key <= 8'd0; end
    72. end
    73. always @(posedge clk or negedge rst_n) begin
    74. if (!rst_n) begin
    75. array[0] <= 0;
    76. array[1] <= 0;
    77. array[2] <= 0;
    78. array[3] <= 0;
    79. array[4] <= 0;
    80. array[5] <= 0;
    81. array[6] <= 0;
    82. array[7] <= 0;
    83. array[8] <= 0;
    84. array[9] <= 0;
    85. array[10] <= 0;
    86. array[11] <= 0;
    87. array[12] <= 0;
    88. array[13] <= 0;
    89. array[14] <= 0;
    90. array[15] <= 0;
    91. array[16] <= 0;
    92. array[17] <= 0;
    93. array[18] <= 0;
    94. array[19] <= 0;
    95. array[20] <= 0;
    96. array[21] <= 0;
    97. array[22] <= 0;
    98. array[23] <= 0;
    99. array[24] <= 0;
    100. array[25] <= 0;
    101. array[26] <= 0;
    102. array[27] <= 0;
    103. array[28] <= 0;
    104. array[29] <= 0;
    105. array[30] <= 0;
    106. array[31] <= 0;
    107. end
    108. else begin
    109. if (state_c == IDLE) begin
    110. array[0] <= data_in[0*8 +: 8];
    111. array[1] <= data_in[1*8 +: 8];
    112. array[2] <= data_in[2*8 +: 8];
    113. array[3] <= data_in[3*8 +: 8];
    114. array[4] <= data_in[4*8 +: 8];
    115. array[5] <= data_in[5*8 +: 8];
    116. array[6] <= data_in[6*8 +: 8];
    117. array[7] <= data_in[7*8 +: 8];
    118. array[8] <= data_in[8*8 +: 8];
    119. array[9] <= data_in[9*8 +: 8];
    120. array[10] <= data_in[10*8 +: 8];
    121. array[11] <= data_in[11*8 +: 8];
    122. array[12] <= data_in[12*8 +: 8];
    123. array[13] <= data_in[13*8 +: 8];
    124. array[14] <= data_in[14*8 +: 8];
    125. array[15] <= data_in[15*8 +: 8];
    126. array[16] <= data_in[16*8 +: 8];
    127. array[17] <= data_in[17*8 +: 8];
    128. array[18] <= data_in[18*8 +: 8];
    129. array[19] <= data_in[19*8 +: 8];
    130. array[20] <= data_in[20*8 +: 8];
    131. array[21] <= data_in[21*8 +: 8];
    132. array[22] <= data_in[22*8 +: 8];
    133. array[23] <= data_in[23*8 +: 8];
    134. array[24] <= data_in[24*8 +: 8];
    135. array[25] <= data_in[25*8 +: 8];
    136. array[26] <= data_in[26*8 +: 8];
    137. array[27] <= data_in[27*8 +: 8];
    138. array[28] <= data_in[28*8 +: 8];
    139. array[29] <= data_in[29*8 +: 8];
    140. array[30] <= data_in[30*8 +: 8];
    141. array[31] <= data_in[31*8 +: 8];
    142. end
    143. else if (state_c == BREAK && i == 1 && array[j] > key) begin
    144. array[j + 1] <= array[j];
    145. array[j] <= key;
    146. end
    147. else if (state_c == SORT && array[j] > key) begin
    148. array[j + 1] <= array[j];
    149. array[j] <= key;
    150. end
    151. end
    152. end
    153. endmodule

    以下是testbench:

    1. module sort_insertion_tb #(parameter BITWIDTH = 8, parameter ELEMENTS = 32)();
    2. reg clk;
    3. reg rst_n;
    4. reg [BITWIDTH*ELEMENTS-1:0] data_in;
    5. wire [BITWIDTH*ELEMENTS-1:0] data_out;
    6. sort_insertion #(BITWIDTH, ELEMENTS) uut (
    7. .clk(clk),
    8. .rst_n(rst_n),
    9. .data_in(data_in),
    10. .data_out(data_out)
    11. );
    12. // Clock generation
    13. always #5 clk = ~clk;
    14. initial begin
    15. // Initialize inputs
    16. clk = 0;
    17. rst_n = 0;
    18. // Apply reset
    19. #10;
    20. rst_n = 1;
    21. // Load test data
    22. data_in[BITWIDTH*0 +: BITWIDTH] = 8'd12;
    23. data_in[BITWIDTH*1 +: BITWIDTH] = 8'd3;
    24. data_in[BITWIDTH*2 +: BITWIDTH] = 8'd25;
    25. data_in[BITWIDTH*3 +: BITWIDTH] = 8'd8;
    26. data_in[BITWIDTH*4 +: BITWIDTH] = 8'd15;
    27. data_in[BITWIDTH*5 +: BITWIDTH] = 8'd18;
    28. data_in[BITWIDTH*6 +: BITWIDTH] = 8'd7;
    29. data_in[BITWIDTH*7 +: BITWIDTH] = 8'd1;
    30. data_in[BITWIDTH*8 +: BITWIDTH] = 8'd31;
    31. data_in[BITWIDTH*9 +: BITWIDTH] = 8'd14;
    32. data_in[BITWIDTH*10 +: BITWIDTH] = 8'd6;
    33. data_in[BITWIDTH*11 +: BITWIDTH] = 8'd22;
    34. data_in[BITWIDTH*12 +: BITWIDTH] = 8'd27;
    35. data_in[BITWIDTH*13 +: BITWIDTH] = 8'd20;
    36. data_in[BITWIDTH*14 +: BITWIDTH] = 8'd5;
    37. data_in[BITWIDTH*15 +: BITWIDTH] = 8'd9;
    38. data_in[BITWIDTH*16 +: BITWIDTH] = 8'd4;
    39. data_in[BITWIDTH*17 +: BITWIDTH] = 8'd17;
    40. data_in[BITWIDTH*18 +: BITWIDTH] = 8'd2;
    41. data_in[BITWIDTH*19 +: BITWIDTH] = 8'd10;
    42. data_in[BITWIDTH*20 +: BITWIDTH] = 8'd11;
    43. data_in[BITWIDTH*21 +: BITWIDTH] = 8'd13;
    44. data_in[BITWIDTH*22 +: BITWIDTH] = 8'd24;
    45. data_in[BITWIDTH*23 +: BITWIDTH] = 8'd28;
    46. data_in[BITWIDTH*24 +: BITWIDTH] = 8'd19;
    47. data_in[BITWIDTH*25 +: BITWIDTH] = 8'd26;
    48. data_in[BITWIDTH*26 +: BITWIDTH] = 8'd23;
    49. data_in[BITWIDTH*27 +: BITWIDTH] = 8'd30;
    50. data_in[BITWIDTH*28 +: BITWIDTH] = 8'd29;
    51. data_in[BITWIDTH*29 +: BITWIDTH] = 8'd21;
    52. data_in[BITWIDTH*30 +: BITWIDTH] = 8'd16;
    53. data_in[BITWIDTH*31 +: BITWIDTH] = 8'd0;
    54. // Wait for the sorting to complete
    55. #3000;
    56. // Display sorted data
    57. $display("Sorted data:");
    58. $display("data_out[0] = %d", data_out[BITWIDTH*0 +: BITWIDTH]);
    59. $display("data_out[1] = %d", data_out[BITWIDTH*1 +: BITWIDTH]);
    60. $display("data_out[2] = %d", data_out[BITWIDTH*2 +: BITWIDTH]);
    61. $display("data_out[3] = %d", data_out[BITWIDTH*3 +: BITWIDTH]);
    62. $display("data_out[4] = %d", data_out[BITWIDTH*4 +: BITWIDTH]);
    63. $display("data_out[5] = %d", data_out[BITWIDTH*5 +: BITWIDTH]);
    64. $display("data_out[6] = %d", data_out[BITWIDTH*6 +: BITWIDTH]);
    65. $display("data_out[7] = %d", data_out[BITWIDTH*7 +: BITWIDTH]);
    66. $display("data_out[8] = %d", data_out[BITWIDTH*8 +: BITWIDTH]);
    67. $display("data_out[9] = %d", data_out[BITWIDTH*9 +: BITWIDTH]);
    68. $display("data_out[10] = %d", data_out[BITWIDTH*10 +: BITWIDTH]);
    69. $display("data_out[11] = %d", data_out[BITWIDTH*11 +: BITWIDTH]);
    70. $display("data_out[12] = %d", data_out[BITWIDTH*12 +: BITWIDTH]);
    71. $display("data_out[13] = %d", data_out[BITWIDTH*13 +: BITWIDTH]);
    72. $display("data_out[14] = %d", data_out[BITWIDTH*14 +: BITWIDTH]);
    73. $display("data_out[15] = %d", data_out[BITWIDTH*15 +: BITWIDTH]);
    74. $display("data_out[16] = %d", data_out[BITWIDTH*16 +: BITWIDTH]);
    75. $display("data_out[17] = %d", data_out[BITWIDTH*17 +: BITWIDTH]);
    76. $display("data_out[18] = %d", data_out[BITWIDTH*18 +: BITWIDTH]);
    77. $display("data_out[19] = %d", data_out[BITWIDTH*19 +: BITWIDTH]);
    78. $display("data_out[20] = %d", data_out[BITWIDTH*20 +: BITWIDTH]);
    79. $display("data_out[21] = %d", data_out[BITWIDTH*21 +: BITWIDTH]);
    80. $display("data_out[22] = %d", data_out[BITWIDTH*22 +: BITWIDTH]);
    81. $display("data_out[23] = %d", data_out[BITWIDTH*23 +: BITWIDTH]);
    82. $display("data_out[24] = %d", data_out[BITWIDTH*24 +: BITWIDTH]);
    83. $display("data_out[25] = %d", data_out[BITWIDTH*25 +: BITWIDTH]);
    84. $display("data_out[26] = %d", data_out[BITWIDTH*26 +: BITWIDTH]);
    85. $display("data_out[27] = %d", data_out[BITWIDTH*27 +: BITWIDTH]);
    86. $display("data_out[28] = %d", data_out[BITWIDTH*28 +: BITWIDTH]);
    87. $display("data_out[29] = %d", data_out[BITWIDTH*29 +: BITWIDTH]);
    88. $display("data_out[30] = %d", data_out[BITWIDTH*30 +: BITWIDTH]);
    89. $display("data_out[31] = %d", data_out[BITWIDTH*31 +: BITWIDTH]);
    90. $finish;
    91. end
    92. endmodule

    看一张调了很久的波形:

    三、双调排序

    以下是双调排序.v(目前来看,这段代码还有很大改进空间):

    1. //
    2. //
    3. // Create Date: 22/03/2022
    4. // Author: Bala Dhinesh
    5. // Module Name: BitonicSortScalable
    6. // Project Name: Bitonic Sorting in Verilog
    7. //
    8. //
    9. // This code can work with any number of elements in the powers of 2. There are three primary states in the code, namely SORT, MERGE_SETUP, MERGE.
    10. // **SORT**: Sort the array for every eight elements.
    11. // **MERGE_SETUP**: This will make a bitonic sequence for the entire array.
    12. // **MERGE**: Do the bitonic sort from the bitonic sequence array obtained from MERGE_SETUP.
    13. // This code uses eight elements as a base for hardware and scaled from it.
    14. // reference: https://github.com/BalaDhinesh/Bitonic-Sorting-In-Verilog
    15. // another reference: https://github.com/john9636/SortingNetwork
    16. module sort_bitonic #(parameter BITWIDTH = 8, parameter ELEMENTS = 32)(
    17. input clk,
    18. input rst_n,
    19. input en_i,
    20. input [ELEMENTS*BITWIDTH-1:0] in,
    21. // add signal for max
    22. // 32-max1
    23. input enable_SORT32_max1,
    24. // 16-max1
    25. input enable_SORT16_max1,
    26. // The reason of multipling 8 is 8 group of 4-max1
    27. // 4 group of 8-max1, 2 group of 16-max1, 1 group of 32-max1
    28. output reg [BITWIDTH*8-1:0] sort_max,
    29. output reg done_o,
    30. output reg [ELEMENTS*BITWIDTH-1:0] out
    31. );
    32. // FSM states
    33. localparam START = 3'b000, // 0
    34. SETUP = 3'b001, // 1
    35. SORT = 3'b010, // 2
    36. DONE = 3'b011, // 3
    37. MERGE_SETUP = 3'b100, // 4
    38. MERGE = 3'b101, // 5
    39. IDLE = 3'b111; // 7
    40. reg positive; // sort ascending or descending for intermediate sub-arrays
    41. reg [2:0] state; // state of FSM
    42. reg [$clog2(ELEMENTS)-1:0] stage;
    43. reg [7:0] d[0:7]; // temporary register array
    44. reg [2:0] step;
    45. // Register variables for Bitonic merge
    46. reg [$clog2(ELEMENTS):0] compare;
    47. reg [$clog2(ELEMENTS)-1:0] i_MERGE;
    48. reg [$clog2(ELEMENTS)-1:0] sum;
    49. reg [$clog2(ELEMENTS)-1:0] sum_max;
    50. reg [$clog2(ELEMENTS):0] STAGES = ELEMENTS/16;
    51. reg [$clog2(ELEMENTS):0] STAGES_FIXED = ELEMENTS/16;
    52. always @(posedge clk or negedge rst_n)
    53. if (!rst_n) begin
    54. out <= 0;
    55. step <= 4'd0;
    56. done_o <= 1'd0;
    57. state <= START;
    58. end else begin
    59. case(state)
    60. START:
    61. begin
    62. step <= 0;
    63. done_o <= 1'd0;
    64. compare <= ELEMENTS;
    65. i_MERGE <= 0;
    66. positive <= 1;
    67. sum <= 8;
    68. sum_max <= 8;
    69. out <= in;
    70. if(en_i) begin
    71. state <= SETUP;
    72. stage <= 0;
    73. end
    74. end
    75. SETUP:
    76. begin
    77. if(stage <= (ELEMENTS/8)) begin
    78. d[0] <= in[stage*8*BITWIDTH + 0*BITWIDTH +: 8];
    79. d[1] <= in[stage*8*BITWIDTH + 1*BITWIDTH +: 8];
    80. d[2] <= in[stage*8*BITWIDTH + 2*BITWIDTH +: 8];
    81. d[3] <= in[stage*8*BITWIDTH + 3*BITWIDTH +: 8];
    82. d[4] <= in[stage*8*BITWIDTH + 4*BITWIDTH +: 8];
    83. d[5] <= in[stage*8*BITWIDTH + 5*BITWIDTH +: 8];
    84. d[6] <= in[stage*8*BITWIDTH + 6*BITWIDTH +: 8];
    85. d[7] <= in[stage*8*BITWIDTH + 7*BITWIDTH +: 8];
    86. state <= SORT;
    87. end
    88. else begin
    89. state <= START;
    90. end
    91. end
    92. SORT:
    93. begin
    94. case(step)
    95. 0: begin
    96. if(d[0] > d[1]) begin
    97. d[0] <= d[1];
    98. d[1] <= d[0];
    99. end
    100. if(d[2] < d[3]) begin
    101. d[2] <= d[3];
    102. d[3] <= d[2];
    103. end
    104. if(d[4] > d[5]) begin
    105. d[4] <= d[5];
    106. d[5] <= d[4];
    107. end
    108. if(d[6] < d[7]) begin
    109. d[6] <= d[7];
    110. d[7] <= d[6];
    111. end
    112. step <= step + 1;
    113. end
    114. 1: begin
    115. if(d[0] > d[2]) begin
    116. d[0] <= d[2];
    117. d[2] <= d[0];
    118. end
    119. if(d[1] > d[3]) begin
    120. d[1] <= d[3];
    121. d[3] <= d[1];
    122. end
    123. if(d[4] < d[6]) begin
    124. d[4] <= d[6];
    125. d[6] <= d[4];
    126. end
    127. if(d[5] < d[7]) begin
    128. d[5] <= d[7];
    129. d[7] <= d[5];
    130. end
    131. step <= step + 1;
    132. end
    133. 2: begin
    134. if(d[0] > d[1]) begin
    135. d[0] <= d[1];
    136. d[1] <= d[0];
    137. end
    138. if(d[2] > d[3]) begin
    139. d[2] <= d[3];
    140. d[3] <= d[2];
    141. end
    142. if(d[4] < d[5]) begin
    143. d[4] <= d[5];
    144. d[5] <= d[4];
    145. end
    146. if(d[6] < d[7]) begin
    147. d[6] <= d[7];
    148. d[7] <= d[6];
    149. end
    150. step <= step + 1;
    151. end
    152. 3: begin
    153. if(stage%2 ==0) begin
    154. if(d[0] > d[4]) begin
    155. d[0] <= d[4];
    156. d[4] <= d[0];
    157. end
    158. if(d[1] > d[5]) begin
    159. d[1] <= d[5];
    160. d[5] <= d[1];
    161. end
    162. if(d[2] > d[6]) begin
    163. d[2] <= d[6];
    164. d[6] <= d[2];
    165. end
    166. if(d[3] > d[7]) begin
    167. d[3] <= d[7];
    168. d[7] <= d[3];
    169. end
    170. end
    171. else begin
    172. if(d[0] < d[4]) begin
    173. d[0] <= d[4];
    174. d[4] <= d[0];
    175. end
    176. if(d[1] < d[5]) begin
    177. d[1] <= d[5];
    178. d[5] <= d[1];
    179. end
    180. if(d[2] < d[6]) begin
    181. d[2] <= d[6];
    182. d[6] <= d[2];
    183. end
    184. if(d[3] < d[7]) begin
    185. d[3] <= d[7];
    186. d[7] <= d[3];
    187. end
    188. end
    189. step <= step + 1;
    190. end
    191. 4: begin
    192. if(stage%2 ==0) begin
    193. if(d[0] > d[2]) begin
    194. d[0] <= d[2];
    195. d[2] <= d[0];
    196. end
    197. if(d[1] > d[3]) begin
    198. d[1] <= d[3];
    199. d[3] <= d[1];
    200. end
    201. if(d[4] > d[6]) begin
    202. d[4] <= d[6];
    203. d[6] <= d[4];
    204. end
    205. if(d[5] > d[7]) begin
    206. d[5] <= d[7];
    207. d[7] <= d[5];
    208. end
    209. end
    210. else begin
    211. if(d[0] < d[2]) begin
    212. d[0] <= d[2];
    213. d[2] <= d[0];
    214. end
    215. if(d[1] < d[3]) begin
    216. d[1] <= d[3];
    217. d[3] <= d[1];
    218. end
    219. if(d[4] < d[6]) begin
    220. d[4] <= d[6];
    221. d[6] <= d[4];
    222. end
    223. if(d[5] < d[7]) begin
    224. d[5] <= d[7];
    225. d[7] <= d[5];
    226. end
    227. end
    228. step <= step + 1;
    229. end
    230. 5: begin
    231. if(stage%2 ==0) begin
    232. if(d[0] > d[1]) begin
    233. d[0] <= d[1];
    234. d[1] <= d[0];
    235. end
    236. if(d[2] > d[3]) begin
    237. d[2] <= d[3];
    238. d[3] <= d[2];
    239. end
    240. if(d[4] > d[5]) begin
    241. d[4] <= d[5];
    242. d[5] <= d[4];
    243. end
    244. if(d[6] > d[7]) begin
    245. d[6] <= d[7];
    246. d[7] <= d[6];
    247. end
    248. end
    249. else begin
    250. if(d[0] < d[1]) begin
    251. d[0] <= d[1];
    252. d[1] <= d[0];
    253. end
    254. if(d[2] < d[3]) begin
    255. d[2] <= d[3];
    256. d[3] <= d[2];
    257. end
    258. if(d[4] < d[5]) begin
    259. d[4] <= d[5];
    260. d[5] <= d[4];
    261. end
    262. if(d[6] < d[7]) begin
    263. d[6] <= d[7];
    264. d[7] <= d[6];
    265. end
    266. end
    267. step <= 4'd0;
    268. state <= DONE;
    269. end
    270. default: step <= 4'd0;
    271. endcase
    272. end
    273. DONE: begin
    274. if(stage == (ELEMENTS/8 - 1)) begin
    275. out[stage*8*BITWIDTH + 0*BITWIDTH +: 8] <= d[0];
    276. out[stage*8*BITWIDTH + 1*BITWIDTH +: 8] <= d[1];
    277. out[stage*8*BITWIDTH + 2*BITWIDTH +: 8] <= d[2];
    278. out[stage*8*BITWIDTH + 3*BITWIDTH +: 8] <= d[3];
    279. out[stage*8*BITWIDTH + 4*BITWIDTH +: 8] <= d[4];
    280. out[stage*8*BITWIDTH + 5*BITWIDTH +: 8] <= d[5];
    281. out[stage*8*BITWIDTH + 6*BITWIDTH +: 8] <= d[6];
    282. out[stage*8*BITWIDTH + 7*BITWIDTH +: 8] <= d[7];
    283. if(ELEMENTS == 8) state <= IDLE;
    284. // add code by dention 20240514 start
    285. // add 2 group of 16-max1
    286. else begin
    287. if (enable_SORT16_max1) begin
    288. if (out[0*8*BITWIDTH + 7*BITWIDTH +: 8] > out[1*8*BITWIDTH + 0*BITWIDTH +: 8]) begin
    289. sort_max[BITWIDTH-1:0] <= out[0*8*BITWIDTH + 7*BITWIDTH +: 8]; // 16-max1
    290. end
    291. else begin
    292. sort_max[BITWIDTH-1:0] <= out[1*8*BITWIDTH + 0*BITWIDTH +: 8];
    293. end
    294. if (out[2*8*BITWIDTH + 7*BITWIDTH +: 8] > out[3*8*BITWIDTH + 0*BITWIDTH +: 8]) begin
    295. sort_max[2*BITWIDTH-1:BITWIDTH] <= out[2*8*BITWIDTH + 7*BITWIDTH +: 8]; // 16-max1
    296. end
    297. else begin
    298. sort_max[2*BITWIDTH-1:BITWIDTH] <= out[3*8*BITWIDTH + 0*BITWIDTH +: 8];
    299. end
    300. sort_max[8*BITWIDTH-1:2*BITWIDTH] <= 0;
    301. state <= IDLE;
    302. done_o <= 1;
    303. end
    304. else begin
    305. sort_max[8*BITWIDTH-1:0] <= 0;
    306. state <= MERGE_SETUP;
    307. end
    308. end
    309. stage <= 0;
    310. sum <= 8;
    311. i_MERGE <= 0;
    312. compare <= 16;
    313. end
    314. else if(stage < (ELEMENTS/8)) begin
    315. out[stage*8*BITWIDTH + 0*BITWIDTH +: 8] <= d[0];
    316. out[stage*8*BITWIDTH + 1*BITWIDTH +: 8] <= d[1];
    317. out[stage*8*BITWIDTH + 2*BITWIDTH +: 8] <= d[2];
    318. out[stage*8*BITWIDTH + 3*BITWIDTH +: 8] <= d[3];
    319. out[stage*8*BITWIDTH + 4*BITWIDTH +: 8] <= d[4];
    320. out[stage*8*BITWIDTH + 5*BITWIDTH +: 8] <= d[5];
    321. out[stage*8*BITWIDTH + 6*BITWIDTH +: 8] <= d[6];
    322. out[stage*8*BITWIDTH + 7*BITWIDTH +: 8] <= d[7];
    323. state <= SETUP;
    324. stage <= stage + 1;
    325. end
    326. else begin
    327. out <= 110;
    328. state <= IDLE;
    329. end
    330. end
    331. MERGE_SETUP:
    332. begin
    333. if(STAGES == ELEMENTS | STAGES_FIXED == 1) begin
    334. if(sum == ELEMENTS/2) begin
    335. // add code by dention 20240514 start
    336. // add 32-max1
    337. if (enable_SORT32_max1) begin
    338. // choose double sort bitonic 16 max1 and compare two max1 to gain the max in 32
    339. if (out[1*8*BITWIDTH + 7*BITWIDTH +: 8] > out[2*8*BITWIDTH + 0*BITWIDTH +: 8]) begin
    340. sort_max[BITWIDTH-1:0] <= out[1*8*BITWIDTH + 7*BITWIDTH +: 8]; // 32-max1
    341. end
    342. else begin
    343. sort_max[BITWIDTH-1:0] <= out[2*8*BITWIDTH + 0*BITWIDTH +: 8]; // 32-max1
    344. end
    345. sort_max[BITWIDTH*8-1:BITWIDTH] <= 0;
    346. state <= IDLE;
    347. done_o <= 1;
    348. end
    349. // add code by dention 20240514 end
    350. else begin
    351. sort_max[BITWIDTH*8-1:0] <= 0;
    352. state <= MERGE;
    353. end
    354. end
    355. else begin
    356. sum <= sum_max * 2; //16
    357. sum_max <= sum_max * 2; //16
    358. state <= MERGE_SETUP;
    359. i_MERGE <= 0;
    360. compare <= sum_max*4; //64
    361. positive <= 1;
    362. stage <= 0;
    363. STAGES <= STAGES_FIXED / 2; // 2
    364. STAGES_FIXED <= STAGES_FIXED / 2; // 1
    365. end
    366. end
    367. // across-0 min-max-min-max 1 period
    368. // across-1 min-max-min-max 2 period
    369. // across-3 min-max-min-max 3 period
    370. // across-7 min-max-min-max 4 period
    371. else begin
    372. if((sum + i_MERGE) < compare && (compare <= ELEMENTS) && (stage < STAGES)) begin
    373. if(positive) begin
    374. if(out[i_MERGE*BITWIDTH +: 8] > out[(i_MERGE+sum)*BITWIDTH +: 8]) begin
    375. out[i_MERGE*BITWIDTH +: 8] <= out[(i_MERGE+sum)*BITWIDTH +: 8];
    376. out[(i_MERGE+sum)*BITWIDTH +: 8] <= out[i_MERGE*BITWIDTH +: 8];
    377. end
    378. end
    379. else begin
    380. if(out[i_MERGE*BITWIDTH +: 8] < out[(i_MERGE+sum)*BITWIDTH +: 8]) begin
    381. out[i_MERGE*BITWIDTH +: 8] <= out[(i_MERGE+sum)*BITWIDTH +: 8];
    382. out[(i_MERGE+sum)*BITWIDTH +: 8] <= out[i_MERGE*BITWIDTH +: 8];
    383. end
    384. end
    385. if ((sum + i_MERGE) >= (compare - 1)) begin
    386. i_MERGE <= compare;
    387. compare <= compare + 2*sum;
    388. stage = stage + 1;
    389. if(STAGES == 2) begin
    390. if(stage == 0) positive <= 1;
    391. else positive <= 0;
    392. end
    393. else begin
    394. if((stage%(STAGES*2/STAGES_FIXED)) < STAGES/STAGES_FIXED) positive <= 1;
    395. else positive <= 0;
    396. end
    397. state <= MERGE_SETUP;
    398. end
    399. else begin
    400. i_MERGE = i_MERGE + 1;
    401. state <= MERGE_SETUP;
    402. end
    403. end
    404. else begin
    405. state <= MERGE_SETUP;
    406. i_MERGE <= 0;
    407. positive <= 1;
    408. sum <= sum / 2;
    409. compare <= sum;
    410. stage <= 0;
    411. STAGES <= STAGES * 2;
    412. end
    413. end
    414. end
    415. MERGE:
    416. begin
    417. if(sum == 1) begin
    418. state <= IDLE;
    419. done_o <= 1;
    420. end
    421. else begin
    422. if((sum + i_MERGE) < ELEMENTS) begin
    423. if(out[i_MERGE*BITWIDTH +: 8] > out[(i_MERGE+sum)*BITWIDTH +: 8]) begin
    424. out[i_MERGE*BITWIDTH +: 8] <= out[(i_MERGE+sum)*BITWIDTH +: 8];
    425. out[(i_MERGE+sum)*BITWIDTH +: 8] <= out[i_MERGE*BITWIDTH +: 8];
    426. end
    427. if ((sum + i_MERGE) >= (compare - 1)) begin
    428. i_MERGE <= compare;
    429. compare <= compare * 2;
    430. end
    431. else begin
    432. i_MERGE = i_MERGE + 1;
    433. state <= MERGE;
    434. end
    435. end
    436. else begin
    437. state <= MERGE;
    438. i_MERGE <= 0;
    439. sum <= sum / 2;
    440. compare <= sum;
    441. end
    442. end
    443. end
    444. IDLE: state <= IDLE;
    445. default: state <= START;
    446. endcase
    447. end
    448. endmodule

    以下是testbench:

    1. `timescale 1ns/1ps
    2. `define clk_period 20
    3. module BitonicSortScalable_tb #(
    4. parameter BITWIDTH = 8, // Bitwidth of each element
    5. parameter ELEMENTS = 32 // Number of elements to be sorted. This value must be a powers of two
    6. )
    7. ();
    8. reg clk, rst_n, en_i;
    9. reg [ELEMENTS*BITWIDTH-1:0] in;
    10. wire done_o;
    11. wire [ELEMENTS*BITWIDTH-1:0] out;
    12. reg enable_SORT32_max1;
    13. reg enable_SORT16_max1;
    14. wire [BITWIDTH*8-1:0] sort_max;
    15. sort_bitonic bitonic_SORT_u(
    16. clk,
    17. rst_n,
    18. en_i,
    19. in,
    20. enable_SORT32_max1,
    21. enable_SORT16_max1,
    22. sort_max,
    23. done_o,
    24. out
    25. );
    26. integer i;
    27. initial begin
    28. clk = 1'b1;
    29. end
    30. always #(`clk_period/2) begin
    31. clk = ~clk;
    32. end
    33. initial begin
    34. rst_n = 0;
    35. en_i = 0;
    36. in = 0;
    37. enable_SORT16_max1 = 0;
    38. enable_SORT32_max1 = 0;
    39. #(`clk_period);
    40. rst_n = 1;
    41. en_i = 1;
    42. enable_SORT16_max1 = 1;
    43. // Input array vector to sort.
    44. // Increase the number of elements based on ELEMENTS parameter
    45. in ={ 8'd28, 8'd23, 8'd24, 8'd16, 8'd11, 8'd25, 8'd29, 8'd1,
    46. 8'd10, 8'd32, 8'd21, 8'd2, 8'd27, 8'd31, 8'd3, 8'd30,
    47. 8'd15, 8'd13, 8'd0, 8'd8, 8'd5, 8'd18, 8'd22, 8'd26,
    48. 8'd4, 8'd6, 8'd9, 8'd19, 8'd20, 8'd7, 8'd14, 8'd17
    49. };
    50. #(`clk_period);
    51. en_i = 0;
    52. #(`clk_period*10000);
    53. $display("Input array:");
    54. for(i=0;i
    55. $write(in[i*BITWIDTH +: 8]);
    56. $display("\nOutput sorted array:");
    57. for(i=0;i
    58. $write(out[i*BITWIDTH +: 8]);
    59. $display("\ndone %d", done_o);
    60. $finish;
    61. end
    62. endmodule

    四、堆排序

    以下是堆排序的.v代码:

    1. // reference: https://zhuanlan.zhihu.com/p/32166363
    2. `timescale 1ns / 1ps
    3. module sort_heap
    4. #(
    5. parameter addr_width = 5,
    6. parameter data_width = 8
    7. )
    8. (
    9. input clk,
    10. input rst_n,
    11. input en,
    12. input clr,
    13. output reg done,
    14. input [addr_width - 1:0] parent,
    15. input [addr_width - 1:0] length,
    16. output reg wea,
    17. output reg [ addr_width - 1:0 ] addra,
    18. output reg [ data_width - 1:0 ] data_we,
    19. input [ data_width - 1:0 ] data_re
    20. );
    21. reg [data_width - 1:0] temp;
    22. reg [addr_width :0] parent_r;//attention: For recognize the parent, we must expand data width of it
    23. reg [addr_width :0] child_r;
    24. reg [addr_width :0] length_r;
    25. parameter IDLE = 6'b000001;
    26. parameter BEGINA = 6'b000010;
    27. parameter GET = 6'b000100;
    28. parameter COMPARE = 6'b001000;
    29. parameter WRITE = 6'b010000;
    30. parameter COMPLETE= 6'b100000;
    31. reg [5:0] state;
    32. reg [5:0] next_state;
    33. reg [7:0] cnt;
    34. reg [data_width - 1:0] child_compare;
    35. always@(posedge clk or negedge rst_n)
    36. begin
    37. if(!rst_n) begin state <= IDLE; end
    38. else begin state <= next_state; end
    39. end
    40. always@(*)
    41. begin
    42. case(state)
    43. IDLE: begin
    44. if(en) begin next_state = BEGINA; end
    45. else begin next_state = IDLE; end
    46. end
    47. BEGINA:begin
    48. if(cnt == 8'd2) begin next_state = GET; end
    49. else begin next_state = BEGINA; end
    50. end
    51. GET: begin
    52. if(child_r >= length_r) begin next_state = COMPLETE; end
    53. else if(cnt == 8'd4) begin next_state = COMPARE; end
    54. else begin next_state = GET; end
    55. end
    56. COMPARE: begin
    57. if(temp >= child_compare) begin next_state = COMPLETE; end
    58. else begin next_state = WRITE; end
    59. end
    60. WRITE: begin
    61. if(cnt == 8'd1) begin next_state = GET; end
    62. else begin next_state = WRITE; end
    63. end
    64. COMPLETE:begin
    65. if(clr) begin next_state = IDLE; end
    66. else begin next_state = COMPLETE; end
    67. end
    68. endcase
    69. end
    70. reg [data_width - 1:0] child_R;
    71. reg [data_width - 1:0] child_L;
    72. always@(posedge clk or negedge rst_n)
    73. begin
    74. if(!rst_n) begin done <= 1'b0; end
    75. else
    76. begin
    77. case(state)
    78. IDLE: begin
    79. parent_r <= {1'b0, parent};
    80. length_r <= {1'b0, length};
    81. child_r <= 2*parent + 1'b1;
    82. cnt <= 8'd0; child_R <= 0; child_L <= 0;
    83. done <= 1'b0;
    84. end
    85. BEGINA:begin
    86. if(cnt == 8'd0) begin addra <= parent_r; cnt <= cnt + 1'b1; end
    87. else if(cnt == 8'd2) begin temp <= data_re; cnt <= 1'b0; end
    88. else begin cnt <= cnt + 1'b1; end
    89. end
    90. GET: begin
    91. if(child_r >= length_r) begin addra <= addra; end
    92. else
    93. begin
    94. if(cnt == 8'd0) begin addra <= child_r; cnt <= cnt + 1'b1; end
    95. else if(cnt == 8'd1) begin addra <= child_r + 1'b1; cnt <= cnt + 1'b1; end
    96. else if(cnt == 8'd2) begin child_L <= data_re; cnt <= cnt + 1'b1; end
    97. else if(cnt == 8'd3) begin child_R <= data_re; cnt <= cnt + 1'b1; end
    98. else if(cnt == 8'd4)
    99. begin
    100. if( (child_r + 1'b1 < length_r) && (child_R > child_L) )
    101. begin
    102. child_r <= child_r + 1'b1;
    103. child_compare <= child_R;
    104. end
    105. else
    106. begin
    107. child_r <= child_r;
    108. child_compare <= child_L;
    109. end
    110. cnt <= 8'd0;
    111. end
    112. else begin cnt <= cnt + 1'b1; end
    113. end
    114. end
    115. COMPARE: begin end
    116. WRITE: begin
    117. if(cnt == 8'd0) begin
    118. addra <= parent_r; wea <= 1'b1;
    119. data_we <= child_compare; cnt <= cnt + 1'b1;
    120. end
    121. else if(cnt == 8'd1) begin
    122. wea <= 1'b0; cnt <= 8'd0;
    123. parent_r <= child_r;
    124. child_r <= child_r*2 + 1'b1;
    125. end
    126. else begin cnt <= cnt; end
    127. end
    128. COMPLETE: begin
    129. if(cnt == 8'd0) begin
    130. wea <= 1'b1; addra <= parent_r;
    131. data_we <= temp; cnt <= cnt + 1'b1;
    132. end
    133. else if(cnt == 8'd1)
    134. begin
    135. wea <= 1'b0;
    136. cnt <= cnt + 1'b1;
    137. done <= 1'b1;
    138. end
    139. else if(cnt == 8'd2)
    140. begin
    141. done <= 1'b0;
    142. cnt <= 8'd2;
    143. end
    144. end
    145. endcase
    146. end
    147. end
    148. endmodule

    以下是堆排序的top文件(注意事先在vivado的库内例化ROM,并载入coe,网上有大量关于coe的语法,我这边也贴上)

    1. `timescale 1ns / 1ps
    2. module sort_heap_top
    3. #(
    4. parameter addr_width = 5, //stack address width
    5. parameter data_width = 8, //stack data width
    6. parameter stack_deepth = 31 //stack deepth
    7. )
    8. (
    9. input clk,
    10. input rst_n
    11. );
    12. reg en; //initial module input: Enable initial process
    13. reg clr; //initial module input: Reset initial process
    14. wire done; //initial module output: One initial process have done
    15. reg [addr_width - 1:0] parent; //initial module input: Parent
    16. reg [addr_width - 1:0] length; //initial module input: Length of list
    17. wire wea; //RAM module input: write enable
    18. wire [addr_width - 1:0] addra; //RAM module input: write/read address
    19. wire [data_width - 1:0] data_we; //RAM module input: write data
    20. wire [data_width - 1:0] data_re; //RAM module output: read data
    21. parameter BEGINA = 9'b0_0000_0001;//stage 1: stack initial
    22. parameter RANK = 9'b0_0000_0010;
    23. parameter FINISH = 9'b0_0000_0100;
    24. parameter DONE = 9'b0_0000_1000;
    25. parameter READ = 9'b0_0001_0000;//stage 2: rank of stack
    26. parameter WRITE = 9'b0_0010_0000;
    27. parameter RANK_2 = 9'b0_0100_0000;
    28. parameter FINISH_2= 9'b0_1000_0000;
    29. parameter DONE_2 = 9'b1_0000_0000;
    30. reg [addr_width - 1:0] cnt; //counter in FSM stage 1/2
    31. reg [addr_width - 1:0] cnt2; //counter in FSM stage 2
    32. reg [8:0] state; //FSM state
    33. reg [8:0] next_state; //FSM next state
    34. reg [addr_width - 1:0] addr; //stack inital process read RAM address
    35. reg initial_done; //stack initial done
    36. reg [data_width - 1:0] list_i; //RANK process reg
    37. reg [data_width - 1:0] list_0; //RANK process reg
    38. reg wea_FSM; //wea signal from FSM
    39. reg [data_width - 1:0] data_we_FSM; //write data form FSM
    40. //FSM stage 1: state transform
    41. always@(posedge clk or negedge rst_n)
    42. begin
    43. if(!rst_n) begin state <= BEGINA; end
    44. else begin state <= next_state; end
    45. end
    46. //FSM stage 2: state change
    47. always@(*)
    48. begin
    49. case(state)
    50. BEGINA: begin next_state = RANK; end //stack initial process begin
    51. RANK: begin
    52. if(done) begin next_state = FINISH; end
    53. else begin next_state = RANK; end
    54. end
    55. FINISH:begin
    56. if(addr == stack_deepth - 1 & cnt != {addr_width{1'b1}}) begin next_state = BEGINA; end
    57. else if(addr == stack_deepth - 1 & cnt == {addr_width{1'b1}} ) begin next_state = DONE; end
    58. else begin next_state = FINISH; end
    59. end
    60. DONE: begin next_state = READ; end //stack initial process have done
    61. READ: begin //stack rank process begin
    62. if(cnt == 3) begin next_state = WRITE; end
    63. else begin next_state = READ; end
    64. end
    65. WRITE:begin
    66. if(cnt == 2) begin next_state = RANK_2; end
    67. else begin next_state = WRITE; end
    68. end
    69. RANK_2:begin
    70. if(done) begin next_state = FINISH_2; end
    71. else begin next_state = RANK_2; end
    72. end
    73. FINISH_2:begin
    74. if(addr == stack_deepth - 1 & cnt2 != 0) begin next_state = READ; end
    75. else if(addr == stack_deepth - 1 & cnt2 == 0) begin next_state = DONE_2; end
    76. else begin next_state = FINISH_2; end
    77. end
    78. DONE_2:begin next_state = DONE_2; end//stack rank process done
    79. endcase
    80. end
    81. //FSM stage 3: state output
    82. always@(posedge clk or negedge rst_n)
    83. begin
    84. if(!rst_n) begin cnt <= stack_deepth/2; addr <= {addr_width{1'b1}}; initial_done <= 1'b0; wea_FSM <= 1'b0; end
    85. else
    86. begin
    87. case(state)
    88. BEGINA: begin //stack initial begin
    89. en <= 1'b1;
    90. clr <= 1'b0;
    91. parent <= cnt;
    92. length <= stack_deepth;
    93. end
    94. RANK: begin
    95. clr <= 1'b0;
    96. if(done) begin cnt <= cnt - 1'b1; clr <= 1'b1; en <= 1'b0; addr <= 4'd0; end
    97. end
    98. FINISH:begin clr <= 1'b0; addr <= addr + 1'b1; end
    99. DONE: begin
    100. initial_done <= 1'b1; //stack initial have done
    101. cnt2 <= stack_deepth - 1;
    102. cnt <= 0;
    103. end
    104. READ: begin //stack rank process begin
    105. if(cnt == 0) begin addr <= 0; cnt <= cnt + 1'b1; end
    106. else if(cnt == 1) begin addr <= cnt2; cnt <= cnt + 1'b1; end
    107. else if(cnt == 2) begin list_0 <= data_re; cnt <= cnt + 1'b1; end
    108. else if(cnt == 3) begin list_i <= data_re; cnt <= 0; end
    109. else begin cnt <= cnt; end
    110. end
    111. WRITE:begin
    112. if(cnt == 0) begin
    113. wea_FSM <= 1'b1;
    114. addr <= 0; data_we_FSM <= list_i;
    115. cnt <= cnt + 1'b1;
    116. end
    117. else if(cnt == 1) begin
    118. wea_FSM <= 1'b1;
    119. addr <= cnt2; data_we_FSM <= list_0;
    120. cnt <= cnt + 1'b1;
    121. end
    122. else if(cnt == 2) begin wea_FSM <= 1'b0; cnt <= 0; parent <= 0; length <= cnt2; en <= 1'b1; end
    123. else begin cnt <= cnt; end
    124. end
    125. RANK_2:begin
    126. if(done) begin cnt2 <= cnt2 - 1'b1; clr <= 1'b1; en <= 1'b0; addr <= 0; end
    127. end
    128. FINISH_2:begin
    129. clr <= 1'b0; addr <= addr + 1'b1;
    130. end
    131. endcase
    132. end
    133. end
    134. wire wea_initial;
    135. wire [data_width - 1:0] data_we_initial;
    136. //stack initial process
    137. sort_heap U1
    138. (
    139. .clk(clk),
    140. .rst_n(rst_n),
    141. .en(en),
    142. .clr(clr),
    143. .done(done),
    144. .parent(parent),
    145. .length(length),
    146. .wea(wea_initial),
    147. .addra(addra),
    148. .data_we(data_we_initial),
    149. .data_re(data_re)
    150. );
    151. wire [addr_width - 1:0] RAM_addr;
    152. assign wea = (state == WRITE) ? wea_FSM:wea_initial;
    153. assign RAM_addr = (state == FINISH || state == READ || state == WRITE || state == FINISH_2) ? addr:addra;
    154. assign data_we = (state == WRITE) ? data_we_FSM:data_we_initial;
    155. //RAM module
    156. blk_mem_gen_0 RAM1
    157. (
    158. .clka(clk),
    159. .wea(wea),
    160. .addra(RAM_addr),
    161. .dina(data_we),
    162. .douta(data_re)
    163. );
    164. endmodule

    对应的coe文件如下:

    1. ;分号后的代码都被认为是注释内容
    2. memory_initialization_radix=10;
    3. memory_initialization_vector=
    4. 1,
    5. 3,
    6. 4,
    7. 5,
    8. 2,
    9. 6,
    10. 9,
    11. 7,
    12. 8,
    13. 0,
    14. 11,
    15. 15,
    16. 13,
    17. 19,
    18. 20,
    19. 16,
    20. 12,
    21. 10,
    22. 14,
    23. 11,
    24. 25,
    25. 21,
    26. 23,
    27. 22,
    28. 18,
    29. 27,
    30. 36,
    31. 29,
    32. 17,
    33. 24,
    34. 26,
    35. 30;

     以下为对应的testbench:

    1. `define clk_period 20
    2. module sort_hep_top_tb
    3. ();
    4. reg clk;
    5. reg rst_n;
    6. initial begin
    7. clk = 1'b1;
    8. end
    9. always #(`clk_period/2) begin
    10. clk = ~clk;
    11. end
    12. initial begin
    13. rst_n = 0;
    14. #(`clk_period);
    15. rst_n = 1;
    16. #(`clk_period*100000);
    17. $finish;
    18. end
    19. sort_heap_top sort_heap_top_u(
    20. clk,
    21. rst_n
    22. );
    23. endmodule

    五、资源占用和延时对比

    直接贴图表示,不多废话!

  • 相关阅读:
    【mysql】--记一次delete删除语句使用别名的坑
    HBRD-212/5电源监视继电器
    [附源码]java毕业设计医学季节性疾病筛查系统
    Vue3 + Nodejs 实战 ,文件上传项目--大文件分片上传+断点续传
    HttpServletResponse HttpServletRequest
    C++ 逻辑运算符
    【21天Python进阶学习挑战赛】[day3]json标准库大总结
    定时任务cron
    nginx服务中使用用户认证功能,对Web后台url进行用户验证【加一层认证】
    HTTP 到 HTTPS 再到 HSTS 的转变
  • 原文地址:https://blog.csdn.net/weixin_41029027/article/details/138925616