• C/C++、C#、F#、Go AMD x86-64 编译器内部实现乘法运算


    我们知道任何编译器语言都不是直接编译 “目标CPU平台汇编机器字节码”(汇编),哪怕是:C/C++ 编译器也是相同的,需要预编译为ASM汇编文本源码后,提交输入ASM编译器进行静态编译,VC++ CL内集MASM编译器。

    而本文的获取的不同语言执行汇编代码就是这个步骤的由编译器生成的源码。

    原型表达式:C/C++

    int mul(int x, int y) { return x * y; }

    原型表达式:Go

    1. package main
    2. func mul(x int, y int) int {
    3. return x * y
    4. }
    5. func main() {}

    原型表达式:C#

    1. class Program
    2. {
    3. static int mul(int x, int y) => x * y;
    4. }

    原型表达式:F#

    1. module Program
    2. let mul(x, y) = x * y

    C/C++ on x86-64 clang 6.0.0

    1. mul(int, int):                               # @mul(int, int)
    2.         imul    edi, esi
    3.         mov     eax, edi
    4.         ret

    C/C++ on MSVC V19.10 WINE(VC++ 2019, ##VC++ 2022 version greater !19.29.30140.0)

    1. x$ = 8
    2. y$ = 16
    3. int mul(int,int) PROC ; mul
    4. imul ecx, edx
    5. mov eax, ecx
    6. ret 0
    7. int mul(int,int) ENDP ; mul

    C# .NET 6.0(dotnet core)

    1. Program:.ctor():this:
    2. ret
    3. Program:mul(int,int):int:
    4. mov eax, edi
    5. imul eax, esi
    6. ret

    C# .NET Framework 4.0 JIT (Intel x86-32)DEBUG【会有无效CPU指令】

    1. static int mul(int x, int y) => x * y;
    2. ## 完整函数实现(等价于上面其它语言ASM被编译为最终机器代码)
    3. 0516B040 55 push ebp
    4. 0516B041 8B EC mov ebp,esp // 部署函数堆栈
    5. 0516B043 57 push edi
    6. 0516B044 56 push esi
    7. 0516B045 53 push ebx
    8. 0516B046 83 EC 34 sub esp,34h // 扩大52字节计算堆栈
    9. 0516B049 33 C0 xor eax,eax // 置空(位运算同值异或)
    10. ## 复制计算堆栈的值(X,Y)两个变量到函数局部变量计算堆栈上及函数计算堆栈的初始化
    11. 0516B04B 89 45 F0 mov dword ptr [ebp-10h],eax
    12. 0516B04E 89 45 E4 mov dword ptr [ebp-1Ch],eax
    13. 0516B051 89 4D C4 mov dword ptr [ebp-3Ch],ecx
    14. 0516B054 89 55 C0 mov dword ptr [ebp-40h],edx
    15. 0516B057 83 3D F0 42 E8 00 00 cmp dword ptr ds:[0E842F0h],0
    16. 0516B05E 74 05 je Ppp.Windows.PppApplication+Program.mul(Int32, Int32)+025h (0516B065h)
    17. 0516B060 E8 0B 2C EB 6D call 7301DC70
    18. ## 该C#函数做工乘法运算执行汇编的指令(很清晰);
    19. 0516B065 8B 45 C0 mov eax,dword ptr [ebp-40h]
    20. 0516B068 0F AF 45 C4 imul eax,dword ptr [ebp-3Ch]
    21. ## 平衡函数堆栈并返回,注:EAX累加寄存器在X86汇编中常用于代表返回值
    22. 0516B06C 8D 65 F4 lea esp,[ebp-0Ch]
    23. 0516B06F 5B pop ebx
    24. 0516B070 5E pop esi
    25. 0516B071 5F pop edi
    26. 0516B072 5D pop ebp
    27. 0516B073 C3 ret ## 等价:RETN 0(就是上面未编译为机器汇编的源文本形式的:ret 0

    F# on dotNET Native AOT

    1. Program:mul(int,int):int:
    2. mov eax, edi
    3. imul eax, esi
    4. ret

    Golang on x86 gccgo 12.2.20(打开编译器最大代码编译优化级别)

    剔除其它代码至少需要执行以下的源汇编指令,这就只是做个 X, Y 简单的乘法运算而已.....

    1. main.mul:
    2. cmp rsp, QWORD PTR fs:112
    3. jb .L125
    4. .L124:
    5. mov rax, rdi
    6. imul rax, rsi
    7. ret
    8. .L125:
    9. xor r10d, r10d
    10. xor r11d, r11d
    11. call __morestack
    12. ret
    13. jmp .L124

    编译器生成完整汇编源代码:

    1. main.struct_4runtime_0gList_cruntime_0n_bint32_5..eq:
    2. cmp rsp, QWORD PTR fs:112
    3. jb .L6
    4. .L4:
    5. mov rdx, QWORD PTR [rsi]
    6. xor eax, eax
    7. cmp QWORD PTR [rdi], rdx
    8. jne .L1
    9. mov eax, DWORD PTR [rsi+8]
    10. cmp DWORD PTR [rdi+8], eax
    11. sete al
    12. .L1:
    13. ret
    14. .L6:
    15. xor r10d, r10d
    16. xor r11d, r11d
    17. call __morestack
    18. ret
    19. jmp .L4
    20. main._661_7struct_4Size_buint32_cMallocs_buint64_cFrees_buint64_5..eq:
    21. cmp rsp, QWORD PTR fs:112
    22. jb .L16
    23. .L15:
    24. xor edx, edx
    25. .L9:
    26. mov r9, QWORD PTR [rdi+8+rdx]
    27. mov r8, QWORD PTR [rdi+16+rdx]
    28. mov rax, QWORD PTR [rsi+8+rdx]
    29. mov rcx, QWORD PTR [rsi+16+rdx]
    30. mov r10d, DWORD PTR [rsi+rdx]
    31. cmp DWORD PTR [rdi+rdx], r10d
    32. je .L17
    33. xor eax, eax
    34. .L7:
    35. ret
    36. .L17:
    37. cmp r9, rax
    38. sete al
    39. cmp r8, rcx
    40. sete cl
    41. and al, cl
    42. je .L7
    43. add rdx, 24
    44. cmp rdx, 1464
    45. jne .L9
    46. ret
    47. .L16:
    48. xor r10d, r10d
    49. xor r11d, r11d
    50. call __morestack
    51. ret
    52. jmp .L15
    53. main.struct_4Size_buint32_cMallocs_buint64_cFrees_buint64_5..eq:
    54. cmp rsp, QWORD PTR fs:112
    55. jb .L23
    56. .L22:
    57. mov edx, DWORD PTR [rsi]
    58. xor eax, eax
    59. cmp DWORD PTR [rdi], edx
    60. jne .L18
    61. mov rcx, QWORD PTR [rsi+8]
    62. cmp QWORD PTR [rdi+8], rcx
    63. je .L24
    64. .L18:
    65. ret
    66. .L24:
    67. mov rax, QWORD PTR [rsi+16]
    68. cmp QWORD PTR [rdi+16], rax
    69. sete al
    70. ret
    71. .L23:
    72. xor r10d, r10d
    73. xor r11d, r11d
    74. call __morestack
    75. ret
    76. jmp .L22
    77. main._633_7float64..eq:
    78. cmp rsp, QWORD PTR fs:112
    79. jb .L33
    80. .L32:
    81. xor eax, eax
    82. jmp .L28
    83. .L35:
    84. add rax, 8
    85. cmp rax, 264
    86. je .L34
    87. .L28:
    88. movsd xmm0, QWORD PTR [rdi+rax]
    89. ucomisd xmm0, QWORD PTR [rsi+rax]
    90. jp .L29
    91. je .L35
    92. .L29:
    93. xor eax, eax
    94. ret
    95. .L34:
    96. mov eax, 1
    97. ret
    98. .L33:
    99. xor r10d, r10d
    100. xor r11d, r11d
    101. call __morestack
    102. ret
    103. jmp .L32
    104. main._632_7uintptr..eq:
    105. cmp rsp, QWORD PTR fs:112
    106. jb .L39
    107. .L37:
    108. sub rsp, 8
    109. mov edx, 256
    110. call runtime.memequal
    111. add rsp, 8
    112. ret
    113. .L39:
    114. mov r10d, 8
    115. xor r11d, r11d
    116. call __morestack
    117. ret
    118. jmp .L37
    119. main._6256_7uint64..eq:
    120. cmp rsp, QWORD PTR fs:112
    121. jb .L43
    122. .L41:
    123. sub rsp, 8
    124. mov edx, 2048
    125. call runtime.memequal
    126. add rsp, 8
    127. ret
    128. .L43:
    129. mov r10d, 8
    130. xor r11d, r11d
    131. call __morestack
    132. ret
    133. jmp .L41
    134. main._6122_7uintptr..eq:
    135. cmp rsp, QWORD PTR fs:112
    136. jb .L47
    137. .L45:
    138. sub rsp, 8
    139. mov edx, 976
    140. call runtime.memequal
    141. add rsp, 8
    142. ret
    143. .L47:
    144. mov r10d, 8
    145. xor r11d, r11d
    146. call __morestack
    147. ret
    148. jmp .L45
    149. main._68_7uint64..eq:
    150. cmp rsp, QWORD PTR fs:112
    151. jb .L51
    152. .L49:
    153. sub rsp, 8
    154. mov edx, 64
    155. call runtime.memequal
    156. add rsp, 8
    157. ret
    158. .L51:
    159. mov r10d, 8
    160. xor r11d, r11d
    161. call __morestack
    162. ret
    163. jmp .L49
    164. main._6128_7uint8..eq:
    165. cmp rsp, QWORD PTR fs:112
    166. jb .L55
    167. .L53:
    168. sub rsp, 8
    169. mov edx, 128
    170. call runtime.memequal
    171. add rsp, 8
    172. ret
    173. .L55:
    174. mov r10d, 8
    175. xor r11d, r11d
    176. call __morestack
    177. ret
    178. jmp .L53
    179. main._64096_7uint8..eq:
    180. cmp rsp, QWORD PTR fs:112
    181. jb .L59
    182. .L57:
    183. sub rsp, 8
    184. mov edx, 4096
    185. call runtime.memequal
    186. add rsp, 8
    187. ret
    188. .L59:
    189. mov r10d, 8
    190. xor r11d, r11d
    191. call __morestack
    192. ret
    193. jmp .L57
    194. main._668_7uint16..eq:
    195. cmp rsp, QWORD PTR fs:112
    196. jb .L63
    197. .L61:
    198. sub rsp, 8
    199. mov edx, 136
    200. call runtime.memequal
    201. add rsp, 8
    202. ret
    203. .L63:
    204. mov r10d, 8
    205. xor r11d, r11d
    206. call __morestack
    207. ret
    208. jmp .L61
    209. main._665_7uint32..eq:
    210. cmp rsp, QWORD PTR fs:112
    211. jb .L67
    212. .L65:
    213. sub rsp, 8
    214. mov edx, 260
    215. call runtime.memequal
    216. add rsp, 8
    217. ret
    218. .L67:
    219. mov r10d, 8
    220. xor r11d, r11d
    221. call __morestack
    222. ret
    223. jmp .L65
    224. main._64_7uintptr..eq:
    225. cmp rsp, QWORD PTR fs:112
    226. jb .L71
    227. .L69:
    228. sub rsp, 8
    229. mov edx, 32
    230. call runtime.memequal
    231. add rsp, 8
    232. ret
    233. .L71:
    234. mov r10d, 8
    235. xor r11d, r11d
    236. call __morestack
    237. ret
    238. jmp .L69
    239. main._65_7uint..eq:
    240. cmp rsp, QWORD PTR fs:112
    241. jb .L75
    242. .L73:
    243. sub rsp, 8
    244. mov edx, 40
    245. call runtime.memequal
    246. add rsp, 8
    247. ret
    248. .L75:
    249. mov r10d, 8
    250. xor r11d, r11d
    251. call __morestack
    252. ret
    253. jmp .L73
    254. main._6512_7uint8..eq:
    255. cmp rsp, QWORD PTR fs:112
    256. jb .L79
    257. .L77:
    258. sub rsp, 8
    259. mov edx, 512
    260. call runtime.memequal
    261. add rsp, 8
    262. ret
    263. .L79:
    264. mov r10d, 8
    265. xor r11d, r11d
    266. call __morestack
    267. ret
    268. jmp .L77
    269. main._6249_7uint8..eq:
    270. cmp rsp, QWORD PTR fs:112
    271. jb .L83
    272. .L81:
    273. sub rsp, 8
    274. mov edx, 249
    275. call runtime.memequal
    276. add rsp, 8
    277. ret
    278. .L83:
    279. mov r10d, 8
    280. xor r11d, r11d
    281. call __morestack
    282. ret
    283. jmp .L81
    284. main._6129_7uint8..eq:
    285. cmp rsp, QWORD PTR fs:112
    286. jb .L87
    287. .L85:
    288. sub rsp, 8
    289. mov edx, 129
    290. call runtime.memequal
    291. add rsp, 8
    292. ret
    293. .L87:
    294. mov r10d, 8
    295. xor r11d, r11d
    296. call __morestack
    297. ret
    298. jmp .L85
    299. main._632_7uint8..eq:
    300. cmp rsp, QWORD PTR fs:112
    301. jb .L91
    302. .L89:
    303. sub rsp, 8
    304. mov edx, 32
    305. call runtime.memequal
    306. add rsp, 8
    307. ret
    308. .L91:
    309. mov r10d, 8
    310. xor r11d, r11d
    311. call __morestack
    312. ret
    313. jmp .L89
    314. main._627_7string..eq:
    315. cmp rsp, QWORD PTR fs:112
    316. jb .L104
    317. .L102:
    318. push r12
    319. mov r12, rdi
    320. push rbp
    321. mov rbp, rsi
    322. push rbx
    323. xor ebx, ebx
    324. jmp .L95
    325. .L106:
    326. cmp rdi, rsi
    327. je .L97
    328. call memcmp
    329. test eax, eax
    330. jne .L93
    331. .L97:
    332. add rbx, 16
    333. cmp rbx, 432
    334. je .L105
    335. .L95:
    336. movdqu xmm0, XMMWORD PTR [r12+rbx]
    337. mov rdi, QWORD PTR [r12+rbx]
    338. movdqu xmm0, XMMWORD PTR [rbp+0+rbx]
    339. mov rsi, QWORD PTR [rbp+0+rbx]
    340. mov rdx, QWORD PTR [rbp+8+rbx]
    341. cmp rdx, QWORD PTR [r12+8+rbx]
    342. je .L106
    343. .L93:
    344. xor eax, eax
    345. pop rbx
    346. pop rbp
    347. pop r12
    348. ret
    349. .L105:
    350. mov eax, 1
    351. pop rbx
    352. pop rbp
    353. pop r12
    354. ret
    355. .L104:
    356. mov r10d, 24
    357. xor r11d, r11d
    358. call __morestack
    359. ret
    360. jmp .L102
    361. main._61024_7uint8..eq:
    362. cmp rsp, QWORD PTR fs:112
    363. jb .L110
    364. .L108:
    365. sub rsp, 8
    366. mov edx, 1024
    367. call runtime.memequal
    368. add rsp, 8
    369. ret
    370. .L110:
    371. mov r10d, 8
    372. xor r11d, r11d
    373. call __morestack
    374. ret
    375. jmp .L108
    376. main._62_7int32..eq:
    377. cmp rsp, QWORD PTR fs:112
    378. jb .L114
    379. .L112:
    380. sub rsp, 8
    381. mov edx, 8
    382. call runtime.memequal
    383. add rsp, 8
    384. ret
    385. .L114:
    386. mov r10d, 8
    387. xor r11d, r11d
    388. call __morestack
    389. ret
    390. jmp .L112
    391. main._664_7uint8..eq:
    392. cmp rsp, QWORD PTR fs:112
    393. jb .L118
    394. .L116:
    395. sub rsp, 8
    396. mov edx, 64
    397. call runtime.memequal
    398. add rsp, 8
    399. ret
    400. .L118:
    401. mov r10d, 8
    402. xor r11d, r11d
    403. call __morestack
    404. ret
    405. jmp .L116
    406. main._6256_7uint8..eq:
    407. cmp rsp, QWORD PTR fs:112
    408. jb .L122
    409. .L120:
    410. sub rsp, 8
    411. mov edx, 256
    412. call runtime.memequal
    413. add rsp, 8
    414. ret
    415. .L122:
    416. mov r10d, 8
    417. xor r11d, r11d
    418. call __morestack
    419. ret
    420. jmp .L120
    421. main.mul:
    422. cmp rsp, QWORD PTR fs:112
    423. jb .L125
    424. .L124:
    425. mov rax, rdi
    426. imul rax, rsi
    427. ret
    428. .L125:
    429. xor r10d, r10d
    430. xor r11d, r11d
    431. call __morestack
    432. ret
    433. jmp .L124
    434. main.main:
    435. cmp rsp, QWORD PTR fs:112
    436. jb .L128
    437. ret
    438. .L128:
    439. xor r10d, r10d
    440. xor r11d, r11d
    441. call __morestack
    442. ret
    443. ret
    444. __go_init_main:
    445. cmp rsp, QWORD PTR fs:112
    446. jb .L132
    447. .L130:
    448. sub rsp, 8
    449. mov esi, OFFSET FLAT:go..typelists
    450. mov edi, 11
    451. call runtime.registerTypeDescriptors
    452. call internal_1cpu..import
    453. call runtime..import
    454. add rsp, 8
    455. ret
    456. .L132:
    457. mov r10d, 8
    458. xor r11d, r11d
    459. call __morestack
    460. ret
    461. jmp .L130
    462. go..typelists:
    463. .quad internal_1cpu..types
    464. .quad runtime..types
    465. .quad internal_1abi..types
    466. .quad internal_1bytealg..types
    467. .quad internal_1goarch..types
    468. .quad internal_1goexperiment..types
    469. .quad internal_1goos..types
    470. .quad runtime_1internal_1atomic..types
    471. .quad runtime_1internal_1math..types
    472. .quad runtime_1internal_1sys..types
    473. .quad main..types
    474. main..types:
    475. .zero 16
    476. main._6256_7uint8..eq..f:
    477. .quad main._6256_7uint8..eq
    478. main._664_7uint8..eq..f:
    479. .quad main._664_7uint8..eq
    480. main._62_7int32..eq..f:
    481. .quad main._62_7int32..eq
    482. main._61024_7uint8..eq..f:
    483. .quad main._61024_7uint8..eq
    484. main._627_7string..eq..f:
    485. .quad main._627_7string..eq
    486. main._632_7uint8..eq..f:
    487. .quad main._632_7uint8..eq
    488. main._6129_7uint8..eq..f:
    489. .quad main._6129_7uint8..eq
    490. main._6249_7uint8..eq..f:
    491. .quad main._6249_7uint8..eq
    492. main._6512_7uint8..eq..f:
    493. .quad main._6512_7uint8..eq
    494. main._65_7uint..eq..f:
    495. .quad main._65_7uint..eq
    496. main._64_7uintptr..eq..f:
    497. .quad main._64_7uintptr..eq
    498. main._665_7uint32..eq..f:
    499. .quad main._665_7uint32..eq
    500. main._633_7float64..eq..f:
    501. .quad main._633_7float64..eq
    502. main._668_7uint16..eq..f:
    503. .quad main._668_7uint16..eq
    504. main._64096_7uint8..eq..f:
    505. .quad main._64096_7uint8..eq
    506. main._6128_7uint8..eq..f:
    507. .quad main._6128_7uint8..eq
    508. main._68_7uint64..eq..f:
    509. .quad main._68_7uint64..eq
    510. main._6122_7uintptr..eq..f:
    511. .quad main._6122_7uintptr..eq
    512. main.struct_4Size_buint32_cMallocs_buint64_cFrees_buint64_5..eq..f:
    513. .quad main.struct_4Size_buint32_cMallocs_buint64_cFrees_buint64_5..eq
    514. main._661_7struct_4Size_buint32_cMallocs_buint64_cFrees_buint64_5..eq..f:
    515. .quad main._661_7struct_4Size_buint32_cMallocs_buint64_cFrees_buint64_5..eq
    516. main._6256_7uint64..eq..f:
    517. .quad main._6256_7uint64..eq
    518. main._632_7uintptr..eq..f:
    519. .quad main._632_7uintptr..eq
    520. main.struct_4runtime_0gList_cruntime_0n_bint32_5..eq..f:
    521. .quad main.struct_4runtime_0gList_cruntime_0n_bint32_5..eq

    人们从上述,可以自行看出很多门道来了;没有严格的从目标平台CPU机器汇编代码执行来判断某个编程语言编译代码的执行效率是没有意义的。

  • 相关阅读:
    长文讲解Linux内核性能优化的思路和步骤
    回溯算法中组合问题的去重通用方案
    PHP跌出前十,Python依然霸占榜首,C#有望摘得年度编程语言 TIOBE 12 月编程语言排行榜
    时间序列分析2--时间序列数据的处理和绘制时序图
    B. Bin Packing Problem(线段树+multiset)
    基于Unity3D的AR射击游戏设计与实现
    springMVC 源码学习-请求映射原理
    【java】打印小火车
    [附源码]Python计算机毕业设计宠物领养系统
    C++ 【类和对象: 初始化列表,Static成员 --3】
  • 原文地址:https://blog.csdn.net/liulilittle/article/details/127700862