• 【TVM源码学习笔记】3.1.3 工作空间更新


    CodeGen中在分配内存后,即执行工作空间更新

    1. backend::FunctionInfo func_info;
    2. // defined()判断memory_plan_的数据是否为空,这里表示内存分配是否成功
    3. if (memory_plan_.defined()) {
    4. // TODO(@electriclilies, @jroesch): remove UpdateMainWorkspaceSize
    5. // 使用新的内存分配更新mod工作空间大小
    6. func_info =
    7. relay::tec::UpdateMainWorkspaceSize(mod, config_, memory_plan_->expr_to_storage_info);
    8. // 给mod加一个main_func_info属性,值为刚才更新后的函数信息
    9. mod = WithAttr(mod, "main_func_info", func_info);
    10. }

     UpdateMainWorkspaceSize的实现

    1. backend::FunctionInfo UpdateMainWorkspaceSize(const IRModule& mod, const CompilationConfig& config,
    2. Map storage_info_map) {
    3. Function func = Downcast(mod->Lookup("main"));
    4. VLOG_CONTEXT << "UpdateMainWorkspaceSize";
    5. VLOG(1) << "calculating FunctionInfo for main:" << std::endl << PrettyPrint(func);
    6. // This is a Map>
    7. // TODO(mbs): Collapsing VirtualDevices to just device type.
    8. // 索引为设备类型,值是分配的内存块id和大小
    9. std::unordered_mapint, int>, backend::EnumClassHash>
    10. sid_workspace;
    11. // This is a Map
    12. // 索引为设备类型,值为io个数
    13. std::unordered_mapint, backend::EnumClassHash> device_io;
    14. // This is a Map
    15. // 索引为设备类型,值为常量个数
    16. std::unordered_mapint, backend::EnumClassHash> device_consts;
    17. // Initialize the mapping from all storage identifiers to workspace sizes,
    18. // the amount of device io, and the device constants.
    19. // storage_info_map是分配的内存表,对应各个token分配的内存.
    20. // 这里sid_workspace,device_io,device_consts是以token的设备类型为索引
    21. for (const auto& kv : storage_info_map) {
    22. const backend::StorageInfo& storage_info = kv.second;
    23. const std::vector<int64_t>& storage_ids = storage_info->storage_ids;
    24. const std::vector& virtual_devices = storage_info->virtual_devices;
    25. CHECK_EQ(storage_ids.size(), virtual_devices.size());
    26. for (uint32_t i = 0; i < virtual_devices.size(); i++) {
    27. DLDeviceType device_type = virtual_devices[i]->device_type();
    28. sid_workspace[device_type][storage_ids[i]] = 0;
    29. device_io[device_type] = 0;
    30. device_consts[device_type] = 0;
    31. }
    32. }
    33. // Iterate the storage map to compute all the tensor sizes in the program.
    34. // There are 3 cases in this code:
    35. //
    36. // First we need to compute the sizes of all
    37. // inline constants.
    38. //
    39. // Second we compute the size of any bound variable as these are input and output
    40. // sizes of the program.
    41. //
    42. // Finally for all other expressions we check which storage identifier they have
    43. // been assigned and we compute the maximal size of the storage, as tensors can
    44. // share storage with other tensors which are the same size or larger.
    45. //
    46. // In this final case there is only one allocation for all tensors which share storage
    47. // which will be the maximal size of all tensors which were assigned to it.
    48. /* 迭代内存卡映射来计算程序中所有张量的大小
    49. 在这个代码中有3种情况:
    50. 首先,我们需要计算所有内联常数的大小;
    51. 其次,我们计算所有绑定变量的大小,因为这些是程序的输入和输出大小;
    52. 最后,我们检查所有其他表达式的存储标识符,并计算存储空间的最大大小,因为张量可以与其他大小相同或更大的张量复用存储空间.
    53. 在最后一种情况下,所有张量只有一个共享存储的分配,即分配给它的所有张量的最大大小。
    54. */
    55. for (const auto& kv : storage_info_map) {
    56. const Expr& expr = kv.first;
    57. const backend::StorageInfo& storage_info = kv.second;
    58. // 计算token tensor需要的空间大小
    59. int64_t size_bytes = backend::CalculateRelayExprSizeBytes(expr->checked_type());
    60. VLOG(1) << "expression:" << std::endl
    61. << PrettyPrint(expr) << std::endl
    62. << "of type:" << std::endl
    63. << PrettyPrint(expr->checked_type()) << std::endl
    64. << "has size " << size_bytes << " and storage info:" << std::endl
    65. << storage_info;
    66. //获取为该token分配的内存块id和设备类型
    67. const std::vector<int64_t>& storage_ids = storage_info->storage_ids;
    68. const std::vector& virtual_devices = storage_info->virtual_devices;
    69. //如果对应的token是常量,则按设备类型统计常量所占空间大小
    70. if (expr->IsInstance()) {
    71. for (const auto& virtual_device : virtual_devices) {
    72. DLDeviceType device_type = virtual_device->device_type();
    73. ICHECK_EQ(device_consts.count(device_type), 1);
    74. device_consts[device_type] += size_bytes;
    75. }
    76. } else if (expr->IsInstance() || expr.same_as(func->body)) {
    77. //如果是变量或者函数体,则按照设备类型统计io所占内存大小
    78. CHECK(size_bytes == 0 || virtual_devices.size() >= 1) << "must be at least one device";
    79. for (const auto& virtual_device : virtual_devices) {
    80. DLDeviceType device_type = virtual_device->device_type();
    81. device_io[device_type] += size_bytes;
    82. }
    83. } else {
    84. // TODO(@electriclilies): This code is never being called which means sid_workspace is not
    85. // updated.. This means that storage info is probably not being created correctly. Or is not
    86. // equivalent to what was here previously
    87. for (uint32_t i = 0; i < storage_ids.size(); i++) {
    88. // Here we record the largest size of the tensor
    89. // that share the same storage id, because storage_id will
    90. // be shared between multiple tensors that are not live simultaneously.
    91. /* 如果一种设备上若干个tensor不同时存在, 那么它们复用同一块内存,
    92. 只要保证这个内存是最大的tensor大小即可, 所以这里记录最大的tensor大小*/
    93. DLDeviceType device_type = virtual_devices[i]->device_type();
    94. if (size_bytes > sid_workspace[device_type][storage_ids[i]]) {
    95. sid_workspace[device_type][storage_ids[i]] = size_bytes;
    96. }
    97. }
    98. }
    99. }
    100. // This is a Map
    101. // 表的key是设备类型, value是工作空间大小
    102. std::unordered_mapint, backend::EnumClassHash> device_workspace;
    103. // Once we know the sizes of sids, we need to accumulate per device
    104. for (const auto& dev_sid_size : sid_workspace) {
    105. auto dev = dev_sid_size.first;
    106. device_workspace[dev] = 0;
    107. // 对每种设备,统计该设备的分配的内存块总共大小
    108. for (const auto& sid_size : dev_sid_size.second) {
    109. device_workspace[dev] += sid_size.second;
    110. }
    111. }
    112. Map workspace_sizes;
    113. Map io_sizes;
    114. Map constant_sizes;
    115. Map tir_primfuncs;
    116. Map relay_primfuncs;
    117. // Initialize all target workspaces to zero
    118. for (const auto& target : config->primitive_targets) {
    119. workspace_sizes.Set(target, 0);
    120. }
    121. //获取分配的内存块相关设备target,设置内存块大小统计,关联relay fun和target
    122. for (const auto& dev_and_size : device_workspace) {
    123. Target target = config->FindPrimitiveTargetForDeviceOrFail(dev_and_size.first);
    124. workspace_sizes.Set(target, dev_and_size.second);
    125. relay_primfuncs.Set(target, func);
    126. }
    127. //按target记录io占用内存大小
    128. for (const auto& dev_and_size : device_io) {
    129. Target target = config->FindPrimitiveTargetForDeviceOrFail(dev_and_size.first);
    130. io_sizes.Set(target, dev_and_size.second);
    131. }
    132. //按target记录常量占用内存大小
    133. for (const auto& dev_and_size : device_consts) {
    134. Target target = config->FindPrimitiveTargetForDeviceOrFail(dev_and_size.first);
    135. ICHECK_EQ(constant_sizes.count(target), 0);
    136. constant_sizes.Set(target, dev_and_size.second);
    137. }
    138. //返回函数占用空间信息
    139. backend::FunctionInfo func_info(std::move(workspace_sizes), std::move(io_sizes),
    140. std::move(constant_sizes), std::move(tir_primfuncs),
    141. std::move(relay_primfuncs));
    142. VLOG(1) << "func_info: " << func_info;
    143. return std::move(func_info);
    144. }

    简单的说,就是统计一个函数的输入输出占用了多少空间,函数内部变量占用了多少空间,以及函数使用的常量占用了多少空间。

  • 相关阅读:
    在Spring中使用Redis
    链接装载与库:第八章——Linux共享库组织
    leetcode 周赛——2848. 与车相交的点
    深入理解Java虚拟机:Java内存区域与内存溢出异常
    Android Studio Giraffe解决gradle reload failed问题
    Rust入门-引用借用
    面试项目准备 | 如何向面试官展示项目?
    517-coding #2 贪心算法
    90%的程序员不适合做独立开发
    C# 9.0语法标准 “函数指针” 深度解读!
  • 原文地址:https://blog.csdn.net/zx_ros/article/details/126182950