• alexnet pytorch模型和onnx模型速度对比


    1. # -*- coding: utf-8 -*-
    2. from tqdm import tqdm
    3. import pandas as pd
    4. import time
    5. import torch
    6. import torchvision
    7. import numpy as np
    8. import onnxruntime as ort
    9. def save_onnx(path, device):
    10. dummy_input = torch.randn(3, 3, 224, 224).to(device)
    11. model = torchvision.models.alexnet(pretrained=False).to(device)
    12. input_names = ["actual_input_1"] + ["learned_%d" % i for i in range(16)]
    13. output_names = ["output1"]
    14. torch.onnx.export(model, dummy_input, path, verbose=False, input_names=input_names, output_names=output_names,
    15. dynamic_axes={'actual_input_1': [0], 'output1': [0]})
    16. # print(model)
    17. return model
    18. def torch_t(model, batch_size, device):
    19. s = time.time()
    20. input_ = torch.randn(batch_size, 3, 224, 224).to(device)
    21. for i in range(epoch):
    22. model(input_)
    23. cost = time.time() - s
    24. return round(cost, 2)
    25. def onnx_t_fun(path, batch_size, device):
    26. if device == 'cpu':
    27. ort_session = ort.InferenceSession(path, providers=['CPUExecutionProvider'])
    28. else:
    29. ort_session = ort.InferenceSession(path, providers=['CUDAExecutionProvider'])
    30. s = time.time()
    31. input_ = np.random.randn(batch_size, 3, 224, 224).astype(np.float32)
    32. for i in range(epoch):
    33. ort_session.run(
    34. None,
    35. {"actual_input_1": input_},
    36. )
    37. cost = time.time() - s
    38. return round(cost, 2)
    39. if __name__ == '__main__':
    40. batch_size_list = [2 ** i for i in range(8)]
    41. device_list = ['cpu', torch.device(0)]
    42. tuples = [(device, batch_size) for device in device_list for batch_size in batch_size_list]
    43. index = pd.MultiIndex.from_tuples(tuples)
    44. epoch = 100
    45. path = "alexnet.onnx"
    46. df = pd.DataFrame(columns=['torch', 'onnx'], index=index)
    47. for batch_size in tqdm(batch_size_list):
    48. for device in device_list:
    49. model = save_onnx(path, device)
    50. cost_torch = torch_t(model, batch_size, device)
    51. cost_onnx = onnx_t_fun(path, batch_size, device)
    52. df.loc[(device, batch_size), :] = [cost_torch, cost_onnx]
    53. print(df)

                torch   onnx
    cpu    1      1.2   0.89
             2     2.47   1.15
             4     2.86   1.68
             8     3.67    2.7
             16    5.93   4.72
             32    9.47   8.85
             64   17.33  17.26
            128  32.77  34.27
    cuda:0  1     0.07   0.42
                2     0.08   0.65
               4     0.12    0.6
               8      0.2   0.27
               16    0.24    0.4
               32    0.38   0.89
               64    0.69   1.72
              128   1.26   3.12

    可以发现cpu时,onnx在batch_size较小时,速度能提升部分,随着batch_size变大,越来越慢

    gpu上反而变慢了,不知道是否合理

    cpu查询命令为cat /proc/cpuinfo | grep 'model name' |uniq

    结果为 Intel(R) Core(TM) i7-9700K CPU @ 3.60GHz

  • 相关阅读:
    【数据分析】NumPy
    产品周报第35期|APP端学习门户上线、每日一练新增编程题型、专栏显示订阅来源……
    专利申请需要多少钱
    linux进阶56——systemd实现程序日志保存成文件
    Go踩坑记录分享
    单源最短路径问题(Java)
    JVM基础08_强软弱虚引用
    HCIA笔记-1 网络基础
    揭秘编码器与解码器语言模型
    Ruby教程_编程入门自学教程_菜鸟教程-免费教程分享
  • 原文地址:https://blog.csdn.net/qq_22526061/article/details/126976847