• 模型部署笔记--Pytorch-FX量化


    目录

    1--Pytorch-FX量化

    2--校准模型

    3--代码实例

    3-1--主函数

    3-2--prepare_dataloader函数

    3-3--训练和测试函数


    1--Pytorch-FX量化

            Pytorch在torch.quantization.quantize_fx中提供了两个API,即prepare_fx和convert_fx。

            prepare_fx的作用是准备量化,其在输入模型里按照设定的规则qconfig_dict来插入观察节点,进行的工作包括:

    1. 将nn.Module转换为GraphModule。
    2. 合并算子,例如将Conv、BN和Relu算子进行合并(通过打印模型可以查看合并的算子)。
    3. 在Conv和Linear等OP前后插入Observer, 用于观测激活值Feature map的特征(权重的最大最小值),计算scale和zero_point。

            convert_fx的作用是根据scale和zero_point来将模型进行量化。

    2--校准模型

            完整项目代码参考:ljf69/Model-Deployment-Notes

            在对原始模型model调用prepare_fx()后得到prepare_model,一般需要对模型进行校准,校准后再调用convert_fx()进行模型的量化。

    3--代码实例

    3-1--主函数

    1. import os
    2. import copy
    3. import torch
    4. import torch.nn as nn
    5. from torchvision.models.resnet import resnet18
    6. from torch.quantization import get_default_qconfig
    7. from torch.quantization.quantize_fx import prepare_fx, convert_fx
    8. from torch.ao.quantization.fx.graph_module import ObservedGraphModule
    9. from dataloader import prepare_dataloader
    10. from train_val import train_model, evaluate_model
    11. # 量化模型
    12. def quant_fx(model):
    13. # 使用Pytorch中的FX模式对模型进行量化
    14. model.eval()
    15. qconfig = get_default_qconfig("fbgemm") # 默认是静态量化
    16. qconfig_dict = {
    17. "": qconfig,
    18. }
    19. model_to_quantize = copy.deepcopy(model)
    20. # 通过调用prepare_fx和convert_fx直接量化模型
    21. prepared_model = prepare_fx(model_to_quantize, qconfig_dict)
    22. # print("prepared model: ", prepared_model) # 打印模型
    23. quantized_model = convert_fx(prepared_model)
    24. # print("quantized model: ", quantized_model) # 打印模型
    25. # 保存量化后的模型
    26. torch.save(quantized_model.state_dict(), "r18_quant.pth")
    27. # 校准函数
    28. def calib_quant_model(model, calib_dataloader):
    29. # 判断model一定是ObservedGraphModule,即一定是量化模型,而不是原始模型nn.module
    30. assert isinstance(
    31. model, ObservedGraphModule
    32. ), "model must be a perpared fx ObservedGraphModule."
    33. model.eval()
    34. with torch.inference_mode():
    35. for inputs, labels in calib_dataloader:
    36. model(inputs)
    37. print("calib done.")
    38. # 比较校准前后的差异
    39. def quant_calib_and_eval(model, test_loader):
    40. model.to(torch.device("cpu"))
    41. model.eval()
    42. qconfig = get_default_qconfig("fbgemm")
    43. qconfig_dict = {
    44. "": qconfig,
    45. }
    46. # 原始模型(未量化前的结果)
    47. print("model:")
    48. evaluate_model(model, test_loader)
    49. # 量化模型(未经过校准的结果)
    50. model2 = copy.deepcopy(model)
    51. model_prepared = prepare_fx(model2, qconfig_dict)
    52. model_int8 = convert_fx(model_prepared)
    53. print("Not calibration model_int8:")
    54. evaluate_model(model_int8, test_loader)
    55. # 通过原始模型转换为量化模型
    56. model3 = copy.deepcopy(model)
    57. model_prepared = prepare_fx(model3, qconfig_dict) # 将模型准备为量化模型,即插入观察节点
    58. calib_quant_model(model_prepared, test_loader) # 使用数据对模型进行校准
    59. model_int8 = convert_fx(model_prepared) # 调用convert_fx将模型设置为量化模型
    60. torch.save(model_int8.state_dict(), "r18_quant_calib.pth") # 保存校准后的模型
    61. # 量化模型(已经过校准的结果)
    62. print("Do calibration model_int8:")
    63. evaluate_model(model_int8, test_loader)
    64. if __name__ == "__main__":
    65. # 准备训练数据和测试数据
    66. train_loader, test_loader = prepare_dataloader()
    67. # 定义模型
    68. model = resnet18(pretrained=True)
    69. model.fc = nn.Linear(512, 10)
    70. # 训练模型(如果事先没有训练)
    71. if os.path.exists("r18_row.pth"): # 之前训练过就直接加载权重
    72. model.load_state_dict(torch.load("r18_row.pth", map_location="cpu"))
    73. else:
    74. train_model(model, train_loader, test_loader, torch.device("cuda"))
    75. print("train finished.")
    76. torch.save(model.state_dict(), "r18_row.pth")
    77. # 量化模型
    78. quant_fx(model)
    79. # 对比是否进行校准的影响
    80. quant_calib_and_eval(model, test_loader)

    3-2--prepare_dataloader函数

    1. # 准备训练数据和测试数据
    2. def prepare_dataloader(num_workers=8, train_batch_size=128, eval_batch_size=256):
    3. train_transform = transforms.Compose(
    4. [
    5. transforms.RandomCrop(32, padding=4),
    6. transforms.RandomHorizontalFlip(),
    7. transforms.ToTensor(),
    8. transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    9. ]
    10. )
    11. test_transform = transforms.Compose(
    12. [
    13. transforms.ToTensor(),
    14. transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    15. ]
    16. )
    17. train_set = torchvision.datasets.CIFAR10(
    18. root="data", train=True, download=True, transform=train_transform
    19. )
    20. test_set = torchvision.datasets.CIFAR10(
    21. root="data", train=False, download=True, transform=test_transform
    22. )
    23. train_sampler = torch.utils.data.RandomSampler(train_set)
    24. test_sampler = torch.utils.data.SequentialSampler(test_set)
    25. train_loader = torch.utils.data.DataLoader(
    26. dataset=train_set,
    27. batch_size=train_batch_size,
    28. sampler=train_sampler,
    29. num_workers=num_workers,
    30. )
    31. test_loader = torch.utils.data.DataLoader(
    32. dataset=test_set,
    33. batch_size=eval_batch_size,
    34. sampler=test_sampler,
    35. num_workers=num_workers,
    36. )
    37. return train_loader, test_loader

    3-3--训练和测试函数

    1. # 训练模型,用于后面的量化
    2. def train_model(model, train_loader, test_loader, device):
    3. learning_rate = 1e-2
    4. num_epochs = 20
    5. criterion = nn.CrossEntropyLoss()
    6. model.to(device)
    7. optimizer = optim.SGD(
    8. model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=1e-5
    9. )
    10. for epoch in range(num_epochs):
    11. # Training
    12. model.train()
    13. running_loss = 0
    14. running_corrects = 0
    15. for inputs, labels in train_loader:
    16. inputs = inputs.to(device)
    17. labels = labels.to(device)
    18. optimizer.zero_grad()
    19. outputs = model(inputs)
    20. _, preds = torch.max(outputs, 1)
    21. loss = criterion(outputs, labels)
    22. loss.backward()
    23. optimizer.step()
    24. running_loss += loss.item() * inputs.size(0)
    25. running_corrects += torch.sum(preds == labels.data)
    26. train_loss = running_loss / len(train_loader.dataset)
    27. train_accuracy = running_corrects / len(train_loader.dataset)
    28. # Evaluation
    29. model.eval()
    30. eval_loss, eval_accuracy = evaluate_model(
    31. model=model, test_loader=test_loader, device=device, criterion=criterion
    32. )
    33. print("Epoch: {:02d} Train Loss: {:.3f} Train Acc: {:.3f} Eval Loss: {:.3f} Eval Acc: {:.3f}".format(
    34. epoch, train_loss, train_accuracy, eval_loss, eval_accuracy))
    35. return model
    36. def evaluate_model(model, test_loader, device=torch.device("cpu"), criterion=None):
    37. t0 = time.time()
    38. model.eval()
    39. model.to(device)
    40. running_loss = 0
    41. running_corrects = 0
    42. for inputs, labels in test_loader:
    43. inputs = inputs.to(device)
    44. labels = labels.to(device)
    45. outputs = model(inputs)
    46. _, preds = torch.max(outputs, 1)
    47. if criterion is not None:
    48. loss = criterion(outputs, labels).item()
    49. else:
    50. loss = 0
    51. # statistics
    52. running_loss += loss * inputs.size(0)
    53. running_corrects += torch.sum(preds == labels.data)
    54. eval_loss = running_loss / len(test_loader.dataset)
    55. eval_accuracy = running_corrects / len(test_loader.dataset)
    56. t1 = time.time()
    57. print(f"eval loss: {eval_loss}, eval acc: {eval_accuracy}, cost: {t1 - t0}")
    58. return eval_loss, eval_accuracy

  • 相关阅读:
    用 KV 缓存量化解锁长文本生成
    2023华为杯数学建模D题——碳排放路径优化基于指数分解法的LMDI 模型
    12v24v60v高校同步降压转换芯片推荐
    SAFe大规模敏捷框架,敏捷认证培训体系(全)
    2022算能生态合作伙伴大会,英码科技应邀出席共同探讨生态合作和发展问题
    小学生python游戏编程arcade----基本知识4角色动画
    低碳环保:无服务器和 Kubernetes 原生 Java 部署实践
    open ai服务器崩溃
    Kotlin学习笔记-Kotlin基础-01
    在window10下python:ocr实战
  • 原文地址:https://blog.csdn.net/weixin_43863869/article/details/133951380