• 物体分类__pytorch


    classification

    目录

    1. 数据处理
    2. 主干网络
    3. 损失函数
    4. 训练
    5. 预测

    1. 数据处理

    1.1 获取物体类别和图片地址

    (1) 流程

    文件夹:
    datasets
        train
            cat
                cat0.jpg
                cat1.jpg
                ......
            dog
                dog0.jpg
                dog1.jpg
                ......
        test
            cat
                cat100.jpg
                cat345.jpg
                ......
            dog
                dog198.jpg
                dog209.jpg
                ......
    
    '''
    No1.
    goals  :
            读取datasets/train/cat,datasets/train/dog 文件里分类物体的类别及图片地址,
            并放在cls_train.txt中,对待test数据亦是如此。
    inputs :
            datasets/train/cat,datasets/train/dog ,datasets/test/cat,datasets/test/dog
    outputs:
            cls_train.txt,cls_test.txt.每个.txt文件里存放的是所有物体类别和图片地址。
            eg:0;/Users/LS/cls_LS/datasets/test/cat/14.jpg
    processes:
            1. 分别遍历datasets里的train和test文件,获取cat和dog的文件名
            2. 分别遍历train和test里的cat和dog文件,读取每个文件的图片名称,
               如果是cat文件,cls_id为0,写入物体类别和图片名称。
    
    
    注: pytorch有个特点,函数后跟两个括号,第一个括号里写参数,第二个括号输入变量。
    '''
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39

    (2) 代码

    import os
    
    classes = ['cat','dog']
    sets = ['train','test']
    
    def masklabels(classes,sets):
        ''' 读取文件下的图片信息,制作标签 '''
        wd = os.getcwd()
        for set in sets:
            list_file = open('LS' + set + '.txt','w')
            types_name = os.listdir('datasets/'+set)   # types_name:['cat', '.DS_Store', 'dog']
            for type_name in types_name:
                if type_name not in classes:
                    continue
                cls_id = classes.index(type_name)     # type_name='cat',cls_id =0;type_name='dog',cls_id = 1
    
                photos_path = os.path.join('datasets',set,type_name)  # eg:photos_path='datasets/train/cat'
                photos_name = os.listdir(photos_path)
                for photo_name in photos_name:
                    _,postfix = os.path.splitext(photo_name)  #  _,postfix = ('cat.6', '.jpg')
                    if postfix not in ['.jpg', '.png', '.jpeg']:
                        continue
                    list_file.write(str(cls_id)+';' + '%s/%s'%(wd, os.path.join(photos_path,photo_name))+'\n')  # 0;/Users/LS/cls_LS/datasets/train/cat/cat.6.jpg
            list_file.close()
    
    if __name__ == '__main__':
        masklabels(classes,sets)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27

    1.2 数据增强

    (1) 流程

    '''
    1. get_random_data()
       (1) 对图像进行缩放并且进行长和宽的扭曲;
       (2) 将图像多余的部分加上灰条。图像扭曲后,宽高发生变化,加上加上灰条,图片的宽高仍是(224, 224)。
       (3) 图像翻转
       (4) 图像旋转
       (5) 色域扭曲
    
    '''
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9

    (2) 代码

    
    import cv2
    import numpy as np
    from PIL import Image
    from random import shuffle
    import torch.utils.data as data
    from utils.utils_ls import letterbox_image
    
    def _preprocess_input(x):
        # 图像数据归一化到0~1
        x /= 127.5
        x -= 1.
        return x
    
    def rand(a=0,b=1):
        # 数据归一化到a~b
        return np.random.rand()*(b-a) + a
    
    def get_random_data(image,input_shape,jitter=.3, hue=.1, sat=1.5, val=1.5):
        image = image.convert("RGB")
        h, w = input_shape
    
        # 1.1 对图像进行缩放并且进行长和宽的扭曲
        new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter)
        scale = rand(.75, 1.25)
        if new_ar < 1:
            nh = int(scale*h)
            nw = int(nh*new_ar)
        else:
            nw = int(scale*w)
            nh = int(nw/new_ar)
        image = image.resize((nw,nh), Image.BICUBIC)
    
        # 1.2 将图像多余的部分加上灰条。图像扭曲后,宽高发生变化,加上加上灰条,图片的宽高仍是(224, 224)
        dx = int(rand(0, w-nw))
        dy = int(rand(0, h-nh))
        new_image = Image.new('RGB', (w,h), (128,128,128))
        new_image.paste(image, (dx, dy))
        image = new_image
    
        # 翻转图像
        flip = rand()<.5
        if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
        # 旋转
        rotate = rand()<.5
        if rotate:
            angle = np.random.randint(-15,15)
            a,b = w/2,h/2
            M = cv2.getRotationMatrix2D((a,b),angle,1)   # 旋转矩阵
            image=cv2.warpAffine(np.array(image),M,(w,h),borderValue=[128,128,128])  # 仿射变换
    
        # 色域扭曲
        # hue = rand(-hue, hue)
        sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat)
        val = rand(1, val) if rand()<.5 else 1/rand(1, val)
        x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)
        # x[..., 0] *= hue
        x[..., 1] *= sat
        x[..., 2] *= val
        x[x[:,:, 0]>360, 0] = 360
        x[:, :, 1:][x[:, :, 1:]>1] = 1
        x[x<0] = 0
        image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)*255
        return image_data
    
    
    class DataGenerator(data.Dataset):
        def __init__(self, input_shape, lines, random=True):
            self.input_shape = input_shape
            self.lines = lines
            self.random = random
    
        def __len__(self):
            return len(self.lines)
    
        def get_len(self):
            return len(self.lines)
    
        def __getitem__(self, index):
            if index == 0:
                shuffle(self.lines)
    
            annotation_path = self.lines[index].split(';')[1].split()[0]  # '/Users/LS/cls_LS/datasets/train/cat/cat.6.jpg'
            img = Image.open(annotation_path)
    
            if self.random:
                img = get_random_data(img, [self.input_shape[0],self.input_shape[1]])
            else:
                img = letterbox_image(img, [self.input_shape[0],self.input_shape[1]])
    
            img = np.array(img).astype(np.float32)
            img = _preprocess_input(img)
            img = np.transpose(img,[2,0,1])  # 转换通道数
    
            y = int(self.lines[index].split(';')[0])
            return img, y   # img.shape, y ((3, 224, 224), 0)
    
    def detection_collate(batch):
        images = []
        targets = []
        for img, y in batch:
            images.append(img)
            targets.append(y)
        images = np.array(images)
        targets = np.array(targets)
        return images, targets
    
    if __name__ == '__main__':
        # from torch.utils.data import DataLoader
        input_shape = [224,224,3]
        with open(r"./cls_train.txt","r") as f:
            lines = f.readlines()
        num_val = int(len(lines)*0.1)     # 6
        num_train = len(lines) - num_val  # 54
    
        train_dataset   = DataGenerator(input_shape,lines[:6])
        images, targets = detection_collate(train_dataset)
        print(images.shape, targets.shape)
    '''
    (6, 3, 224, 224) (6,)
    '''
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122

    2. 主干网络

    2.1 vgg16

    (1) 思路

    ''' model
    1. VGG网络构架:
        features(x) + avgpool(x) + flatten(x, 1)+ classifier(x)
    2. 代码思路:
        (1)features(x):features = make_layers(cfgs['D'])
             [Conv2d(k=3,s=1) + (BN) + ReLU + MaxPool2d(k=2,s=2)] * 5
             [b,3,224,224] -> [b,64,224,224]-> [b,64,112,112] -> [b,128,112,112] -> [b,128,56,56] -> [b,256,56,56]->
             [b,256,28,28] -> [b,512,28,28] -> [b,512,14,14] -> [b,512,14,14] -> [b,512,7,7]
        (2)avgpool(x):  avgpool = AdaptiveAvgPool2d(7,7)
             [b,7,7,512] -> [b,7,7,512]
        (3)flatten(x, 1): [b,7,7,512]  -> [b,25088]
        (4)classifier(x): classifier = [Linear + ReLU + Dropout]*2 + Linear
             [b,25088] -> [b,4096]  -> [b,4096] -> [b,1000]
        (5)_initialize_weights
    3.vgg16
        下载模型参数后,又重新定义分类层,实现迁移学习,
        可以在原有模型参数基础上,训练自有的数据。
    
    VGG(
      (features): Sequential(
        (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5): ReLU(inplace=True)
        (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (9): ReLU(inplace=True)
        (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (12): ReLU(inplace=True)
        (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (16): ReLU(inplace=True)
        (17): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (18): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (19): ReLU(inplace=True)
        (20): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (21): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (22): ReLU(inplace=True)
        (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (24): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (25): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (26): ReLU(inplace=True)
        (27): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (28): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (29): ReLU(inplace=True)
        (30): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (31): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (32): ReLU(inplace=True)
        (33): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (34): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (35): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (36): ReLU(inplace=True)
        (37): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (38): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (39): ReLU(inplace=True)
        (40): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (41): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (42): ReLU(inplace=True)
        (43): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
      (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
      (classifier): Sequential(
        (0): Linear(in_features=25088, out_features=4096, bias=True)
        (1): ReLU(inplace=True)
        (2): Dropout(p=0.5, inplace=False)
        (3): Linear(in_features=4096, out_features=4096, bias=True)
        (4): ReLU(inplace=True)
        (5): Dropout(p=0.5, inplace=False)
        (6): Linear(in_features=4096, out_features=10, bias=True)
      )
    )
    
    '''
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79

    (2) 代码

    vgg16网络架构相对简单,参数量是真的大。在刚开始出现时是有价值的,用3x3的卷积核代替大的卷积核,两个3x3相当与一个5x5的卷积核的卷积效果,减少参数量,也变相加深网络深度。通过设置cgfs 的方式,让模型可以细化成不同的版本。通过vgg16 的学习,学习到网络架构、模型初始化参数、冻结参数、加载参数、改变分类数目等方法。

    import torch
    import torch.nn as nn
    from torchvision.models.utils import load_state_dict_from_url
    
    model_urls = {'vgg16':'https://download.pytorch.org/models/vgg16-397923af.pth'}
    
    cfgs = {
        'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    }
    
    def make_layers(cfg,batch_norm=True):
        layers = []
        in_channels = 3
        for v in cfg:
            if v == 'M':
                layers += [nn.MaxPool2d(kernel_size=2,stride=2)]
            else:
                conv2d = nn.Conv2d(in_channels,v,kernel_size=3,padding=1)
                if batch_norm:
                    layers += [conv2d,nn.BatchNorm2d(v),nn.ReLU(True)]
                else:
                    layers += [conv2d,nn.ReLU(True)]
                in_channels = v
        return nn.Sequential(*layers)
    
    class VGG(nn.Module):
        def __init__(self,features,num_classes=1000,init_weights=True):
            super(VGG, self).__init__()
            self.features = features
            self.avgpool = nn.AdaptiveAvgPool2d((7,7))
            self.classifier = nn.Sequential(
                nn.Linear(512*7*7,4096),
                nn.ReLU(True),
                nn.Dropout(),
                nn.Linear(4096,4096),
                nn.ReLU(True),
                nn.Dropout(),
                nn.Linear(4096,num_classes))
            if init_weights:
                self._initialize_weights()
    
        def forward(self,x):
            x = self.features(x)
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.classifier(x)
            return x
    
        def _initialize_weights(self):
            for m in self.modules():
                if isinstance(m,nn.Conv2d):
                    nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
                    if m.bias is not None:
                        nn.init.constant_(m.bias,0)
                elif isinstance(m,nn.BatchNorm2d):
                    nn.init.constant_(m.weight,1)
                    nn.init.constant_(m.bias,0)
                elif isinstance(m,nn.Linear):
                    nn.init.normal_(m.weight,0,0.01)
                    nn.init.constant_(m.bias,0)
    
        def freeze_backbone(self):
            for param in self.features.parameters():
                param.requires_grad = False
    
        def Unfreeze_backbone(self):
            for param in self.features.parameters():
                param.requires_grad = True
    
    def vgg16(pretrained= False,progress=True,num_classes=1000):
        model = VGG(make_layers(cfgs['D']))
        if pretrained:
            state_dict = load_state_dict_from_url(model_urls['vgg16'],
                                                  model_dir='./model_data',
                                                  progress = progress)
            model.load_state_dict(state_dict,strict=False)
        if num_classes != 1000:
            model.classifier = nn.Sequential(
                nn.Linear(512*7*7,4096),
                nn.ReLU(True),
                nn.Dropout(),
                nn.Linear(4096,4096),
                nn.ReLU(True),
                nn.Dropout(),
                nn.Linear(4096,num_classes))
        return model
    
    if __name__ == '__main__':
        x = torch.rand([2,3,224,224])
        model = vgg16(num_classes=10)
        y = model(x)
        print(y.shape)
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93

    2.2 resnet50

    (1) 思路

    '''
    1. ResNet :
        1.1 主要模块:ConvBlock + IdentityBlock
            ConvBlock : x + [(cnv(1x1)+bn+relu) + (cnv(3x3)+bn+relu) +(cnv(1x1)+bn+relu) ]
            IdentityBlock : downsample(x) + [(cnv(1x1)+bn+relu) + (cnv(3x3)+bn+relu) +(cnv(1x1)+bn+relu) ]
            layer : ConvBlock + IdentityBlock * n
        1.2 网络结构:
            (cnv(1x1)+bn+relu+maxpool) + layer*4 + avgpool + fc
    
    2. resnet50 :
        2.1 流程:
            model -> pretrained -> num_classes
        2.2 网络结构:
            ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              )
            )
        (1): Bottleneck(
          (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
        (2): Bottleneck(
          (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
      )
      (layer2): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
            (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (1): Bottleneck(
          (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
        (2): Bottleneck(
          (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
        (3): Bottleneck(
          (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
      )
      (layer3): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
            (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (1): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
        (2): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
        (3): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
        (4): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
        (5): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
      )
      (layer4): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
            (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (1): Bottleneck(
          (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
        (2): Bottleneck(
          (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
      )
      (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
      (fc): Linear(in_features=2048, out_features=2, bias=True)
    )
    
    
    '''
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152
    • 153
    • 154
    • 155
    • 156
    • 157
    • 158
    • 159
    • 160
    • 161
    • 162
    • 163
    • 164
    • 165
    • 166
    • 167
    • 168
    • 169
    • 170
    • 171
    • 172
    • 173
    • 174
    • 175
    • 176
    • 177
    • 178
    • 179
    • 180
    • 181
    • 182
    • 183
    • 184
    • 185
    • 186
    • 187
    • 188
    • 189
    • 190
    • 191
    • 192

    (2) 代码

    
    import torch
    import torch.nn as nn
    from torchvision.models.utils import load_state_dict_from_url
    
    model_urls = {'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth'}
    
    def conv3x3(in_planes,out_planes,stride=1,groups=1,dilation=1):
        return nn.Conv2d(in_planes,out_planes,kernel_size=3,stride=stride,
                         padding=dilation,groups=groups,bias=False,dilation=dilation)
    
    def conv1x1(in_planes,out_planes,stride=1):
        return nn.Conv2d(in_planes,out_planes,kernel_size=1,stride=stride,bias=False)
    
    class Bottleneck(nn.Module):
        expansion = 4
        def __init__(self,inplanes,planes,stride=1,downsample=None,groups=1,
                     base_width=64,dilation=1,norm_layer=None):
            super(Bottleneck, self).__init__()
            if norm_layer is None:
                norm_layer = nn.BatchNorm2d
            width = int(planes*(base_width/64.))*groups
            self.conv1 = conv1x1(inplanes,width)
            self.bn1 = norm_layer(width)
    
            self.conv2 = conv3x3(width,width,stride,groups,dilation)
            self.bn2 = norm_layer(width)
    
            self.conv3 = conv1x1(width,planes*self.expansion)
            self.bn3 = norm_layer(planes*self.expansion)
            self.relu = nn.ReLU(inplace=True)
            self.downsample = downsample
            self.stride = stride
    
        def forward(self,x):
            identity = x
    
            out =self.conv1(x)
            out = self.bn1(out)
            out = self.relu(out)
    
            out =self.conv2(out)
            out = self.bn2(out)
            out = self.relu(out)
    
            out = self.conv3(out)
            out = self.bn3(out)
    
            if self.downsample is not None:
                identity = self.downsample(x)
    
            out += identity
            out = self.relu(out)
            return out
    
    class ResNet(nn.Module):
        def __init__(self,block, layers, num_classes=1000, zero_init_residual=False,
                     groups=1, width_per_group=64, replace_stride_with_dilation=None,
                     norm_layer=None):
            super(ResNet, self).__init__()
            if norm_layer is None:
                norm_layer = nn.BatchNorm2d
            self._norm_layer = norm_layer
    
            self.inplanes = 64
            self.dilation = 1
            if replace_stride_with_dilation is None:
                replace_stride_with_dilation = [False, False, False]
    
            if len(replace_stride_with_dilation) != 3:
                raise ValueError("replace_stride_with_dilation should be None "
                                 "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
    
            self.block = block
            self.groups = groups
            self.base_width = width_per_group
            # [1, 3, 214, 214] --> [1, 64, 107, 107]
            self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
                                   bias=False)
            self.bn1 = norm_layer(self.inplanes)
            self.relu = nn.ReLU(inplace=True)
            # [1, 64, 107, 107] --> [1, 64, 54, 54]
            self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
            # [1, 64, 54, 54] --> [1, 256, 54, 54]
            self.layer1 = self._make_layer(block, 64, layers[0])
            # [1, 256, 54, 54] --> [1, 512, 27, 27]
            self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
                                           dilate=replace_stride_with_dilation[0])
            # [1, 512, 27, 27] --> [1, 1024, 14, 14]
            self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
                                           dilate=replace_stride_with_dilation[1])
            # [1, 1024, 14, 14] --> [1, 2048, 7, 7]
            self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
                                           dilate=replace_stride_with_dilation[2])
            # [1, 2048, 7, 7]  --> [1, 2048, 1, 1]
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
            #  [1, 2048, 1, 1]  --> flatten [1, 2048] --> [1, 10]
            self.fc = nn.Linear(512 * block.expansion, num_classes)
    
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                    nn.init.constant_(m.weight, 1)
                    nn.init.constant_(m.bias, 0)
            if zero_init_residual:
                for m in self.modules():
                    if isinstance(m, Bottleneck):
                        nn.init.constant_(m.bn3.weight, 0)
        def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
            norm_layer = self._norm_layer
            downsample = None
            previous_dilation = self.dilation
            if dilate:
                self.dilation *= stride
                stride = 1
            if stride != 1 or self.inplanes != planes * block.expansion:
                downsample = nn.Sequential(
                    conv1x1(self.inplanes, planes * block.expansion, stride),
                    norm_layer(planes * block.expansion),
                )
            layers = []
            # Conv_block
            layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
                                self.base_width, previous_dilation, norm_layer))
            self.inplanes = planes * block.expansion
            for _ in range(1, blocks):
                # identity_block
                layers.append(block(self.inplanes, planes, groups=self.groups,
                                    base_width=self.base_width, dilation=self.dilation,
                                    norm_layer=norm_layer))
            return nn.Sequential(*layers)
    
        def forward(self, x):
            x = self.conv1(x)    # [1, 3, 214, 214] --> [1, 64, 107, 107]
            x = self.bn1(x)  
            x = self.relu(x)
            x = self.maxpool(x)  # [1, 64, 107, 107] --> [1, 64, 54, 54]
    
            x = self.layer1(x)   # [1, 64, 54, 54] --> [1, 256, 54, 54]
            x = self.layer2(x)   # [1, 256, 54, 54] --> [1, 512, 27, 27]
            x = self.layer3(x)   # [1, 512, 27, 27] --> [1, 1024, 14, 14]
            x = self.layer4(x)   # [1, 1024, 14, 14] --> [1, 2048, 7, 7]
    
            x = self.avgpool(x)  # [1, 2048, 7, 7]  --> [1, 2048, 1, 1]
            x = torch.flatten(x, 1)  #  [1, 2048, 1, 1]  -->  [1, 2048]
            x = self.fc(x)       # [1, 2048] --> [1, 10]
    
            return x
    
        def freeze_backbone(self):
            backbone = [self.conv1, self.bn1, self.layer1, self.layer2, self.layer3, self.layer4]
            for module in backbone:
                for param in module.parameters():
                    param.requires_grad = False
    
        def Unfreeze_backbone(self):
            backbone = [self.conv1, self.bn1, self.layer1, self.layer2, self.layer3, self.layer4]
            for module in backbone:
                for param in module.parameters():
                    param.requires_grad = True
    
    def resnet50(pretrained=False, progress=False, num_classes=1000):
        model = ResNet(Bottleneck, [3, 4, 6, 3])
        if pretrained:
            state_dict = load_state_dict_from_url(model_urls['resnet50'], model_dir='./model_data',
                                                  progress=progress)
            model.load_state_dict(state_dict)
        if num_classes != 1000:
            model.fc = nn.Linear(512 * model.block.expansion, num_classes)
        return model
    
    
    if __name__ == '__main__':
        x = torch.rand([1,3,214,214])
        model = resnet50(num_classes=10)
        y = model(x)
    
    '''
    torch.Size([2, 10])
    Process finished with exit code 0
    '''
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152
    • 153
    • 154
    • 155
    • 156
    • 157
    • 158
    • 159
    • 160
    • 161
    • 162
    • 163
    • 164
    • 165
    • 166
    • 167
    • 168
    • 169
    • 170
    • 171
    • 172
    • 173
    • 174
    • 175
    • 176
    • 177
    • 178
    • 179
    • 180
    • 181
    • 182

    2.3 mobilenet

    (1) 思路

    '''
    1. MobileNetV2:
        1.1 主要结构
             InvertedResidual : (Conv(3x3)BNReLU  -->  Conv(1x1)BNReLU)
                                (Conv(3x3)BNReLU  -->  Conv(1x1)BNReLU + x)
                                (Conv(1x1)BNReLU --> Conv(3x3)BNReLU --> Conv(1x1)BNReLU )
                                (Conv(1x1)BNReLU --> Conv(3x3)BNReLU --> Conv(1x1)BNReLU + x)
        1.2 网络构架
            net : features(x) + x.mean + classifier(x)
            features(x) : ConvBNReLU + InvertedResidual*7 + ConvBNReLU
            x.mean : x.mean([2,3])
            classifier(x) :  Dropout + Linear
    
    2. mobilenet_v2
        2.1 流程
            (1)导入模型。(2)导入参数。(3)修改检测类别数目。
    
    
    MobileNetV2(
      (features): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
              (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (2): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=96, bias=False)
              (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(96, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (3): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(144, 144, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=144, bias=False)
              (1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(144, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (4): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(144, 144, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=144, bias=False)
              (1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(144, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (5): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False)
              (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (6): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False)
              (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (7): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=192, bias=False)
              (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (8): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
              (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (9): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
              (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (10): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
              (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (11): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
              (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (12): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=576, bias=False)
              (1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (13): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=576, bias=False)
              (1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (14): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(576, 576, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=576, bias=False)
              (1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(576, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (15): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
              (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (16): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
              (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (17): InvertedResidual(
          (conv): Sequential(
            (0): ConvBNReLU(
              (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): ConvBNReLU(
              (0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
              (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (2): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (3): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (18): ConvBNReLU(
          (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(1280, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
      )
      (classifier): Sequential(
        (0): Dropout(p=0.2, inplace=False)
        (1): Linear(in_features=1280, out_features=10, bias=True)
      )
    )
    
    Process finished with exit code 0
    
    '''
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152
    • 153
    • 154
    • 155
    • 156
    • 157
    • 158
    • 159
    • 160
    • 161
    • 162
    • 163
    • 164
    • 165
    • 166
    • 167
    • 168
    • 169
    • 170
    • 171
    • 172
    • 173
    • 174
    • 175
    • 176
    • 177
    • 178
    • 179
    • 180
    • 181
    • 182
    • 183
    • 184
    • 185
    • 186
    • 187
    • 188
    • 189
    • 190
    • 191
    • 192
    • 193
    • 194
    • 195
    • 196
    • 197
    • 198
    • 199
    • 200
    • 201
    • 202
    • 203
    • 204
    • 205
    • 206
    • 207
    • 208
    • 209
    • 210
    • 211
    • 212
    • 213
    • 214
    • 215
    • 216
    • 217
    • 218
    • 219
    • 220
    • 221
    • 222
    • 223
    • 224
    • 225
    • 226
    • 227
    • 228
    • 229
    • 230
    • 231
    • 232
    • 233
    • 234
    • 235
    • 236
    • 237
    • 238
    • 239
    • 240
    • 241
    • 242
    • 243
    • 244
    • 245
    • 246
    • 247
    • 248
    • 249
    • 250
    • 251
    • 252
    • 253
    • 254
    • 255
    • 256
    • 257
    • 258
    • 259
    • 260
    • 261
    • 262
    • 263
    • 264
    • 265
    • 266
    • 267
    • 268
    • 269
    • 270
    • 271
    • 272
    • 273
    • 274
    • 275
    • 276
    • 277
    • 278
    • 279
    • 280
    • 281
    • 282
    • 283
    • 284
    • 285
    • 286
    • 287
    • 288
    • 289
    • 290
    • 291
    • 292
    • 293
    • 294
    • 295
    • 296
    • 297
    • 298
    • 299
    • 300
    • 301
    • 302
    • 303
    • 304
    • 305
    • 306
    • 307

    (2) 代码

    import torch
    from torch import nn
    from torchvision.models.utils import load_state_dict_from_url
    
    __all__ = ['MobileNetV2', 'mobilenet_v2']
    
    model_urls = {
        'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
    }
    
    def _make_divisible(v, divisor, min_value=None):
        ''' 调整通道数,使其是 divisor 的整数倍 '''
        if min_value is None:
            min_value = divisor
        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
        if new_v < 0.9 * v:
            new_v += divisor
        return new_v
    
    class ConvBNReLU(nn.Sequential):
        def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
            padding = (kernel_size - 1) // 2
            super(ConvBNReLU, self).__init__(
                nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
                nn.BatchNorm2d(out_planes),
                nn.ReLU6(inplace=True)
            )
    
    class InvertedResidual(nn.Module):
        def __init__(self, inp, oup, stride, expand_ratio):
            super(InvertedResidual, self).__init__()
            self.stride = stride
            assert stride in [1, 2]
    
            hidden_dim = int(round(inp * expand_ratio))
            self.use_res_connect = self.stride == 1 and inp == oup
    
            layers = []
            if expand_ratio != 1:
                layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
            layers.extend([
                ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
            ])
            self.conv = nn.Sequential(*layers)
    
        def forward(self, x):
            if self.use_res_connect:
                return x + self.conv(x)
            else:
                return self.conv(x)
    
    
    class MobileNetV2(nn.Module):
        def __init__(self, num_classes=1000, width_mult=1.0, inverted_residual_setting=None, round_nearest=8):
            super(MobileNetV2, self).__init__()
            block = InvertedResidual
            input_channel = 32
            last_channel = 1280
    
            if inverted_residual_setting is None:
                inverted_residual_setting = [
                    # t, c, n, s
                    # 112, 112, 32 -> 112, 112, 16
                    [1, 16, 1, 1],
                    # 112, 112, 16 -> 56, 56, 24
                    [6, 24, 2, 2],
                    # 56, 56, 24 -> 28, 28, 32
                    [6, 32, 3, 2],
                    # 28, 28, 32 -> 14, 14, 64
                    [6, 64, 4, 2],
                    # 14, 14, 64 -> 14, 14, 96
                    [6, 96, 3, 1],
                    # 14, 14, 96 -> 7, 7, 160
                    [6, 160, 3, 2],
                    # 7, 7, 160 -> 7, 7, 320
                    [6, 320, 1, 1],
                ]
    
            if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
                raise ValueError("inverted_residual_setting should be non-empty "
                                 "or a 4-element list, got {}".format(inverted_residual_setting))
    
            input_channel = _make_divisible(input_channel * width_mult, round_nearest)
            self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
    
            # 224, 224, 3 -> 112, 112, 32
            features = [ConvBNReLU(3, input_channel, stride=2)]
    
            for t, c, n, s in inverted_residual_setting:
                output_channel = _make_divisible(c * width_mult, round_nearest)
                for i in range(n):
                    stride = s if i == 0 else 1
                    features.append(block(input_channel, output_channel, stride, expand_ratio=t))
                    input_channel = output_channel
    
            # 7, 7, 320 -> 7,7,1280
            features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
            self.features = nn.Sequential(*features)
    
            self.classifier = nn.Sequential(
                nn.Dropout(0.2),
                nn.Linear(self.last_channel, num_classes),
            )
    
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.kaiming_normal_(m.weight, mode='fan_out')
                    if m.bias is not None:
                        nn.init.zeros_(m.bias)
                elif isinstance(m, nn.BatchNorm2d):
                    nn.init.ones_(m.weight)
                    nn.init.zeros_(m.bias)
                elif isinstance(m, nn.Linear):
                    nn.init.normal_(m.weight, 0, 0.01)
                    nn.init.zeros_(m.bias)
    
        def forward(self, x):
            x = self.features(x)    # [2, 3, 224, 224] --> [2, 1280, 7, 7]
            x = x.mean([2, 3])      # [2, 1280, 7, 7] --> [2, 1280]
            x = self.classifier(x)  # [2, 1280] --> [2, 10]
            return x
    
        def freeze_backbone(self):
            for param in self.features.parameters():
                param.requires_grad = False
    
        def Unfreeze_backbone(self):
            for param in self.features.parameters():
                param.requires_grad = True
    
    
    def mobilenet_v2(pretrained=False, progress=True, num_classes=1000):
        model = MobileNetV2()
        if pretrained:
            state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'], model_dir='./model_data',
                                                  progress=progress)
            model.load_state_dict(state_dict)
    
        if num_classes!=1000:
            model.classifier = nn.Sequential(
                nn.Dropout(0.2),
                nn.Linear(model.last_channel, num_classes),
            )
        return model
    
    if __name__ == '__main__':
        x = torch.rand([2,3,224,224])
        model = mobilenet_v2(num_classes=10)
        y = model(x)
        
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152

    3. 损失函数

    3.1 交叉熵

    多分类问题采用交叉熵作为损失函数。y_pred[batch_size,cls],y_true[cls]

    '''
    y_pred取值在0~1之间
    loss = (-y_true*log(y_pred)).mean()
    
    y_pred取值实数
    loss = (-x[class]+log(exp(x).sum())).mean()
    
    '''
    if __name__ == '__main__':
        import torch
        import torch.nn as nn
        torch.random.seed()
        outputs = torch.tensor([[3.9383, 0.0983],
                                [0.0465, 5.9902]])
        targets = torch.Tensor([0,1]).long()
        # method_1
        loss    = nn.CrossEntropyLoss()(outputs, targets)
        print(loss) # tensor(0.0119)
        # method_2
        print(nn.NLLLoss()(nn.LogSoftmax(dim=1)(outputs), targets))
        # method_3
        y = torch.zeros_like(outputs)
        for i,j in enumerate(targets):
            y[i,j]=1      # one_hot
        print(-(torch.log(torch.exp(outputs)/(torch.exp(outputs).sum(1).expand_as(outputs)))*y).sum()/2)
        # method_4
        print(((torch.log(torch.exp(outputs).sum(1)).expand_as(outputs)-outputs)*y).sum()/len(outputs))
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28

    4. 训练

    4.1 训练流程

    '''
    1. 设置参数
    2. 加载模型
        2.1 通过网页下载参数
        2.2 上一步不成功,参数初始化。
        2.3 迁移学习。a. 加载训练好的参数,取出未训练模型参数。b.取出模型参数和预训练模型参数shape相同的参数。c.把上一步取出的参数加载到未训练的模型上。 
    3. 读取数据及数据预处理
    4. 设置优化器和学习率
    5. 分批次训练数据
    
    '''
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11

    (1) 加载模型代码

    '''
    assert backbone in ["mobilenet", "resnet50", "vgg16"]
    # 1. pretrained = True ,则通过网页下载参数
    model = get_model_from_name[backbone](num_classes=num_classes,pretrained=pretrained)
    # 2. pretrained = False
    if not pretrained:
        weights_init(model)
    # 3. 迁移学习
    model_path =  'model_data/mobilenet_catvsdog.pth'
    print('Loading weights into state dict...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    pretrained_dict = torch.load(model_path, map_location=device)
    model_dict = model.state_dict()
    pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    
    '''
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18

    (2) 训练代码

    import torch
    import numpy as np
    from torch import nn
    from tqdm import tqdm
    import torch.optim as optim
    import torch.nn.functional as F
    import torch.backends.cudnn as cudnn
    from torch.utils.data import DataLoader
    
    from nets.mobilenet_ls import mobilenet_v2
    from nets.resnet50_ls import resnet50
    from nets.vgg16_ls import vgg16
    
    from utils.utils_ls import weights_init
    from utils.dataloader_ls import DataGenerator, detection_collate
    
    get_model_from_name = {
        'mobilenet':mobilenet_v2,
        'resnet50' :resnet50,
        'vgg16'    : vgg16}
    
    freeze_layers = {
        'mobilenet': 81,
        'resnet50' : 173,
        'vgg16'    : 19}
    
    def get_lr(optimizer):
        for param_group in optimizer.param_groups:
            return param_group['lr']
    
    def get_classes(classes_path):
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names
    
    def fit_one_epoch(net, epoch, epoch_size, epoch_size_val, gen, genval, Epoch, cuda):
        total_loss = 0
        total_accuracy = 0
        val_total_loss = 0
    
        with tqdm(total = epoch_size,desc=f'Epoch{epoch+1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
            for iteration,batch in enumerate(gen):
                if iteration >= epoch_size:
                    break
                images, targets = batch
                with torch.no_grad():
                    images  = torch.from_numpy(images).type(torch.FloatTensor)
                    targets = torch.from_numpy(targets).type(torch.FloatTensor).long()
                    if cuda:
                        images  = images.cuda()
                        targets = targets.cuda()
                optimizer.zero_grad()
                outputs = net(images)
                loss    = nn.CrossEntropyLoss()(outputs, targets)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
                with torch.no_grad():
                    accuracy = torch.mean((torch.argmax(F.softmax(outputs, dim=-1), dim=-1) == targets).type(torch.FloatTensor))
                    total_accuracy += accuracy.item()
    
                pbar.set_postfix(**{'total_loss': total_loss / (iteration + 1),
                                    'accuracy'  : total_accuracy / (iteration + 1),
                                    'lr'        : get_lr(optimizer)})
                pbar.update(1)
    
        print('Start Validation')
        with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
            for iteration, batch in enumerate(genval):
                if iteration >= epoch_size_val:
                    break
                images, targets = batch
                with torch.no_grad():
                    images = torch.from_numpy(images).type(torch.FloatTensor)
                    targets = torch.from_numpy(targets).type(torch.FloatTensor).long()
                    if cuda:
                        images = images.cuda()
                        targets = targets.cuda()
    
                    optimizer.zero_grad()
    
                    outputs = net(images)
                    val_loss = nn.CrossEntropyLoss()(outputs, targets)
    
                    val_total_loss += val_loss.item()
    
                pbar.set_postfix(**{'total_loss': val_total_loss / (iteration + 1),
                                    'lr'        : get_lr(optimizer)})
                pbar.update(1)
    
        print('Finish Validation')
        print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
        print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1),val_total_loss/(epoch_size_val+1)))
    
        print('Saving state, iter:', str(epoch+1))
        torch.save(model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth'%((epoch+1),total_loss/(epoch_size+1),val_total_loss/(epoch_size_val+1)))
    
    
    
    if __name__ == '__main__':
        log_dir = './logs/'
        backbone = 'mobilenet'
        input_shape = [224,224,3]
        Cuda = False
        pretrained = False
        classes_path = './model_data/cls_classes_ls.txt'
        class_names = get_classes(classes_path)    # ['cat', 'dog']
        num_classes = len(class_names)
    
        assert backbone in ["mobilenet", "resnet50", "vgg16"]
        model = get_model_from_name[backbone](num_classes=num_classes,pretrained=pretrained)
        if not pretrained:
            weights_init(model)
    
        # # 加快模型训练的效率
        # model_path = "model_data/Omniglot_vgg.pth"  # 'model_data/mobilenet_catvsdog.pth'
        # print('Loading weights into state dict...')
        # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        # pretrained_dict = torch.load(model_path, map_location=device)
        # model_dict = model.state_dict()
        # pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
        # model_dict.update(pretrained_dict)
        # model.load_state_dict(model_dict)
    
        with open(r"./cls_train.txt","r") as f:
            lines = f.readlines()
        np.random.seed(10101)
        np.random.shuffle(lines)
        np.random.seed(None)
        num_val = int(len(lines)*0.1)
        num_train = len(lines) - num_val
    
        net = model.train()
        if Cuda:
            net = torch.nn.DataParallel(model)
            cudnn.benchmark = True
            net = net.cuda()
    
        #------------------------------------------------------#
        #   主干特征提取网络特征通用,冻结训练可以加快训练速度
        #   也可以在训练初期防止权值被破坏。
        #   Init_Epoch为起始世代
        #   Freeze_Epoch为冻结训练的世代
        #   Epoch总训练世代
        #   提示OOM或者显存不足请调小Batch_size
        #------------------------------------------------------#
        if True:
            #--------------------------------------------#
            #   BATCH_SIZE不要太小,不然训练效果很差
            #--------------------------------------------#
            lr              = 1e-3
            Batch_size      = 32   # 128
            Init_Epoch      = 0   # 0
            Freeze_Epoch    = 50   # 50
    
            optimizer       = optim.Adam(net.parameters(),lr,weight_decay=5e-4)
            lr_scheduler    = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
    
            train_dataset   = DataGenerator(input_shape,lines[:num_train])
            val_dataset     = DataGenerator(input_shape,lines[num_train:], False)
            gen             = DataLoader(train_dataset, batch_size=Batch_size, num_workers=4, pin_memory=True,
                                         drop_last=True, collate_fn=detection_collate)
            gen_val         = DataLoader(val_dataset, batch_size=Batch_size, num_workers=4, pin_memory=True,
                                         drop_last=True, collate_fn=detection_collate)
    
            epoch_size      = train_dataset.get_len()//Batch_size
            epoch_size_val  = val_dataset.get_len()//Batch_size
    
            if epoch_size == 0 or epoch_size_val == 0:
                raise ValueError("数据集过小,无法进行训练,请扩充数据集。")
            #------------------------------------#
            #   冻结一定部分训练
            #------------------------------------#
            model.freeze_backbone()
    
            for epoch in range(Init_Epoch,Freeze_Epoch):
                fit_one_epoch(model,epoch,epoch_size,epoch_size_val,gen,gen_val,Freeze_Epoch,Cuda)
                lr_scheduler.step()
    
        if True:
            #--------------------------------------------#
            #   BATCH_SIZE不要太小,不然训练效果很差
            #--------------------------------------------#
            lr              = 1e-4
            Batch_size      = 32   # 128
            Freeze_Epoch    = 50   # 50
            Epoch           = 100   # 100
    
            optimizer       = optim.Adam(net.parameters(),lr,weight_decay=5e-4)
            lr_scheduler    = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
    
            train_dataset   = DataGenerator(input_shape,lines[:num_train])
            val_dataset     = DataGenerator(input_shape,lines[num_train:], False)
            gen             = DataLoader(train_dataset, batch_size=Batch_size, num_workers=2, pin_memory=True,
                                         drop_last=True, collate_fn=detection_collate)
            gen_val         = DataLoader(val_dataset, batch_size=Batch_size, num_workers=2, pin_memory=True,
                                         drop_last=True, collate_fn=detection_collate)
    
            epoch_size      = train_dataset.get_len()//Batch_size
            epoch_size_val  = val_dataset.get_len()//Batch_size
    
            if epoch_size == 0 or epoch_size_val == 0:
                raise ValueError("数据集过小,无法进行训练,请扩充数据集。")
            #------------------------------------#
            #   解冻后训练
            #------------------------------------#
            model.Unfreeze_backbone()
    
            for epoch in range(Freeze_Epoch,Epoch):
                fit_one_epoch(model,epoch,epoch_size,epoch_size_val,gen,gen_val,Epoch,Cuda)
                lr_scheduler.step()
    
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152
    • 153
    • 154
    • 155
    • 156
    • 157
    • 158
    • 159
    • 160
    • 161
    • 162
    • 163
    • 164
    • 165
    • 166
    • 167
    • 168
    • 169
    • 170
    • 171
    • 172
    • 173
    • 174
    • 175
    • 176
    • 177
    • 178
    • 179
    • 180
    • 181
    • 182
    • 183
    • 184
    • 185
    • 186
    • 187
    • 188
    • 189
    • 190
    • 191
    • 192
    • 193
    • 194
    • 195
    • 196
    • 197
    • 198
    • 199
    • 200
    • 201
    • 202
    • 203
    • 204
    • 205
    • 206
    • 207
    • 208
    • 209
    • 210
    • 211
    • 212
    • 213
    • 214

    5. 预测

    5.1 预测流程

    (1)流程

    '''
    1. 分类实例化
    2. 打开图片
    3. 图片识别
    
    '''
    from PIL import Image
    from classification_ls import Classification
    
    classification = Classification()
    
    while True :
        img = input('Input image filename')
        try:
            image = Image.open(img)
        except:
            print('Open Error! Try again!')
            continue
        else:
            class_name = classification.detect_image(image)
            print(class_name)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21

    5.2 分类代码

    (1)分类流程

    '''
    1. 加载图片、加灰条、归一化
    2. 加载模型,预测
    3. 显示预测结果
    
    '''
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6

    (2)代码

    import os, copy, torch
    import numpy as np
    from torch import nn
    import matplotlib.pyplot as plt
    from torch.autograd import Variable
    
    from nets.vgg16_ls import vgg16
    from nets.resnet50_ls import resnet50
    from nets.mobilenet_ls import mobilenet_v2
    from utils.utils_ls import letterbox_image
    
    get_model_from_name = {"vgg16":vgg16,
                           "resnet50":resnet50,
                           "mobilenet":mobilenet_v2}
    
    def _preprocess_input(x):
        x /= 127.5
        x -= 1.
        return x
    
    class Classification(object):
        _defaults = {
            "cuda"          : False,
            "backbone"      : 'mobilenet',
            "input_shape"   : [224,224,3],
            "classes_path"  : 'model_data/cls_classes.txt',
            "model_path"    : 'model_data/mobilenet_catvsdog.pth',
        }
    
        @classmethod
        def get_defaults(cls,n):
            if n in cls._defaults:
                return cls._defaults[n]
            else:
                return "Unrecognized attribute name '" + n + "'"
    
        #   初始化classification
        def __init__(self,**kwargs):
            self.__dict__.update(self._defaults)
            self.class_names = self._get_class()
            self.generate()
    
        #   获得所有的分类名称
        def _get_class(self):
            classes_path = os.path.expanduser(self.classes_path)
            with open(classes_path) as f:
                class_names = f.readlines()
            class_names = [c.strip() for c in class_names]
            return class_names
    
        #   加载模型
        def generate(self):
            model_path = os.path.expanduser(self.model_path)
            self.num_classes = len(self.class_names)
    
            assert self.backbone in ["mobilenet", "resnet50", "vgg16"]
            self.model = get_model_from_name[self.backbone](num_classes=self.num_classes, pretrained=False)
            self.model = self.model.eval()   ####################################
            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
            state_dict = torch.load(self.model_path,map_location=device)
            self.model.load_state_dict(state_dict)
            if self.cuda:
                self.model = nn.DataParallel(self.model)
                self.model = self.model.cuda()
            print('{} model, and classes loaded.'.format(model_path))
    
        #   检测图片
        def detect_image(self,image):
            old_image = copy.deepcopy(image)
    
            crop_img = letterbox_image(image, [self.input_shape[0],self.input_shape[1]])
            photo = np.array(crop_img,dtype=np.float32)
    
            photo = np.reshape(_preprocess_input(photo),[1,self.input_shape[0],self.input_shape[1],self.input_shape[2]])
            photo = np.transpose(photo,(0,3,1,2))
    
            with torch.no_grad():
                photo = Variable(torch.from_numpy(photo).type(torch.FloatTensor))
                if self.cuda:
                    photo = photo.cuda()
                preds = torch.softmax(self.model(photo)[0],dim=-1).cpu().numpy()
    
            class_name = self.class_names[np.argmax(preds)]
            probability = np.max(preds)
    
            plt.subplot(1,1,1)
            plt.imshow(np.array(old_image))
            plt.title('Class:%s Probability:%.3f' %(class_name, probability))
            plt.show()
            return class_name
    
    if __name__ == '__main__':
        from PIL import Image
        img = Image.open('img/cat.jpg')
        clas = Classification()
        class_name = clas.detect_image(img)
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97

    6. 评估

    6.1 evaluteTop1

    (1) 训练流程

    '''
    1. 导入图片流,得到预测结果。
    2. 根据预测值和真实值,计算正确预测的样本数。
    3. Top1 = 正确预测的样本数/总样本
    
    '''
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6

    (2) 代码

    ''' 评价'''
    
    import numpy as np
    import torch
    from PIL import Image
    from torch.autograd import Variable
    
    from classification_ls import Classification, _preprocess_input
    from utils.utils_ls import letterbox_image
    
    class top1_Classification(Classification):
        def detect_image(self, image):
            crop_img = letterbox_image(image, [self.input_shape[0],self.input_shape[1]])
            photo = np.array(crop_img,dtype = np.float32)
    
            photo = np.reshape(_preprocess_input(photo),[1,self.input_shape[0],self.input_shape[1],self.input_shape[2]])
            photo = np.transpose(photo,(0,3,1,2))
    
            with torch.no_grad():
                photo = Variable(torch.from_numpy(photo).type(torch.FloatTensor))
                if self.cuda:
                    photo = photo.cuda()
                preds = torch.softmax(self.model(photo)[0], dim=-1).cpu().numpy()
    
            arg_pred = np.argmax(preds)
            return arg_pred
    
    def evaluteTop1(classfication, lines):
        correct = 0
        total = len(lines)
        for index, line in enumerate(lines):
            annotation_path = line.split(';')[1].split()[0]
            x = Image.open(annotation_path)
            y = int(line.split(';')[0])
    
            pred = classfication.detect_image(x)
            correct += pred == y
            if index % 100 == 0:
                print("[%d/%d]"%(index,total))
        return correct / total
    
    if __name__ == '__main__':
        classfication = top1_Classification()
        with open(r"./cls_test.txt","r") as f:
            lines = f.readlines()
        top1 = evaluteTop1(classfication, lines)
        print("top-1 accuracy = %.2f%%" % (top1*100))
        
    '''
    model_data/mobilenet_catvsdog.pth model, and classes loaded.
    [0/26]
    top-1 accuracy = 100.00%
    
    Process finished with exit code 0
    
    
    '''
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57

    6.2 evaluteTop5

    (1) 训练流程

    '''
    1. 导入图片流,得到预测结果。按照概率对预测结果从大到小排列,取出前5个预测结果。
    2. 如果前5个预测结果有预测正确的,作为预测正确,记录正确预测的样本数。
    3. Top5 = 正确预测的样本数/总样本
    
    
    '''
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7

    (2) 代码

    import numpy as np
    import torch
    from PIL import Image
    from torch.autograd import Variable
    
    from classification_ls import Classification, _preprocess_input
    from utils.utils_ls import letterbox_image
    
    
    class top5_Classification(Classification):
        def detect_image(self, image):
            crop_img = letterbox_image(image, [self.input_shape[0],self.input_shape[1]])
            photo = np.array(crop_img,dtype = np.float32)
    
            # 图片预处理,归一化
            photo = np.reshape(_preprocess_input(photo),[1,self.input_shape[0],self.input_shape[1],self.input_shape[2]])
            photo = np.transpose(photo,(0,3,1,2))
    
            with torch.no_grad():
                photo = Variable(torch.from_numpy(photo).type(torch.FloatTensor))
                if self.cuda:
                    photo = photo.cuda()
                preds = torch.softmax(self.model(photo)[0], dim=-1).cpu().numpy()
    
            arg_pred = np.argsort(preds)[::-1]
            arg_pred_top5 = arg_pred[:5]
            # print(111)
            return arg_pred_top5
    
    def evaluteTop5(classfication, lines):
        correct = 0
        total = len(lines)
        for index, line in enumerate(lines):
            annotation_path = line.split(';')[1].split()[0]
            x = Image.open(annotation_path)
            y = int(line.split(';')[0])
    
            pred = classfication.detect_image(x)
            correct += y in pred
            if index % 100 == 0:
                print("[%d/%d]"%(index,total))
        return correct / total
    
    if __name__ == '__main__':
        classfication = top5_Classification()
        with open(r"./cls_test.txt","r") as f:
            lines = f.readlines()
        top5 = evaluteTop5(classfication, lines)
        print("top-5 accuracy = %.2f%%" % (top5*100))
    
    
    ''' 
    model_data/mobilenet_catvsdog.pth model, and classes loaded.
    [0/26]
    top-5 accuracy = 100.00%
    
    Process finished with exit code 0
    '''
    
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
  • 相关阅读:
    【JS函数】JS函数之防抖、节流函数
    java计算机毕业设计医院人事档案管理系源代码+系统+数据库+lw文档
    【C++】C / C++ 内存管理
    目标检测YOLO实战应用案例100讲-SAR图像多尺度舰船目标检测
    Linux部署elk日志监控系统
    数组的子集能否累加出K
    Tomcat部署及优化
    JavaScript 实现每次循环都等待
    1. 使用STM32CubeMX建立STM32G030C8T6项目工程
    【C++初阶(四)aoto关键字与基于范围的for循环】
  • 原文地址:https://blog.csdn.net/qq_35732321/article/details/126729684