• PyTorch笔记 - Convolution卷积运算的原理 (3)


    卷积操作包括5个参数:input、kernel、bias、stride、padding

    • input已包括padding,计算ouput时,不要再加上2*padding
    • output的索引是i/stride,j/stride,因为i和j的步长是stride
    def matrix_multiplication_for_conv2d(input, kernel, bias=0, stride=1, padding=0):
        if padding > 0:
            input = F.pad(input, (padding, padding, padding, padding))
        input_h, input_w = input.shape
        kernel_h, kernel_w = kernel.shape
        # 向下取整floor, 直接pad到input,不用padding
        output_w = int((input_w - kernel_w) / stride + 1)  # 卷积输出的高度
        output_h = int((input_h - kernel_h) / stride + 1)  # 卷积输出的宽度
        
        output = torch.zeros(output_h, output_w)  # 初始化输出矩阵
        
        for i in range(0, input_h-kernel_h+1, stride):  # 对高度维进行遍历
            for j in range(0, input_w-kernel_w+1, stride):  # 对宽度度维进行遍历
                region = input[i:i+kernel_h, j:j+kernel_w]
                output[int(i/stride), int(j/stride)] = torch.sum(region * kernel) + bias  # 点乘,并且赋值输出位置的元素
        
        return output
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17

    矩阵和矩阵相乘,转换为行向量和列向量相乘,即 输入矩阵9个行向量 x kernel的列向量

    与输入矩阵尺寸相同,填充kernel为0,计算矩阵相乘,转置卷积

    torch.flatten() 操作,多维向量变成1维向量

    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    
    a = torch.randn(1, 1, 2, 3)
    b = torch.flatten(a)
    """
    tensor([[[[ 1.0875,  0.1187, -0.0439],
              [ 0.2802,  0.7416, -0.0189]]]])
    tensor([ 1.0875,  0.1187, -0.0439,  0.2802,  0.7416, -0.0189])
    """
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11

    torch.numel() 操作

    a = torch.randn(1, 1, 2, 3)
    b = torch.randn(2, 3, 4)
    print(a.numel())
    print(b.numel())
    """
    6
    24
    """
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8

    torch.reshape() 操作

    a = torch.randn(1, 1, 2, 3)
    b = a.reshape(-1, 1)
    print(b.shape)
    print(b)
    """
    torch.Size([6, 1])
    tensor([[ 0.8519],
            [ 0.1513],
            [-0.1491],
            [-1.2765],
            [-0.1445],
            [ 1.0137]])
    """
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13

    flatten input版本:对比与直接矩阵相乘,优势,只有1次矩阵相乘,节省计算量

    • matrix_multiplication_for_conv2d
    • matrix_multiplication_for_conv2d_flatten
    • F.conv2d
    # step1 用原始的矩阵运算来实现二维卷积, 先不考虑batchsize维度和channel维度
    input = torch.randn(5, 5)  # 卷积输入特征图
    kernel = torch.randn(3, 3)  # 卷积核
    bias = torch.randn(1)  # 卷积偏置,默认输出通道数是1
    
    
    def matrix_multiplication_for_conv2d(input, kernel, bias=0, stride=1, padding=0):
        if padding > 0:
            input = F.pad(input, (padding, padding, padding, padding))
        input_h, input_w = input.shape
        kernel_h, kernel_w = kernel.shape
        # 向下取整floor, 直接pad到input,不用padding
        output_w = int((input_w - kernel_w) / stride + 1)  # 卷积输出的高度
        output_h = int((input_h - kernel_h) / stride + 1)  # 卷积输出的宽度
        
        output = torch.zeros(output_h, output_w)  # 初始化输出矩阵
        
        for i in range(0, input_h-kernel_h+1, stride):  # 对高度维进行遍历
            for j in range(0, input_w-kernel_w+1, stride):  # 对宽度度维进行遍历
                region = input[i:i+kernel_h, j:j+kernel_w]
                output[int(i/stride), int(j/stride)] = torch.sum(region * kernel) + bias  # 点乘,并且赋值输出位置的元素
        
        return output
    
      
    # flatten input 版本
    def matrix_multiplication_for_conv2d_flatten(input, kernel, bias=0, stride=1, padding=0):
        if padding > 0:
            input = F.pad(input, (padding, padding, padding, padding))
        input_h, input_w = input.shape
        kernel_h, kernel_w = kernel.shape
        # 向下取整floor, 直接pad到input,不用padding
        output_w = int((input_w - kernel_w) / stride + 1)  # 卷积输出的高度
        output_h = int((input_h - kernel_h) / stride + 1)  # 卷积输出的宽度
        
        output = torch.zeros(output_h, output_w)  # 初始化输出矩阵
        
        # 存储所有的拉平后的特征区域
        region_matrix = torch.zeros(output.numel(), kernel.numel())
        kernel_matrix = kernel.reshape((-1, 1)) # kernel的列向量(kernel.numel())形式,矩阵
        for i in range(0, input_h-kernel_h+1, stride):  # 对高度维进行遍历
            for j in range(0, input_w-kernel_w+1, stride):  # 对宽度度维进行遍历
                region = input[i:i+kernel_h, j:j+kernel_w]
                region_vector = torch.flatten(region)
                region_matrix[i*output_h+j] = region_vector  # 每个值都有一个kernel行
        output_matrix = region_matrix @ kernel_matrix + bias
        output = output_matrix.reshape(output_h, output_w)
        return output
    
    
    # 矩阵运算实现卷积的结果
    mat_mul_conv_output = matrix_multiplication_for_conv2d(input, kernel, bias=bias, padding=1)
    print(f'mat_mul_conv_output: \n{mat_mul_conv_output}')
    
    # 矩阵运算实现卷积的结果,flatten input版本
    mat_mul_flatten_conv_output = matrix_multiplication_for_conv2d_flatten(input, kernel, bias=bias, padding=1)
    print(f'mat_mul_flatten_conv_output: \n{mat_mul_flatten_conv_output}')
    
    # 调用PyTorch API的卷积实现结果, padding=1, padding="same"
    pytorch_api_conv_output = F.conv2d(input.reshape((1, 1, *input.shape)), kernel.reshape(1, 1, *kernel.shape), bias=bias, padding=1)
    print(f'F.conv2d: \n{pytorch_api_conv_output.reshape(mat_mul_conv_output.shape)}')
    
    # 验证矩阵运算,与PyTorch API的结果一致
    flag1 = torch.allclose(mat_mul_conv_output, pytorch_api_conv_output)
    print(f"flag1: {flag1}")
    flag2 = torch.allclose(pytorch_api_conv_output, mat_mul_flatten_conv_output)
    print(f"flag2: {flag2}")
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67

    用原始的矩阵运算来实现二维卷积, 考虑batchsize维度和channel维度, 4维

    • bias形状和output channel是一致的
    # step3 用原始的矩阵运算来实现二维卷积, 考虑batchsize维度和channel维度, 4维
    # bias形状和output channel是一致的
    def matrix_multiplication_for_conv2d_full(input, kernel, bias, stride=1, padding=0):
        if padding > 0:
            # 从里到外,width、height、channel、batch
            input = F.pad(input, (padding, padding, padding, padding, 0, 0, 0, 0))  
            
        bs, in_channel, input_h, input_w = input.shape
        out_channel, in_channel, kernel_h, kernel_w = kernel.shape
        
        if bias == None:
            bias = torch.zeros(out_channel)
            
        # 向下取整floor, 直接pad到input,不用padding
        output_w = int((input_w - kernel_w) / stride + 1)  # 卷积输出的高度
        output_h = int((input_h - kernel_h) / stride + 1)  # 卷积输出的宽度
        
        output = torch.zeros(bs, out_channel, output_h, output_w)  # 初始化输出矩阵
        
        for ind in range(bs):
            for oc in range(out_channel):
                for ic in range(in_channel):
                    for i in range(0, input_h-kernel_h+1, stride):  # 对高度维进行遍历
                        for j in range(0, input_w-kernel_w+1, stride):  # 对宽度度维进行遍历
                            region = input[ind, ic, i:i+kernel_h, j:j+kernel_w]
                            output[ind, oc, int(i/stride), int(j/stride)] += torch.sum(region * kernel[oc, ic])  # 点乘,并且赋值输出位置的元素
                output[ind, oc] += bias[oc]
    
        return output
    
    
    input = torch.randn(2, 2, 5, 5)  # 卷积输入特征图, bs*in_channel*in_h*in_w
    kernel = torch.randn(3, 2, 3, 3)  # 卷积核,输出通道和输入通道, out_channel*in_channel*kernel_h*kernel_w
    bias = torch.randn(3)  # 卷积偏置,默认输出通道数是1
    
    # matrix_multiplication_for_conv2d_full 与 PyTorch官方API的结果一致
    pytorch_conv2d_api_output = F.conv2d(input, kernel, bias=bias, padding=1, stride=2)
    mm_conv2d_full_output = matrix_multiplication_for_conv2d_full(input, kernel, bias=bias, padding=1, stride=2)
    
    flag = torch.allclose(pytorch_conv2d_api_output, mm_conv2d_full_output)
    print(f"flag: {flag}")  # True
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
  • 相关阅读:
    鲈鱼的面试题库+答案
    C语言牛客网(NowCoder)刷题篇
    拥有这个中文版CustomGPT,你也能定制自己的AI问答机器人
    【Android入门】4、数据持久化:文件、SharedPreferences 和 Sqlite
    前馈神经网络自动梯度计算和预定义算子
    Python中RotatingFileHandler、TimedRotatingFileHandler函数用法
    golang读取conf文件的两种方式(ini和Viper)
    系统集成|第十六章(笔记)
    全局大喇叭--广播机制
    用Java包com.sun.net.httpserver下面的类实现一个简单的http服务器demo
  • 原文地址:https://blog.csdn.net/u012515223/article/details/126265676