（pytorch进阶之路三）conv2d

写在前面：二维卷积比较简单，原理略过
在这里插入图片描述
但还是大致说一下，in_channel = 2，out_channel = 3，所以kernel的数量是6，输出的每个通道由输入的每个通道与各自的kernel进行卷积操作，每个输出通道再将卷积操作的矩阵再加起来，最后将所有输出通道的矩阵拼接。

步长为1的情况下，输出大小 = 输入大小 - 核的大小 + 2*padding大小 + 1
所以我们要让输出和输入大小一致，根据公式可以计算出padding大小

padding=‘same’ 自动计算大小

文章目录

Conv2d API
原始矩阵滑动实现PyTorch二维卷积
向量内积角度实现PyTorch的二维卷积
转置卷积角度实现PyTorch的二维转置卷积
小结
空洞卷积
群组卷积
实现空洞与群组卷积

Conv2d API

比较简单，直接略过说明了

import torch
import torch.nn.functional as F

in_channel = 1
out_channel = 1
kernel_size = 3
bias = False
height = 4
weight = 4
batch_size = 1
input_size = [batch_size, in_channel, height, weight] # [1,4,4]
# 实例化一个二维卷积层
conv_layer = torch.nn.Conv2d(in_channel, out_channel, kernel_size, bias=bias)
conv_input = torch.randn(input_size)
print(conv_input, "# conv_input")
conv_output = conv_layer(conv_input)
print(conv_output, "# conv_output")
print(conv_layer.weight.shape, "# [out_ch, in_ch, h, w]")
# 使用functional api
conv_F_output = F.conv2d(conv_input, conv_layer.weight)
print(conv_F_output, "# conv_F_output")

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

原始矩阵滑动实现PyTorch二维卷积

input的形状: batch_size, in channel, h, w
kernel的形状: out_channel, in_channel, kernel_h, kernel_w
bias的是个标量，但是对应每个输出通道会有所不同
四层遍历（这里只是最简单的实现）
分别遍历 batch_size, out_channel, in_channel, h, w
对每个input_channel维度计算区域，最后计算完后要合并到out_channel维度，所以是先h,w，再in_channel，最后out_channel，只考虑了一个样本，所以最外层再套一个batch_size维度

# 原始的矩阵运算实现二位卷积
import math
import torch
import torch.nn.functional as F
batch_size = 2
in_channel = 2
inh = 5
inw = 5
out_channel = 3
kh = 3
kw = 3
tmp_input = torch.ones(batch_size, in_channel, inh, inw)
tmp_kernel = torch.ones(out_channel, in_channel, kh, kw) / 2
# bias的是个标量，但是对应每个输出通道不同
tmp_bias = torch.randn(out_channel)
def matrix_multiplication_for_conv2d_full(input_tensor: torch.Tensor,
                                     kernel: torch.Tensor, stride=1,
                                     padding=0, bias=0):
    """
    考虑batch_size维度和channel维度
    input和kernel都是4维，
    """
    if padding > 0:
        # batch_size，channel维度都不需要填充
        input_tensor = F.pad(input_tensor, (padding, padding, padding, padding,0,0,0,0))
    if bias is None:
        bias = torch.zeros(out_channel)
    # input shape: batch_size, in channel, h, w
    bs, in_ch, input_h, input_w = input_tensor.shape
    # kernel shape: out_channel, in_channel, kernel_h, kernel_w
    out_ch, in_ch, kernel_h, kernel_w = kernel.shape

    output_h = math.floor((input_h - kernel_h)/stride) + 1
    output_w = math.floor((input_w - kernel_w)/stride) + 1
    # 初始化一个输出矩阵
    output = torch.zeros(bs, out_ch, output_h, output_w)
    # 5层遍历,逐层遍历batch_size, out_channel, in_channel, h, w
    for ind in range(bs):
        for oc in range(out_ch):
            for ic in range(in_ch):
                for i in range(0, input_h - kernel_h + 1, stride):
                    for j in range(0, input_w - kernel_w + 1, stride):
                        # 取输入的区域
                        region = input_tensor[ind, ic, i:i+kernel_h, j:j+kernel_w]
                        # 输入区域与kernel点乘计算卷积，逐元素相乘,输出通道是所有输入通道的求和，是+=
                        # kernel也要取出区域，取第oc通道的第ic通道
                        output[ind, oc, int(i/stride), int(j/stride)] \
                            +=  torch.sum(region * kernel[oc, ic])
            # bias是对每个output channel相加
            output[ind, oc] += bias[oc]
    return output


res1 = matrix_multiplication_for_conv2d_full(tmp_input, tmp_kernel, padding=1,
                                        bias=tmp_bias, stride=2)

res2 = F.conv2d(tmp_input, tmp_kernel,
                padding=1, bias=tmp_bias, stride=2)
flag = torch.allclose(res1, res2)
print(flag)
print(res1)
print(res2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62

向量内积角度实现PyTorch的二维卷积

二维卷积可以转化成两个矩阵相乘，每次卷积的区域可以拉直成一个行向量，kernel可以拉直成一个列向量，那这样一个区域矩阵乘法计算就是一个标量，即作为输出。

另外一种做法就是，将kernel padding成和input矩阵一样的shape，直接矩阵相乘，也是后来转置卷积思想的基础

这里先实现第一种思路

# 原始的矩阵运算实现二位卷积
tmp_input = torch.ones(5, 5)
tmp_kernel = torch.ones(3, 3) / 2
tmp_bias = torch.randn(1)
def matrix_multiplication_for_conv2d_flatten(input_tensor: torch.Tensor,
                                     kernel: torch.Tensor, stride=1,
                                     padding=0, bias=0):
    """
    不考虑batch_size维度和channel维度，flatten版本
    """
    if padding > 0:
        input_tensor = F.pad(input_tensor, (padding, padding, padding, padding))
    input_h, input_w = input_tensor.shape
    kernel_h, kernel_w = kernel.shape
    output_h = math.floor((input_h - kernel_h)/stride) + 1
    output_w = math.floor((input_w - kernel_w)/stride) + 1
    # 初始化一个输出矩阵
    output = torch.zeros(output_h, output_w)
    # 存储拉平后的特征区域
    region_matrix = torch.zeros(output.numel(), kernel.numel())
    # 将kernel转成列向量
    kernel_matrix = kernel.reshape((kernel.numel(), 1))
    row_index = 0
    # 遍历
    for i in range(0, input_h - kernel_h + 1, stride):
        for j in range(0, input_w - kernel_w + 1, stride):
            # 取输入的区域
            region = input_tensor[i:i+kernel_h, j:j+kernel_w]
            region_vector = torch.flatten(region)
            region_matrix[row_index] = region_vector
            row_index += 1
    # 矩阵乘法
    output_matrix = region_matrix @ kernel_matrix
    output = output_matrix.reshape((output_h, output_w)) + bias

    return output



res1 = matrix_multiplication_for_conv2d_flatten(tmp_input, tmp_kernel, padding=1,
                                        bias=tmp_bias)

res2 = F.conv2d(tmp_input.reshape(1,1,tmp_input.shape[0], tmp_input.shape[1]),
                tmp_kernel.reshape(1,1,tmp_kernel.shape[0], tmp_kernel.shape[1]),
                padding=1, bias=tmp_bias)
flag = torch.allclose(res1, res2)
print(flag)
print(res1)
print(res2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49

其实也可以用nn.Unfold API来实现，Unfold干的事情和flatten代码干的原理差不多，该函数是从一个batch图片中，提取出滑动的局部区域块，也就是卷积操作中的提取kernel filter对应的滑动窗口

kernel区域的size是[2,3]
输入形状batch，in_channel，h，w 分别是 2，5，3，4

每次卷积的区域把它拿出来，那就是 in_channel * kernel_h * kernel_w = 5 * 2 * 3 = 30

输入Tensor大小3 * 4，kernel大小2 * 3，默认stride=1，padding=0，那么有4个block，也就是一次卷积输出大小是2*2的

所以Unfold输出的形状是 [2,30,4]

那怎么用呢，Unfold输出不考虑batch_size维度，则是[30,4]，转置一下变成[4,30]作为a
假设out_channel = 1，那么将kernel size 为[1,5,2,3]，outch=1,inch=5, kernel_h=2, kernel_w=3，[1,5,2,3]reshpe成[1,30]，再转置变成[30,1]作为b
最后矩阵乘法即可 res = a@b

unfold = nn.Unfold(kernel_size=(2, 3))
input = torch.randn(2, 5, 3, 4)
output = unfold(input)
# each patch contains 30 values (2x3=6 vectors, each of 5 channels)
# 4 blocks (2x3 kernels) in total in the 3x4 input
output.size()

# Convolution is equivalent with Unfold + Matrix Multiplication + Fold (or view to output shape)
inp = torch.randn(1, 3, 10, 12)
w = torch.randn(2, 3, 4, 5)
inp_unf = torch.nn.functional.unfold(inp, (4, 5))
out_unf = inp_unf.transpose(1, 2).matmul(w.view(w.size(0), -1).t()).transpose(1, 2)
out = torch.nn.functional.fold(out_unf, (7, 8), (1, 1))
# or equivalently (and avoiding a copy),
# out = out_unf.view(1, 2, 7, 8)
(torch.nn.functional.conv2d(inp, w) - out).abs().max()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

转置卷积角度实现PyTorch的二维转置卷积

转置卷积也可以说是反卷积，实现上采样，恢复输入的形状大小

对kernel进行展开，将input拉直，如4×4拉成16×1，每一步的kernel也拉长、填充一个长度为16的向量，拼成一个矩阵（shape = [(input_h - kernel_h + 1) * (input_w - kernel_w + 1), input_h * input_w]），再用这个矩阵([4, 16])与input拉直后的向量([16,1])进行矩阵相乘，这样也能得到一个二维卷积的结果（[4,1]）。

展开后的kernel矩阵将它转置一下([16,4])，再和输出([4,1])相乘，得到结果([16,1])，这样就起到了一个形状上的上采样的效果，也就是恢复输入的形状（只是形状）。

# kernel展开
def get_kernel_matrix(kernel, input_size):
    """先不考虑batch，channel，padding. 并假设stride=1
    得到kernel矩阵，将kernel拉长、填充
    如3×3的kernel变成5×5的向量
    将所有的向量堆叠起来成一个矩阵
    """
    kernel_h, kernel_w = kernel.shape
    input_h, input_w = input_size.shape
    num_out_feat_map = (input_h - kernel_h + 1) * (input_w - kernel_w + 1)
    # 初始化res矩阵
    result = torch.zeros((num_out_feat_map, input_h*input_w))
    # 分别对高度维和宽度维循环
    count = 0
    for i in range(0, input_h - kernel_h + 1, 1):
        for j in range(0, input_w - kernel_w + 1, 1):
            # pad操作：先左右后上下填充0, 使得填充后的大小和输入的大小一致
            padded_kernel = F.pad(kernel,[j, input_w - kernel_w - j, i, input_h - kernel_h - i])
            # 每次填充后的kernel拉直送入result中
            result[count] = padded_kernel.flatten()
            count += 1
    return result
 
def test_get_kernel_matrix():
    kernel = torch.randn(3, 3)
    input = torch.randn(4, 4)
    kernel_matrix = get_kernel_matrix(kernel, input)
    # print(kernel_matrix, '\n', kernel_matrix.shape)
    mm_conv2d_output = kernel_matrix @ input.reshape((-1, 1))
    pytorch_conv2d_output = F.conv2d(input.unsqueeze(0).unsqueeze(0),
                                     kernel.unsqueeze(0).unsqueeze(0))
    print(mm_conv2d_output, "\n", pytorch_conv2d_output)

test_get_kernel_matrix()

def test_transpose_conv2d_demo():
    # 转置卷积实现上采样
    kernel = torch.randn(3, 3)
    input = torch.randn(4, 4)
    kernel_matrix = get_kernel_matrix(kernel, input)
    mm_conv2d_output = kernel_matrix @ input.reshape((-1, 1))
    pytorch_conv2d_output = F.conv2d(input.unsqueeze(0).unsqueeze(0),
                                     kernel.unsqueeze(0).unsqueeze(0))
    # [16,4] @ [4,1]
    mm_transposed_conv2d_output = kernel_matrix.transpose(-1, -2) @ mm_conv2d_output
    py_transpose_conv2d_output = F.conv_transpose2d(pytorch_conv2d_output,
                                                    kernel.unsqueeze(0).unsqueeze(0))
    print(mm_transposed_conv2d_output.reshape(4,4))
    print(py_transpose_conv2d_output)
    
    
test_transpose_conv2d_demo()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

小结

注意padpadding是从里往外维度，对每个维度的padding都是先左后右
F.padding(input, Seq())
输入形状
input shape = [batch, in_channel, i_h, i_w]

kernel形状
kernel shape = [out_channel, in_channel, k_h, k_w]

输出高度
out_h = floor((ih - kh)/stride) + 1

输出宽度
out_w = floor((iw - kw)/stride) + 1

输出形状 = [bs, oc, oh, ow]
block数目 out_num = out_h * out_w * in_channel

区域滑块遍历
i in range(0, ih - kh + 1, stride)
j in range(0, iw -kw +1, stride)

截取区域位置
region = input[bs, ic, i:i+k_h, j:j+k_h]

遍历kernel位置
kernel[oc, ic]

区域输出位置
output[bs, oc, int(i/stride), int(j/stride)]

卷积计算
sum(region * kernel[oc, in])

卷积计算，input展开角度
region_flatten_vector shape = [k_n,]

output = region_flatten_matrix @ kernel_vector(shape = [out_n, k_n] @ [k_n, 1] = [out_n, 1])

output.reshape((oh, ow))

卷积计算，kernel展开角度
input_vector shape = [-1, 1] ([ih * iw, 1])

kernel_padded = F.pad(j, iw-kw-j, i, ih-kh-i).flatten (shape = [ih * iw,])

kernel_matrix[count] = kernel_padded

kernel_matrix shape = [out_n, ih * iw]

output = kernel_matrix @ input_vector (shape = [out_n, 1])

reshape((oh, ow))

上采样转置(反)卷积
kernel_matrix shape = [out_n, ih * iw]
output shape = [out_n, 1]

transposed_map = kernel_matrix.transpose(-1, -2) @ output (shape = [ih * iw, out_n] @ [out_n, 1])

等于使用API，不用自己去实现kernel_matrix
F.conv_transpose2d(output, kernel)

原理：矩阵角度思考
y = w * x
∂y/∂x = w^T
y’ = w^T * y
此时y’形状 = x
y[m,n] = w[m, p] @ x[p,n]
w^T[p, m] @ y[m,n] = y’[p, n]

空洞卷积

空洞 dilation
nn.Conv2d中参数dilation默认等于1

普通卷积是从输入特征中取一块kernel size的区域，即dilation=1，彼此之间索引差距为1

dilation若等于2，说明第一个元素和第二个元素之间索引相差了2

也就说dilation决定了取出的区域是否是紧凑的

目的是为了保持运算量不变的情况下，增大感受野的面积

def dilation_demo():
    a = torch.randn(7,7)
    print(a)
    # dilation = 1
    print(a[0:3, 0:3])
    # dilation = 2
    print(a[0:5:2, 0:5:2])
    # dilation = 3
    dilation = 3
    print(a[0:7:3, 0:7:3])

dilation_demo()
1
2
3
4
5
6
7
8
9
10
11
12

群组卷积

group=1时，对输入的每个通道进行卷积，再求和赋值给输出的通道

group>1时，将一个大的卷积看作为多个小卷积
假设此时有in channel = 2， out channel = 4

当group=1
此时有8个卷积核

当group=2时
in和out channel被分成两组
sub in channel = 1
sub out channnel = 2
这样有2个卷积核，又因为有两组，所以一共4个卷积核
卷积参数减半
但引入了归纳偏置（前提假设），我们只需做一小部分通道之间的建模就可以了，不需要考虑每个通道与所有通道之间的关系，也就是说group=1，每个输入通道都要做一个混合（卷积），当group>1，每次只在几个通道里面做卷积，下一次在另外几个通道里做卷积，结果拼起来就好了，也也就是说通道融合并不充分，组与组之间不融合，只在组之间融合。

最后进行一个1 * 1 的point-wise convolution 将所有通道之间进行融合就好了

实现空洞与群组卷积

首先是padding，和之前的常规卷积一致，考虑groups情况, 确保ic,oc能被groups整除。reshape 输入和kernel，把groups拆分，计算kernel高度和宽度，计算输出的高度和宽度。后面正常卷积计算遍历。

import torch
import torch.nn as nn
import torch.nn.functional as F
import math


def matrix_multiplication_for_conv2d_final(input, kernel, bias=None, stride=1,
                                           padding=0, dilation=1, groups=1):
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding, 0,0,0,0))

    # batch_size, in_channel, input h, input w
    bs, ic, ih, iw = input.shape
    # out_channel, _, kernel h, kernel w
    oc, _ic, kh, kw = kernel.shape
    if bias is None:
        bias = torch.zeros(oc)
    # 考虑groups情况, 确保ic,oc能被groups整除
    assert oc % groups == 0 and ic % groups == 0, "groups必须同时被通道数整除！"
    # reshape一下，把groups拆开
    input = input.reshape((bs, groups, ic//groups, ih, iw))
    kernel = kernel.reshape((groups, oc//groups, ic//groups, kh, kw))
    # 相邻点之间插入dilation-1个空洞，插入kh - 1次，所以增加的距离一共是
    kh = (dilation - 1) * (kh - 1) + kh
    kw = (dilation - 1) * (kw - 1) + kw
    # 输出 高度和宽度, 不需要考虑 dilation了，因为已经在kh和kw里面了
    oh = int(math.floor((ih - kh)/stride)) + 1
    ow = int(math.floor((iw - kw)/stride)) + 1
    output_shape = (bs, groups, oc//groups, oh, ow)
    # 初始化输出
    output = torch.zeros(output_shape)
    # 遍历计算
    for ind in  range(bs): # batch遍历
        for g in range(groups): # 群组遍历
            for oc_ind in range(oc//groups): # 对分组的输出通道遍历
                for ic_ind in range(ic//groups): # 对分组的输入通道遍历
                    for i in range(0, ih-kh+1, stride): # 高度
                        for j in range(0, iw-kw+1, stride): # 宽度
                            # 取出区域
                            region = input[ind, g, ic_ind, i:i+kh:dilation,         j:j+kw:dilation]
                            output[ind, g, oc_ind, int(i/stride), int(j/stride)] += torch.sum(region * kernel[g, oc_ind, ic_ind])
                # bias偏置，计算走过多少个通道
                output[ind, g, oc_ind] += bias[g*(oc//groups) + oc_ind]
    # 还原回4维
    output = output.reshape((bs, oc, oh, ow))

    return output


def test_conv2d_final():
    bs, ic, ih, iw = 2, 2, 5, 5
    kh, kw = 3, 3
    oc = 4
    groups, dilation, stride = 2, 2, 2
    padding = 1


    input = torch.randn(bs, ic, ih, iw)
    # groups大于1，kernel数量会减小，输入通道数减小
    kernel = torch.randn(oc, ic//groups, kh, kw)
    bias = torch.randn(oc)

    py_res = F.conv2d(input, kernel, bias=bias, padding=padding, stride=stride,
                    dilation=dilation, groups=groups)

    my_res = matrix_multiplication_for_conv2d_final(
        input, kernel, bias=bias,padding=padding, stride=stride,
        dilation=dilation, groups=groups)

    flag = torch.allclose(py_res, my_res)
    print(flag)

test_conv2d_final()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

相关阅读:
XML文件和DOM简单操作
 RabbitMQ（六）死信队列
 Cryptographic primitives（密码原语）
新环境下配置jupyter notebook并启动
 Ubuntu 16下Linaro 交叉编译器的安装
 .NET餐厅管理系统sql数据帮助类C#利用反射获取对象属性值、将sr_readStr数组存入数据库image类型的字段中
 FFmpeg开发笔记（二十四）Linux环境给FFmpeg集成AV1的编解码器
 机器人技术概述_2.机器人4.0的核心技术
 看完这篇教你玩转渗透测试靶机vulnhub——VICTIM: 1
技术分享 | MySQL Shell 定制化部署 MySQL 实例
原文地址：https://blog.csdn.net/qq_19841133/article/details/125561026