卷积操作包括5个参数:input、kernel、bias、stride、padding
def matrix_multiplication_for_conv2d(input, kernel, bias=0, stride=1, padding=0):
if padding > 0:
input = F.pad(input, (padding, padding, padding, padding))
input_h, input_w = input.shape
kernel_h, kernel_w = kernel.shape
# 向下取整floor, 直接pad到input,不用padding
output_w = int((input_w - kernel_w) / stride + 1) # 卷积输出的高度
output_h = int((input_h - kernel_h) / stride + 1) # 卷积输出的宽度
output = torch.zeros(output_h, output_w) # 初始化输出矩阵
for i in range(0, input_h-kernel_h+1, stride): # 对高度维进行遍历
for j in range(0, input_w-kernel_w+1, stride): # 对宽度度维进行遍历
region = input[i:i+kernel_h, j:j+kernel_w]
output[int(i/stride), int(j/stride)] = torch.sum(region * kernel) + bias # 点乘,并且赋值输出位置的元素
return output
矩阵和矩阵相乘,转换为行向量和列向量相乘,即 输入矩阵9个行向量 x kernel的列向量
与输入矩阵尺寸相同,填充kernel为0,计算矩阵相乘,转置卷积
torch.flatten() 操作,多维向量变成1维向量
import torch
import torch.nn as nn
import torch.nn.functional as F
a = torch.randn(1, 1, 2, 3)
b = torch.flatten(a)
"""
tensor([[[[ 1.0875, 0.1187, -0.0439],
[ 0.2802, 0.7416, -0.0189]]]])
tensor([ 1.0875, 0.1187, -0.0439, 0.2802, 0.7416, -0.0189])
"""
torch.numel() 操作
a = torch.randn(1, 1, 2, 3)
b = torch.randn(2, 3, 4)
print(a.numel())
print(b.numel())
"""
6
24
"""
torch.reshape() 操作
a = torch.randn(1, 1, 2, 3)
b = a.reshape(-1, 1)
print(b.shape)
print(b)
"""
torch.Size([6, 1])
tensor([[ 0.8519],
[ 0.1513],
[-0.1491],
[-1.2765],
[-0.1445],
[ 1.0137]])
"""
flatten input版本:对比与直接矩阵相乘,优势,只有1次矩阵相乘,节省计算量。
matrix_multiplication_for_conv2d
matrix_multiplication_for_conv2d_flatten
F.conv2d
# step1 用原始的矩阵运算来实现二维卷积, 先不考虑batchsize维度和channel维度
input = torch.randn(5, 5) # 卷积输入特征图
kernel = torch.randn(3, 3) # 卷积核
bias = torch.randn(1) # 卷积偏置,默认输出通道数是1
def matrix_multiplication_for_conv2d(input, kernel, bias=0, stride=1, padding=0):
if padding > 0:
input = F.pad(input, (padding, padding, padding, padding))
input_h, input_w = input.shape
kernel_h, kernel_w = kernel.shape
# 向下取整floor, 直接pad到input,不用padding
output_w = int((input_w - kernel_w) / stride + 1) # 卷积输出的高度
output_h = int((input_h - kernel_h) / stride + 1) # 卷积输出的宽度
output = torch.zeros(output_h, output_w) # 初始化输出矩阵
for i in range(0, input_h-kernel_h+1, stride): # 对高度维进行遍历
for j in range(0, input_w-kernel_w+1, stride): # 对宽度度维进行遍历
region = input[i:i+kernel_h, j:j+kernel_w]
output[int(i/stride), int(j/stride)] = torch.sum(region * kernel) + bias # 点乘,并且赋值输出位置的元素
return output
# flatten input 版本
def matrix_multiplication_for_conv2d_flatten(input, kernel, bias=0, stride=1, padding=0):
if padding > 0:
input = F.pad(input, (padding, padding, padding, padding))
input_h, input_w = input.shape
kernel_h, kernel_w = kernel.shape
# 向下取整floor, 直接pad到input,不用padding
output_w = int((input_w - kernel_w) / stride + 1) # 卷积输出的高度
output_h = int((input_h - kernel_h) / stride + 1) # 卷积输出的宽度
output = torch.zeros(output_h, output_w) # 初始化输出矩阵
# 存储所有的拉平后的特征区域
region_matrix = torch.zeros(output.numel(), kernel.numel())
kernel_matrix = kernel.reshape((-1, 1)) # kernel的列向量(kernel.numel())形式,矩阵
for i in range(0, input_h-kernel_h+1, stride): # 对高度维进行遍历
for j in range(0, input_w-kernel_w+1, stride): # 对宽度度维进行遍历
region = input[i:i+kernel_h, j:j+kernel_w]
region_vector = torch.flatten(region)
region_matrix[i*output_h+j] = region_vector # 每个值都有一个kernel行
output_matrix = region_matrix @ kernel_matrix + bias
output = output_matrix.reshape(output_h, output_w)
return output
# 矩阵运算实现卷积的结果
mat_mul_conv_output = matrix_multiplication_for_conv2d(input, kernel, bias=bias, padding=1)
print(f'mat_mul_conv_output: \n{mat_mul_conv_output}')
# 矩阵运算实现卷积的结果,flatten input版本
mat_mul_flatten_conv_output = matrix_multiplication_for_conv2d_flatten(input, kernel, bias=bias, padding=1)
print(f'mat_mul_flatten_conv_output: \n{mat_mul_flatten_conv_output}')
# 调用PyTorch API的卷积实现结果, padding=1, padding="same"
pytorch_api_conv_output = F.conv2d(input.reshape((1, 1, *input.shape)), kernel.reshape(1, 1, *kernel.shape), bias=bias, padding=1)
print(f'F.conv2d: \n{pytorch_api_conv_output.reshape(mat_mul_conv_output.shape)}')
# 验证矩阵运算,与PyTorch API的结果一致
flag1 = torch.allclose(mat_mul_conv_output, pytorch_api_conv_output)
print(f"flag1: {flag1}")
flag2 = torch.allclose(pytorch_api_conv_output, mat_mul_flatten_conv_output)
print(f"flag2: {flag2}")
用原始的矩阵运算来实现二维卷积, 考虑batchsize维度和channel维度, 4维
# step3 用原始的矩阵运算来实现二维卷积, 考虑batchsize维度和channel维度, 4维
# bias形状和output channel是一致的
def matrix_multiplication_for_conv2d_full(input, kernel, bias, stride=1, padding=0):
if padding > 0:
# 从里到外,width、height、channel、batch
input = F.pad(input, (padding, padding, padding, padding, 0, 0, 0, 0))
bs, in_channel, input_h, input_w = input.shape
out_channel, in_channel, kernel_h, kernel_w = kernel.shape
if bias == None:
bias = torch.zeros(out_channel)
# 向下取整floor, 直接pad到input,不用padding
output_w = int((input_w - kernel_w) / stride + 1) # 卷积输出的高度
output_h = int((input_h - kernel_h) / stride + 1) # 卷积输出的宽度
output = torch.zeros(bs, out_channel, output_h, output_w) # 初始化输出矩阵
for ind in range(bs):
for oc in range(out_channel):
for ic in range(in_channel):
for i in range(0, input_h-kernel_h+1, stride): # 对高度维进行遍历
for j in range(0, input_w-kernel_w+1, stride): # 对宽度度维进行遍历
region = input[ind, ic, i:i+kernel_h, j:j+kernel_w]
output[ind, oc, int(i/stride), int(j/stride)] += torch.sum(region * kernel[oc, ic]) # 点乘,并且赋值输出位置的元素
output[ind, oc] += bias[oc]
return output
input = torch.randn(2, 2, 5, 5) # 卷积输入特征图, bs*in_channel*in_h*in_w
kernel = torch.randn(3, 2, 3, 3) # 卷积核,输出通道和输入通道, out_channel*in_channel*kernel_h*kernel_w
bias = torch.randn(3) # 卷积偏置,默认输出通道数是1
# matrix_multiplication_for_conv2d_full 与 PyTorch官方API的结果一致
pytorch_conv2d_api_output = F.conv2d(input, kernel, bias=bias, padding=1, stride=2)
mm_conv2d_full_output = matrix_multiplication_for_conv2d_full(input, kernel, bias=bias, padding=1, stride=2)
flag = torch.allclose(pytorch_conv2d_api_output, mm_conv2d_full_output)
print(f"flag: {flag}") # True