在编程实战中,基础是最重要的,所以为了巩固基础,哈哈哈~
不说废话了,大家喜欢就往下看看,也是我自己的一些总结,方便以后自己看~
我觉得还是动手敲一遍,会有不一样的感受~
相关内容:
由浅入深,走进深度学习(补充篇:神经网络结构层基础)-CSDN博客
目录
正片开始!!!
这下面的代码中分别包括几个重要部分:
自定义块
对块进行实例化
顺序块
正向传播
混合块
代码和相关内容的解释,给大家放在下面了:
- import torch
- from torch import nn
- from torch.nn import functional as F
-
- net = nn.Sequential(nn.Linear(20, 256),
- nn.ReLU(),
- nn.Linear(256, 10))
-
- x = torch.rand(4, 20) # 这里必须是20 对应第一个 Linear 的20
- print('x:', x)
- print('x.shape:', x.shape)
- output = net(x)
- print('output:', output)
-
- # 自定义块
- print('--------------------------------------')
- class MLP(nn.Module):
- def __init__(self):
- super(MLP, self).__init__() # 调用父类的__init__函数
- self.hidden = nn.Linear(20, 256)
- self.output = nn.Linear(256, 10)
- self.relu = nn.ReLU()
-
- def forward(self, x):
- x = self.hidden(x)
- x = self.relu(x)
- x = self.output(x)
- return x
-
- # 实例化多层感知机的层 然后在每次调用正向传播函数调用这些层
- net = MLP()
- x = torch.rand(2, 20)
- output = net(x)
- print('output:', output)
-
- # 顺序块
- print('- - - - - - - - - - - - - - - - - - - - - -')
- class MySequential(nn.Module):
- def __init__(self, *args):
- super(MySequential, self).__init__()
- for block in args:
- self._modules[block] = block # block 本身作为它的key 存在_modules里面的为层,以字典的形式
-
- def forward(self, x):
- for block in self._modules.values():
- print('block:', block)
- x = block(x)
- return x
-
- net = MySequential(nn.Linear(20, 256),
- nn.ReLU(),
- nn.Linear(256, 10))
- x = torch.rand(2, 20)
- output = net(x)
- print('output:', output)
-
- # 正向传播
- # 在正向传播中执行代码
- print('-----------------------------------------')
- class FixeHidden(nn.Module):
- def __init__(self):
- super(FixeHidden, self).__init__()
- self.rand_weight = torch.rand((20, 20), requires_grad = True)
- self.linear = nn.Linear(20, 20)
-
- def forward(self, x):
- x = self.linear(x)
- x = F.relu(torch.mm(x, self.rand_weight + 1))
- x = self.linear(x)
- while x.abs().sum() > 1:
- x /= 2
- return x.sum()
-
- net = FixeHidden()
- a = torch.rand(2, 20)
- y = net(a)
- print('y:', y)
-
- # 混合组合块
- print('------------------------------------------------------------------')
- class Mixmodel(nn.Module):
- def __init__(self):
- super(Mixmodel, self).__init__()
- self.net = nn.Sequential(nn.Linear(20, 64),
- nn.ReLU(),
- nn.Linear(64, 16),
- nn.ReLU())
- self.linear = nn.Linear(16, 32)
-
- def forward(self, xx):
- xx = self.net(xx)
- xx = self.linear(xx)
- return xx
-
- mixnet = nn.Sequential(Mixmodel(),
- nn.Linear(32, 20),
- MySequential())
- aa = torch.rand(3, 20)
- out1 = mixnet(aa)
- print('out1:', out1)
在这个部分涉及到:参数管理、参数替换和参数绑定三部分内容
具体代码和相关解释如下:
- # 参数管理
- # 首先关注具有单隐藏层的多层感知机
- import torch
- from torch import nn
- from torch.nn import functional as F
-
- net = nn.Sequential(nn.Linear(4, 8),
- nn.ReLU(),
- nn.Linear(8, 1))
- x = torch.rand(size = (2, 4))
- print('net(x):', net(x))
- print('net[2].stat_dict:', net[2].state_dict()) # 访问参数 net[2]就是最后一个输出层
- print('net[2].bias:', type(net[2].bias)) # 目标参数
- print(net[2].bias)
- print(net[2].bias.data)
- print(net[2].weight.grad == None) # 还没进行反向计算 所以grad为None
- print('------------', *[(name, param.shape) for name, param in net[0].named_parameters()]) # 一次性访问所有参数
- print('- - - - - - - -', *[(name, param.shape) for name, param in net.named_parameters()]) # 0是第一层名字 1是ReLU 它没有参数
- print('*************', net.state_dict()['2.bias'].data) # 通过名字获取参数
-
- # 嵌套块
- # 从嵌套块收集参数
- def block1():
- return nn.Sequential(nn.Linear(4, 8),
- nn.ReLU(),
- nn.Linear(8, 4),
- nn.ReLU())
-
- def block2():
- net = nn.Sequential()
- for i in range(4):
- net.add_module(f'block{i}', block1()) # f'block{i}' 可以传一个字符串名字过来 block2可以嵌套四个block1
- return net
-
- regnet = nn.Sequential(block2(),
- nn.Linear(4, 1))
- xx = torch.rand(size = (2, 4))
- yy = torch.rand(2, 4)
- print('xx:', xx)
- print('yy:', yy)
- print('regnet(xx):', regnet(xx))
- print('regnet(yy):', regnet(yy))
- print('regnet:', regnet)
-
-
- # 内置初始化
- print('**********************************************************')
- net1 = nn.Sequential(nn.Linear(4, 8),
- nn.ReLU(),
- nn.Linear(8, 4),
- nn.ReLU())
- def init_normal(m):
- if type(m) == nn.Linear:
- nn.init.normal_(m.weight, mean = 0, std = 0.01) # 下划线表示把m.weight的值替换掉
- nn.init.zeros_(m.bias)
-
- net1.apply(init_normal) # 会递归调用 直到所有层都初始化
- print('net1[0].weight.data[0]:', net1[0].weight.data[0])
- print('net1[0].bias.data[0]:', net1[0].bias.data[0])
-
- net2 = nn.Sequential(nn.Linear(4,8),
- nn.ReLU(),
- nn.Linear(8,1))
-
- def init_constant(m):
- if type(m) == nn.Linear:
- nn.init.constant_(m.weight, 1)
- nn.init.zeros_(m.bias)
-
- net2.apply(init_constant)
- print('net2[0].weight.data[0]:', net2[0].weight.data[0])
- print('net2[0].bias.data[0]:', net2[0].bias.data[0])
-
- # 对某些块应用不同的初始化
- def xavier(m):
- if type(m) == nn.Linear:
- nn.init.xavier_uniform_(m.weight)
-
- def init_42(m):
- if type(m) == nn.Linear:
- nn.init.constant_(m.weight, 42)
-
- net1[0].apply(xavier)
- net1[2].apply(init_42)
- print('net1[0].weight.data[0]:', net1[0].weight.data[0])
- print('net1[2].weight.data:', net1[2].weight.data)
-
-
- # 参数替换
- # 自定义初始化
- def my_init(m):
- if type(m) == nn.Linear:
- print("Init",*[(name, param.shape) for name, param in m.named_parameters()][0]) # 打印名字是啥,形状是啥
- nn.init.uniform_(m.weight, -10, 10)
- m.weight.data *= m.weight.data.abs() >= 5
- # 这里*=的代码相当于先计算一个布尔矩阵(先判断>=) 然后再用布尔矩阵的对应元素去乘以原始矩阵的每个元素 保留绝对值大于5的权重 不是的话就设为0
-
- net2.apply(my_init)
- print('net2[0].weight[:2]:', net2[0].weight[:2])
- net2[0].weight.data[:] += 1 # 参数替换
- net2[0].weight.data[0, 0] = 42
- print('net2[0].weight.data[0]:', net2[0].weight.data[0])
-
-
- # 参数绑定
- shared = nn.Linear(8,8)
- net3 = nn.Sequential(nn.Linear(4,8),
- nn.ReLU(), shared,
- nn.ReLU(), shared,
- nn.ReLU(),
- nn.Linear(8,1)) # 第2个隐藏层和第3个隐藏层是share权重的 第一个和第四个是自己的
- print(net3)
- net3(torch.rand(2, 4))
- print(net3[2].weight.data[0] == net3[4].weight.data[0])
- net3[2].weight.data[0,0] = 100
- print(net3[2].weight.data[0] == net3[4].weight.data[0])
在这部分,构造的时候:由非参数层和参数层
- # 自定义层
- # 构造一个没有任何参数的自定义层
- import torch
- import torch.nn.functional as F
- from torch import nn
- class MyLayer(nn.Module):
- def __init__(self):
- super(MyLayer, self).__init__()
- def forward(self, x):
- return x - x.mean()
-
- net = MyLayer()
- z = torch.tensor([1, 2, 3, 4, 5], dtype = torch.float32)
- print('net(z):', net(z))
-
- # 将层作为组件合并到构建更复杂的模型中
- net1 = nn.Sequential(nn.Linear(8, 128),
- MyLayer())
- zz = torch.rand(4, 8)
- print('net1(zz):', net1(zz))
- print('net1(zz).mean:', net1(zz).mean())
-
- # 带参数的图层
- class MyLinear(nn.Module):
- def __init__(self, in_units, units):
- super().__init__()
- self.weight = nn.Parameter(torch.randn(in_units,units)) # nn.Parameter使得这些参数加上了梯度
- self.bias = nn.Parameter(torch.randn(units,))
-
- def forward(self, X):
- linear = torch.matmul(X, self.weight.data) + self.bias.data
- return F.relu(linear)
-
- dense = MyLinear(5,3)
- print('dense.weight', dense.weight)
-
- # 使用自定义层直接执行正向传播计算
- print('dense(torch.rand(2,5))', dense(torch.rand(2,5)))
- # 使用自定义层构建模型
- net = nn.Sequential(MyLinear(64,8),
- MyLinear(8,1))
- print('net(torch.rand(2,64))', net(torch.rand(2,64)))
这里涉及使用torch保存和读取文件,然后就是在以后我们设计模型,训练的时候,我们可以通过这种方式,保存我们的模型,然后再最后测试的时候在调用模型!!!
代码如下:
- # 读写文件
- # 加载和保存张量
- import torch
- from torch import nn
- from torch.nn import functional as F
-
- x = torch.arange(4)
- torch.save(x, 'x_file')
- x1 = torch.load('x_file')
- print('x1:', x1)
-
- #存储一个张量列表 然后把它们读回内存
- y = torch.zeros(4)
- torch.save([x, y], 'x-file')
- x2, y2 = torch.load('x-file')
- print('x2:', x2)
- print('y2:', y2)
-
- # 写入或读取从字符串映射到张量的字典
- mydict = {'x':x, 'y':y}
- torch.save(mydict, 'mydict')
- mydict1 = torch.load('mydict')
- print('mydict1:', mydict1)
加载和保存模型参数
- # 加载和保存模型参数
- import torch
- from torch import nn
- from torch.nn import functional as F
-
- class MLP(nn.Module):
- def __init__(self):
- super(MLP, self).__init__() # 调用父类的__init__函数
- self.hidden = nn.Linear(20, 256)
- self.output = nn.Linear(256, 10)
- self.relu = nn.ReLU()
-
- def forward(self, x):
- x = self.hidden(x)
- x = self.relu(x)
- x = self.output(x)
- return x
-
- # 实例化多层感知机的层 然后在每次调用正向传播函数调用这些层
- net = MLP()
- x = torch.rand(2, 20)
- output = net(x)
- print('output:', output)
-
- # 将模型的参数存储为一个叫做"mlp.params"的文件
- torch.save(net.state_dict(), 'MLP.params')
-
- # 实例化了原始多层感知机模型的一个备份。直接读取文件中存储的参数
- clone = MLP() # 必须要先声明一下,才能导入参数
- clone.load_state_dict(torch.load('MLP.params'))
- print('MLP eval', clone.eval()) # eval()是进入测试模式
-
- output_clone = clone(x)
- print(output_clone == output)
注:上述内容参考b站up主“我是土堆”的视频,参考吴恩达深度学习,机器学习内容,参考李沐动手学深度学习!!!