import torch
from torch import nn
from d2l import torch as d2l
# 超参数
dropout1,dropout2=0.2,0.5
num_epochs,lr,batch_size=10,0.5,256
# 数据
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size)
# 模型
# Flatten,展平层,输入展开(-1,784)
net=nn.Sequential(nn.Flatten(),nn.Linear(784,256),nn.ReLU(),
nn.Dropout(dropout1),nn.Linear(256,256),nn.ReLU(),nn.Dropout(dropout2),nn.Linear(256,10))
def init_weights(m):
if type(m)==nn.Linear:
nn.init.normal_(m.weight,std=0.01)
# 参数
net.apply(init_weights)
# 损失函数
loss=nn.CrossEntropyLoss(reduction='none')
# 优化函数
trainer=torch.optim.SGD(net.parameters(),lr=lr)
# 训练
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,trainer)

query
dropout随机置0对求梯度和反向传播的影响
dropout随机丢弃,可重复性?
神经网络,正确性?
预测/推理中神经元没有丢弃
BN和dropout
标签正则化
/(1-p)的原因dropout
泰斗hiton,最早的dropout每次随机选择几个子网络最后做平均的做法,类似于随机森林多决策树做投票的思想
防止过拟合
dropout丢弃前一层输的输出后一层的输入
dropout和权重衰退
dropout常用值
!!!深度学习允许你过拟合,然后通过各种方式调整
dropout介入后,需要调整lr
dropout的介入会造成收敛变慢