• 《动手学深度学习 Pytorch版》 4.3 多层感知机的简洁实现


    import torch
    from torch import nn
    from d2l import torch as d2l
    
    • 1
    • 2
    • 3

    模型

    net = nn.Sequential(nn.Flatten(),
                        nn.Linear(784, 256),
                        nn.ReLU(),  # 与 3.7 节相比多了一层
                        nn.Linear(256, 10))
    
    def init_weights(m):
        if type(m) == nn.Linear:  # 使用正态分布中的随机值初始化权重
            nn.init.normal_(m.weight, std=0.01)
    
    net.apply(init_weights)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    Sequential(
      (0): Flatten(start_dim=1, end_dim=-1)
      (1): Linear(in_features=784, out_features=256, bias=True)
      (2): ReLU()
      (3): Linear(in_features=256, out_features=10, bias=True)
    )
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    batch_size, lr, num_epochs = 256, 0.1, 10
    loss = nn.CrossEntropyLoss(reduction='none')
    trainer = torch.optim.SGD(net.parameters(), lr=lr)
    
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6


    在这里插入图片描述

    练习

    (1)尝试添加不同数量的隐藏层(也可以修改学习率),怎样设置效果最好?

    net2 = nn.Sequential(nn.Flatten(),
                        nn.Linear(784, 256),
                        nn.ReLU(),
                        nn.Linear(256, 128),
                        nn.ReLU(),
                        nn.Linear(128, 10))
    
    def init_weights(m):
        if type(m) == nn.Linear:  # 使用正态分布中的随机值初始化权重
            nn.init.normal_(m.weight, std=0.01)
    
    net2.apply(init_weights)
    
    batch_size2, lr2, num_epochs2 = 256, 0.3, 10
    loss2 = nn.CrossEntropyLoss(reduction='none')
    trainer2 = torch.optim.SGD(net2.parameters(), lr=lr2)
    
    train_iter2, test_iter2 = d2l.load_data_fashion_mnist(batch_size2)
    d2l.train_ch3(net2, train_iter2, test_iter2, loss2, num_epochs2, trainer2)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19


    在这里插入图片描述


    (2)尝试不同的激活函数,哪个激活函数效果最好?

    net3 = nn.Sequential(nn.Flatten(),
                        nn.Linear(784, 256),
                        nn.Sigmoid(),
                        nn.Linear(256, 10))
    
    net4 = nn.Sequential(nn.Flatten(),
                        nn.Linear(784, 256),
                        nn.Tanh(),
                        nn.Linear(256, 10))
    
    def init_weights(m):
        if type(m) == nn.Linear:
            nn.init.normal_(m.weight, std=0.01)
    
    net3.apply(init_weights)
    net4.apply(init_weights)
    
    
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    batch_size, lr, num_epochs = 256, 0.1, 10
    loss = nn.CrossEntropyLoss(reduction='none')
    trainer = torch.optim.SGD(net3.parameters(), lr=lr)
    d2l.train_ch3(net3, train_iter, test_iter, loss, num_epochs, trainer)
    
    • 1
    • 2
    • 3
    • 4
    ---------------------------------------------------------------------------
    
    AssertionError                            Traceback (most recent call last)
    
    Cell In[5], line 4
          2 loss = nn.CrossEntropyLoss(reduction='none')
          3 trainer = torch.optim.SGD(net3.parameters(), lr=lr)
    ----> 4 d2l.train_ch3(net3, train_iter, test_iter, loss, num_epochs, trainer)
    
    
    File c:\Software\Miniconda3\envs\d2l\lib\site-packages\d2l\torch.py:340, in train_ch3(net, train_iter, test_iter, loss, num_epochs, updater)
        338     animator.add(epoch + 1, train_metrics + (test_acc,))
        339 train_loss, train_acc = train_metrics
    --> 340 assert train_loss < 0.5, train_loss
        341 assert train_acc <= 1 and train_acc > 0.7, train_acc
        342 assert test_acc <= 1 and test_acc > 0.7, test_acc
    
    
    AssertionError: 0.5017133202234904
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19

    在这里插入图片描述

    batch_size, lr, num_epochs = 256, 0.1, 10
    loss = nn.CrossEntropyLoss(reduction='none')
    trainer = torch.optim.SGD(net4.parameters(), lr=lr)
    d2l.train_ch3(net4, train_iter, test_iter, loss, num_epochs, trainer)
    
    • 1
    • 2
    • 3
    • 4


    在这里插入图片描述

    还是 ReLU 比较奈斯。


    (3)尝试不同的方案来初始化权重,什么方案效果最好。

    累了,不想试试了。略…

  • 相关阅读:
    多任务学习(MTL)--学习笔记
    Vue+ElementUI实现动态树和表格数据的分页模糊查询
    K-近邻算法分类和回归
    2023年最全详解:什么是销售管理?销售管理必备百科指南!
    源码安装LAMT架构
    【HCIP】BGP实验
    Pyecharts数据可视化
    访问学者在国外需要特别注意的安全问题
    全局事件总线
    NISP网络安全中护网面试如何应对(一)
  • 原文地址:https://blog.csdn.net/qq_43941037/article/details/132752358