softmax回归python实现

from d2l import torch as d2lutils
import torch
import torchvision
from torch.utils import data
from torchvision import transforms

class Accumulator:
    """在`n`个变量上累加。"""
    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]


# ①准备数据集
def load_data_fashion_mnist(batch_size, resize=None):
    # PyTorch中的一个转换函数，它的作用是将一个PIL Image或numpy.ndarray图像转换为一个Tensor数据类型。
    trans = [transforms.ToTensor()]
    # 是否需要改变大小
    if resize:
        trans.insert(0, transforms.Resize(resize))
    # 函数compose将这些转换操作组合起来
    trans = transforms.Compose(trans)
    # 训练数据
    mnist_train = torchvision.datasets.FashionMNIST(
        root="../data", train=True, transform=trans, download=True)
    # 测试数据
    mnist_test = torchvision.datasets.FashionMNIST(
        root="../data", train=False, transform=trans, download=True)
    # 返回值
    return (torch.utils.data.DataLoader(mnist_train, batch_size, shuffle=True,
                                        num_workers=4),
            torch.utils.data.DataLoader(mnist_test, batch_size, shuffle=False,
                                        num_workers=4))

# 批量大小为256
batch_size = 256
# 获取训练数据集和测试数据集
train_iter, test_iter = load_data_fashion_mnist(batch_size)

# ②展平每个图像，把它们看作长度为784的向量（28*28=784）。 因为我们的数据集有10个类别，所以网络输出维度为10
num_inputs = 784
num_outputs = 10
# W为从一个高斯分布抽取一个784行，10列的矩阵，需要计算梯度
W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
# b为长度为10的向量
b = torch.zeros(num_outputs, requires_grad=True)

# ③实现softmax
def softmax(X):
    # 指数函数e^X
    X_exp = torch.exp(X)
    # 列维度求和，保留维度
    partition = X_exp.sum(1, keepdim=True)
    return X_exp / partition


# ④实现softmax回归模型
def net(X):
    # X.reshape((-1, W.shape[0])也就是X重新定义形状，在0维度上自动，假设是N，在1维度上取 W.shape[0]=784，也就是说matmul的第一个参数是N*784的二维张量；
    # W为784的一维张量，满足matmul的第四种情况，结果为N*1的矩阵，或者长度为N的向量
    return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)


# ⑤实现交叉熵损失函数
def cross_entropy(y_hat, y):
    # 假设y = torch.tensor([0, 2])
    # 假设y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
    # len(y_hat) 长度为2
    # range(len(y_hat)) = [0,1] 左闭右开
    # return -torch.log(y_hat[[0,1], y])
    # return -torch.log([0.1,0.5])
    return -torch.log(y_hat[range(len(y_hat)), y])


# ⑥将预测类别与真实 y 元素进行比较
def accuracy(y_hat, y):
    """计算预测正确的数量。"""
    # 确保是二维
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        # 最大值的索引
        y_hat = y_hat.argmax(axis=1)
        print("===y_hat====")
        print(y_hat)
    # 转换为True  False True=1 False=0
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())


# ⑦评估在任意模型 net 的准确率
def evaluate_accuracy(net, data_iter):
    """计算在指定数据集上模型的精度。"""
    if isinstance(net, torch.nn.Module):
        net.eval()
    metric = Accumulator(2)
    for X, y in data_iter:
        print("===开始=====================================================================")
        print("===net(X)===")
        print(net(X))
        print("===y====")
        print(y)
        print("===正确的个数====" +str(accuracy(net(X), y)))
        metric.add(accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]



# ⑨定义一个函数来训练一个迭代周期
def train_epoch_ch3(net, train_iter, loss, updater):  # @save
    # 判断net模型是否为深度学习类型，将模型设置为训练模式
    if isinstance(net, torch.nn.Module):
        net.train()  # 要计算梯度

    # Accumulator(3)创建3个变量：训练损失总和、训练准确度总和、样本数
    metric = Accumulator(3)
    for X, y in train_iter:
        # 计算梯度并更新参数
        y_hat = net(X)
        l = loss(y_hat, y)
        # 判断updater是否为优化器
        if isinstance(updater, torch.optim.Optimizer):
            # 使用PyTorch内置的优化器和损失函数
            updater.zero_grad()  # 把梯度设置为0
            l.mean().backward()  # 计算梯度
            updater.step()  # 自更新
        else:
            # 使用定制的优化器和损失函数
            # 自我实现的话，l出来是向量，先求和再求梯度
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    # 返回训练损失和训练精度，metric的值由Accumulator得到
    return metric[0] / metric[2], metric[1] / metric[2]


def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
    # num_epochs：训练次数
    for epoch in range(num_epochs):
        # train_epoch_ch3：训练模型，返回准确率和错误度
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        # 在测试数据集上评估精度
        test_acc = evaluate_accuracy(net, test_iter)

    train_loss, train_acc = train_metrics
    print("训练损失:" + str(train_loss))
    print("训练精度:" + str(train_acc))
    print("测试精度:" + str(test_acc))
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc

# 定义学习率
lr = 0.1
def updater(batch_size):
    return d2lutils.sgd([W, b], lr, batch_size)

# 训练
if __name__ == '__main__':
    num_epochs = 10
    train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169

训练10轮后精度如下

训练损失:0.44705355275472003
训练精度:0.8476166666666667
测试精度:0.8222
1
2
3

相关阅读:
mac打不开xxx软件，因为apple 无法检查其是否包含恶意
 MySQL 创建用户并分配数据库权限
 WPF实现签名拍照功能
 @AliasFor注解详解（结合源码分析）
地线干扰的共阻干扰
 基于jsp+mysql+ssm峰值预警停车场管理系统-计算机毕业设计
 K8s----资源管理
 TPS54331DDAR —— DCDC降压设计12V 至 5.00V @ 3A【电感电容选择计算】
yolo自动化项目实例解析（一）日志格式输出、并发异步多线程、websocket、循环截图、yolo推理、3d寻路
 SpringCloud微服务【实用篇】| Eureka注册中心、Ribbon负载均衡
原文地址：https://blog.csdn.net/qq_39879126/article/details/134459588