• Pytorch实现的LSTM、RNN模型结构


    一、LSTM模型

    1. import torch
    2. from torch import nn
    3. import torchvision.datasets as dsets
    4. import torchvision.transforms as transforms
    5. import matplotlib.pyplot as plt
    6. torch.manual_seed(1)
    7. # Hyper Parameters
    8. EPOCH = 1 # 训练整批数据多少次, 为了节约时间, 我们只训练一次
    9. BATCH_SIZE = 64
    10. TIME_STEP = 28 # rnn 时间步数 / 图片高度
    11. INPUT_SIZE = 28 # rnn 每步输入值 / 图片每行像素
    12. LR = 0.01 # learning rate
    13. DOWNLOAD_MNIST = True # 如果你已经下载好了mnist数据就写上 Fasle
    14. # Mnist 手写数字
    15. train_data = dsets.MNIST(
    16. root='./mnist/', # 保存或者提取位置
    17. train=True, # this is training data
    18. transform=transforms.ToTensor(), # 转换 PIL.Image or numpy.ndarray 成
    19. # torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间
    20. download=DOWNLOAD_MNIST, # 没下载就下载, 下载了就不用再下了
    21. )
    22. # plot one example
    23. # print(train_data.train_data.size()) # (60000, 28, 28)
    24. # print(train_data.train_labels.size()) # (60000)
    25. # plt.imshow(train_data.train_data[0].numpy(), cmap='gray')
    26. # plt.title('%i' % train_data.train_labels[0])
    27. # plt.show()
    28. # 批训练 50samples, 1 channel, 28x28 (50, 1, 28, 28)
    29. train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
    30. # 为了节约时间, 我们测试时只测试前2000个
    31. test_data = dsets.MNIST(root='./mnist/', train=False, transform=transforms.ToTensor())
    32. test_x = test_data.test_data.type(torch.FloatTensor)[:2000]/255. # shape (2000, 28, 28) value in range(0,1)
    33. test_y = test_data.test_labels.numpy()[:2000] # covert to numpy array
    34. class RNN(nn.Module):
    35. def __init__(self):
    36. super(RNN, self).__init__()
    37. self.rnn = nn.LSTM( # LSTM 效果要比 nn.RNN() 好多了
    38. input_size=28, # 图片每行的数据像素点
    39. hidden_size=64, # rnn hidden unit
    40. num_layers=1, # 有几层 RNN layers
    41. batch_first=True, # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size)
    42. )
    43. self.out = nn.Linear(64, 10) # 输出层
    44. def forward(self, x):
    45. # 输入的input为,(batch, time_step, input_size)
    46. # x shape (batch, time_step, input_size)
    47. # r_out shape (batch, time_step, output_size)
    48. # h_n shape (n_layers, batch, hidden_size) LSTM 有两个 hidden states, h_n 是分线, h_c 是主线
    49. # h_c shape (n_layers, batch, hidden_size)
    50. r_out, (h_n, h_c) = self.rnn(x, None) # None 表示 hidden state 会用全0的 state
    51. # 选取最后一个时间点的 r_out 输出
    52. # 这里 r_out[:, -1, :] 的值也是 h_n 的值
    53. out = self.out(r_out[:, -1, :])
    54. return out
    55. rnn = RNN()
    56. print(rnn)
    57. optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # optimize all cnn parameters
    58. loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted
    59. # training and testing
    60. for epoch in range(EPOCH):
    61. for step, (b_x, b_y) in enumerate(train_loader): # gives batch data
    62. b_x = b_x.view(-1, 28, 28) # reshape x to (batch, time_step, input_size)
    63. output = rnn(b_x) # rnn output
    64. loss = loss_func(output, b_y) # cross entropy loss
    65. optimizer.zero_grad() # clear gradients for this training step
    66. loss.backward() # backpropagation, compute gradients
    67. optimizer.step() # apply gradients
    68. if step % 50 == 0:
    69. test_output = rnn(test_x) # (samples, time_step, input_size)
    70. pred_y = torch.max(test_output, 1)[1].data.numpy()
    71. accuracy = float((pred_y == test_y).astype(int).sum()) / float(test_y.size)
    72. print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)
    73. # print 10 predictions from test data
    74. test_output = rnn(test_x[:10].view(-1, 28, 28))
    75. pred_y = torch.max(test_output, 1)[1].data.numpy()
    76. print(pred_y, 'prediction number')
    77. print(test_y[:10], 'real number')

    上述中,我们对于h_n, h_c全部以0为输入,此时我们也可以修改为随机参数:

    1. import torch
    2. from torch import nn
    3. class RNN(nn.Module):
    4. def __init__(self):
    5. super(RNN, self).__init__()
    6. self.rnn = nn.LSTM( # LSTM 效果要比 nn.RNN() 好多了
    7. input_size=28, # 图片每行的数据像素点
    8. hidden_size=64, # rnn hidden unit
    9. num_layers=1, # 有几层 RNN layers
    10. batch_first=True, # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size)
    11. )
    12. self.out = nn.Linear(64, 10) # 输出层
    13. def forward(self, x):
    14. # 输入的input为,(batch, time_step, input_size)
    15. # x shape (batch, time_step, input_size)
    16. # r_out shape (batch, time_step, output_size)
    17. # h_n shape (n_layers, batch, hidden_size) LSTM 有两个 hidden states, h_n 是分线, h_c 是主线
    18. # h_c shape (n_layers, batch, hidden_size)
    19. # 初始化的隐藏元和记忆元,通常它们的维度是一样的
    20. # 1个LSTM层,batch_size=x.shape[0], 隐藏层的特征维度64
    21. h_0 = torch.randn(1, x.shape[0], 64)
    22. c_0 = torch.randn(1, x.shape[0], 64)
    23. r_out, (h_n, h_c) = self.rnn(x, (h_0, c_0)) # None 表示 hidden state 会用全0的 state
    24. # 选取最后一个时间点的 r_out 输出
    25. # 这里 r_out[:, -1, :] 的值也是 h_n 的值
    26. out = self.out(r_out[:, -1, :])
    27. return out
    28. rnn = RNN()
    29. print(rnn)

    参数:

    1. class torch.nn.LSTM(*args, **kwargs)
    2. 参数有:
    3. input_size:x的特征维度
    4. hidden_size:隐藏层的特征维度
    5. num_layers:lstm隐层的层数,默认为1
    6. bias:False则bihbih=0和bhhbhh=0. 默认为True
    7. batch_first:True则输入输出的数据格式为 (batch, seq, feature)
    8. dropout:除最后一层,每一层的输出都进行dropout,默认为: 0
    9. bidirectional:True则为双向lstm默认为False

    LSTM的另外两个输入是 h0 和 c0,可以理解成网络的初始化参数,用随机数生成即可。

    1. h0(num_layers * num_directions, batch, hidden_size)
    2. c0(num_layers * num_directions, batch, hidden_size)
    3. 参数:
    4. num_layers:隐藏层数
    5. num_directions:如果是单向循环网络,则num_directions=1,双向则num_directions=2
    6. batch:输入数据的batch
    7. hidden_size:隐藏层神经元个数

    注意,如果我们定义的input格式是:

    1. input(batch, seq_len, input_size)
    2. 则H和C的格式也是要变的:
    3. h0(batch, num_layers * num_directions, hidden_size)
    4. c0(batch, num_layers * num_directions, hidden_size)

    LSTM的输出是一个tuple,如下:

    1. output,(ht, ct) = net(input)
    2. output: 最后一个状态的隐藏层的神经元输出
    3. ht:最后一个状态的隐含层的状态值
    4. ct:最后一个状态的隐含层的遗忘门值

    output的默认维度是:

    1. output(seq_len, batch, hidden_size * num_directions)
    2. ht(num_layers * num_directions, batch, hidden_size)
    3. ct(num_layers * num_directions, batch, hidden_size)

    和input的情况类似,如果我们前面定义的input格式是:

    1. input(batch, seq_len, input_size)
    2. 则ht和ct的格式也是要变的:
    3. ht(batc,num_layers * num_directions, h, hidden_size)
    4. ct(batc,num_layers * num_directions, h, hidden_size)

    我们使用线性函数进行构建LSTM:

    1. import torch
    2. import torch.nn as nn
    3. class LSTM_v1(nn.Module):
    4. def __init__(self, input_sz, hidden_sz):
    5. super().__init__()
    6. self.input_size = input_sz
    7. self.hidden_size = hidden_sz
    8. # 遗忘门
    9. self.f_gate = nn.Linear(self.input_size+self.hidden_size, self.hidden_size)
    10. # 输入门
    11. self.i_gate = nn.Linear(self.input_size+self.hidden_size, self.hidden_size)
    12. # 细胞cell
    13. self.c_cell = nn.Linear(self.input_size+self.hidden_size, self.hidden_size)
    14. # 输出门
    15. self.o_gate = nn.Linear(self.input_size+self.hidden_size, self.hidden_size)
    16. self.init_weights()
    17. def init_weights(self):
    18. pass
    19. def forward(self, x, init_states=None):
    20. bs, seq_sz, _ = x.size()
    21. hidden_seq = []
    22. if init_states is None:
    23. h_t, c_t = (
    24. torch.zeros(bs, self.hidden_size).to(x.device),
    25. torch.zeros(bs, self.hidden_size).to(x.device)
    26. )
    27. else:
    28. h_t, c_t = init_states
    29. for t in range(seq_sz):
    30. x_t = x[:, t, :]
    31. input_t = torch.concat([x_t, h_t], dim=-1)
    32. f_t = torch.sigmoid(self.f_gate(input_t))
    33. i_t = torch.sigmoid(self.i_gate(input_t))
    34. c_t_ = torch.tanh(self.c_cell(input_t))
    35. c_t = f_t * c_t + i_t * c_t_
    36. o_t = torch.sigmoid(self.o_gate(input_t))
    37. h_t = o_t * torch.tanh(c_t)
    38. hidden_seq.append(h_t.unsqueeze(0))
    39. hidden_seq = torch.cat(hidden_seq, dim=0)
    40. hidden_seq = hidden_seq.transpose(0, 1).contiguous()
    41. return hidden_seq, (h_t, c_t)

    二、RNN

    1. import torch
    2. from torch import nn
    3. import torchvision.datasets as dsets
    4. import torchvision.transforms as transforms
    5. import matplotlib.pyplot as plt
    6. torch.manual_seed(1)
    7. # Hyper Parameters
    8. EPOCH = 1 # 训练整批数据多少次, 为了节约时间, 我们只训练一次
    9. BATCH_SIZE = 64
    10. TIME_STEP = 28 # rnn 时间步数 / 图片高度
    11. INPUT_SIZE = 28 # rnn 每步输入值 / 图片每行像素
    12. LR = 0.01 # learning rate
    13. DOWNLOAD_MNIST = True # 如果你已经下载好了mnist数据就写上 Fasle
    14. # Mnist 手写数字
    15. train_data = dsets.MNIST(
    16. root='./mnist/', # 保存或者提取位置
    17. train=True, # this is training data
    18. transform=transforms.ToTensor(), # 转换 PIL.Image or numpy.ndarray 成
    19. # torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间
    20. download=DOWNLOAD_MNIST, # 没下载就下载, 下载了就不用再下了
    21. )
    22. # plot one example
    23. # print(train_data.train_data.size()) # (60000, 28, 28)
    24. # print(train_data.train_labels.size()) # (60000)
    25. # plt.imshow(train_data.train_data[0].numpy(), cmap='gray')
    26. # plt.title('%i' % train_data.train_labels[0])
    27. # plt.show()
    28. # 批训练 50samples, 1 channel, 28x28 (50, 1, 28, 28)
    29. train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
    30. # 为了节约时间, 我们测试时只测试前2000个
    31. test_data = dsets.MNIST(root='./mnist/', train=False, transform=transforms.ToTensor())
    32. test_x = test_data.test_data.type(torch.FloatTensor)[:2000]/255. # shape (2000, 28, 28) value in range(0,1)
    33. test_y = test_data.test_labels.numpy()[:2000] # covert to numpy array
    34. class RNN(nn.Module):
    35. def __init__(self):
    36. super(RNN, self).__init__()
    37. self.rnn = nn.RNN(
    38. input_size=28, # 图片每行的数据像素点
    39. hidden_size=64, # rnn hidden unit
    40. num_layers=1, # 有几层 RNN layers
    41. batch_first=True, # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size)
    42. )
    43. self.out = nn.Linear(64, 10) # 输出层
    44. def forward(self, x):
    45. # 输入的input为,(batch, time_step, input_size)
    46. # x shape (batch, time_step, input_size)
    47. # r_out shape (batch, time_step, output_size)
    48. # h_n shape (n_layers, batch, hidden_size) LSTM 有两个 hidden states, h_n 是分线, h_c 是主线
    49. # h_c shape (n_layers, batch, hidden_size)
    50. r_out, h = self.rnn(x, None) # None 表示 hidden state 会用全0的 state
    51. # 选取最后一个时间点的 r_out 输出
    52. out = self.out(r_out[:, -1, :])
    53. return out
    54. rnn = RNN()
    55. print(rnn)
    56. optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # optimize all cnn parameters
    57. loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted
    58. # training and testing
    59. for epoch in range(EPOCH):
    60. for step, (b_x, b_y) in enumerate(train_loader): # gives batch data
    61. b_x = b_x.view(-1, 28, 28) # reshape x to (batch, time_step, input_size)
    62. output = rnn(b_x) # rnn output
    63. loss = loss_func(output, b_y) # cross entropy loss
    64. optimizer.zero_grad() # clear gradients for this training step
    65. loss.backward() # backpropagation, compute gradients
    66. optimizer.step() # apply gradients
    67. if step % 50 == 0:
    68. test_output = rnn(test_x) # (samples, time_step, input_size)
    69. pred_y = torch.max(test_output, 1)[1].data.numpy()
    70. accuracy = float((pred_y == test_y).astype(int).sum()) / float(test_y.size)
    71. print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)
    72. # print 10 predictions from test data
    73. test_output = rnn(test_x[:10].view(-1, 28, 28))
    74. pred_y = torch.max(test_output, 1)[1].data.numpy()
    75. print(pred_y, 'prediction number')
    76. print(test_y[:10], 'real number')

    上述中,我们对于h全部以0为输入,此时我们也可以修改为随机参数:

    1. import torch
    2. from torch import nn
    3. class RNN(nn.Module):
    4. def __init__(self):
    5. super(RNN, self).__init__()
    6. self.rnn = nn.RNN(
    7. input_size=28, # 图片每行的数据像素点
    8. hidden_size=64, # rnn hidden unit
    9. num_layers=1, # 有几层 RNN layers
    10. batch_first=True, # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size)
    11. )
    12. self.out = nn.Linear(64, 10) # 输出层
    13. def forward(self, x):
    14. # 输入的input为,(batch, time_step, input_size)
    15. # x shape (batch, time_step, input_size)
    16. # r_out shape (batch, time_step, output_size)
    17. # h_n shape (n_layers, batch, hidden_size) LSTM 有两个 hidden states, h_n 是分线, h_c 是主线
    18. # h_c shape (n_layers, batch, hidden_size)
    19. # 初始化的隐藏元
    20. # 1个RNN层,batch_size=x.shape[0], 隐藏层的特征维度64
    21. h_0 = torch.randn(1,x.shape[0], 64)
    22. r_out, h = self.rnn(x, h_0) # None 表示 hidden state 会用全0的 state
    23. # 选取最后一个时间点的 r_out 输出
    24. out = self.out(r_out[:, -1, :])
    25. return out
    26. rnn = RNN()
    27. print(rnn)

    参数:

    1. nn.RNN是PyTorch中的一个循环神经网络模型。它有几个重要的参数:
    2. input_size:输入的特征维度大小。
    3. hidden_size:隐藏状态的维度大小。
    4. num_layers:RNN层数。
    5. nonlinearity:非线性激活函数,默认为’tanh’。
    6. bias:是否使用偏置,默认为True
    7. batch_first:如果为True,则输入的维度为(batch_size, seq_length, input_size),否则为(seq_length, batch_size, input_size)。默认为False
    8. dropout:如果非零,则在输出之间应用丢弃以进行稀疏连接。
    9. bidirectional:如果为True,则使用双向RNN,默认为False
  • 相关阅读:
    如何恢复edge的自动翻译功能
    基于vue3 + ant-design 自定义SVG图标iconfont的解决方案;ant-design加载本地iconfont.js不显示图标问题
    RoHS认证测试的3种方法,RoHS测试,RoHS检测,RoHS认证是什么
    中文编程开发语言工具构件说明:屏幕截取构件的编程操作
    我的创作纪念日
    ES6知识点(1)
    leetcode - 22 672. 灯泡开关 Ⅱ
    第四章 串
    关于python上的一个坑——reload导致模块重置
    《一个程序猿的生命周期》-《发展篇》- 42.逃离“管理”陷阱
  • 原文地址:https://blog.csdn.net/qq_45100200/article/details/133023072