• PyTorch搭建循环神经网络(RNN)进行文本分类、预测及损失分析(对不同国家的语言单词和姓氏进行分类,附源码和数据集)


    需要源码和数据集请点赞关注收藏后评论区留言~~~

    下面我们将使用循环神经网络训练来自18种起源于不同语言的数千种姓氏,并根据拼写方式预测名称的来源。

    一、数据准备和预处理

    总共有18个txt文件,并且对它们进行预处理,输出如下

    部分预处理代码如下

    1. from __future__ import unicode_literals, print_function, division
    2. from io import open
    3. import glob
    4. import os
    5. def findFiles(path): return glob.glob(path)
    6. print(findFiles('data/names/*.txt'))
    7. import unicodedata
    8. import string
    9. all_letters = string.ascii_letters + " .,;'"
    10. n_letters = len(all_letters)
    11. return ''.join(
    12. c for c in unicodedata.normalize('NFD', s)
    13. if unicodedata.category(c) != 'Mn'
    14. and c in all_letters
    15. )
    16. for filename in findFiles('data/names/*.txt'):
    17. category = os.path.splitext(os.path.basename(filename))[0]
    18. all_categories.append(category)
    19. lines = readLines(filename)
    20. category_lines[category] = lines
    21. n_categories = len(all_categories)

     二、将名字转换为张量

    现在已经整理好了所有数据集种的名字,这里需要将它们转换为张量以使用它们,为了表示单个字母,这里使用独热编码的方法

    三、构建神经网络

    在PyTorch种构建循环神经网络涉及在多个时间步长上克隆多个RNN层 的参数,RNN层保留了Hidden State和梯度,这些状态完全由PyTorch的计算图来自动完成维护,这意味我们只需要关心前馈网络而不需要关注反向传播

     四、训练RNN网络

    训练该网络所需要做的是向他输入大量的数据,令其进行预测,然后告诉它是否有错误

    每个训练的循环包含下面七个步骤

    1:创建输入和目标Tensor

    2:创建归零的初始Hidden State

    3:输入一个字母

    4:传递Hidden State给下一个字母输入

    5:比较最终输出和目标

    6:反向传播

    7:返回输出和损失

    平均损失如下

     

     五、绘制损失变化图像

    绘制网络的历史损失变化,以显示网络学习情况 

    可见随着训练次数的增加损失逐渐 梯度下降

     六、预测结果

    为了了解网络在不同类别上的表现如何,这里将创建一个混淆矩阵,为每种实际语言指示网络猜测那种语言,结果如下图,可以从主轴上挑出一些亮点,以显示它猜错了哪些语言

    可见中文/朝鲜语 西班牙语/意大利语会有混淆,网络预测希腊语名字十分准确,但是英语名字预测的很糟糕

     七、预测用户输入

    大家可以输入任何希望预测的名字到模型中,网络会给出几个名字最有可能的语言类型

     八、代码

    需要全部源码请点赞关注收藏后评论区留言~~~

    1. from __future__ import unicode_literals, print_function, division
    2. from io import open
    3. import glob
    4. import os
    5. def findFiles(path): return glob.glob(path)
    6. print(findFiles('data/names/*.txt'))
    7. import unicodedata
    8. import string
    9. all_letters = string.ascii_letters + " .,;'"
    10. n_letters = len(all_letters)
    11. # Turn a Unicode string to plain ASCII, thanks to https://stackoverflow.com/a/518232/2809427
    12. def unicodeToAscii(s):
    13. return ''.join(
    14. c for c in unicodedata.normalize('NFD', s)
    15. if unicodedata.category(c) != 'Mn'
    16. and c in all_letters
    17. )
    18. print(unicodeToAscii('Ślusàrski'))
    19. # Build the category_lines dictionary, a list of names per language
    20. category_lines = {}
    21. all_categories = []
    22. # Read a file and split into lines
    23. def readLines(filename):
    24. lines = open(filename, encoding='utf-8').read().strip().split('\n')
    25. return [unicodeToAscii(line) for line in lines]
    26. for filename in findFiles('data/names/*.txt'):
    27. category = os.path.splitext(os.path.basename(filename))[0]
    28. all_categories.append(category)
    29. lines = readLines(filename)
    30. category_lines[category] = lines
    31. n_categories = len(all_categories)
    32. #
    33. #
    34. # In[33]:
    35. #print(category_lines['Italian'][:5])
    36. # Turning Names into Tensors
    37. #
    38. # In[34]:
    39. import torch
    40. # Find letter index from all_letters, e.g. "a" = 0
    41. def letterToIndex(letter):
    42. return all_letters.find(letter)
    43. # Just for demonstration, turn a letter into a <1 x n_letters> Tensor
    44. def letterToTensor(letter):
    45. tensor = torch.zeros(1, n_letters)
    46. tensor[0][letterToIndex(letter)] = 1
    47. return tensor
    48. # Turn a line into a <line_length x 1 x n_letters>,
    49. # or an array of one-hot letter vectors
    50. def lineToTensor(line):
    51. tensor = torch.zeros(len(line), 1, n_letters)
    52. for li, letter in enumerate(line):
    53. tensor[li][0][letterToIndex(letter)] = 1
    54. return tensor
    55. print(letterToTensor('J'))
    56. print(lineToTensor('Jones').size())
    57. # This RNN module (mostly copied from `the PyTorch for Torch users
    58. # tutorial <https://pytorch.org/tutorials/beginner/former_torchies/
    59. # nn_tutorial.html#example-2-recurrent-net>`__)
    60. # is just 2 linear layers which operate on an input and hidden state, with
    61. # a LogSoftmax layer after the output.
    62. #
    63. # .. figure:: https://i.imgur.com/Z2xbySO.png
    64. # :alt:
    65. #
    66. #
    67. #
    68. #
    69. # In[35]:
    70. i
    71. self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
    72. self.i2o = nn.Linear(input_size + hidden_size, output_size)
    73. self.softmax = nn.LogSoftmax(dim=1)
    74. def forward(self, input, hidden):
    75. combined = torch.cat((input, hidden), 1)
    76. hidden = self.i2h(combined)
    77. output = self.i2o(combined)
    78. output = self.softmax(output)
    79. return output, hidden
    80. def initHidden(self):
    81. return torch.zeros(1, self.hidden_size)
    82. n_hidden = 128
    83. rnn = RNN(n_letters, n_hidden, n_categories)
    84. # To run a step of this network we need to pass an input (in our case, the
    85. # Tensor for the current letter) and a previous hidden state (which we
    86. # initialize as zeros at first). We'll get back the output (probability of
    87. # each language) and a next hidden state (which we keep for the next
    88. # step).
    89. #
    90. #
    91. #
    92. # In[36]:
    93. inp
    94. # For the sake of efficiency we don't want to be creating a new Tensor for
    95. # every step, so we will use ``lineToTensor`` instead of
    96. # ``letterToTensor`` and use slices. This could be further optimized by
    97. # pre-computing batches of Tensors.
    98. #
    99. #
    100. #
    101. # In[37]:
    102. input = lineToTensor('Albert')
    103. hidden = torch.zeros(1, n_hidden)
    104. output, next_hidden = rnn(input[0], hidden)
    105. print(output)
    106. # As you can see the output is a ``<1 x n_categories>`` Tensor, where
    107. # every item is the likelihood of that category (higher is more likely).
    108. #
    109. #
    110. #
    111. # Training
    112. # ========
    113. # Preparing for Training
    114. # ----------------------
    115. #
    116. # Before going into training we should make a few helper functions. The
    117. # first is to interpret the output of the network, which we know to be a
    118. # likelihood of each category. We can use ``Tensor.topk`` to get the index
    119. # of the greatest value:
    120. #
    121. #
    122. #
    123. # In[38]:
    124. def categoryFromOutput(output):
    125. top_n, top_i = output.topk(1)
    126. category_i = top_i[0].item()
    127. return all_categories[category_i], category_i
    128. #print(categoryFromOutput(output))
    129. # We will also want a quick way to get a training example (a name and its
    130. # language):
    131. #
    132. #
    133. #
    134. # In[39]:
    135. import random
    136. def randomChoice(l):
    137. return l[random.randint(0, len(l) - 1)]
    138. def randomTrainingExample():
    139. category = randomChoice(all_categories)
    140. line = randomChoice(category_lines[category])
    141. category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long)
    142. line_tensor = lineToTensor(line)
    143. return category, line, category_tensor, line_tensor
    144. for i in range(10):
    145. category, line, category_tensor, line_tensor = randomTrainingExample()
    146. print('category =', category, '/ line =', line)
    147. # Training the Network
    148. # --------------------
    149. #
    150. # Now all it takes to train this network is show it a bunch of examples,
    151. # have it make guesses, and tell it if it's wrong.
    152. #
    153. # For the loss function ``nn.NLLLoss`` is appropriate, since the last
    154. # layer of the RNN is ``nn.LogSoftmax``.
    155. #
    156. #
    157. #
    158. # In[40]:
    159. criterion = nn.NLLLoss()
    160. #
    161. # - Keep hidden state for next letter
    162. #
    163. # - Compare final output to target
    164. # - Back-propagate
    165. # - Return the output and loss
    166. #
    167. #
    168. #
    169. # In[41]:
    170. learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn
    171. def train(category_tensor, line_tensor):
    172. hidden = rnn.initHidden()
    173. rnn.zero_grad()
    174. for i in range(line_tensor.size()[0]):
    175. output, hidden = rnn(line_tensor[i], hidden)
    176. loss = criterion(output, category_tensor)
    177. loss.backward()
    178. # Add parameters' gradients to their values, multiplied by learning rate
    179. for p in rnn.parameters():
    180. p.data.add_(p.grad.data, alpha=-learning_rate)
    181. return output, loss.item()
    182. # Now we just have to run that with a bunch of examples. Since the
    183. # ``train`` function returns both the output and loss we can print its
    184. # guesses and also keep track of loss for plotting. Since there are 1000s
    185. # of examples we print only every ``print_every`` examples, and take an
    186. # average of the loss.
    187. #
    188. #
    189. #
    190. m = math.floor(s / 60)
    191. s -= m * 60
    192. return '%dm %ds' % (m, s)
    193. start = time.time()
    194. for iter in range(1, n_iters + 1):
    195. category, line, category_tensor, line_tensor = randomTrainingExample()
    196. output, loss = train(category_tensor, line_tensor)
    197. current_loss += loss
    198. # Print iter number, loss, name and guess
    199. if iter % print_every == 0:
    200. guess, guess_i = categoryFromOutput(output)
    201. correct = '✓' if guess == category else '✗ (%s)' % category
    202. print('%d %d%% (%s) %.4f %s / %s %s' % (iter, iter / n_iters * 100, timeSince(start), loss, line, guess, correct))
    203. # Add current loss avg to list of losses
    204. if iter % plot_every == 0:
    205. all_losses.append(current_loss / plot_every)
    206. current_loss = 0
    207. # Plotting the Results
    208. # --------------------
    209. #
    210. # Plotting the historical loss from ``all_losses`` shows the network
    211. # learning:
    212. #
    213. #
    214. #
    215. # In[22]:
    216. plt.figure()
    217. plt.plot(all_losses)
    218. # Evaluating the Results
    219. # ======================
    220. #
    221. # To see how well the network performs on different categories, we will
    222. # create a confusion matrix, indicating for every actual language (rows)
    223. # which language the network guesses (columns). To calculate the confusion
    224. # matrix a bunch of samples are run through the network with
    225. # ``evaluate()``, which is the same as ``train()`` minus the backprop.
    226. #
    227. #
    228. #
    229. # In[46]:
    230. # Keep track of correct guesses in a confusion matrix
    231. confusion = torch.zeros(n_categories, n_categories)
    232. n_confusion = 10000
    233. # Just return an output given a line
    234. def evaluate(line_tensor):
    235. hidde
    236. # Go through a bunch of examples and record which are correctly guessed
    237. for i in range(n_confusion):
    238. category, line, category_tensor, line_tensor = randomTrainingExample()
    239. output = evaluate(line_tensor)
    240. guess, guess_i = categoryFromOutput(output)
    241. category_i = all_categories.index(category)
    242. confusion[category_i][guess_i] += 1
    243. # Normalize by dividing every row by its sum
    244. for i in range(n_categories):
    245. confusion[i] = confusion[i] / confusion[i].sum()
    246. # Set up plot
    247. fig = plt.figure()
    248. ax = fig.add_subplot(111)
    249. cax = ax.matshow(confusion.numpy())
    250. fig.colorbar(cax)
    251. # Set up axes
    252. ax.set_xticklabels([''] + all_categories, rotation=90)
    253. ax.set_yticklabels([''] + all_categories)
    254. # Force label at every tick
    255. ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    256. ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
    257. # You can pick out bright spots off the main axis that show which
    258. # languages it guesses incorrectly, e.g. Chinese for Korean, and Spanish
    259. # for Italian. It seems to do very well with Greek, and very poorly with
    260. # English (perhaps because of overlap with other languages).
    261. #
    262. #
    263. #
    264. # Running on User Input
    265. # ---------------------
    266. #
    267. #
    268. #
    269. # In[47]:
    270. def predict(input_line, n_predictions=3):
    271. print('\n> %s' % input_line)
    272. with t evaluate(lineToTensor(input_line))
    273. = []
    274. for i in range(n_predictions):
    275. value = topv[0][i].item()
    276. category_index = topi[0][i].item()
    277. print('(%.2f) %s' % (value, all_categories[category_index]))
    278. predictions.append([value, all_categories[category_index]])
    279. predict('Dovesky')
    280. predict('Jackson')
    281. predict('Satoshi')
    282. # The final versions of the scripts `in the Practical PyTorch
    283. # repo <https://github.com/spro/practical-pytorch/tree/master/char-rnn-classification>`__
    284. # split the above code into a few files:
    285. #
    286. # - ``data.py`` (loads files)
    287. # - ``model.py`` (defines the RNN)
    288. # - ``train.py`` (runs training)
    289. # - ``predict.py`` (runs ``predict()`` with command line arguments)
    290. # - ``server.py`` (serve prediction as a JSON API with bottle.py)
    291. #
    292. # Run ``train.py`` to train and save the network.
    293. #
    294. # Run ``predict.py`` with a name to view predictions:
    295. #
    296. # ::
    297. #
    298. # $ python predict.py Hazaki
    299. # (-0.42) Japanese
    300. # (-1.39) Polish
    301. irst name -> gender
    302. # - Character name -> writer
    303. # - Page title -> blog or subreddit
    304. #
    305. # - Get better results with a bigger and/or better shaped network
    306. #
    307. # - Add more linear layers
    308. # - Try the ``nn.LSTM`` and ``nn.GRU`` layers
    309. # - Combine multiple of these RNNs as a higher level network
    310. #
    311. #
    312. #

    创作不易 觉得有帮助请点赞关注收藏~~~

  • 相关阅读:
    android h5 共享停车系统myeclipse开发mysql数据库编程服务端java计算机程序设计
    RP原型资源分享-购物类App
    Android11系统桌面隐藏指定APP图标
    打脸现场:“曾以为对开发者最好的应用商店,无故下架了我的高评分应用”
    MySQL 全文索引
    【仿牛客网笔记】项目进阶,构建安全高效的企业服务——置顶、加精、删除
    MIME type备忘
    rv1126-rv1109-yaffs2-mkyaffs2image610
    计算机二级WPS 选择题(模拟和解析十二)
    电影售票系统
  • 原文地址:https://blog.csdn.net/jiebaoshayebuhui/article/details/127831982