• 神经网络分析中医药数据Excel数据||神经网络pytorch实现||五折交叉验证||pytorch神经网络框架||神经网络模型参数保存


    注意参数位已用中文描述代替。

    如果你想训练自己的数据集,大多情况修改他们就可以了。

    如果数据格式不大相同 或者 你想要修改网络结构(本文是三层全连接BP神经网络)。

    那么你可以试着修改其中的一些函数,试着符合你的要求。

     五折交叉、模型保存等均在代码中解释,自行选择

    如果你没有安装pytorch,你可以尝试安装这个包的cpu版

    Start Locally | PyTorch

    如果不想安装,可以去去郭大侠的github,哪里不需要pytorch

    数据格式

    664条样本   每条103个属性,最后一列为标签

    结果

     不使用五折交叉验证

     使用五折交叉验证

    代码

    不使用五折交叉验证

    1. """
    2. BP神经网络二分类任务 瘀血阻络证 103特征
    3. """
    4. import pandas as pd
    5. import matplotlib.pyplot as plt
    6. import numpy as np
    7. import random
    8. import torch
    9. import torch.nn as nn
    10. from torch.utils.data import DataLoader, TensorDataset
    11. from sklearn.model_selection import train_test_split
    12. from sklearn.metrics import classification_report, roc_curve, auc
    13. def open_excel(file_url):
    14. """
    15. 打开数据集,进行数据处理
    16. :param file_url:文件路径
    17. :return:特征集数据、标签集数据
    18. """
    19. readbook = pd.read_excel(f'{file_url}', engine='openpyxl')
    20. nplist = readbook.T.to_numpy()
    21. data = nplist[0:-1].T
    22. data = np.float64(data)
    23. target = nplist[-1]
    24. return data, target
    25. def open_csv(file_url):
    26. """
    27. 打开数据集,进行数据处理
    28. :param file_url:文件名
    29. :return:特征集数据、标签集数据
    30. """
    31. readbook = pd.read_csv(f'{file_url}.csv')
    32. nplist = readbook.T.to_numpy()
    33. data = nplist[0:-1].T
    34. data = np.float64(data)
    35. target = nplist[-1]
    36. return data, target
    37. def random_number(data_size, key):
    38. """
    39. 使用shuffle()打乱
    40. """
    41. number_set = []
    42. for i in range(data_size):
    43. number_set.append(i)
    44. if key == 1:
    45. random.shuffle(number_set)
    46. return number_set
    47. def inputtotensor(inputtensor, labeltensor):
    48. """
    49. 将数据集的输入和标签转为tensor格式
    50. :param inputtensor: 数据集输入
    51. :param labeltensor: 数据集标签
    52. :return: 输入tensor,标签tensor
    53. """
    54. inputtensor = np.array(inputtensor)
    55. inputtensor = torch.FloatTensor(inputtensor)
    56. labeltensor = np.array(labeltensor)
    57. labeltensor = labeltensor.astype(float)
    58. labeltensor = torch.LongTensor(labeltensor)
    59. return inputtensor, labeltensor
    60. # 定义BP神经网络
    61. class NeuralNetwork(nn.Module):
    62. def __init__(self):
    63. super(NeuralNetwork, self).__init__()
    64. self.fc1 = nn.Linear(input_size, hidden_size) # 输入层->隐藏层1
    65. self.relu = nn.ReLU()
    66. self.fc2 = nn.Linear(hidden_size, hidden_size) # 隐藏层1->隐藏层2
    67. self.fc3 = nn.Linear(hidden_size, num_classes) # 隐藏层2->输出层
    68. self.dropout_i = nn.Dropout(p=0.2) # dropout训练
    69. self.dropout_h = nn.Dropout(p=0.5) # 隐藏层的dropout训练
    70. def forward(self, x):
    71. out = self.fc1(x)
    72. out = self.dropout_i(out)
    73. out = self.relu(out)
    74. out = self.fc2(out)
    75. out = self.dropout_h(out)
    76. out = self.relu(out)
    77. out = self.fc3(out)
    78. return out
    79. def addbatch(data_train, data_test, batchsize):
    80. """
    81. 设置batch
    82. :param data_train: 输入
    83. :param data_test: 标签
    84. :param batchsize: 一个batch大小
    85. :return: 设置好batch的数据集
    86. """
    87. data = TensorDataset(data_train, data_test)
    88. data_loader = DataLoader(data, batch_size=batchsize, shuffle=True)
    89. return data_loader
    90. def train_test(traininput, trainlabel, Epochs, batchsize): # ,testinput, testlabel
    91. """
    92. 函数输入为:训练输入,训练标签,测试输入,测试标签,一个batch大小
    93. 进行BP的训练,每训练一次就算一次准确率,同时记录loss
    94. :return:训练次数list,训练loss,测试loss,准确率
    95. """
    96. # 设置batch
    97. traindata = addbatch(traininput, trainlabel, batchsize) # shuffle打乱数据集
    98. total_step = len(traindata)
    99. for epoch in range(Epochs):
    100. for step, data in enumerate(traindata):
    101. net.train()
    102. inputs, labels = data
    103. # 前向传播
    104. out = net(inputs)
    105. # 计算损失函数
    106. loss = loss_func(out, labels)
    107. # 清空上一轮的梯度
    108. optimizer.zero_grad()
    109. # 反向传播
    110. loss.backward()
    111. # 参数更新
    112. optimizer.step()
    113. # 输出loss
    114. if (epoch + 1) % 100 == 0:
    115. print('Epoch [{}/{}], Step [{}/{}], Loss: {:.8f}'
    116. .format(epoch + 1, Epochs, step + 1, total_step, loss.item()))
    117. ''' # 测试准确率
    118. net.eval()
    119. testout = net(testinput)
    120. testloss = loss_func(testout, testlabel)
    121. prediction = torch.max(testout, 1)[1] # torch.max
    122. pred_y = prediction.numpy() # 事先放在了GPU,所以必须得从GPU取到CPU中!!!!!!
    123. target_y = testlabel.data.numpy()
    124. j = 0
    125. for i in range(pred_y.size):
    126. if pred_y[i] == target_y[i]:
    127. j += 1
    128. acc = j / pred_y.size
    129. if epoch % 10 == 0:
    130. print("训练次数为", epoch, "的准确率为:", acc)'''
    131. # 根据标签和预测概率结果画出ROC图,并计算AUC值
    132. def acu_curve(y, prob):
    133. fpr, tpr, threshold = roc_curve(y, prob) ###计算真正率和假正率
    134. roc_auc = auc(fpr, tpr) ###计算auc的值
    135. print("AUC:", roc_auc)
    136. plt.figure()
    137. lw = 2
    138. plt.figure(figsize=(6, 6))
    139. plt.plot(fpr, tpr, color='darkorange',
    140. lw=lw, label='ROC curve (area = %0.3f)' % roc_auc) ###假正率为横坐标,真正率为纵坐标做曲线
    141. plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    142. plt.xlim([0.0, 1.0])
    143. plt.ylim([0.0, 1.05])
    144. plt.xlabel('False Positive Rate')
    145. plt.ylabel('True Positive Rate')
    146. plt.title('Receiver operating characteristic')
    147. plt.legend(loc="lower right")
    148. plt.show()
    149. return roc_auc
    150. if __name__ == "__main__":
    151. # 利用open_excel读取数据
    152. feature, label = open_excel('症状_瘀血阻络证_data.xlsx')
    153. # 数据划分为训练集和测试集
    154. x_train, x_test, y_train, y_test = train_test_split(feature, label, test_size=你的比例, random_state=你的随机数)
    155. # 将数据转为tensor格式
    156. traininput, trainlabel = inputtotensor(x_train, y_train)
    157. testinput, testlabel = inputtotensor(x_test, y_test)
    158. # 归一化处理
    159. traininput = nn.functional.normalize(traininput)
    160. testinput = nn.functional.normalize(testinput)
    161. # 设置超参数
    162. Epochs = 你的迭代次数
    163. input_size = 103
    164. hidden_size = 你的隐层神经元个数
    165. num_classes = 2
    166. LR = 你的学习率
    167. batchsize =
    168. # 定义网络
    169. net = NeuralNetwork()
    170. # 优化器
    171. optimizer = torch.optim.Adam(net.parameters(), LR)
    172. # 设定损失函数
    173. loss_func = torch.nn.CrossEntropyLoss()
    174. # 训练并且记录每次loss 函数输入为:训练输入,训练标签,测试输入,测试标签,一个batch大小
    175. print(traininput)
    176. print(trainlabel)
    177. train_test(traininput, trainlabel, Epochs, batchsize)
    178. print('----------------------------------不使用五折交叉验证----------------------------------')
    179. # 不使用五折交叉验证的测试结果
    180. net.eval()
    181. test_predict1 = net(testinput).cpu().detach().numpy() # 将tensor转为概率数组
    182. test_predict2 = np.argmax(test_predict1, axis=1) # 将概率数组转为label
    183. # print(test_predict1)
    184. # print(test_predict2)
    185. print(classification_report(testlabel, test_predict2))
    186. auc = acu_curve(testlabel, test_predict1[:, 1:])
    187. #######是否保存模型参数#######
    188. save = True 或者 False
    189. if save:
    190. device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    191. model = NeuralNetwork().to(device)
    192. str = 'hidden_size'+str(hidden_size)+'-batchsize'+str(batchsize)+'-Epochs'+str(Epochs)+'-auc'+str(auc)
    193. url = './'+ str + '.pth'
    194. torch.save(model.state_dict(), url)
    195. print('模型参数已保存到'+url)
    196. '''# 读取教师模型参数 并预测
    197. device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    198. open_model = NeuralNetwork()
    199. open_model.load_state_dict(torch.load('./名字.pth'))
    200. open_model.to(device)
    201. test_predict1 = open_model(testinput.to(device)).cpu().detach().numpy()
    202. test_predict2 = np.argmax(test_predict1, axis=1) # 将概率数组转为label
    203. print(classification_report(testlabel, test_predict2))
    204. acu_curve(testlabel, test_predict1[:, 1:])
    205. '''

    使用五折交叉验证(修改之处)

    前155行与上述相同(多了几个import)

    1. import pandas as pd
    2. import matplotlib.pyplot as plt
    3. import numpy as np
    4. import random
    5. import torch
    6. import torch.nn as nn
    7. from torch.utils.data import DataLoader, TensorDataset
    8. from sklearn.model_selection import KFold
    9. from sklearn.metrics import roc_curve, precision_score, recall_score, f1_score, \
    10. roc_auc_score, accuracy_score, auc
    11. from itertools import chain
    1. def statistics(testlabel, test_predict1, test_predict2):
    2. """
    3. :testlabel 测试集label:
    4. :test_predict1 预测概率数组:
    5. :test_predict2 预测label:
    6. :return:
    7. """
    8. # 准确率
    9. print(f"Accuracy:", accuracy_score(testlabel, test_predict2))
    10. # 精确率
    11. print("Precision:", precision_score(testlabel, test_predict2, average='macro'))
    12. # 召回率
    13. print("Recall:", recall_score(testlabel, test_predict2, average='macro'))
    14. # F1值
    15. print("F1-Score:", f1_score(testlabel, test_predict2, average='macro'))
    16. # 画ROC曲线
    17. fpr, tpr, thresholds = roc_curve(testlabel, test_predict1[:, 1], pos_label=1) # pos_label=1表示正样本的标签为1
    18. roc_auc = auc(fpr, tpr) ###计算auc的值
    19. print("AUC:", roc_auc)
    20. plt.plot(fpr, tpr, linewidth=2, label="ROC(AUC=%0.3f)" % roc_auc_score(testlabel, test_predict1[:, 1]), color='darkorange')
    21. plt.xlabel('False Positive Rate')
    22. plt.ylabel('True Positive Rate')
    23. plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    24. plt.ylim(0, 1.05)
    25. plt.xlim(0, 1.05)
    26. plt.legend(loc=4)
    27. plt.show()
    28. return roc_auc
    29. if __name__ == "__main__":
    30. # 加载数据
    31. df = pd.read_excel(r"症状_瘀血阻络证_data.xlsx")
    32. data = df.iloc[:, :-1]
    33. data['target'] = df.iloc[:, -1] # 添加标签
    34. data = data.to_numpy()
    35. # 5-fold cross-validation
    36. kf = KFold(n_splits=5, shuffle=True)
    37. for train_index, test_index in kf.split(data):
    38. x_train, x_test = data[train_index, :data.shape[1] - 1], data[test_index, :data.shape[1] - 1]
    39. y_train, y_test = data[train_index, data.shape[1] - 1:data.shape[1]], data[test_index,data.shape[1] - 1:data.shape[1]]
    40. '''这里把y_train、y_test降维 否则见221行 ps:目前是200行'''
    41. y_train = list(chain.from_iterable(y_train))
    42. y_test = list(chain.from_iterable(y_test))
    43. # 将数据转为tensor格式
    44. traininput, trainlabel = inputtotensor(x_train, y_train)
    45. testinput, testlabel = inputtotensor(x_test, y_test)
    46. # 归一化处理
    47. traininput = nn.functional.normalize(traininput)
    48. testinput = nn.functional.normalize(testinput)
    49. # 设置超参数
    50. Epochs = 你的迭代次数
    51. input_size = 103
    52. hidden_size = 你的隐层神经元个数
    53. num_classes = 2
    54. LR = 你的学习率
    55. batchsize = 你的batchsize
    56. # 定义网络
    57. net = NeuralNetwork()
    58. # 优化器
    59. optimizer = torch.optim.Adam(net.parameters(), LR)
    60. '''设定损失函数 KFold使trainlabel升维 需要更改为多标签损失函数MultiLabelSoftMarginLoss 当然我没这么做 我在200行把y_train、y_test降维了'''
    61. #loss_func = torch.nn.MultiLabelSoftMarginLoss()
    62. loss_func = torch.nn.CrossEntropyLoss()
    63. # 训练并且记录每次loss 函数输入为:训练输入,训练标签,测试输入,测试标签,一个batch大小
    64. train_test(traininput, trainlabel, Epochs, batchsize)
    65. print('----------------------------------使用五折交叉验证----------------------------------')
    66. # 使用五折交叉验证的测试结果
    67. net.eval()
    68. test_predict1 = net(testinput).cpu().detach().numpy() # 将tensor转为概率数组
    69. test_predict2 = np.argmax(test_predict1, axis=1) # 将概率数组转为label
    70. auc = statistics(testlabel, test_predict1, test_predict2)
    71. #######是否保存模型参数#######
    72. save = True 或者 False
    73. if save:
    74. device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    75. model = NeuralNetwork().to(device)
    76. str = 'hidden_size'+str(hidden_size)+'-batchsize'+str(batchsize)+'-Epochs'+str(Epochs)+'-auc'+str(auc)
    77. url = './'+ str + '.pth'
    78. torch.save(model.state_dict(), url)
    79. print('模型参数已保存到'+url)
    80. '''# 读取教师模型参数 并预测
    81. device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    82. open_model = NeuralNetwork()
    83. open_model.load_state_dict(torch.load('./名字.pth'))
    84. open_model.to(device)
    85. test_predict1 = open_model(testinput.to(device)).cpu().detach().numpy()
    86. test_predict2 = np.argmax(test_predict1, axis=1) # 将概率数组转为label
    87. statistics(testlabel, test_predict1, test_predict2)
    88. '''

     注意点(参考)

    1、主要参考pytorch神经网络对Excel数据集进行处理(读取,转为tensor格式,归一化),并且以鸢尾花(iris)数据集为例,实现BP神经网络_<编程路上>的博客-CSDN博客https://blog.csdn.net/weixin_43788986/article/details/125880602

    2 、pytorch:批量数据分割(batch)_XYKenny的博客-CSDN博客https://blog.csdn.net/XYKenny/article/details/105935836

    3、Pytorch学习笔记(5)——交叉熵报错RuntimeError: 1D target tensor expected, multi-target not supported_野指针小李的博客-CSDN博客https://blog.csdn.net/qq_35357274/article/details/121002342

    4、评估深度学习模型-在keras中使用scikit-learn-基于keras的python学习笔记(三)_人工智障之深度瞎学的博客-CSDN博客https://blog.csdn.net/weixin_44474718/article/details/86249827

    5、python将二维数组变成一维数组(np.array类型)_靳航ppp的博客-CSDN博客_python二维数组转化为一维数组https://blog.csdn.net/haha456487/article/details/106316116

  • 相关阅读:
    财务对账-财务收发存-业务收发存
    Rust使用Iterator优化grep
    概率论与数理统计学习:随机事件(一)——知识总结与C语言实现案例
    Linux 目录管理
    IgH Master环境搭建
    非零基础自学Java (老师:韩顺平) 第15章 泛型 15.3 泛型介绍 && 15.4 泛型的语法
    leetcode刷题(124)——64. 最小路径和
    【算法刷题】—7.13哈希表的应用
    数据统计与可视化复习总结(二):非参数检验、生存分析
    A8. 无人机编队飞行定位分析与讨论-大结局
  • 原文地址:https://blog.csdn.net/qq_54499870/article/details/127974826