• 基于transformer的心脑血管心脏病疾病预测


    视频讲解:基于transformer的心脑血管疾病预测 完整数据代码分享_哔哩哔哩_bilibili

    数据展示

    完整代码:

    1. # pip install openpyxl -i https://pypi.tuna.tsinghua.edu.cn/simple/
    2. # pip install optuna -i https://pypi.tuna.tsinghua.edu.cn/simple/
    3. import numpy as np
    4. import pandas as pd
    5. from tqdm import tqdm
    6. import torch
    7. from torch import nn
    8. import torch.nn.functional as F
    9. from torch import tensor
    10. import torch.utils.data as Data
    11. import math
    12. from matplotlib import pyplot
    13. from datetime import datetime, timedelta
    14. from sklearn.model_selection import train_test_split
    15. import matplotlib.pyplot as plt
    16. import seaborn as sns
    17. import torch
    18. import torch.nn as nn
    19. import math
    20. import warnings
    21. warnings.filterwarnings("ignore")
    22. plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签
    23. plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
    24. # 设置随机参数:保证实验结果可以重复
    25. SEED = 1234
    26. import random
    27. random.seed(SEED)
    28. np.random.seed(SEED)
    29. torch.manual_seed(SEED)
    30. torch.cuda.manual_seed(SEED) # 适用于显卡训练
    31. torch.cuda.manual_seed_all(SEED) # 适用于多显卡训练
    32. from torch.backends import cudnn
    33. cudnn.benchmark = False
    34. cudnn.deterministic = True
    35. # 用30天的数据(包括这30天所有的因子和log_ret)预测下一天的log_ret
    36. device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    37. date=pd.read_csv("Heart Disease Dataset(12 attributes)(1).csv")
    38. print(date.columns)
    39. print(date.head())
    40. data = date.fillna(-1)
    41. data_x=data[['Age', 'RestingBP', 'Cholesterol', 'FastingBS', 'MaxHR', 'Oldpeak',
    42. 'HeartDisease']].values
    43. data_x=np.array(data_x,dtype=np.float16)
    44. print(data_x)
    45. data_31_x = []
    46. data_31_y = []
    47. for i in range(0, len(data_x) - 5,1):
    48. data_31_x.append(data_x[i:i+1])
    49. data_31_y.append(data_x[i+1][-1])
    50. print(len(data_31_x), len(data_31_y))
    51. class DataSet(Data.Dataset):
    52. def __init__(self, data_inputs, data_targets):
    53. self.inputs = torch.FloatTensor(data_inputs)
    54. self.label = torch.FloatTensor(data_targets)
    55. def __getitem__(self, index):
    56. return self.inputs[index], self.label[index]
    57. def __len__(self):
    58. return len(self.inputs)
    59. Batch_Size = 32 #
    60. DataSet = DataSet(np.array(data_31_x), list(data_31_y))
    61. train_size = int(len(data_31_y) * 0.8)
    62. test_size = len(data_31_y) - train_size
    63. train_dataset, test_dataset = torch.utils.data.random_split(DataSet, [train_size, test_size])
    64. TrainDataLoader = Data.DataLoader(train_dataset, batch_size=Batch_Size, shuffle=True, drop_last=True)
    65. TestDataLoader = Data.DataLoader(test_dataset, batch_size=Batch_Size, shuffle=True, drop_last=True)
    66. print("TestDataLoader 的batch个数", TestDataLoader.__len__())
    67. print("TrainDataLoader 的batch个数", TrainDataLoader.__len__())
    68. class PositionalEncoding(nn.Module):
    69. def __init__(self, d_model, max_len=5000):
    70. super(PositionalEncoding, self).__init__()
    71. pe = torch.zeros(max_len, d_model)
    72. position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
    73. div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
    74. pe[:, 0::2] = torch.sin(position * div_term)
    75. pe[:, 1::2] = torch.cos(position * div_term)
    76. pe = pe.unsqueeze(0).transpose(0,1)
    77. self.register_buffer('pe', pe)
    78. def forward(self, x: torch.Tensor):
    79. chunk = x.chunk(x.size(-1), dim=2)
    80. out = torch.Tensor([]).to(x.device)
    81. for i in range(len(chunk)):
    82. out = torch.cat((out, chunk[i] + self.pe[:chunk[i].size(0), ...]), dim=2)
    83. return out
    84. def transformer_generate_tgt_mask(length, device):
    85. mask = torch.tril(torch.ones(length, length, device=device)) == 1
    86. mask = (
    87. mask.float()
    88. .masked_fill(mask == 0, float("-inf"))
    89. .masked_fill(mask == 1, float(0.0))
    90. )
    91. return mask
    92. class Transformer(nn.Module):
    93. """标准的Transformer编码器-解码器结构"""
    94. def __init__(self, n_encoder_inputs, n_decoder_inputs, Sequence_length, d_model=512, dropout=0.1, num_layer=8):
    95. """
    96. 初始化
    97. :param n_encoder_inputs: 输入数据的特征维度
    98. :param n_decoder_inputs: 编码器输入的特征维度,其实等于编码器输出的特征维度
    99. :param d_model: 词嵌入特征维度
    100. :param dropout: dropout
    101. :param num_layer: Transformer块的个数
    102. Sequence_length: transformer 输入数据 序列的长度
    103. """
    104. super(Transformer, self).__init__()
    105. self.input_pos_embedding = torch.nn.Embedding(500, embedding_dim=d_model)
    106. self.target_pos_embedding = torch.nn.Embedding(500, embedding_dim=d_model)
    107. encoder_layer = torch.nn.TransformerEncoderLayer(d_model=d_model, nhead=num_layer, dropout=dropout,
    108. dim_feedforward=4 * d_model)
    109. decoder_layer = torch.nn.TransformerDecoderLayer(d_model=d_model, nhead=num_layer, dropout=dropout,
    110. dim_feedforward=4 * d_model)
    111. self.encoder = torch.nn.TransformerEncoder(encoder_layer, num_layers=2)
    112. self.decoder = torch.nn.TransformerDecoder(decoder_layer, num_layers=4)
    113. self.input_projection = torch.nn.Linear(n_encoder_inputs, d_model)
    114. self.output_projection = torch.nn.Linear(n_decoder_inputs, d_model)
    115. self.linear = torch.nn.Linear(d_model, 1)
    116. self.ziji_add_linear = torch.nn.Linear(Sequence_length, 2)
    117. def encode_in(self, src):
    118. src_start = self.input_projection(src).permute(1, 0, 2)
    119. in_sequence_len, batch_size = src_start.size(0), src_start.size(1)
    120. pos_encoder = (torch.arange(0, in_sequence_len, device=src.device).unsqueeze(0).repeat(batch_size, 1))
    121. pos_encoder = self.input_pos_embedding(pos_encoder).permute(1, 0, 2)
    122. src = src_start + pos_encoder
    123. src = self.encoder(src) + src_start
    124. return src
    125. def decode_out(self, tgt, memory):
    126. tgt_start = self.output_projection(tgt).permute(1, 0, 2)
    127. out_sequence_len, batch_size = tgt_start.size(0), tgt_start.size(1)
    128. pos_decoder = (torch.arange(0, out_sequence_len, device=tgt.device).unsqueeze(0).repeat(batch_size, 1))
    129. pos_decoder = self.target_pos_embedding(pos_decoder).permute(1, 0, 2)
    130. tgt = tgt_start + pos_decoder
    131. tgt_mask = transformer_generate_tgt_mask(out_sequence_len, tgt.device)
    132. out = self.decoder(tgt=tgt, memory=memory, tgt_mask=tgt_mask) + tgt_start
    133. out = out.permute(1, 0, 2) # [batch_size, seq_len, d_model]
    134. out = self.linear(out)
    135. return out
    136. def forward(self, src, target_in):
    137. # print("src.shape", src.shape)
    138. src = self.encode_in(src)
    139. # print("src.shape",src.shape)#src.shape torch.Size([9, 8, 512])
    140. out = self.decode_out(tgt=target_in, memory=src)
    141. # print("out.shape",out.shape)
    142. # print("out.shape:",out.shape)# torch.Size([batch, 3, 1]) # 原本代码中的输出
    143. # 上边的这个输入可以用于很多任务的输出 可以根据任务进行自由的变换
    144. # 下面是自己修改的
    145. # 使用全连接变成 [batch,1] 构成了基于transformer的回归单值预测
    146. out = out.squeeze(2)
    147. out = self.ziji_add_linear(out)
    148. return out
    149. model = Transformer(n_encoder_inputs=7, n_decoder_inputs=7, Sequence_length=1).to(device) # 3 表示Sequence_length transformer 输入数据 序列的长度
    150. def test_main(model):
    151. val_epoch_loss = []
    152. with torch.no_grad():
    153. for index, (inputs, targets) in enumerate(TestDataLoader):
    154. inputs = torch.tensor(inputs).to(device)
    155. targets = torch.tensor(targets).to(device)
    156. inputs = inputs.float()
    157. targets = targets.float()
    158. tgt_in = torch.rand((Batch_Size,1,7))
    159. outputs = model(inputs, tgt_in)
    160. # print(outputs.float(), targets.float())
    161. outputs = torch.tensor(outputs, dtype=torch.float)
    162. targets = torch.tensor(targets, dtype=torch.long)
    163. loss = criterion(outputs, targets)
    164. val_epoch_loss.append(loss.item())
    165. return np.mean(val_epoch_loss)
    166. epochs = 50 #
    167. optimizer = torch.optim.Adamax(model.parameters(), lr=0.01) #
    168. criterion = torch.nn.CrossEntropyLoss().to(device)
    169. val_loss = []
    170. train_loss = []
    171. best_test_loss = 10000000
    172. for epoch in tqdm(range(epochs)):
    173. train_epoch_loss = []
    174. for index, (inputs, targets) in enumerate(TrainDataLoader):
    175. inputs = torch.tensor(inputs).to(device)
    176. targets = torch.tensor(targets).to(device)
    177. inputs = inputs.float()
    178. targets = targets.float()
    179. # print("inputs",inputs.shape) # [batch,316]
    180. # print("targets",targets.shape) # targets torch.Size([batch])
    181. tgt_in = torch.rand((Batch_Size,1,7)) # 输入数据的维度是[batch,序列长度,每个单元的维度]
    182. outputs = model(inputs, tgt_in)
    183. # print("outputs.shape:",outputs.shape) # outputs.shape [batch, 3, 1]
    184. outputs = torch.tensor(outputs, dtype=torch.float)
    185. targets = torch.tensor(targets, dtype=torch.long)
    186. loss = criterion(outputs, targets)
    187. loss.requires_grad_(True)
    188. print("loss:", loss)
    189. loss.backward()
    190. optimizer.step()
    191. train_epoch_loss.append(loss.item())
    192. train_loss.append(np.mean(train_epoch_loss))
    193. val_epoch_loss = test_main(model)
    194. val_loss.append(val_epoch_loss)
    195. print("epoch:", epoch, "train_epoch_loss:", np.mean(train_epoch_loss), "val_epoch_loss:", val_epoch_loss)
    196. # 保存下来最好的模型:
    197. if val_epoch_loss < best_test_loss:
    198. best_test_loss = val_epoch_loss
    199. best_model = model
    200. print("best_test_loss -------------------------------------------------", best_test_loss)
    201. torch.save(best_model.state_dict(), 'best_Transformer_trainModel.pth')
    202. # 画一下loss图
    203. fig = plt.figure(facecolor='white', figsize=(10, 7))
    204. plt.xlabel('X')
    205. plt.ylabel('Y')
    206. plt.xlim(xmax=len(val_loss), xmin=0)
    207. plt.ylim(ymax=max(max(train_loss), max(val_loss)), ymin=0)
    208. # 画两条(0-9)的坐标轴并设置轴标签x,y
    209. x1 = [i for i in range(0, len(train_loss), 1)] # 随机产生300个平均值为2,方差为1.2的浮点数,即第一簇点的x轴坐标
    210. y1 = val_loss # 随机产生300个平均值为2,方差为1.2的浮点数,即第一簇点的y轴坐标
    211. x2 = [i for i in range(0, len(train_loss), 1)]
    212. y2 = train_loss
    213. colors1 = '#00CED4' # 点的颜色
    214. colors2 = '#DC143C'
    215. area = np.pi * 4 ** 1 # 点面积
    216. # 画散点图
    217. plt.scatter(x1, y1, s=area, c=colors1, alpha=0.4, label='val_loss')
    218. plt.scatter(x2, y2, s=area, c=colors2, alpha=0.4, label='train_loss')
    219. plt.legend()
    220. plt.savefig('transformer_loss图.png')
    221. plt.show()
    222. # 加载模型预测------
    223. model = Transformer(n_encoder_inputs=7, n_decoder_inputs=7, Sequence_length=1).to(device)
    224. model.load_state_dict(torch.load('best_Transformer_trainModel.pth'))
    225. model.to(device)
    226. model.eval()
    227. # 在对模型进行评估时,应该配合使用with torch.no_grad() 与 model.eval():
    228. y_pred = []
    229. y_true = []
    230. with torch.no_grad():
    231. with torch.no_grad():
    232. val_epoch_loss = []
    233. for index, (inputs, targets) in enumerate(TestDataLoader):
    234. inputs = torch.tensor(inputs).to(device)
    235. targets = torch.tensor(targets).to(device)
    236. inputs = inputs.float()
    237. targets = targets.float()
    238. tgt_in = torch.rand((Batch_Size,1,7))
    239. outputs = model(inputs, tgt_in)
    240. print("outputs",outputs)
    241. print("targets",targets)
    242. outputs = outputs.cpu().numpy()
    243. for t in np.array(outputs):
    244. t = np.argmax(t)
    245. y_pred.append(t)
    246. for ii in targets:
    247. y_true.append(ii)
    248. from sklearn.metrics import f1_score
    249. from sklearn.metrics import accuracy_score
    250. Acc = accuracy_score(y_true, y_pred)
    251. print("Acc",Acc)
    252. Fa = f1_score(y_true, y_pred, average='macro')
    253. print("Fa", Fa)
    254. # xgboost

     完整代码数据:

    https://download.csdn.net/download/mqdlff_python/88391921

  • 相关阅读:
    manhattan_slam环境配置
    LeetCode 1721. 交换链表中的节点
    从NetSuite Payment Link杂谈财务自动化、数字化转型
    Skywalking流程分析_7(构造/实例方法的增强流程)
    Redis高可用部署架构
    【QT小记】QT线程同步--QWaitCondition
    瑞格尔侯爵酒庄连续两年蝉联2022世界最佳葡萄园第二
    JS-DOM 元素属性的操作(简单tab栏切换案例)
    浏览器与Node.js事件循环:异同点及工作原理
    Vue3和Vue2的部分用法差异 (持续更新中)
  • 原文地址:https://blog.csdn.net/qq_38735017/article/details/133523451