本示例的目的,是希望把sku训练好的embedding值嵌入到transformer算法中,从而提高transformer在销量预测算法中的准确性。
1、embedding训练的数据格式示例:
133657,本田#第八代雅阁,1816,4
字段1表示:sku_id
字段2表示:车型 # 款式
字段3表示:车型 # 款式对应的序号id
字段4表示:sku_id对应的类目信息
2、销量预测训练的数据格式示例:
0053#031188,0_0_0_0_0_0_1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_1_0_0_0_0_0_0_0_0_0_0_0_1_0_0_0_0_0_1_0_0_0_0_0_0_0_0_1_0_0_0_0_0_0_0_0_0_0_0_1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0
字段1表示:门店code # sku_id
字段2表示:历史N周每周的销量值
1、输入阶段
(1)<sku_id, car_id>的组合关系,一个sku可以对应多个car_id
(2)构建sku_car_matrix,其中car_id对应位置标记为1,其余位置标记为0
(3)sku_id到sku_car_matrix中dim=0维的序号值的映射,方便后续sku_id根据序号值直接取到sku的embedding值,并持久化该映射关系
(4)将sku_car_matrix装入DataSet中,其中__getitem()__为<train_data[index], train_data[index]>的组合,因为我们通过sku_car_matrix -> embedding -> sku_car_matrix从而获取embedding值。
(5)将DataSet的值装入dataloader中,并设置shuffle为False,默认情况下DataLoader是会将数据打乱的。
2、模型训练阶段
(1)encoder - decoder网络架构


(2)loss值
decoder阶段输出的值与label值计算MseLoss值
(3)每一批次迭代时会将中间值embedding保存下来,汇总所有迭代的embedding值便得到每一次epoch的embedding值。根据train_loss值,取最少值时的embedding值为最佳embedding值,并持久化该值
(4)测试embedding效果的方法,将几个sku查看两两间的embedding的欧氏距离,理论上相近的sku欧氏距离值更小。
(5)代码实现如下(embedding_model_train.py文件):
- import os
- import numpy as np
- import pandas as pd
- from torch.utils.data import Dataset,DataLoader
- import torch
- import torch.nn as nn
- import logging
- from tqdm import trange
- import transformer_utils
-
- logger = logging.getLogger('Transformer.Embedding')
-
- class EmbeddingTrainDataset(Dataset):
- def __init__(self, matrix_data):
- self.train_data = matrix_data
- self.train_len = len(matrix_data)
-
- def __len__(self):
- return self.train_len
-
- def __getitem__(self, index):
- return self.train_data[index], self.train_data[index]
-
-
- class AutoEncoder(nn.Module):
- def __init__(self, input_dim, embedding_dim):
- super(AutoEncoder, self).__init__()
-
-
-
- self.encoder = nn.Sequential(
- nn.Linear(input_dim, input_dim // 2),
- nn.Tanh(),
- nn.Linear(input_dim // 2, input_dim // 4),
- nn.Tanh(),
- nn.Linear(input_dim // 4, embedding_dim),
-
- )
- self.decoder = nn.Sequential(
-
- nn.Linear(embedding_dim, input_dim // 4),
- nn.Tanh(),
- nn.Linear(input_dim // 4, input_dim // 2),
- nn.Tanh(),
- nn.Linear(input_dim // 2, input_dim),
- )
-
- def forward(self, x):
- encoded = self.encoder(x)
- decoded = self.decoder(encoded)
- return encoded, decoded
-
-
- if __name__ == '__main__':
- embedding_dim = 100
- epochs = 10000
- lr = 0.001
- gamma = 0.95
- batch_size = 1000
-
- transformer_utils.set_logger(os.path.join(os.getcwd(), 'train.log'))
- data_frame = pd.read_csv(os.path.join(os.getcwd(), 'data', 'abs_sku_to_Car_classfication_onehot_detail.csv'), header=None,
- names=['sku_code', 'car_model', 'car_id', 'cat_id'], dtype={0: str, 1: str, 2: int, 3: int})
- sku_code_set = set(data_frame['sku_code'].drop_duplicates())
- sku2idx_dict = {}
-
- for i, sku_code in enumerate(sku_code_set):
- sku2idx_dict[sku_code] = i
- car_id_num = max(data_frame['car_id'])
- sku_code_num = len(sku_code_set)
- sku_code_car_matrix = np.zeros((sku_code_num, car_id_num), dtype='float32')
- np.save(os.path.join(os.getcwd(), 'data', 'sku2idx_dict'), sku2idx_dict)
-
-
-
- for i in trange(len(data_frame)):
- sku_code = data_frame.loc[i, 'sku_code']
- car_id = data_frame.loc[i, 'car_id']
-
- sku_code_idx = sku2idx_dict[sku_code]
- sku_code_car_matrix[sku_code_idx, car_id - 1] = 1
-
- train_set = EmbeddingTrainDataset(sku_code_car_matrix)
- train_loader = DataLoader(train_set, batch_size=batch_size, num_workers=0, shuffle=False)
-
- device = "cuda" if torch.cuda.is_available() else "cpu"
- autoencoder_model = AutoEncoder(car_id_num, embedding_dim).to(device)
- criterion = nn.MSELoss()
- optimizer = torch.optim.AdamW(autoencoder_model.parameters(), lr=lr)
-
- train_loss_summary = np.zeros(epochs)
- best_evaluate_loss = 100.0
- for epoch in trange(epochs):
-
- train_total_loss = 0
- sku_encoder_embedding = np.zeros((sku_code_num, embedding_dim), dtype='float32')
- train_loader_len = len(train_loader)
-
- for i, (x_input, x_label) in enumerate(train_loader):
- x_input = x_input.to(device)
- x_label = x_label.to(device)
-
- encoded, decoded = autoencoder_model(x_input)
- loss = criterion(decoded, x_label)
- optimizer.zero_grad()
- loss.backward()
- optimizer.step()
- train_total_loss += loss.item()
-
- sku_encoder_embedding[(i * batch_size) : (i * batch_size + x_input.shape[0])] = encoded.detach().to('cpu').numpy()
-
- train_avg_loss = train_total_loss / train_loader_len
- logger.info(f'epoch: {epoch + 1}, train_loss: {train_avg_loss}')
-
- is_best = False
- if train_avg_loss < best_evaluate_loss:
- is_best = True
- best_evaluate_loss = train_avg_loss
- np.save(os.path.join(os.getcwd(), 'data', 'sku2embedding'), sku_encoder_embedding)
- logger.info(f'best embedding at: {epoch + 1}')
-
- if epoch >= 10: # 太前面的去掉,免得影响后面曲线的可观测性
- train_loss_summary[epoch] = train_avg_loss
- if epoch % 10 == 1:
- transformer_utils.plot_all_epoch(train_loss_summary, train_loss_summary, epoch, 'embedding_train_loss_summary.png')
-
- print('finish!')
1、transformer预测数据预处理代码(transformer_preprocess_data.py文件):
- import os
- import numpy as np
- import pandas as pd
- from tqdm import trange
-
-
- # 数据格式转换成标准格式
- def normalize_data_format(data):
- data_sale_list_series = data['sale_info'].apply(lambda row: list(map(float, row.split("_"))))
- data_frame = pd.DataFrame(item for item in data_sale_list_series)
- data_frame = pd.concat((data['warehouse_sku'], data_frame), axis=1)
- data_frame = data_frame.transpose()
- return data_frame
-
- # 平滑过大的值
- def smooth_big_value(data_frame):
- columns_len = len(data_frame.columns)
-
- print(">>>>smooth_big_value")
- for i in trange(columns_len):
- values = data_frame.iloc[1:,i]
- value_mean = np.mean(values[values > 0])
- value_std = np.std(values[values > 0], ddof=1)
- value_std = value_std if value_std > 0 else 0
- values_new = np.round(np.where(values > value_mean + 3 * value_std, value_mean + 3 * value_std, values).astype(float))
- values_new = np.array(values_new, dtype=np.int).astype(str)
- data_frame.iloc[1:, i] = values_new
-
- return data_frame
-
- # 获取列名和id之间的映射关系
- def gen_col2series(columns):
- columns = columns.values[0,:]
- id2series_dict = {}
- series2id_dict = {}
-
- j = 0
- for i, column in enumerate(columns):
- id2series_dict[i] = column
- if series2id_dict.get(column) is None:
- series2id_dict[column] = j
- j += 1
-
- return id2series_dict, series2id_dict
-
- # 每列的最大值
- def gen_series2maxValue(data_frame):
- series_max_value = np.max(data_frame[1:], axis=0)
- series2maxValue = series_max_value.to_dict()
- return series2maxValue
-
- # 处理数据
- def prep_data(data, series2maxValue):
- num_series = data.shape[1]
- time_len = data.shape[0]
-
- windows_per_series = np.full((num_series), (time_len - backcast_len))
- total_windows = np.sum(windows_per_series)
- x_input = np.zeros((total_windows, backcast_len, 1 + 2), dtype='float32') # sale_info + series_info + max_value
- label = np.zeros((total_windows, backcast_len), dtype='float32')
-
- print(">>>>prep_data")
- count = 0
- zero_count = 0
- for series_idx in trange(num_series):
- for i in range(windows_per_series[series_idx]):
- x_input_data = data[i : i + backcast_len, series_idx]
- x_input_series = series_idx
- label_data = data[i + 1 : i + backcast_len + 1, series_idx]
-
- if np.max(x_input_data) > 0:
- x_input[count, :, 0] = x_input_data
- x_input[count, :, 1] = x_input_series
- x_input[count, :, 2] = series2maxValue.get(series_idx)
- label[count] = label_data
- x_input[count, :, 0] = x_input[count, :, 0] / series2maxValue.get(series_idx)
- label[count] = label[count] / series2maxValue.get(series_idx)
- count += 1
- elif np.max(label_data) == 0 and zero_count < 2000 and np.random.choice([0,1], p=[0.6, 0.4]) > 0:
- x_input[count, :, 0] = x_input_data
- x_input[count, :, 1] = x_input_series
- x_input[count, :, 2] = 0
- label[count] = label_data
- zero_count += 1
- count += 1
-
- x_input = x_input[:count]
- label = label[:count]
- return x_input, label
-
- # 切分测试集、验证集
- def split_train_test_data(x_input, label, train_ratio=0.8):
- x_len = x_input.shape[0]
- shuffle_idx = np.random.permutation(x_len)
- train_x_len = int(x_len * train_ratio)
- train_shuffle_idx = shuffle_idx[:train_x_len]
- test_shuffle_idx = shuffle_idx[train_x_len:]
-
- train_x_input = x_input[train_shuffle_idx]
- train_label = label[train_shuffle_idx]
- test_x_input = x_input[test_shuffle_idx]
- test_label = label[test_shuffle_idx]
-
- return train_x_input, train_label, test_x_input, test_label
-
- if __name__ == '__main__':
- backcast_len = 12
- train_val_num = 110
-
- data_frame = pd.read_csv(os.path.join(os.getcwd(), 'data', 'ads_hub_sale_num_detail_simple.csv'), header=None, names=['warehouse_sku', 'sale_info'])
- data_frame = normalize_data_format(data_frame)
- data_frame = data_frame[:train_val_num]
- data_frame = smooth_big_value(data_frame)
-
- id2series, series2id = gen_col2series(data_frame)
- series2maxValue = gen_series2maxValue(data_frame)
-
- x_input, label = prep_data(data_frame.values[1:].astype('float'), series2maxValue)
- train_x_input, train_label, test_x_input, test_label = split_train_test_data(x_input, label)
-
- np.save(os.path.join(os.getcwd(), 'data', 'train_data'), train_x_input)
- np.save(os.path.join(os.getcwd(), 'data', 'train_label'), train_label)
- np.save(os.path.join(os.getcwd(), 'data', 'test_data'), test_x_input)
- np.save(os.path.join(os.getcwd(), 'data', 'test_label'), test_label)
- np.save(os.path.join(os.getcwd(), 'data', 'series_max_value'), series2maxValue)
- np.save(os.path.join(os.getcwd(), 'data', 'series2id'), series2id)
- np.save(os.path.join(os.getcwd(), 'data', 'id2series'), id2series)
-
- print('finish!')
2、dataloader加载数据的代码实现(transformer_dataloader.py文件):
- import logging
- import os
- import numpy as np
- from torch.utils.data import Dataset
-
- logger = logging.getLogger('Transformer.Data')
-
- class TrainDataset(Dataset):
- def __init__(self, data_path):
- self.data = np.load(os.path.join(data_path, 'data', 'train_data.npy'))
- self.label = np.load(os.path.join(data_path, 'data', 'train_label.npy'))
- self.id2series_dict = np.load(os.path.join(data_path, 'data', 'id2series.npy')).item()
- self.sku2idx_dict = np.load(os.path.join(data_path, 'data', 'sku2idx_dict.npy')).item()
- self.sku2embedding = np.load(os.path.join(data_path, 'data', 'sku2embedding.npy'))
- self.sku_embedding_avg = self.sku2embedding.mean(axis=0)
- self.train_len = self.data.shape[0]
- logger.info(f'train_len:{self.train_len}')
- logger.info('building datasets from train_data.npy')
-
- def __len__(self):
- return self.train_len
-
- def __getitem__(self, index):
- series_idx = int(self.data[index,0,-2])
- series = self.id2series_dict.get(series_idx)
- sku_code = series.split('#')[1]
- sku_idx = self.sku2idx_dict.get(sku_code)
- if sku_idx is None:
- sku_embedding = self.sku_embedding_avg
- else:
- sku_embedding = self.sku2embedding[sku_idx]
-
- return (self.data[index,:,:-2], series_idx, sku_embedding, self.label[index])
-
- class TestDataset(Dataset):
- def __init__(self, data_path):
- self.data = np.load(os.path.join(data_path, 'data', 'test_data.npy'))
- self.label = np.load(os.path.join(data_path, 'data', 'test_label.npy'))
- self.id2series_dict = np.load(os.path.join(data_path, 'data', 'id2series.npy')).item()
- self.sku2idx_dict = np.load(os.path.join(data_path, 'data', 'sku2idx_dict.npy')).item()
- self.sku2embedding = np.load(os.path.join(data_path, 'data', 'sku2embedding.npy'))
- self.sku_embedding_avg = self.sku2embedding.mean(axis=0)
- self.test_len = self.data.shape[0]
- logger.info(f'test_len:{self.test_len}')
- logger.info('building datasets from test_data.npy')
-
- def __len__(self):
- return self.test_len
-
- def __getitem__(self, index):
- series_idx = int(self.data[index, 0, -2])
- series = self.id2series_dict.get(series_idx)
- sku_code = series.split('#')[1]
- sku_idx = self.sku2idx_dict.get(sku_code)
- if sku_idx is None:
- sku_embedding = self.sku_embedding_avg
- else:
- sku_embedding = self.sku2embedding[sku_idx]
-
- return (self.data[index,:,:-2], series_idx, sku_embedding, self.data[index,0,-1], self.label[index])
(1)id2series表示sku历史销量到sku_id的映射
(2)sku2idx表示sku_id到embedding序号的映射
(3)sku2embedding表示embedding序号到embedding值的映射
(4)在dataset的__getitem__()函数中,先找到sku_id信息,并继续找到embedding值
3、使用embedding值的流程
代码实现如下(transformer_train.py文件):
- import os
- import numpy as np
- import torch
- import torch.nn as nn
- import math
- import time
- import transformer_utils
- from transformer_dataloader import TrainDataset,TestDataset
- from torch.utils.data import DataLoader
- import logging
-
- logger = logging.getLogger('Transformer.Train')
-
-
- class PositionalEncoding(nn.Module):
- def __init__(self, d_model, max_len=5000):
- super(PositionalEncoding, self).__init__()
- pe = torch.zeros(max_len, d_model)
- position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
- div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
- pe[:, 0::2] = torch.sin(position * div_term)
- pe[:, 1::2] = torch.cos(position * div_term)
- pe = pe.unsqueeze(0).transpose(0, 1)
- self.register_buffer('pe', pe)
-
- def forward(self, x, embedding):
- return x + self.pe[:x.size(0), :] + embedding
-
-
- class TransAm(nn.Module):
- def __init__(self, feature_size=100, num_layers=1, dropout=0.1):
- super(TransAm, self).__init__()
- self.model_type = 'Transformer'
- self.src_mask = None
- self.pos_encoder = PositionalEncoding(feature_size)
- self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=10, dropout=dropout)
- self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
- self.decoder = nn.Linear(feature_size, 1)
- self.init_weights()
-
- def init_weights(self):
- initrange = 0.1
- self.decoder.bias.data.zero_()
- self.decoder.weight.data.uniform_(-initrange, initrange)
-
- def _generate_square_subsequent_mask(self, sz):
- mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0,1)
- mask = mask.float().masked_fill(mask==0, float('-inf')).masked_fill(mask == 1, float(0.0))
- return mask
-
-
- def forward(self, src, pre_embedding):
- if self.src_mask is None or self.src_mask.shape[0] == len(src):
- device = src.device
- mask = self._generate_square_subsequent_mask(len(src)).to(device)
- self.src_mask = mask
- src = self.pos_encoder(src, pre_embedding)
- output = self.transformer_encoder(src, self.src_mask)
- output = self.decoder(output)
- return output
-
- def evaluate(model, test_loader):
- test_total_loss = 0
- test_total_with_max_loss = 0
- model.eval()
- test_loader_len = len(test_loader)
- for i, (test_batch, idx, embedding_batch, max_value, labels) in enumerate(test_loader):
- test_batch = test_batch.permute(1, 0, 2).to(device)
- labels = labels.permute(1, 0).to(device)
- embedding_batch = torch.unsqueeze(embedding_batch, dim=0).to(device)
-
- test_output = transformer_model(test_batch, embedding_batch)
- test_output = torch.squeeze(test_output)
- test_output[test_output < 0] = 0
-
- test_labels = labels[-1]
- test_output = test_output[-1]
- test_loss = criterion(test_output, test_labels)
- test_total_loss += test_loss.item()
-
- max_value = max_value.to(device)
- test_with_max_labels = test_labels * max_value
- test_with_max_output = test_output * max_value
- test_with_max_loss = criterion(test_with_max_output, test_with_max_labels)
- test_total_with_max_loss += test_with_max_loss
- test_avg_loss = test_total_loss / test_loader_len
- test_with_max_avg_loss = test_total_with_max_loss / test_loader_len
-
- return test_avg_loss, test_with_max_avg_loss
-
- if __name__ == '__main__':
- transformer_utils.set_logger(os.path.join(os.getcwd(), 'train.log'))
-
- json_path = os.path.join(os.getcwd(), 'params.json')
- params = transformer_utils.Params(json_path)
- lr = params.lr
- epochs = params.epochs
- feature_size = params.feature_size
- gamma = params.gamma
- device = torch.device(params.mode)
- input_window = params.input_window
- feature_size = params.feature_size
-
-
- train_set = TrainDataset(os.getcwd())
- test_set = TestDataset(os.getcwd())
- train_loader = DataLoader(train_set, batch_size=params.train_batch_size, num_workers=0)
- test_loader = DataLoader(test_set, batch_size=params.test_batch_size, num_workers=0)
-
- transformer_model = TransAm(feature_size=feature_size).to(device)
- criterion = nn.MSELoss()
- optimizer = torch.optim.AdamW(transformer_model.parameters(), lr=lr)
- scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=gamma)
-
- train_loss_summary = np.zeros(epochs)
- test_loss_summary = np.zeros(epochs)
-
- best_evaluate_loss = 100.0
- for epoch in range(1, epochs + 1):
- epoch_start_time = time.time()
- train_loader_len = len(train_loader)
-
- train_total_loss = 0
- transformer_model.train()
- for i, (train_batch, idx, embedding_batch, label_batch) in enumerate(train_loader):
- optimizer.zero_grad()
- train_batch = train_batch.permute(1, 0, 2).to(device)
- label_batch = label_batch.permute(1, 0).to(device)
- embedding_batch = torch.unsqueeze(embedding_batch, dim=0).to(device)
-
- output = transformer_model(train_batch, embedding_batch)
- output = torch.squeeze(output)
-
- loss = criterion(output, label_batch)
- loss.backward()
- optimizer.step()
- train_total_loss += loss.item()
- train_avg_loss = train_total_loss / train_loader_len
-
- test_avg_loss, test_with_max_avg_loss = evaluate(transformer_model, test_loader)
- logger.info(f'epoch: {epoch}, train_loss: {train_avg_loss}, test_loss: {test_avg_loss}, test_max_loss: {test_with_max_avg_loss}')
-
- is_best = False
- if test_avg_loss < best_evaluate_loss:
- is_best = True
- best_evaluate_loss = test_avg_loss
- transformer_utils.save_checkpoint({'epoch': epoch,
- 'state_dict': transformer_model.state_dict(),
- 'optim_dict': optimizer.state_dict()},
- is_best,
- epoch=epoch)
-
- train_loss_summary[epoch] = train_avg_loss
- test_loss_summary[epoch] = test_avg_loss
-
- if epoch % 20 == 1:
- transformer_utils.plot_all_epoch(test_loss_summary, test_loss_summary, epoch, 'train_test_loss_summary.png')
-
-
- print('finish!')
(1)读取train_loader的embedding_batch,它的shape为[1200, 100],1200为batch_size,100为embedding_size
(2)torch.unsqueeze(embedding_batch, dim=0)操作,使得embedding_batch的shape为[1, 1200, 100],分别对应sequence_length, batch_size, embedding_size,也对应transformer的输入的shape需求
(3)positionalEncoding中,最后输出为:x + self.pe[:x.size(0), :] + embedding
其中,x的shape为[12, 1200, 1],因为历史销量为12周,embedding_size为1,即只有销量值。
self.pe[:x.size(0), :]的shape为[12, 1, 100],因为对应一个batch中的各个值,所以pe是通用的,因此通过torch的广播机制,两者相加后的shape变为[12, 1200, 100]。
embedding的shape为[1, 1200, 100],因为sku的embedding是固有属性,不随时间(seq_length)发生变化,所以和上一步值通过广播机制后相加,最终维度还是[12, 1200, 100]。
4、工具类文件的代码实现(transformer_utils.py):
- import logging
- import os
- import torch
- import json
- import numpy as np
- from tqdm import tqdm
- import matplotlib.pyplot as plt
-
- logger = logging.getLogger('Transformer.Utils')
-
- class Params:
- '''
- class that loads hyperparameters from a json file
- Example:
- params = Params(json_path)
- print(params.learning_rate)
- '''
-
- def __init__(self, json_path):
- with open(json_path) as f:
- params = json.load(f)
- self.__dict__.update(params)
-
-
- def set_logger(log_path):
- '''Set the logger to log info in terminal and file `log_path`.
- In general, it is useful to have a logger so that every output to the terminal is saved
- in a permanent file. Here we save it to `model_dir/train.log`.
- Example:
- logging.info('Starting training...')
- Args:
- log_path: (string) where to log
- '''
- _logger = logging.getLogger('Transformer')
- _logger.setLevel(logging.INFO)
-
- fmt = logging.Formatter('[%(asctime)s] %(name)s: %(message)s', '%H:%M:%S')
-
- class TqdmHandler(logging.StreamHandler):
- def __init__(self, formatter):
- logging.StreamHandler.__init__(self)
- self.setFormatter(formatter)
-
- def emit(self, record):
- msg = self.format(record)
- tqdm.write(msg)
-
- file_handler = logging.FileHandler(log_path)
- file_handler.setFormatter(fmt)
- _logger.addHandler(file_handler)
- _logger.addHandler(TqdmHandler(fmt))
-
-
- def save_checkpoint(state, is_best, epoch, save_checkpoint=False, ins_name=-1):
- '''Saves model and training parameters at checkpoint + 'last.pth.tar'. If is_best==True, also saves
- checkpoint + 'best.pth.tar'
- Args:
- state: (dict) contains model's state_dict, may contain other keys such as epoch, optimizer state_dict
- is_best: (bool) True if it is the best model seen till now
- checkpoint: (string) folder where parameters are to be saved
- ins_name: (int) instance index
- '''
- if save_checkpoint:
- if ins_name == -1:
- filepath = os.path.join('transformer-training-checkpoint', f'epoch_{epoch}.pth.tar')
- else:
- filepath = os.path.join('transformer-training-checkpoint', f'epoch_{epoch}_ins_{ins_name}.pth.tar')
- if not os.path.exists('transformer-training-checkpoint'):
- logger.info(f'Checkpoint Directory does not exist! Making directory transformer-training-checkpoint')
- os.mkdir('transformer-training-checkpoint')
- torch.save(state, filepath)
- logger.info(f'Checkpoint saved to {filepath}')
- if is_best:
- torch.save(state, os.path.join(os.getcwd(), 'base_model', 'best.pth.tar'))
- logger.info('Best checkpoint saved to best.pth.tar')
-
-
- def plot_all_epoch(train_loss_summary, test_loss_summary, num_samples, png_name):
- x = np.arange(start=1, stop=num_samples + 1)
- f = plt.figure()
- plt.plot(x, train_loss_summary[:num_samples], label='train_loss', linestyle='--')
- plt.plot(x, test_loss_summary[:num_samples], label='test_loss', linestyle='-')
- f.savefig(os.path.join('base_model', png_name))
- plt.close()
5、配置文件(params.json)代码:
- {
- "train_batch_size": 1200,
- "test_batch_size":100,
- "lr": 0.005,
- "epochs": 1000,
- "feature_size": 100,
- "gamma": 0.95,
- "input_window": 12,
- "mode": "cuda"
- }