子图层面的图网络预测pKa,使用了数据增强和迁移学习,原文:MF-SuP-pKa: Multi-fidelity modeling with subgraph pooling mechanism for pKa prediction,代码 MF-SuP-pKa,原文框架如下:

def built_data_and_save_for_pka(
origin_path='data.csv',
save_g_attentivefp_path='data_graph.bin',
smiles_path='data_group.csv',
task_list_selected=None,
acid_or_base=None):
data_origin = pd.read_csv(origin_path)
data_origin = data_origin.fillna(123456)
labels_list = [x for x in data_origin.columns if x not in ['smiles', 'SMILES', 'group', 'ChEMBL ID',
'num_acid_sites',
'num_base_sites']] # pka_acidic or pka_basic
if task_list_selected is not None:
labels_list = task_list_selected
smiles_name = 'smiles'
data_set_gnn = build_dataset_with_idx(dataset_smiles=data_origin, labels_list=labels_list,
smiles_name=smiles_name,
acid_or_base=acid_or_base)
smiles, g_attentivefp, labels, mask, num_sg = map(list, zip(*data_set_gnn))
graph_labels = {'labels': torch.tensor(labels),
'mask': torch.tensor(mask),
'num_sg': torch.tensor(num_sg),
}
smiles_pd = pd.DataFrame(smiles, columns=['smiles'])
smiles_pd.to_csv(smiles_path, index=False)
print('Molecules graph is saved!')
save_graphs(save_g_attentivefp_path, g_attentivefp, graph_labels)