• rdkit Recap、BRICS将smiles切分为片段(fragment)


    一、 Recap切分

    主体函数 

    1. from rdkit import Chem
    2. from rdkit.Chem import BRICS
    3. import numpy as np
    4. # --------------------- 将分子拆分为不带数字或者自定义 ---------------------
    5. def fragment_recursive(mol, frags):
    6. try:
    7. bonds = list(BRICS.FindBRICSBonds(mol))
    8. if len(bonds) == 0:
    9. frags.append(Chem.MolToSmiles(mol))
    10. return frags
    11. idxs, labs = list(zip(*bonds))
    12. bond_idxs = []
    13. for a1, a2 in idxs:
    14. bond = mol.GetBondBetweenAtoms(a1, a2)
    15. bond_idxs.append(bond.GetIdx())
    16. order = np.argsort(bond_idxs).tolist()
    17. bond_idxs = [bond_idxs[i] for i in order]
    18. broken = Chem.FragmentOnBonds(mol, bondIndices=[bond_idxs[0]], dummyLabels=[(0, 0)])
    19. head, tail = Chem.GetMolFrags(broken, asMols=True)
    20. frags.append(Chem.MolToSmiles(head))
    21. return fragment_recursive(tail, frags)
    22. except Exception as e:
    23. print(e)
    24. pass
    25. # --------------------- 将*号去掉 ---------------------
    26. def remove_dummy(smiles):
    27. try:
    28. stripped_smi=smiles.replace('*','[H]')
    29. mol=Chem.MolFromSmiles(stripped_smi)
    30. return Chem.MolToSmiles(mol)
    31. except Exception as e:
    32. print(e)
    33. return None

    1、单个smiles拆分为fragment

    1. # 单个smiles拆分为fragment
    2. aspirin = Chem.MolFromSmiles('C1CC1C(=O)N2CCN(CC2)C(=O)C3=C(C=CC(=C3)CC4=NNC(=O)C5=CC=CC=C54)F')
    3. fragments = fragment_recursive(aspirin, [])
    4. clean_fragments = [remove_dummy(smi) for smi in fragments]
    5. print(clean_fragments)

    2、csv文件的每一行smiles拆分为fragment + 删除重复的行 + 存入新文件中

    1. # csv文件的每一行smiles拆分为fragment + 删除重复的行
    2. import csv
    3. import pandas as pd
    4. # 读取原始 CSV 文件
    5. with open('csv/double.csv', 'r') as csv_file:
    6. reader = csv.reader(csv_file)
    7. input_data = list(reader)
    8. # 处理每一行数据并将结果保存到列表中
    9. output_data = []
    10. for row in input_data:
    11. try:
    12. aspirin = Chem.MolFromSmiles(row[0])
    13. fragments = fragment_recursive(aspirin, [])
    14. clean_fragments = [remove_dummy(smi) for smi in fragments]
    15. output_data.append(clean_fragments)
    16. except:
    17. print("Current smiles process Error: ", row)
    18. # 存入新的csv文件 + delete duplicate item
    19. new_csv_name = 'output.smi'
    20. df = pd.DataFrame(output_data)
    21. df = df.stack().reset_index(drop=True)
    22. df.to_csv(new_csv_name, index=False, header=False)
    23. df = pd.read_csv(new_csv_name, header=None)
    24. df.drop_duplicates(inplace=True)
    25. df.to_csv(new_csv_name, index=False, header=False)

    二、 BRICS切分

    rdkit的BRICS算法,BRICS基于常见的反应,选择片段断键的位点,提供了化学合成意义上的可行性

    1. from rdkit.Chem import Recap
    2. from rdkit.Chem import AllChem as Chem
    3. m = Chem.MolFromSmiles('c1ccccc1OCCOC(=O)CC')
    4. hierarch = Recap.RecapDecompose(m)
    5. #叶子节点函数:hierarch.GetLeaves()
    6. print(hierarch.GetLeaves().keys())
    7. #子孙节点函数:hierarch.GetAllChildren()
    8. print(hierarch.GetAllChildren().keys())
    9. #祖先节点函数,返回列表:getUltimateParents()
    10. print(hierarch.getUltimateParents()[0].smiles)


     

    药物分子进行片段分解 - 知乎

    原文来源于:rdkit Recap、BRICS分子片段拆分与合成_loong_XL的博客-CSDN博客

  • 相关阅读:
    什么是jvm
    【算法】链表的基本操作和高频算法题
    甘露糖-聚乙二醇-CY5 Cy5-PEG-mannose
    springboot+jsp+ssm助农系统农产品宣传网站设计
    JavaAPI操作HBase-Day2
    mybatis循环插入
    弘辽科技:淘宝什么情况下需要提升销量?店铺怎么提升销量?
    如何通过设备维护管理系统实现全员生产维护TPM
    docker安装mysql
    01 # 重塑类型思维
  • 原文地址:https://blog.csdn.net/weixin_43135178/article/details/133144770