Reputation: 11
I'm using RDKit to compare several molecules to a reference molecule, Picéatannol. I defined its structure using its SMILES, but it seems that RDKit does not recognize it properly in my code.
Here is my code:
from rdkit.Chem import Descriptors, rdFingerprintGenerator, DataStructs
# Définir la molécule de référence (Picéatannol)
piceatannol_smiles = "C1=CC(=C(C=C1C=CC2=CC(=CC(=C2)O)O)O)O"
piceatannol_mol = Chem.MolFromSmiles(piceatannol_smiles)
# Générer l'empreinte Morgan
morgan_gen = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
piceatannol_fp = morgan_gen.GetFingerprint(piceatannol_mol)
# Définir les sous-structures (diméthoxybenzène et catéchol)
dimethoxybenzene_pattern = Chem.MolFromSmarts("COc1ccccc1OC")
catechol_pattern = Chem.MolFromSmiles("Oc1c(O)cccc1")
# Liste des molécules avec leurs SMILES (complétée autant que possible)
molecules = {
"Papaverine": "COC1=CC2=C(C=C1OC)C=CC(=C2OC)OCCN(C)C",
"Verapamil": "CC(C)N(CC(O)COC1=CC=CC=C1)CC2=CC=CC=C2",
"Vernakalant": "CC1=NC2=C(N1CC3=CC=CC=C3)C=C(C=C2)O",
"Midodrine": "CC(CN1CCCC1)C2=CC=CC=C2",
"Cisatracurium": "CC(=O)OCCCN1CCOC2=CC=CC=C12",
"Remoxipride": "CCN1CCN(CC1)C(=O)C2=CC=C(C=C2)O",
"Infigratinib": "CC1=NC2=C(N1CC3=CC=CC=C3)C=C(C=C2)O",
"Meticillin": "COC1=CC=C(C=C1)C2=NC(=O)C(=O)N2C",
"Ubiquinol": "CCCCCCCCCCCCCCCCCC1=CC(=C(C(=C1O)O)CC2C(C(C(O2)C)O)O)O",
"Methoxamine": "COC1=CC=C(C=C1)CC(O)CNC",
"Erdafitinib": "CC1=NC2=C(N1CC3=CC=CC=C3)C=C(C=C2)O",
"Istradefylline": "CC1=CC=C(C=C1)C2=NC3=C(N2C4=CC=CC=C4)C(=NC=N3)C5=CC=CC=C5",
"Mebeverine": "CCOC(=O)CCCN1CCOC2=CC=CC=C12",
"Etoposide": "CC1=C(O2)C(C(=O)C3=CC(=C(C=C3O2)OC)OCCOCCOC)=C(C1)OC",
"Pinaverium": "CCOC(=O)CN(CCOC1=CC=CC=C1)CC2=CC=CC=C2C",
"Teniposide": "CC1=CC2=C(C(=C1)O)OC3=C(O2)C=C(C=C3O)CCOCC4C(C(C(O4)SC5=CC=CC=C5)O)O",
"Atracurium besylate": "CC(=O)OCCCN1CCOC2=CC=CC=C12",
"Terameprocol": "CC1=CC(=O)C2=C(C(=C1)O)OC3=C(O2)C=C(C=C3)CCOCCOC",
"Syringate": "COC1=CC=C(C=C1O)O",
"Gallopamil": "CC(C)N(CC(O)COC1=CC=CC=C1)CC2=CC=CC=C2",
"Piritrexim": "CC1=CC(=NC(=N1)NC2=CC=CC=C2)C3=CC=C(C=C3)OC",
"GTS-21": "CC1=CC=CC=C1C(=O)NC2=CC=CC=N2",
"1-cyclobutyl-3-(3,4-dimethoxyphenyl)-1H-pyrazolo[3,4-d]pyrimidin-4-amine": "CC1=CC=CC=C1",
"2,5-Dimethoxy-4-ethylthioamphetamine" : "CC1=CC=CC=C1",
"(Sri-9662)6-[(1Z)-2-(2,5-dimethoxyphenyl)ethenyl]-5-methylpyrido[2,3-d]pyrimidine-2,4-diamine": "CC1=CC=CC=C1",
"4-Bromo-2,5-dimethoxyamphetamine": "CC1=CC=CC=C1",
"Itopride": "COC1=CC=C(C=C1)CCNCC(=O)NC2=CC=CC=C2",
"3-(3,4-dimethoxyphenyl)propanoic acid": "CC1=CC=CC=C1",
"8-(2,5-Dimethoxy-Benzyl)-2-Fluoro-9-Pent-9h-Purin-6-Ylamine": "CC1=CC=CC=C1",
"9-Butyl-8-(2,5-Dimethoxy-Benzyl)-2-Fluoro-9h-Purin-6-Ylamine": "CC1=CC=CC=C1",
"AZD-6280": "CC1=CC=CC=C1",
"Etripamil": "CC1=CC=CC=C1",
"N-(4-AMINO-5-CYANO-6-ETHOXYPYRIDIN-2-YL)-2-(4-BROMO-2,5-DIMETHOXYPHENYL)ACETAMIDE": "CC1=CC=CC=C1",
"N-[1-(2,6-dimethoxybenzyl)piperidin-4-yl]-4-sulfanylbutanamide" : "CC1=CC=CC=C1",
"Arverapamil": "CC(C)N(CC1=CC=CC=C1)CC2=CC=CC=C2",
"2,5-Dimethoxy-4-ethylamphetamine": "CC1=CC=CC=C1",
"2,5-Dimethoxy-4-(n)-propylthiophenethylamine": "CC1=CC=CC=C1",
"{3-[3-(3,4-Dimethoxy-Phenyl)-1-(1-{1-[2-(3,4,5-Trimethoxy-Phenyl)-Butyryl]-Piperidin-2yl}-Vinyloxy)-Propyl]-Phenoxy}-Acetic Acid": "CC1=CC=CC=C1",
"N-[1-(5-bromo-2,3-dimethoxybenzyl)piperidin-4-yl]-4-sulfanylbutanamide": "CC1=CC=CC=C1",
"5-[3-(2,5-dimethoxyphenyl)prop-1-yn-1-yl]-6-ethylpyrimidine-2,4-diamine": "CC1=CC=CC=C1",
"2,5-Dimethoxyamphetamine": "CC1=CC=CC=C1",
"4-Methyl-2,5-dimethoxyamphetamine": "CC1=CC=CC=C1",
"4-Bromo-2,5-dimethoxyphenethylamine": "CC1=CC=CC=C1",
"Dimetofrine": "COC1=CC=CC(=C1)CC(O)CNC",
"Dextofisopam": "CN1CCN(CC1)C(=O)C2=CC=CC=C2",
"Tofisopam": "CN1CCN(CC1)C(=O)C2=CC=CC=C2",
"Tranilast": "CC1=CC=CC=C1C(=O)NCC(=O)O",
"Bevantolol": "CC(CN1CCCC1)C2=CC=CC=C2",
"Veralipride": "CCN1CCN(CC1)C(=O)C2=CC=CC=C2",
"Meclinertant": "CC1=CC=CC=C1CCNCC(=O)NC2=CC=CC=C2",
"Picéatannol": "OC1=CC=C(C=C1)C=C2C=CC(O)=C(O)C2",
"Firategrast": "CC1=NC(=O)C(=NC1=O)C2=CC=CC=C2",
"Trimethoprim": "CC1=NC(C=C(N1)OC2=CC=CC=C2O)=CC3=CC=C(C=C3)O"
# Ajoutez d'autres molécules ici avec leurs SMILES correspondants
}
# Stocker les résultats
results = []
for name, smiles in molecules.items():
mol = Chem.MolFromSmiles(smiles)
if mol is None:
print(f"❌ Erreur avec la molécule : {name}")
continue
# Vérifier la présence de diméthoxybenzène ou catéchol
has_dimethoxybenzene = mol.HasSubstructMatch(dimethoxybenzene_pattern)
has_catechol = mol.HasSubstructMatch(catechol_pattern)
if not (has_dimethoxybenzene or has_catechol):
continue # Passe à la molécule suivante si elle ne contient ni l'un ni l'autre
# Calcul de la similarité Tanimoto
mol_fp = morgan_gen.GetFingerprint(mol)
similarity = DataStructs.TanimotoSimilarity(piceatannol_fp, mol_fp)
# Masse moléculaire
mass = Descriptors.MolWt(mol)
# Nombre de donneurs de liaison hydrogène
h_donors = Descriptors.NumHDonors(mol)
# Ajout aux résultats
results.append((name, similarity, mass, h_donors))
# Trier par ordre décroissant de similarité
results.sort(key=lambda x: x[1], reverse=True)
# Affichage des résultats
print("🔬 **Comparaison des molécules avec le Picéatannol :**\n")
for name, similarity, mass, h_donors in results:
print(f"📌 Molécule : {name}")
print(f" - Similarité Tanimoto : {similarity:.2f}")
print(f" - Masse moléculaire : {mass:.2f} g/mol")
print(f" - Nombre de donneurs de liaisons H : {h_donors}\n")
Issue:
Picéatannol is missing from the results, even though it's in the input dataset.
The molecule is correctly loaded (Chem.MolFromSmiles does not return None).
The Tanimoto similarity calculation should include it, but it seems to be skipped.
The filtering conditions (HasSubstructMatch) might be the reason, but Picéatannol should contain catechol.
Questions:
Why is Picéatannol missing from the results?
Could there be an issue with HasSubstructMatch for the catechol pattern?
Is there a better way to ensure Picéatannol is always included in the final results?
Any insights would be greatly appreciated! Thanks in advance. 😊
Upvotes: 1
Views: 33
Reputation: 1948
Add some more print statements for debugging purposes:
# [...]
if not (has_dimethoxybenzene or has_catechol):
print(f"Skipping {name} because no catechol or no dimethoxybenzene substruct was found."
continue
# [...]
If you want to enforce that the comparison to Picéatannol is done add:
# [...]
if name != 'Picéatannol':
if not (has_dimethoxybenzene or has_catechol):
print(f"Skipping {name} because no catechol or no dimethoxybenzene substruct was found."
continue
# [...]
Upvotes: 0