Example 4 - Different Choices of Surrogate for Docking Inference
To run this notebook you need to have GNINA and AutoDock Vina via DockString installed. GNINA can be downloaded as a linux binary and made executable (see this notebook for installation).
from rdkit import Chem
from rdkit.Chem import AllChem
import subprocess
def predictGNINA_docking(smiles):
mol = Chem.MolFromSmiles(smiles)
mol = Chem.AddHs(mol)
# Generate 3D coordinates
AllChem.EmbedMolecule(mol)
AllChem.UFFOptimizeMolecule(mol)
# Save to PDB file (required format for GNINA)
output_file = "molecule.pdb"
Chem.MolToPDBFile(mol, output_file)
# Command to run GNINA
command = "../dziner/surrogates/gnina -r ../dziner/surrogates/Binding/WDR5_target.pdbqt -l molecule.pdb --autobox_ligand ../dziner/surrogates/Binding/WDR5_target.pdbqt --seed 0"
# Run the command and capture the output
result = subprocess.getoutput(command)
# Initialize a list to store affinity values
affinities = []
# Loop through the output lines to find affinity scores
for line in result.splitlines():
# Skip lines that don't start with a mode index (i.e., a number)
if line.strip() and line.split()[0].isdigit():
# Split the line and extract the affinity (2nd column)
try:
affinity = float(line.split()[1])
affinities.append(affinity)
except ValueError:
# Skip lines that don't have a valid number in the second column
continue
# Find the minimum affinity score
if affinities:
best_docking_score = min(affinities)
# print(f"Best docking score: {best_docking_score}")
else:
# print("No valid docking scores found.")
best_docking_score = None
return best_docking_score
from dockstring import load_target
def predictAutoVina_docking(smiles):
'''
This tool predicts the docking score to WDR5 for a SMILES molecule. Lower docking score means a larger binding affinity.
This model based on Autodock Vina from this paper: https://pubs.acs.org/doi/10.1021/acs.jcim.1c01334
'''
smiles = smiles.replace("\n", "").replace("'", "")
target = load_target("WDR5")
try:
score, affinities = target.dock(smiles)
except:
score = 'Invalid molecule'
return score
GNINA vs AutoDock Vina
import pandas as pd
from tqdm import tqdm
tqdm.pandas()
df = pd.read_csv('../paper/results/1.binding-WDR5/new_candidate/anthropic/WDR5-20-it(0).csv', usecols=['Iteration', 'SMILES'] )
df
|
Iteration |
SMILES |
0 |
0 |
CN1CCN(C2=C(NC(C3=CC=CC=C3Cl)=O)C=C([N+]([O-])... |
1 |
1 |
CN1CCN(C2=C(NC(C3=CC=CC=C3F)=O)C=C([N+]([O-])=... |
2 |
2 |
CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C([N+]... |
3 |
3 |
CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C#N)... |
4 |
4 |
CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O... |
5 |
5 |
CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O... |
6 |
6 |
CN1CCN(CC(=O)N)CC1C2=C(NC(C3=CC=CC4=CC=CC=C43)... |
7 |
7 |
CN1CCN(C2=C(NC(C3=C(F)C=CC4=CC=CC=C43)=O)C=C(C... |
8 |
8 |
CN1CCN(C2=C(NC(=O)CC3=CC=CC4=CC=CC=C43)C=C(C(=... |
9 |
9 |
CN1CCN(CC2=CC=C(NC(C3=CC=CC4=CC=CC=C43)=O)C(C(... |
10 |
10 |
CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O... |
11 |
11 |
CCN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=... |
12 |
12 |
CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C(F)=C(C... |
13 |
13 |
CN1CCN(C2=C(NC(=N)C3=CC=CC4=CC=CC=C43)C=C(C(=O... |
14 |
14 |
CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O... |
15 |
15 |
CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O... |
16 |
16 |
CN1CCN(C2=C(NC(C3=CC=C(F)C4=CC=CC=C43)=O)C=C(C... |
17 |
17 |
CN1CCN(C2=C(NC(C3=CC=C(Cl)C4=CC=CC=C43)=O)C=C(... |
18 |
18 |
CCN1CCN(C2=C(NC(C3=CC=C(Cl)C4=CC=CC=C43)=O)C=C... |
19 |
19 |
CN1CCN(C2=C(NC(C3=CC=C(Cl)C4=CC=CC=C43)=O)C=C(... |
%%time ## 22.65 seconds/ molecule
df['Docking Score (GNINA)'] = df['SMILES'].progress_apply(predictGNINA_docking)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [07:33<00:00, 22.65s/it]
CPU times: user 2.16 s, sys: 58.6 ms, total: 2.22 s
Wall time: 7min 33s
%%time ## 21.4 seconds/ molecule
df['Docking Score (AutoDock Vina)'] = df['SMILES'].progress_apply(predictAutoVina_docking)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [07:08<00:00, 21.42s/it]
CPU times: user 2.91 s, sys: 134 ms, total: 3.04 s
Wall time: 7min 8s
import numpy as np
df['\u0394'] = np.abs(df['Docking Score (GNINA)']- df['Docking Score (AutoDock Vina)'])
df
|
Iteration |
SMILES |
Docking Score (GNINA) |
Docking Score (AutoDock Vina) |
Δ |
0 |
0 |
CN1CCN(C2=C(NC(C3=CC=CC=C3Cl)=O)C=C([N+]([O-])... |
-6.33 |
-6.7 |
0.37 |
1 |
1 |
CN1CCN(C2=C(NC(C3=CC=CC=C3F)=O)C=C([N+]([O-])=... |
-5.52 |
-6.6 |
1.08 |
2 |
2 |
CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C([N+]... |
-7.55 |
-7.6 |
0.05 |
3 |
3 |
CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C#N)... |
-7.83 |
-7.6 |
0.23 |
4 |
4 |
CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O... |
-8.26 |
-8.4 |
0.14 |
5 |
5 |
CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O... |
-8.22 |
-8.3 |
0.08 |
6 |
6 |
CN1CCN(CC(=O)N)CC1C2=C(NC(C3=CC=CC4=CC=CC=C43)... |
-8.88 |
-7.9 |
0.98 |
7 |
7 |
CN1CCN(C2=C(NC(C3=C(F)C=CC4=CC=CC=C43)=O)C=C(C... |
-8.15 |
-8.1 |
0.05 |
8 |
8 |
CN1CCN(C2=C(NC(=O)CC3=CC=CC4=CC=CC=C43)C=C(C(=... |
-7.82 |
-8.2 |
0.38 |
9 |
9 |
CN1CCN(CC2=CC=C(NC(C3=CC=CC4=CC=CC=C43)=O)C(C(... |
-7.36 |
-8.2 |
0.84 |
10 |
10 |
CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O... |
-8.35 |
-8.2 |
0.15 |
11 |
11 |
CCN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=... |
-8.07 |
-7.9 |
0.17 |
12 |
12 |
CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C(F)=C(C... |
-7.55 |
-8.0 |
0.45 |
13 |
13 |
CN1CCN(C2=C(NC(=N)C3=CC=CC4=CC=CC=C43)C=C(C(=O... |
-7.96 |
-7.9 |
0.06 |
14 |
14 |
CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O... |
-9.02 |
-8.7 |
0.32 |
15 |
15 |
CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O... |
-8.14 |
-8.6 |
0.46 |
16 |
16 |
CN1CCN(C2=C(NC(C3=CC=C(F)C4=CC=CC=C43)=O)C=C(C... |
-8.58 |
-8.8 |
0.22 |
17 |
17 |
CN1CCN(C2=C(NC(C3=CC=C(Cl)C4=CC=CC=C43)=O)C=C(... |
-8.91 |
-8.9 |
0.01 |
18 |
18 |
CCN1CCN(C2=C(NC(C3=CC=C(Cl)C4=CC=CC=C43)=O)C=C... |
-7.51 |
-8.7 |
1.19 |
19 |
19 |
CN1CCN(C2=C(NC(C3=CC=C(Cl)C4=CC=CC=C43)=O)C=C(... |
-8.68 |
-8.3 |
0.38 |