Example 4 - Different Choices of Surrogate for Docking Inference

Example 4 - Different Choices of Surrogate for Docking Inference#

To run this notebook you need to have GNINA and AutoDock Vina via DockString installed. GNINA can be downloaded as a linux binary and made executable (see this notebook for installation).

from rdkit import Chem
from rdkit.Chem import AllChem
import subprocess

def predictGNINA_docking(smiles):
    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)
    
    # Generate 3D coordinates
    AllChem.EmbedMolecule(mol)
    AllChem.UFFOptimizeMolecule(mol)
    
    # Save to PDB file (required format for GNINA)
    output_file = "molecule.pdb"
    Chem.MolToPDBFile(mol, output_file)

     # Command to run GNINA
    command = "../dziner/surrogates/gnina -r ../dziner/surrogates/Binding/WDR5_target.pdbqt -l molecule.pdb --autobox_ligand ../dziner/surrogates/Binding/WDR5_target.pdbqt --seed 0"
    
    # Run the command and capture the output
    result = subprocess.getoutput(command)
    
    # Initialize a list to store affinity values
    affinities = []
    
    # Loop through the output lines to find affinity scores
    for line in result.splitlines():
        # Skip lines that don't start with a mode index (i.e., a number)
        if line.strip() and line.split()[0].isdigit():
            # Split the line and extract the affinity (2nd column)
            try:
                affinity = float(line.split()[1])
                affinities.append(affinity)
            except ValueError:
                # Skip lines that don't have a valid number in the second column
                continue
    
    # Find the minimum affinity score
    if affinities:
        best_docking_score = min(affinities)
        # print(f"Best docking score: {best_docking_score}")
    else:
        # print("No valid docking scores found.")
        best_docking_score = None
    return best_docking_score
from dockstring import load_target

def predictAutoVina_docking(smiles):
    '''
    This tool predicts the docking score to WDR5 for a SMILES molecule. Lower docking score means a larger binding affinity.
    This model based on Autodock Vina from this paper: https://pubs.acs.org/doi/10.1021/acs.jcim.1c01334 
    '''
    smiles = smiles.replace("\n", "").replace("'", "")
    target = load_target("WDR5")
    try:
        score, affinities = target.dock(smiles)
    except:
        score = 'Invalid molecule'
    return score


GNINA vs AutoDock Vina#

import pandas as pd
from tqdm import tqdm
tqdm.pandas()

df = pd.read_csv('../paper/results/1.binding-WDR5/new_candidate/anthropic/WDR5-20-it(0).csv', usecols=['Iteration', 'SMILES'] )
df

Iteration SMILES
0 0 CN1CCN(C2=C(NC(C3=CC=CC=C3Cl)=O)C=C([N+]([O-])...
1 1 CN1CCN(C2=C(NC(C3=CC=CC=C3F)=O)C=C([N+]([O-])=...
2 2 CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C([N+]...
3 3 CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C#N)...
4 4 CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O...
5 5 CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O...
6 6 CN1CCN(CC(=O)N)CC1C2=C(NC(C3=CC=CC4=CC=CC=C43)...
7 7 CN1CCN(C2=C(NC(C3=C(F)C=CC4=CC=CC=C43)=O)C=C(C...
8 8 CN1CCN(C2=C(NC(=O)CC3=CC=CC4=CC=CC=C43)C=C(C(=...
9 9 CN1CCN(CC2=CC=C(NC(C3=CC=CC4=CC=CC=C43)=O)C(C(...
10 10 CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O...
11 11 CCN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=...
12 12 CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C(F)=C(C...
13 13 CN1CCN(C2=C(NC(=N)C3=CC=CC4=CC=CC=C43)C=C(C(=O...
14 14 CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O...
15 15 CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O...
16 16 CN1CCN(C2=C(NC(C3=CC=C(F)C4=CC=CC=C43)=O)C=C(C...
17 17 CN1CCN(C2=C(NC(C3=CC=C(Cl)C4=CC=CC=C43)=O)C=C(...
18 18 CCN1CCN(C2=C(NC(C3=CC=C(Cl)C4=CC=CC=C43)=O)C=C...
19 19 CN1CCN(C2=C(NC(C3=CC=C(Cl)C4=CC=CC=C43)=O)C=C(...
%%time ## 22.65 seconds/ molecule
df['Docking Score (GNINA)'] = df['SMILES'].progress_apply(predictGNINA_docking)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [07:33<00:00, 22.65s/it]

CPU times: user 2.16 s, sys: 58.6 ms, total: 2.22 s
Wall time: 7min 33s
%%time ## 21.4 seconds/ molecule
df['Docking Score (AutoDock Vina)'] = df['SMILES'].progress_apply(predictAutoVina_docking)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [07:08<00:00, 21.42s/it]

CPU times: user 2.91 s, sys: 134 ms, total: 3.04 s
Wall time: 7min 8s
import numpy as np

df['\u0394'] = np.abs(df['Docking Score (GNINA)']- df['Docking Score (AutoDock Vina)'])
df
Iteration SMILES Docking Score (GNINA) Docking Score (AutoDock Vina) Δ
0 0 CN1CCN(C2=C(NC(C3=CC=CC=C3Cl)=O)C=C([N+]([O-])... -6.33 -6.7 0.37
1 1 CN1CCN(C2=C(NC(C3=CC=CC=C3F)=O)C=C([N+]([O-])=... -5.52 -6.6 1.08
2 2 CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C([N+]... -7.55 -7.6 0.05
3 3 CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C#N)... -7.83 -7.6 0.23
4 4 CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O... -8.26 -8.4 0.14
5 5 CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O... -8.22 -8.3 0.08
6 6 CN1CCN(CC(=O)N)CC1C2=C(NC(C3=CC=CC4=CC=CC=C43)... -8.88 -7.9 0.98
7 7 CN1CCN(C2=C(NC(C3=C(F)C=CC4=CC=CC=C43)=O)C=C(C... -8.15 -8.1 0.05
8 8 CN1CCN(C2=C(NC(=O)CC3=CC=CC4=CC=CC=C43)C=C(C(=... -7.82 -8.2 0.38
9 9 CN1CCN(CC2=CC=C(NC(C3=CC=CC4=CC=CC=C43)=O)C(C(... -7.36 -8.2 0.84
10 10 CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O... -8.35 -8.2 0.15
11 11 CCN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=... -8.07 -7.9 0.17
12 12 CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C(F)=C(C... -7.55 -8.0 0.45
13 13 CN1CCN(C2=C(NC(=N)C3=CC=CC4=CC=CC=C43)C=C(C(=O... -7.96 -7.9 0.06
14 14 CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O... -9.02 -8.7 0.32
15 15 CN1CCN(C2=C(NC(C3=CC=CC4=CC=CC=C43)=O)C=C(C(=O... -8.14 -8.6 0.46
16 16 CN1CCN(C2=C(NC(C3=CC=C(F)C4=CC=CC=C43)=O)C=C(C... -8.58 -8.8 0.22
17 17 CN1CCN(C2=C(NC(C3=CC=C(Cl)C4=CC=CC=C43)=O)C=C(... -8.91 -8.9 0.01
18 18 CCN1CCN(C2=C(NC(C3=CC=C(Cl)C4=CC=CC=C43)=O)C=C... -7.51 -8.7 1.19
19 19 CN1CCN(C2=C(NC(C3=CC=C(Cl)C4=CC=CC=C43)=O)C=C(... -8.68 -8.3 0.38