Create an Interactive Visualization for Approved Small Molecule Drugs

2 minute read

Published:

In the post, I want to generate an interactive visualization of a chemical space. Each point in the map represents a molecule and close points have similar structures. When you move you mouse on a point, the name and structure of the moelcule will show up.

from chembl_webresource_client.new_client import new_client
import pandas as pd

from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import PandasTools
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import AllChem
from rdkit.Chem import rdFingerprintGenerator
from rdkit import DataStructs

from sklearn.manifold import TSNE

from IPython.display import SVG
from rdkit.Chem import rdDepictor
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.PandasTools import ChangeMoleculeRendering

#Bokeh library for plotting
import json
from bokeh.plotting import figure, show, output_notebook, ColumnDataSource
from bokeh.models import HoverTool
from bokeh.transform import factor_cmap
from bokeh.plotting import figure, output_file, save

Getting the small molecule drugs from ChEMBL

ChEMBL is a manually curated database of bioactive molecules with drug-like properties. It brings together chemical, bioactivity and genomic data to aid the translation of genomic information into effective new drugs. Here we use the python client library of ChEMBL API to download the SMILES for all of the approved small molecule drugs and put them into a Pandas dataframe.

molecule = new_client.molecule
approved_drugs = molecule.filter(max_phase=4)
small_molecule_drugs = [x for x in approved_drugs if x['molecule_type'] == 'Small molecule']

Extract information we need: (1) drug name (2) CHEMBL ID and (3) Canonical SMILES.

struct_list = [(x['pref_name'], x['molecule_chembl_id'],x['molecule_structures'])for x in small_molecule_drugs if x]
smiles_list = [(a,b,c['canonical_smiles']) for (a,b,c) in struct_list if c]
smiles_df = pd.DataFrame(smiles_list)
smiles_df.columns = ['Name','ChEMBL_ID','SMILES']
print(f'We downloaded {smiles_df.shape[0]} small molecule drugs from ChEMBL.')
We downloaded 3147 small molecule drugs from ChEMBL.

Let check the data.

smiles_df.head()
NameChEMBL_IDSMILES
0PRAZOSINCHEMBL2COc1cc2nc(nc(N)c2cc1OC)N3CCN(CC3)C(=O)c4occc4
1NICOTINECHEMBL3CN1CCC[C@H]1c2cccnc2
2OFLOXACINCHEMBL4CC1COc2c(N3CCN(C)CC3)c(F)cc4C(=O)C(=CN1c24)C(=O)O
3NALIDIXIC ACIDCHEMBL5CCN1C=C(C(=O)O)C(=O)c2ccc(C)nc12
4INDOMETHACINCHEMBL6COc1ccc2c(c1)c(CC(=O)O)c(C)n2C(=O)c3ccc(Cl)cc3

Molecular Fingerprints

Add RDKit Mol column to the dataframe.

PandasTools.AddMoleculeColumnToFrame(smiles_df,smilesCol='SMILES')
smiles_df.head(1)
NameChEMBL_IDSMILESROMol
0PRAZOSINCHEMBL2COc1cc2nc(nc(N)c2cc1OC)N3CCN(CC3)C(=O)c4occc4Mol

Generare ECFP4 fingerprint for each drug.

ECFP4_fps = [AllChem.GetMorganFingerprintAsBitVect(x,2) for x in smiles_df['ROMol']]

Dimensionality Reduction of Features

Use t-SNE to reduce the dimension of fetures into 2 for visualization.

tsne = TSNE(random_state=0).fit_transform(ECFP4_fps)

Define some functions for interactive visualization. Some of the codes are from mol2vec_notebook.

def _prepareMol(mol,kekulize):
    mc = Chem.Mol(mol.ToBinary())
    if kekulize:
        try:
            Chem.Kekulize(mc)
        except:
            mc = Chem.Mol(mol.ToBinary())
    if not mc.GetNumConformers():
        rdDepictor.Compute2DCoords(mc)
    return mc

def moltosvg(mol,molSize=(450,200),kekulize=True,drawer=None,**kwargs):
    mc = _prepareMol(mol,kekulize)
    if drawer is None:
        drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0],molSize[1])
    drawer.DrawMolecule(mc,**kwargs)
    drawer.FinishDrawing()
    svg = drawer.GetDrawingText()
    return SVG(svg.replace('svg:',''))

Get the image of molecules.

svgs = [moltosvg(m).data for m in smiles_df.ROMol]

Generate the interactive visualization.

ChangeMoleculeRendering(renderer='PNG')


source = ColumnDataSource(data=dict(x=tsne[:,0], y=tsne[:,1], desc= smiles_df.Name, 
                                    svgs=svgs))

hover = HoverTool(tooltips="""
    <div>
        <div>@svgs{safe}
        </div>
        <div>
            <span style="font-size: 17px; font-weight: bold;">@desc</span>
        </div>
    </div>
    """
)
interactive_map = figure(plot_width=1000, plot_height=1000, tools=['reset,box_zoom,wheel_zoom,zoom_in,zoom_out,pan',hover],
           title="Small Molecule Drug (ECFP4)")



interactive_map.circle('x', 'y', size=5, source=source, fill_alpha=0.2);


Save is as a html file. Please click here to check the generated map.

output_file("interactive_map.html")
save(interactive_map)

All the code and data are available on my github.

Leave a Comment