Create an Interactive Visualization for Approved Small Molecule Drugs
Published:
In the post, I want to generate an interactive visualization of a chemical space. Each point in the map represents a molecule and close points have similar structures. When you move you mouse on a point, the name and structure of the moelcule will show up.
from chembl_webresource_client.new_client import new_client
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import PandasTools
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import AllChem
from rdkit.Chem import rdFingerprintGenerator
from rdkit import DataStructs
from sklearn.manifold import TSNE
from IPython.display import SVG
from rdkit.Chem import rdDepictor
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.PandasTools import ChangeMoleculeRendering
#Bokeh library for plotting
import json
from bokeh.plotting import figure, show, output_notebook, ColumnDataSource
from bokeh.models import HoverTool
from bokeh.transform import factor_cmap
from bokeh.plotting import figure, output_file, save
Getting the small molecule drugs from ChEMBL
ChEMBL is a manually curated database of bioactive molecules with drug-like properties. It brings together chemical, bioactivity and genomic data to aid the translation of genomic information into effective new drugs. Here we use the python client library of ChEMBL API to download the SMILES for all of the approved small molecule drugs and put them into a Pandas dataframe.
molecule = new_client.molecule
approved_drugs = molecule.filter(max_phase=4)
small_molecule_drugs = [x for x in approved_drugs if x['molecule_type'] == 'Small molecule']
Extract information we need: (1) drug name (2) CHEMBL ID and (3) Canonical SMILES.
struct_list = [(x['pref_name'], x['molecule_chembl_id'],x['molecule_structures'])for x in small_molecule_drugs if x]
smiles_list = [(a,b,c['canonical_smiles']) for (a,b,c) in struct_list if c]
smiles_df = pd.DataFrame(smiles_list)
smiles_df.columns = ['Name','ChEMBL_ID','SMILES']
print(f'We downloaded {smiles_df.shape[0]} small molecule drugs from ChEMBL.')
We downloaded 3147 small molecule drugs from ChEMBL.
Let check the data.
smiles_df.head()
Name | ChEMBL_ID | SMILES | |
---|---|---|---|
0 | PRAZOSIN | CHEMBL2 | COc1cc2nc(nc(N)c2cc1OC)N3CCN(CC3)C(=O)c4occc4 |
1 | NICOTINE | CHEMBL3 | CN1CCC[C@H]1c2cccnc2 |
2 | OFLOXACIN | CHEMBL4 | CC1COc2c(N3CCN(C)CC3)c(F)cc4C(=O)C(=CN1c24)C(=O)O |
3 | NALIDIXIC ACID | CHEMBL5 | CCN1C=C(C(=O)O)C(=O)c2ccc(C)nc12 |
4 | INDOMETHACIN | CHEMBL6 | COc1ccc2c(c1)c(CC(=O)O)c(C)n2C(=O)c3ccc(Cl)cc3 |
Molecular Fingerprints
Add RDKit Mol column to the dataframe.
PandasTools.AddMoleculeColumnToFrame(smiles_df,smilesCol='SMILES')
smiles_df.head(1)
Name | ChEMBL_ID | SMILES | ROMol | |
---|---|---|---|---|
0 | PRAZOSIN | CHEMBL2 | COc1cc2nc(nc(N)c2cc1OC)N3CCN(CC3)C(=O)c4occc4 |
Generare ECFP4 fingerprint for each drug.
ECFP4_fps = [AllChem.GetMorganFingerprintAsBitVect(x,2) for x in smiles_df['ROMol']]
Dimensionality Reduction of Features
Use t-SNE to reduce the dimension of fetures into 2 for visualization.
tsne = TSNE(random_state=0).fit_transform(ECFP4_fps)
Define some functions for interactive visualization. Some of the codes are from mol2vec_notebook.
def _prepareMol(mol,kekulize):
mc = Chem.Mol(mol.ToBinary())
if kekulize:
try:
Chem.Kekulize(mc)
except:
mc = Chem.Mol(mol.ToBinary())
if not mc.GetNumConformers():
rdDepictor.Compute2DCoords(mc)
return mc
def moltosvg(mol,molSize=(450,200),kekulize=True,drawer=None,**kwargs):
mc = _prepareMol(mol,kekulize)
if drawer is None:
drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0],molSize[1])
drawer.DrawMolecule(mc,**kwargs)
drawer.FinishDrawing()
svg = drawer.GetDrawingText()
return SVG(svg.replace('svg:',''))
Get the image of molecules.
svgs = [moltosvg(m).data for m in smiles_df.ROMol]
Generate the interactive visualization.
ChangeMoleculeRendering(renderer='PNG')
source = ColumnDataSource(data=dict(x=tsne[:,0], y=tsne[:,1], desc= smiles_df.Name,
svgs=svgs))
hover = HoverTool(tooltips="""
<div>
<div>@svgs{safe}
</div>
<div>
<span style="font-size: 17px; font-weight: bold;">@desc</span>
</div>
</div>
"""
)
interactive_map = figure(plot_width=1000, plot_height=1000, tools=['reset,box_zoom,wheel_zoom,zoom_in,zoom_out,pan',hover],
title="Small Molecule Drug (ECFP4)")
interactive_map.circle('x', 'y', size=5, source=source, fill_alpha=0.2);
Save is as a html
file. Please click here to check the generated map.
output_file("interactive_map.html")
save(interactive_map)
All the code and data are available on my github.
Leave a Comment