from huggingface_hub import from_pretrained_keras
import gradio as gr
from rdkit import Chem, RDLogger
from rdkit.Chem.Draw import MolsToGridImage
import numpy as np
import tensorflow as tf
from tensorflow import keras
# Config
atom_mapping = {
"C": 0,
0: "C",
"N": 1,
1: "N",
"O": 2,
2: "O",
"F": 3,
3: "F",
}
bond_mapping = {
"SINGLE": 0,
0: Chem.BondType.SINGLE,
"DOUBLE": 1,
1: Chem.BondType.DOUBLE,
"TRIPLE": 2,
2: Chem.BondType.TRIPLE,
"AROMATIC": 3,
3: Chem.BondType.AROMATIC,
}
NUM_ATOMS = 9 # Maximum number of atoms
ATOM_DIM = 4 + 1 # Number of atom types
BOND_DIM = 4 + 1 # Number of bond types
LATENT_DIM = 64 # Size of the latent space
RDLogger.DisableLog("rdApp.*")
def graph_to_molecule(graph):
# Unpack graph
adjacency, features = graph
# RWMol is a molecule object intended to be edited
molecule = Chem.RWMol()
# Remove "no atoms" & atoms with no bonds
keep_idx = np.where(
(np.argmax(features, axis=1) != ATOM_DIM - 1)
& (np.sum(adjacency[:-1], axis=(0, 1)) != 0)
)[0]
features = features[keep_idx]
adjacency = adjacency[:, keep_idx, :][:, :, keep_idx]
# Add atoms to molecule
for atom_type_idx in np.argmax(features, axis=1):
atom = Chem.Atom(atom_mapping[atom_type_idx])
_ = molecule.AddAtom(atom)
# Add bonds between atoms in molecule; based on the upper triangles
# of the [symmetric] adjacency tensor
(bonds_ij, atoms_i, atoms_j) = np.where(np.triu(adjacency) == 1)
for (bond_ij, atom_i, atom_j) in zip(bonds_ij, atoms_i, atoms_j):
if atom_i == atom_j or bond_ij == BOND_DIM - 1:
continue
bond_type = bond_mapping[bond_ij]
molecule.AddBond(int(atom_i), int(atom_j), bond_type)
# Sanitize the molecule; for more information on sanitization, see
# https://www.rdkit.org/docs/RDKit_Book.html#molecular-sanitization
flag = Chem.SanitizeMol(molecule, catchErrors=True)
# Let's be strict. If sanitization fails, return None
if flag != Chem.SanitizeFlags.SANITIZE_NONE:
return None
return molecule
generator = from_pretrained_keras("keras-io/wgan-molecular-graphs")
def predict(num_mol):
samples = num_mol*2
z = tf.random.normal((samples, LATENT_DIM))
graph = generator.predict(z)
# obtain one-hot encoded adjacency tensor
adjacency = tf.argmax(graph[0], axis=1)
adjacency = tf.one_hot(adjacency, depth=BOND_DIM, axis=1)
# Remove potential self-loops from adjacency
adjacency = tf.linalg.set_diag(adjacency, tf.zeros(tf.shape(adjacency)[:-1]))
# obtain one-hot encoded feature tensor
features = tf.argmax(graph[1], axis=2)
features = tf.one_hot(features, depth=ATOM_DIM, axis=2)
molecules = [
graph_to_molecule([adjacency[i].numpy(), features[i].numpy()])
for i in range(samples)
]
MolsToGridImage(
[m for m in molecules if m is not None][:num_mol], molsPerRow=5, subImgSize=(150, 150), returnPNG=False,
).save("img.png")
return 'img.png'
gr.Interface(
fn=predict,
title="Generating molecular graphs by WGAN-GP",
description = "WGAN-GP with R-GCN for the generation of small molecular graphs 🔬",
inputs=[
gr.inputs.Slider(5, 50, label='Number of Molecular Graphs', step=5, default=10),
],
outputs="image",
article = "Author: Vu Minh Chien. Based on the keras example from Alexander Kensert",
).launch(enable_queue=True)