ArylHalides / app.py
AVH1993's picture
Add arylation example
8053f30
raw
history blame
4.24 kB
import gradio as gr
import pandas as pd
# Goal: Maximize
# Publication reporting the dataset: Ahnemanet al. Predicting reaction performance in
# C–N cross-coupling using machine learning. Science 360, 186–190 (2018)
# https://www.science.org/doi/10.1126/science.aar5169#supplementary-materials
def lookup(base: str, ligand: str, additive: str, aryl_halide: str):
data = pd.read_csv("./data.csv", delimiter=",", index_col=None)
other = pd.DataFrame.from_records(
{
"base": base,
"ligand": ligand,
"additive": additive,
"aryl_halide": aryl_halide,
},
index=[0],
)
merged_df = pd.merge(
left=data.reset_index(),
right=other,
on=other.columns.tolist(),
how="right",
sort=False,
).dropna(subset="index")
idxs_matched = pd.Index(merged_df["index"].values)
if len(idxs_matched) < len(other):
raise IndexError(
"When doing exact lookup some rows could not be found. Try "
"approximate lookup or check reference data."
)
if len(idxs_matched) > len(other):
raise IndexError(
"When doing exact lookup some rows in the reference dataframe appear "
"duplicated. Check reference data."
)
return data["yield"][idxs_matched[0]]
base = {
"BTMG": "CN(C)/C(N(C)C)=N\\C(C)(C)C",
"MTBD": "CN1CCCN2CCCN=C12",
"P2Et": "CN(C)P(N(C)C)(N(C)C)=NP(N(C)C)(N(C)C)=NCC",
}
ligand = {
"XPhos": "CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C3CCCCC3)C4CCCCC4)C=CC=C2",
"t-BuXPhos": "CC(C)C(C=C(C(C)C)C=C1C(C)C)=C1C2=CC=CC=C2P(C(C)(C)C)C(C)(C)C",
"t-BuBrettPhos": "CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C(C)(C)C)C(C)(C)C)C(OC)=CC=C2OC",
"AdBrettPhos": "CC(C1=C(C2=C(OC)C=CC(OC)=C2P(C34CC5CC(C4)CC(C5)C3)C67CC8CC(C7)CC(C8)C6)C(C(C)C)=CC(C(C)C)=C1)C",
}
additive = {
"3,5-dimethylisoxazole": "Cc1onc(C)c1",
"3-methyl-5-phenylisoxazole": "Cc1cc(on1)c2ccccc2",
"3-methylisoxazole": "Cc1ccon1",
"3-phenylisoxazole": "o1ccc(n1)c2ccccc2",
"4-phenylisoxazole": "o1cc(cn1)c2ccccc2",
"5-(2,6-difluorophenyl)isoxazole": "Fc1cccc(F)c1c2oncc2",
"5-Phenyl-1,2,4-oxadiazole": "c1ccc(-c2ncno2)cc1",
"5-methyl-3-(1H-pyrrol-1-yl)isoxazole": "Cc1onc(c1)n2cccc2",
"5-methylisoxazole": "Cc1oncc1",
"5-phenylisoxazole": "o1nccc1c2ccccc2",
"N,N-dibenzylisoxazol-3-amine": "C(N(Cc1ccccc1)c2ccon2)c3ccccc3",
"N,N-dibenzylisoxazol-5-amine": "C(N(Cc1ccccc1)c2oncc2)c3ccccc3",
"benzo[c]isoxazole": "o1cc2ccccc2n1",
"benzo[d]isoxazole": "o1ncc2ccccc12",
"ethyl-3-methoxyisoxazole-5-carboxylate": "CCOC(=O)c1onc(OC)c1",
"ethyl-3-methylisoxazole-5-carboxylate": "CCOC(=O)c1onc(C)c1",
"ethyl-5-methylisoxazole-3-carboxylate": "CCOC(=O)c1cc(C)on1",
"ethyl-5-methylisoxazole-4-carboxylate": "CCOC(=O)c1cnoc1C",
"ethyl-isoxazole-3-carboxylate": "CCOC(=O)c1ccon1",
"ethyl-isoxazole-4-carboxylate": "CCOC(=O)c1conc1",
"methyl-5-(furan-2-yl)isoxazole-3-carboxylate": "COC(=O)c1cc(on1)c2occc2",
"methyl-5-(thiophen-2-yl)isoxazole-3-carboxylate": "COC(=O)c1cc(on1)c2sccc2",
"methyl-isoxazole-5-carboxylate": "COC(=O)c1oncc1",
}
aryl_haylide = {
'1-bromo-4-ethylbenzene': 'CCc1ccc(Br)cc1',
'1-bromo-4-methoxybenzene': 'COc1ccc(Br)cc1',
'1-chloro-4-(trifluoromethyl)benzene': 'FC(F)(F)c1ccc(Cl)cc1',
'1-chloro-4-methoxybenzene': 'COc1ccc(Cl)cc1',
'1-iodo-4-methoxybenzene': 'COc1ccc(I)cc1',
'2-bromopyridine': 'Brc1ccccn1',
'2-iodopyridine': 'Ic1ccccn1',
'3-bromopyridine': 'Brc1cccnc1',
'3-chloropyridine': 'Clc1cccnc1'
}
defaults = (
"P2Et","XPhos","5-phenylisoxazole","1-chloro-4-(trifluoromethyl)benzene"
)
iface = gr.Interface(
fn=lookup,
inputs=[
gr.Radio(label="Base", choices=list(base.keys()), value=defaults[0]),
gr.Radio(label="Ligand", choices=list(ligand.keys()), value=defaults[1]),
gr.Radio(label="Additive", choices=list(additive.keys()), value=defaults[2]),
gr.Radio(
label="Aryl Haylide", choices=list(aryl_haylide.keys()), value=defaults[3]
),
],
outputs=gr.Number(lookup(*defaults), label="yield value (maximize)"),
)
iface.launch()