import gradio as gr import pandas as pd # Goal: Maximize # Publication reporting the dataset: Ahnemanet al. Predicting reaction performance in # C–N cross-coupling using machine learning. Science 360, 186–190 (2018) # https://www.science.org/doi/10.1126/science.aar5169#supplementary-materials def lookup(base: str, ligand: str, additive: str, aryl_halide: str): data = pd.read_csv("./data.csv", delimiter=",", index_col=None) other = pd.DataFrame.from_records( { "base": base, "ligand": ligand, "additive": additive, "aryl_halide": aryl_halide, }, index=[0], ) merged_df = pd.merge( left=data.reset_index(), right=other, on=other.columns.tolist(), how="right", sort=False, ).dropna(subset="index") idxs_matched = pd.Index(merged_df["index"].values) if len(idxs_matched) < len(other): raise IndexError( "When doing exact lookup some rows could not be found. Try " "approximate lookup or check reference data." ) if len(idxs_matched) > len(other): raise IndexError( "When doing exact lookup some rows in the reference dataframe appear " "duplicated. Check reference data." ) return data["yield"][idxs_matched[0]] base = { "BTMG": "CN(C)/C(N(C)C)=N\\C(C)(C)C", "MTBD": "CN1CCCN2CCCN=C12", "P2Et": "CN(C)P(N(C)C)(N(C)C)=NP(N(C)C)(N(C)C)=NCC", } ligand = { "XPhos": "CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C3CCCCC3)C4CCCCC4)C=CC=C2", "t-BuXPhos": "CC(C)C(C=C(C(C)C)C=C1C(C)C)=C1C2=CC=CC=C2P(C(C)(C)C)C(C)(C)C", "t-BuBrettPhos": "CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C(C)(C)C)C(C)(C)C)C(OC)=CC=C2OC", "AdBrettPhos": "CC(C1=C(C2=C(OC)C=CC(OC)=C2P(C34CC5CC(C4)CC(C5)C3)C67CC8CC(C7)CC(C8)C6)C(C(C)C)=CC(C(C)C)=C1)C", } additive = { "3,5-dimethylisoxazole": "Cc1onc(C)c1", "3-methyl-5-phenylisoxazole": "Cc1cc(on1)c2ccccc2", "3-methylisoxazole": "Cc1ccon1", "3-phenylisoxazole": "o1ccc(n1)c2ccccc2", "4-phenylisoxazole": "o1cc(cn1)c2ccccc2", "5-(2,6-difluorophenyl)isoxazole": "Fc1cccc(F)c1c2oncc2", "5-Phenyl-1,2,4-oxadiazole": "c1ccc(-c2ncno2)cc1", "5-methyl-3-(1H-pyrrol-1-yl)isoxazole": "Cc1onc(c1)n2cccc2", "5-methylisoxazole": "Cc1oncc1", "5-phenylisoxazole": "o1nccc1c2ccccc2", "N,N-dibenzylisoxazol-3-amine": "C(N(Cc1ccccc1)c2ccon2)c3ccccc3", "N,N-dibenzylisoxazol-5-amine": "C(N(Cc1ccccc1)c2oncc2)c3ccccc3", "benzo[c]isoxazole": "o1cc2ccccc2n1", "benzo[d]isoxazole": "o1ncc2ccccc12", "ethyl-3-methoxyisoxazole-5-carboxylate": "CCOC(=O)c1onc(OC)c1", "ethyl-3-methylisoxazole-5-carboxylate": "CCOC(=O)c1onc(C)c1", "ethyl-5-methylisoxazole-3-carboxylate": "CCOC(=O)c1cc(C)on1", "ethyl-5-methylisoxazole-4-carboxylate": "CCOC(=O)c1cnoc1C", "ethyl-isoxazole-3-carboxylate": "CCOC(=O)c1ccon1", "ethyl-isoxazole-4-carboxylate": "CCOC(=O)c1conc1", "methyl-5-(furan-2-yl)isoxazole-3-carboxylate": "COC(=O)c1cc(on1)c2occc2", "methyl-5-(thiophen-2-yl)isoxazole-3-carboxylate": "COC(=O)c1cc(on1)c2sccc2", "methyl-isoxazole-5-carboxylate": "COC(=O)c1oncc1", } aryl_haylide = { '1-bromo-4-ethylbenzene': 'CCc1ccc(Br)cc1', '1-bromo-4-methoxybenzene': 'COc1ccc(Br)cc1', '1-chloro-4-(trifluoromethyl)benzene': 'FC(F)(F)c1ccc(Cl)cc1', '1-chloro-4-methoxybenzene': 'COc1ccc(Cl)cc1', '1-iodo-4-methoxybenzene': 'COc1ccc(I)cc1', '2-bromopyridine': 'Brc1ccccn1', '2-iodopyridine': 'Ic1ccccn1', '3-bromopyridine': 'Brc1cccnc1', '3-chloropyridine': 'Clc1cccnc1' } defaults = ( "P2Et","XPhos","5-phenylisoxazole","1-chloro-4-(trifluoromethyl)benzene" ) iface = gr.Interface( fn=lookup, inputs=[ gr.Radio(label="Base", choices=list(base.keys()), value=defaults[0]), gr.Radio(label="Ligand", choices=list(ligand.keys()), value=defaults[1]), gr.Radio(label="Additive", choices=list(additive.keys()), value=defaults[2]), gr.Radio( label="Aryl Haylide", choices=list(aryl_haylide.keys()), value=defaults[3] ), ], outputs=gr.Number(lookup(*defaults), label="yield value (maximize)"), ) iface.launch()