File size: 4,235 Bytes
8053f30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import gradio as gr
import pandas as pd


# Goal: Maximize
# Publication reporting the dataset: Ahnemanet al. Predicting reaction performance in
# C–N cross-coupling using machine learning. Science 360, 186–190 (2018)
# https://www.science.org/doi/10.1126/science.aar5169#supplementary-materials
def lookup(base: str, ligand: str, additive: str, aryl_halide: str):
    data = pd.read_csv("./data.csv", delimiter=",", index_col=None)
    other = pd.DataFrame.from_records(
        {
            "base": base,
            "ligand": ligand,
            "additive": additive,
            "aryl_halide": aryl_halide,
        },
        index=[0],
    )

    merged_df = pd.merge(
        left=data.reset_index(),
        right=other,
        on=other.columns.tolist(),
        how="right",
        sort=False,
    ).dropna(subset="index")
    idxs_matched = pd.Index(merged_df["index"].values)

    if len(idxs_matched) < len(other):
        raise IndexError(
            "When doing exact lookup some rows could not be found. Try "
            "approximate lookup or check reference data."
        )
    if len(idxs_matched) > len(other):
        raise IndexError(
            "When doing exact lookup some rows in the reference dataframe appear "
            "duplicated. Check reference data."
        )

    return data["yield"][idxs_matched[0]]


base = {
    "BTMG": "CN(C)/C(N(C)C)=N\\C(C)(C)C",
    "MTBD": "CN1CCCN2CCCN=C12",
    "P2Et": "CN(C)P(N(C)C)(N(C)C)=NP(N(C)C)(N(C)C)=NCC",
}
ligand = {
    "XPhos": "CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C3CCCCC3)C4CCCCC4)C=CC=C2",
    "t-BuXPhos": "CC(C)C(C=C(C(C)C)C=C1C(C)C)=C1C2=CC=CC=C2P(C(C)(C)C)C(C)(C)C",
    "t-BuBrettPhos": "CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C(C)(C)C)C(C)(C)C)C(OC)=CC=C2OC",
    "AdBrettPhos": "CC(C1=C(C2=C(OC)C=CC(OC)=C2P(C34CC5CC(C4)CC(C5)C3)C67CC8CC(C7)CC(C8)C6)C(C(C)C)=CC(C(C)C)=C1)C",
}

additive = {
    "3,5-dimethylisoxazole": "Cc1onc(C)c1",
    "3-methyl-5-phenylisoxazole": "Cc1cc(on1)c2ccccc2",
    "3-methylisoxazole": "Cc1ccon1",
    "3-phenylisoxazole": "o1ccc(n1)c2ccccc2",
    "4-phenylisoxazole": "o1cc(cn1)c2ccccc2",
    "5-(2,6-difluorophenyl)isoxazole": "Fc1cccc(F)c1c2oncc2",
    "5-Phenyl-1,2,4-oxadiazole": "c1ccc(-c2ncno2)cc1",
    "5-methyl-3-(1H-pyrrol-1-yl)isoxazole": "Cc1onc(c1)n2cccc2",
    "5-methylisoxazole": "Cc1oncc1",
    "5-phenylisoxazole": "o1nccc1c2ccccc2",
    "N,N-dibenzylisoxazol-3-amine": "C(N(Cc1ccccc1)c2ccon2)c3ccccc3",
    "N,N-dibenzylisoxazol-5-amine": "C(N(Cc1ccccc1)c2oncc2)c3ccccc3",
    "benzo[c]isoxazole": "o1cc2ccccc2n1",
    "benzo[d]isoxazole": "o1ncc2ccccc12",
    "ethyl-3-methoxyisoxazole-5-carboxylate": "CCOC(=O)c1onc(OC)c1",
    "ethyl-3-methylisoxazole-5-carboxylate": "CCOC(=O)c1onc(C)c1",
    "ethyl-5-methylisoxazole-3-carboxylate": "CCOC(=O)c1cc(C)on1",
    "ethyl-5-methylisoxazole-4-carboxylate": "CCOC(=O)c1cnoc1C",
    "ethyl-isoxazole-3-carboxylate": "CCOC(=O)c1ccon1",
    "ethyl-isoxazole-4-carboxylate": "CCOC(=O)c1conc1",
    "methyl-5-(furan-2-yl)isoxazole-3-carboxylate": "COC(=O)c1cc(on1)c2occc2",
    "methyl-5-(thiophen-2-yl)isoxazole-3-carboxylate": "COC(=O)c1cc(on1)c2sccc2",
    "methyl-isoxazole-5-carboxylate": "COC(=O)c1oncc1",
}

aryl_haylide = {
    '1-bromo-4-ethylbenzene': 'CCc1ccc(Br)cc1',
    '1-bromo-4-methoxybenzene': 'COc1ccc(Br)cc1',
    '1-chloro-4-(trifluoromethyl)benzene': 'FC(F)(F)c1ccc(Cl)cc1',
    '1-chloro-4-methoxybenzene': 'COc1ccc(Cl)cc1',
    '1-iodo-4-methoxybenzene': 'COc1ccc(I)cc1',
    '2-bromopyridine': 'Brc1ccccn1',
    '2-iodopyridine': 'Ic1ccccn1',
    '3-bromopyridine': 'Brc1cccnc1',
    '3-chloropyridine': 'Clc1cccnc1'
}

defaults = (
"P2Et","XPhos","5-phenylisoxazole","1-chloro-4-(trifluoromethyl)benzene"
)

iface = gr.Interface(
    fn=lookup,
    inputs=[
        gr.Radio(label="Base", choices=list(base.keys()), value=defaults[0]),
        gr.Radio(label="Ligand", choices=list(ligand.keys()), value=defaults[1]),
        gr.Radio(label="Additive", choices=list(additive.keys()), value=defaults[2]),
        gr.Radio(
            label="Aryl Haylide", choices=list(aryl_haylide.keys()), value=defaults[3]
        ),
    ],
    outputs=gr.Number(lookup(*defaults), label="yield value (maximize)"),
)
iface.launch()