File size: 11,153 Bytes
45b3fc1
b3e9379
45b3fc1
 
 
f618644
 
 
 
 
 
 
b3e9379
 
 
45b3fc1
 
b3e9379
 
 
 
 
45b3fc1
b3e9379
 
 
 
 
 
 
 
45b3fc1
b3e9379
 
 
45b3fc1
 
b3e9379
 
 
 
 
 
45b3fc1
 
b3e9379
b0c702c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b742f8
b0c702c
 
f618644
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b0c702c
f618644
 
b0c702c
f618644
 
 
 
b0c702c
f618644
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b3e9379
b0c702c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45b3fc1
b0c702c
 
 
 
45b3fc1
b0c702c
 
45b3fc1
 
 
 
 
 
 
 
 
 
 
 
 
b0c702c
45b3fc1
de22f41
45b3fc1
 
 
 
 
 
 
 
 
 
 
de22f41
45b3fc1
 
 
 
 
 
 
 
 
 
 
b0c702c
 
 
45b3fc1
 
 
 
 
 
 
 
 
 
444bcef
45b3fc1
 
 
 
 
 
 
 
 
7ebabb5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36603fd
7ebabb5
b5d373d
7ebabb5
 
 
 
b5d373d
 
7ebabb5
 
 
45b3fc1
 
0b7d32c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
import numpy as np
import gradio as gr
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from surrogate import CrabNetSurrogateModel, PARAM_BOUNDS
from pydantic import (
    BaseModel,
    ValidationError,
    ValidationInfo,
    field_validator,
    model_validator,
)

model = CrabNetSurrogateModel()

# Define the input parameters
example_parameterization = {
    "N": 3,
    "alpha": 0.5,
    "d_model": 512,
    "dim_feedforward": 2048,
    "dropout": 0.1,
    "emb_scaler": 0.5,
    "epochs_step": 10,
    "eps": 0.000001,
    "fudge": 0.02,
    "heads": 4,
    "k": 6,
    "lr": 0.001,
    "pe_resolution": 5000,
    "ple_resolution": 5000,
    "pos_scaler": 0.5,
    "weight_decay": 0,
    "batch_size": 32,
    "out_hidden4": 128,
    "betas1": 0.9,
    "betas2": 0.999,
    "bias": False,
    "criterion": "RobustL1",
    "elem_prop": "mat2vec",
    "train_frac": 0.5,
}

example_results = model.surrogate_evaluate([example_parameterization])
example_result = example_results[0]

# Initialize and fit scalers for each parameter
scalers = {}
for param_info in PARAM_BOUNDS:
    if param_info["type"] == "range":
        scaler = MinMaxScaler()
        # Fit the scaler using the parameter bounds
        scaler.fit([[bound] for bound in param_info["bounds"]])
        scalers[param_info["name"]] = scaler

# HACK: Hardcoded
BLINDED_PARAM_BOUNDS = [
    {"name": "x1", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x2", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x3", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x4", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x5", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x6", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x7", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x8", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x9", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x10", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x11", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x12", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x13", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x14", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x15", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x16", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x17", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x18", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x19", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "x20", "type": "range", "bounds": [0.0, 1.0]},
    {"name": "c1", "type": "choice", "values": ["c1_0", "c1_1"]},
    {"name": "c2", "type": "choice", "values": ["c2_0", "c2_1"]},
    {"name": "c3", "type": "choice", "values": ["c3_0", "c3_1", "c3_2"]},
    {"name": "fidelity1", "type": "range", "bounds": [0.0, 1.0]},
]


class BlindedParameterization(BaseModel):
    x1: float  # int
    x2: float
    x3: float  # int
    x4: float  # int
    x5: float
    x6: float
    x7: float  # int
    x8: float
    x9: float
    x10: float  # int
    x11: float  # int
    x12: float
    x13: float  # int
    x14: float  # int
    x15: float
    x16: float  # int
    x17: float  # int
    x18: float  # int
    x19: float
    x20: float
    c1: str  # bool
    c2: str
    c3: str
    fidelity1: float

    @field_validator("*")
    def check_bounds(cls, v: int, info: ValidationInfo) -> int:
        param = next(
            (item for item in BLINDED_PARAM_BOUNDS if item["name"] == info.field_name),
            None,
        )
        if param is None:
            return v

        if param["type"] == "range":
            min_val, max_val = param["bounds"]
            if not min_val <= v <= max_val:
                raise ValueError(
                    f"{info.field_name} must be between {min_val} and {max_val}"
                )
        elif param["type"] == "choice":
            if v not in param["values"]:
                raise ValueError(f"{info.field_name} must be one of {param['values']}")

        return v

    @model_validator(mode="after")
    def check_constraints(self) -> "BlindedParameterization":
        if self.x19 > self.x20:
            raise ValueError(
                f"Received x19={self.x19} which should be less than x20={self.x20}"
            )
        if self.x6 + self.x15 > 1.0:
            raise ValueError(
                f"Received x6={self.x6} and x15={self.x15} which should sum to less than or equal to 1.0"  # noqa: E501
            )


# Conversion from original to blinded representation
def convert_to_blinded(params):
    blinded_params = {}
    numeric_index = 1
    choice_index = 1
    for param in PARAM_BOUNDS:
        if param["type"] == "range":
            key = f"x{numeric_index}" if param["name"] != "train_frac" else "fidelity1"
            blinded_params[key] = scalers[param["name"]].transform(
                [[params[param["name"]]]]
            )[0][0]
            numeric_index += 1 if param["name"] != "train_frac" else 0
        elif param["type"] == "choice":
            key = f"c{choice_index}"
            choice_index = param["values"].index(params[param["name"]])
            blinded_params[key] = f"{key}_{choice_index}"
            choice_index += 1
    return blinded_params


# Conversion from blinded to original representation
def convert_from_blinded(blinded_params):
    original_params = {}
    numeric_index = 1
    choice_index = 1
    for param in PARAM_BOUNDS:
        if param["type"] == "range":
            key = f"x{numeric_index}" if param["name"] != "train_frac" else "fidelity1"
            original_params[param["name"]] = scalers[param["name"]].inverse_transform(
                [[blinded_params[key]]]
            )[0][0]
            numeric_index += 1 if param["name"] != "train_frac" else 0
        elif param["type"] == "choice":
            key = f"c{choice_index}"
            choice_value = blinded_params[key].split("_")[-1]
            original_params[param["name"]] = param["values"][int(choice_value)]
            choice_index += 1
    return original_params


def evaluate(*args):
    # Assume args are in the order of BLINDED_PARAM_BOUNDS
    blinded_params = dict(zip([param["name"] for param in BLINDED_PARAM_BOUNDS], args))
    original_params = convert_from_blinded(blinded_params)
    BlindedParameterization(**blinded_params)  # Validation

    params_list = [original_params]
    results = model.surrogate_evaluate(params_list)
    results_list = [list(result.values()) for result in results]
    return results_list


def get_interface(param_info, numeric_index, choice_index):
    key = param_info["name"]
    default_value = example_parameterization[key]
    if param_info["type"] == "range":
        # Rescale the parameter to be between 0 and 1
        scaler = scalers[key]
        scaler.fit([[bound] for bound in param_info["bounds"]])
        scaled_value = scaler.transform([[default_value]])[0][0]
        scaled_bounds = scaler.transform([[bound] for bound in param_info["bounds"]])
        label = f"fidelity1" if key == "train_frac" else f"x{numeric_index}"
        return (
            gr.Slider(  # Change this line
                value=scaled_value,
                minimum=scaled_bounds[0][0],
                maximum=scaled_bounds[1][0],
                label=label,
                step=(scaled_bounds[1][0] - scaled_bounds[0][0]) / 100,
            ),
            numeric_index + 1,
            choice_index,
        )
    elif param_info["type"] == "choice":
        return (
            gr.Radio(
                choices=[
                    f"c{choice_index}_{i}" for i in range(len(param_info["values"]))
                ],
                label=f"c{choice_index}",
                value=f"c{choice_index}_{param_info['values'].index(default_value)}",
            ),
            numeric_index,
            choice_index + 1,
        )


# test the evaluate function
blinded_results = evaluate(*[0.5] * 20, "c1_0", "c2_0", "c3_0", 0.5)

numeric_index = 1
choice_index = 1
inputs = []
for param in PARAM_BOUNDS:
    input, numeric_index, choice_index = get_interface(
        param, numeric_index, choice_index
    )
    inputs.append(input)

iface = gr.Interface(
    title="Advanced Optimization",
    fn=evaluate,
    inputs=inputs,
    outputs=gr.Numpy(
        value=np.array([list(example_result.values())]),
        headers=[f"y{i+1}" for i in range(len(example_result))],
        col_count=(len(example_result), "fixed"),
        datatype=["number"] * len(example_result),
    ),
    description="""
    ## Objectives
    
    **Minimize `y1`, `y2`, `y3`, and `y4`**
    
    ### Correlations

    - `y1` and `y2` are correlated
    - `y1` is anticorrelated with `y3`
    - `y2` is anticorrelated with `y3`
    
    ### Noise 
    
    `y1`, `y2`, and `y3` are stochastic with heteroskedastic, parameter-free
    noise, whereas `y4` is deterministic, but still considered 'black-box'. In
    other words, repeat calls with the same input arguments will result in
    different values for `y1`, `y2`, and `y3`, but the same value for `y4`.

    ### Objective thresholds
    
    If `y1` is greater than 0.2, the result is considered "bad" no matter how
    good the other values are. If `y2` is greater than 0.7, the result is
    considered "bad" no matter how good the other values are. If `y3` is greater
    than 1800, the result is considered "bad" no matter how good the other
    values are. If `y4` is greater than 40e6, the result is considered "bad" no
    matter how good the other values are.

    ## Search Space

    ### Fidelity
    
    `fidelity1` is a fidelity parameter. The lowest fidelity is 0, and the
    highest fidelity is 1. The higher the fidelity, the more expensive the
    evaluation, and the higher the quality.
    
    NOTE: `fidelity1` and `y3` are correlated.

    ### Constraints

    - x<sub>19</sub> < x<sub>20</sub>
    - x<sub>6</sub> + x<sub>15</sub> ≀ 1.0

    ### Parameter bounds

    - 0 ≀ x<sub>i</sub> ≀ 1 for i ∈ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
      14, 15, 16, 17, 18, 19, 20}
    - c<sub>1</sub> ∈ {c1_0, c1_1}
    - c<sub>2</sub> ∈ {c2_0, c2_1}
    - c<sub>3</sub> ∈ {c3_0, c3_1, c3_2}
    - 0 ≀ fidelity1 ≀ 1

    ## Notion of best

    Thresholded Pareto front hypervolume vs. running cost for three different
    budgets, and averaged over 10 search campaigns.

    ## References:
    
    1. Baird, S. G.; Liu, M.; Sparks, T. D. High-Dimensional Bayesian
        Optimization of 23 Hyperparameters over 100 Iterations for an
        Attention-Based Network to Predict Materials Property: A Case Study on
        CrabNet Using Ax Platform and SAASBO. Computational Materials Science
        2022, 211, 111505. https://doi.org/10.1016/j.commatsci.2022.111505.
        
    2. Baird, S. G.; Parikh, J. N.; Sparks, T. D. Materials Science
        Optimization Benchmark Dataset for High-Dimensional, Multi-Objective,
        Multi-Fidelity Optimization of CrabNet Hyperparameters. ChemRxiv March
        7, 2023. https://doi.org/10.26434/chemrxiv-2023-9s6r7.
    """,
)
iface.launch(show_error=True)