Spaces:
Running
Running
from itertools import product | |
from pathlib import Path | |
import numpy as np | |
import pandas as pd | |
from dask.distributed import Client | |
from dask_jobqueue import SLURMCluster | |
from mlip_arena.models import MLIPEnum | |
from mlip_arena.tasks import ELASTICITY, OPT, PHONON | |
from mlip_arena.tasks.optimize import run as OPT | |
from mlip_arena.tasks.utils import get_calculator | |
from numpy import linalg as LA | |
from prefect import flow, task | |
from prefect_dask import DaskTaskRunner | |
from tqdm.auto import tqdm | |
from ase.db import connect | |
select_models = [ | |
"ALIGNN", | |
"CHGNet", | |
"M3GNet", | |
"MACE-MP(M)", | |
"MACE-MPA", | |
"MatterSim", | |
"ORBv2", | |
"SevenNet", | |
] | |
def elastic_tensor_to_voigt(C): | |
""" | |
Convert a rank-4 (3x3x3x3) elastic tensor into a rank-2 (6x6) tensor using Voigt notation. | |
Parameters: | |
C (numpy.ndarray): A 3x3x3x3 elastic tensor. | |
Returns: | |
numpy.ndarray: A 6x6 elastic tensor in Voigt notation. | |
""" | |
# voigt_map = { | |
# (0, 0): 0, (1, 1): 1, (2, 2): 2, # Normal components | |
# (1, 2): 3, (2, 1): 3, # Shear components | |
# (0, 2): 4, (2, 0): 4, | |
# (0, 1): 5, (1, 0): 5 | |
# } | |
voigt_map = { | |
(0, 0): 0, | |
(1, 1): 1, | |
(2, 2): -1, # Normal components | |
(1, 2): -1, | |
(2, 1): -1, # Shear components | |
(0, 2): -1, | |
(2, 0): -1, | |
(0, 1): 2, | |
(1, 0): 2, | |
} | |
C_voigt = np.zeros((3, 3)) | |
for i in range(3): | |
for j in range(3): | |
for k in range(3): | |
for l in range(3): | |
alpha = voigt_map[(i, j)] | |
beta = voigt_map[(k, l)] | |
if alpha == -1 or beta == -1: | |
continue | |
factor = 1 | |
# if alpha in [3, 4, 5]: | |
if alpha == 2: | |
factor = factor * (2**0.5) | |
if beta == 2: | |
factor = factor * (2**0.5) | |
C_voigt[alpha, beta] = C[i, j, k, l] * factor | |
return C_voigt | |
# - | |
def run_one(model, row): | |
if Path(f"{model.name}.pkl").exists(): | |
df = pd.read_pickle(f"{model.name}.pkl") | |
# if row.key_value_pairs.get('uid', None) in df['uid'].unique(): | |
# pass | |
else: | |
df = pd.DataFrame(columns=["model", "uid", "eigenvalues", "frequencies"]) | |
atoms = row.toatoms() | |
# print(data := row.key_value_pairs) | |
calc = get_calculator(model) | |
result_opt = OPT( | |
atoms, | |
calc, | |
optimizer="FIRE", | |
criterion=dict(fmax=0.05, steps=500), | |
symmetry=True, | |
) | |
atoms = result_opt["atoms"] | |
result_elastic = ELASTICITY( | |
atoms, | |
calc, | |
optimizer="FIRE", | |
criterion=dict(fmax=0.05, steps=500), | |
pre_relax=False, | |
) | |
elastic_tensor = elastic_tensor_to_voigt(result_elastic["elastic_tensor"]) | |
eigenvalues, eigenvectors = LA.eig(elastic_tensor) | |
outdir = Path(f"{model.name}") / row.key_value_pairs.get( | |
"uid", atoms.get_chemical_formula() | |
) | |
outdir.mkdir(parents=True, exist_ok=True) | |
np.savez(outdir / "elastic.npz", tensor=elastic_tensor, eigenvalues=eigenvalues) | |
result_phonon = PHONON( | |
atoms, | |
calc, | |
supercell_matrix=(2, 2, 1), | |
outdir=outdir, | |
) | |
frequencies = result_phonon["phonon"].get_frequencies(q=(0, 0, 0)) | |
new_row = pd.DataFrame( | |
[ | |
{ | |
"model": model.name, | |
"uid": row.key_value_pairs.get("uid", None), | |
"eigenvalues": eigenvalues, | |
"frequencies": frequencies, | |
} | |
] | |
) | |
df = pd.concat([df, new_row], ignore_index=True) | |
df.drop_duplicates(subset=["model", "uid"], keep="last", inplace=True) | |
df.to_pickle(f"{model.name}.pkl") | |
def run_all(): | |
import random | |
random.seed(0) | |
futures = [] | |
with connect("c2db.db") as db: | |
random_indices = random.sample(range(1, len(db) + 1), 1000) | |
for row, model in tqdm( | |
product(db.select(filter=lambda r: r["id"] in random_indices), MLIPEnum) | |
): | |
if model.name not in select_models: | |
continue | |
future = run_one.submit(model, row) | |
futures.append(future) | |
return [f.result(raise_on_failure=False) for f in futures] | |
# + | |
if __name__ == "__main__": | |
nodes_per_alloc = 1 | |
gpus_per_alloc = 1 | |
ntasks = 1 | |
cluster_kwargs = dict( | |
cores=1, | |
memory="64 GB", | |
processes=1, | |
shebang="#!/bin/bash", | |
account="matgen", | |
walltime="00:30:00", | |
# job_cpu=128, | |
job_mem="0", | |
job_script_prologue=[ | |
"source ~/.bashrc", | |
"module load python", | |
"source activate /pscratch/sd/c/cyrusyc/.conda/dev", | |
], | |
job_directives_skip=["-n", "--cpus-per-task", "-J"], | |
job_extra_directives=[ | |
"-J c2db", | |
"-q regular", | |
f"-N {nodes_per_alloc}", | |
"-C gpu", | |
f"-G {gpus_per_alloc}", | |
], | |
) | |
cluster = SLURMCluster(**cluster_kwargs) | |
print(cluster.job_script()) | |
cluster.adapt(minimum_jobs=25, maximum_jobs=50) | |
client = Client(cluster) | |
# - | |
run_all.with_options( | |
task_runner=DaskTaskRunner(address=client.scheduler.address), log_prints=True | |
)() | |