Spaces:

atomind
/

mlip-arena

Running

File size: 5,392 Bytes

79edee4

from itertools import product
from pathlib import Path

import numpy as np
import pandas as pd
from dask.distributed import Client
from dask_jobqueue import SLURMCluster
from mlip_arena.models import MLIPEnum
from mlip_arena.tasks import ELASTICITY, OPT, PHONON
from mlip_arena.tasks.optimize import run as OPT
from mlip_arena.tasks.utils import get_calculator
from numpy import linalg as LA
from prefect import flow, task
from prefect_dask import DaskTaskRunner
from tqdm.auto import tqdm

from ase.db import connect

select_models = [
    "ALIGNN",
    "CHGNet",
    "M3GNet",
    "MACE-MP(M)",
    "MACE-MPA",
    "MatterSim",
    "ORBv2",
    "SevenNet",
]

def elastic_tensor_to_voigt(C):
    """
    Convert a rank-4 (3x3x3x3) elastic tensor into a rank-2 (6x6) tensor using Voigt notation.

    Parameters:
    C (numpy.ndarray): A 3x3x3x3 elastic tensor.

    Returns:
    numpy.ndarray: A 6x6 elastic tensor in Voigt notation.
    """
    # voigt_map = {
    #     (0, 0): 0, (1, 1): 1, (2, 2): 2,  # Normal components
    #     (1, 2): 3, (2, 1): 3,  # Shear components
    #     (0, 2): 4, (2, 0): 4,
    #     (0, 1): 5, (1, 0): 5
    # }
    voigt_map = {
        (0, 0): 0,
        (1, 1): 1,
        (2, 2): -1,  # Normal components
        (1, 2): -1,
        (2, 1): -1,  # Shear components
        (0, 2): -1,
        (2, 0): -1,
        (0, 1): 2,
        (1, 0): 2,
    }

    C_voigt = np.zeros((3, 3))

    for i in range(3):
        for j in range(3):
            for k in range(3):
                for l in range(3):
                    alpha = voigt_map[(i, j)]
                    beta = voigt_map[(k, l)]

                    if alpha == -1 or beta == -1:
                        continue

                    factor = 1
                    # if alpha in [3, 4, 5]:
                    if alpha == 2:
                        factor = factor * (2**0.5)
                    if beta == 2:
                        factor = factor * (2**0.5)

                    C_voigt[alpha, beta] = C[i, j, k, l] * factor

    return C_voigt


# -


@task
def run_one(model, row):
    if Path(f"{model.name}.pkl").exists():
        df = pd.read_pickle(f"{model.name}.pkl")

        # if row.key_value_pairs.get('uid', None) in df['uid'].unique():
        #     pass
    else:
        df = pd.DataFrame(columns=["model", "uid", "eigenvalues", "frequencies"])

    atoms = row.toatoms()
    # print(data := row.key_value_pairs)

    calc = get_calculator(model)

    result_opt = OPT(
        atoms,
        calc,
        optimizer="FIRE",
        criterion=dict(fmax=0.05, steps=500),
        symmetry=True,
    )

    atoms = result_opt["atoms"]

    result_elastic = ELASTICITY(
        atoms,
        calc,
        optimizer="FIRE",
        criterion=dict(fmax=0.05, steps=500),
        pre_relax=False,
    )

    elastic_tensor = elastic_tensor_to_voigt(result_elastic["elastic_tensor"])
    eigenvalues, eigenvectors = LA.eig(elastic_tensor)

    outdir = Path(f"{model.name}") / row.key_value_pairs.get(
        "uid", atoms.get_chemical_formula()
    )
    outdir.mkdir(parents=True, exist_ok=True)

    np.savez(outdir / "elastic.npz", tensor=elastic_tensor, eigenvalues=eigenvalues)

    result_phonon = PHONON(
        atoms,
        calc,
        supercell_matrix=(2, 2, 1),
        outdir=outdir,
    )

    frequencies = result_phonon["phonon"].get_frequencies(q=(0, 0, 0))

    new_row = pd.DataFrame(
        [
            {
                "model": model.name,
                "uid": row.key_value_pairs.get("uid", None),
                "eigenvalues": eigenvalues,
                "frequencies": frequencies,
            }
        ]
    )

    df = pd.concat([df, new_row], ignore_index=True)
    df.drop_duplicates(subset=["model", "uid"], keep="last", inplace=True)

    df.to_pickle(f"{model.name}.pkl")


@flow
def run_all():
    import random

    random.seed(0)

    futures = []
    with connect("c2db.db") as db:
        random_indices = random.sample(range(1, len(db) + 1), 1000)
        for row, model in tqdm(
            product(db.select(filter=lambda r: r["id"] in random_indices), MLIPEnum)
        ):
            if model.name not in select_models:
                continue
            future = run_one.submit(model, row)
            futures.append(future)
    return [f.result(raise_on_failure=False) for f in futures]


# +


if __name__ == "__main__":
    nodes_per_alloc = 1
    gpus_per_alloc = 1
    ntasks = 1

    cluster_kwargs = dict(
        cores=1,
        memory="64 GB",
        processes=1,
        shebang="#!/bin/bash",
        account="matgen",
        walltime="00:30:00",
        # job_cpu=128,
        job_mem="0",
        job_script_prologue=[
            "source ~/.bashrc",
            "module load python",
            "source activate /pscratch/sd/c/cyrusyc/.conda/dev",
        ],
        job_directives_skip=["-n", "--cpus-per-task", "-J"],
        job_extra_directives=[
            "-J c2db",
            "-q regular",
            f"-N {nodes_per_alloc}",
            "-C gpu",
            f"-G {gpus_per_alloc}",
        ],
    )

    cluster = SLURMCluster(**cluster_kwargs)
    print(cluster.job_script())
    cluster.adapt(minimum_jobs=25, maximum_jobs=50)
    client = Client(cluster)
    # -

    run_all.with_options(
        task_runner=DaskTaskRunner(address=client.scheduler.address), log_prints=True
    )()