Spaces:
Paused
Paused
import os | |
import time | |
import random | |
import yaml | |
import subprocess | |
import runpod | |
import gradio as gr | |
import pandas as pd | |
from jinja2 import Template | |
from huggingface_hub import ModelCard, ModelCardData, HfApi, repo_info | |
from huggingface_hub.utils import RepositoryNotFoundError | |
# Set environment variables | |
HF_TOKEN = os.environ.get("HF_TOKEN") | |
runpod.api_key = os.environ.get("RUNPOD_TOKEN") | |
# Parameters | |
USERNAME = 'automerger' | |
N_ROWS = 20 | |
WAIT_TIME = 3600 | |
def create_dataset() -> bool: | |
""" | |
Use Scrape Open LLM Leaderboard to create a CSV dataset. | |
""" | |
command = ["python3", "scrape-open-llm-leaderboard/main.py", "-csv"] | |
try: | |
result = subprocess.run(command, check=True, stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, text=True) | |
print(f"scrape-open-llm-leaderboard: {result.stdout}") | |
return True | |
except subprocess.CalledProcessError as e: | |
print(f"scrape-open-llm-leaderboard: {e.stderr}") | |
return False | |
def merge_models() -> None: | |
""" | |
Use mergekit to create a merge. | |
""" | |
command = ["mergekit-yaml", "config.yaml", "merge", "--copy-tokenizer", "--allow-crimes", "--out-shard-size", "1B", "--lazy-unpickle"] | |
try: | |
result = subprocess.run(command, check=True, stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, text=True) | |
print(f"mergekit: {result.stdout}") | |
except subprocess.CalledProcessError as e: | |
print(f"mergekit: {e.stderr}") | |
def make_df(file_path: str, n_rows: int) -> pd.DataFrame: | |
""" | |
Create a filtered dataset from the Open LLM Leaderboard. | |
""" | |
columns = ["Available on the hub", "Model sha", "T", "Type", "Precision", | |
"Architecture", "Weight type", "Hub ❤️", "Flagged", "MoE"] | |
ds = pd.read_csv(file_path) | |
df = ( | |
ds[ | |
(ds["#Params (B)"] == 7.24) & | |
(ds["Available on the hub"] == True) & | |
(ds["Flagged"] == False) & | |
(ds["MoE"] == False) & | |
(ds["Weight type"] == "Original") | |
] | |
.drop(columns=columns) | |
.drop_duplicates(subset=["Model"]) | |
.iloc[:n_rows] | |
) | |
return df | |
def repo_exists(repo_id: str) -> bool: | |
try: | |
repo_info(repo_id) | |
return True | |
except RepositoryNotFoundError: | |
return False | |
def get_name(models: list[pd.Series], username: str, version=0) -> str: | |
model_name = models[0]["Model"].split("/")[-1].split("-")[0].capitalize() \ | |
+ models[1]["Model"].split("/")[-1].split("-")[0].capitalize() \ | |
+ "-7B" | |
if version > 0: | |
model_name = model_name.split("-")[0] + f"-v{version}-7B" | |
if repo_exists(f"{username}/{model_name}"): | |
get_name(models, username, version+1) | |
return model_name | |
def get_license(models: list[pd.Series]) -> str: | |
license1 = models[0]["Hub License"] | |
license2 = models[1]["Hub License"] | |
license = "cc-by-nc-4.0" | |
if license1 == "cc-by-nc-4.0" or license2 == "cc-by-nc-4.0": | |
license = "cc-by-nc-4.0" | |
elif license1 == "apache-2.0" or license2 == "apache-2.0": | |
license = "apache-2.0" | |
elif license1 == "MIT" and license2 == "MIT": | |
license = "MIT" | |
return license | |
def create_config(models: list[pd.Series]) -> str: | |
slerp_config = f""" | |
slices: | |
- sources: | |
- model: {models[0]["Model"]} | |
layer_range: [0, 32] | |
- model: {models[1]["Model"]} | |
layer_range: [0, 32] | |
merge_method: slerp | |
base_model: {models[0]["Model"]} | |
parameters: | |
t: | |
- filter: self_attn | |
value: [0, 0.5, 0.3, 0.7, 1] | |
- filter: mlp | |
value: [1, 0.5, 0.7, 0.3, 0] | |
- value: 0.5 | |
dtype: bfloat16 | |
random_seed: 0 | |
""" | |
dare_config = f""" | |
models: | |
- model: {models[0]["Model"]} | |
# No parameters necessary for base model | |
- model: {models[1]["Model"]} | |
parameters: | |
density: 0.53 | |
weight: 0.6 | |
merge_method: dare_ties | |
base_model: {models[0]["Model"]} | |
parameters: | |
int8_mask: true | |
dtype: bfloat16 | |
random_seed: 0 | |
""" | |
yaml_config = random.choices([slerp_config, dare_config], weights=[0.4, 0.6], k=1)[0] | |
with open('config.yaml', 'w', encoding="utf-8") as f: | |
f.write(yaml_config) | |
return yaml_config | |
def create_model_card(yaml_config: str, model_name: str, username: str, license: str) -> None: | |
template_text = """ | |
--- | |
license: {{ license }} | |
base_model: | |
{%- for model in models %} | |
- {{ model }} | |
{%- endfor %} | |
tags: | |
- merge | |
- mergekit | |
- lazymergekit | |
--- | |
# {{ model_name }} | |
{{ model_name }} is an automated merge created by [Maxime Labonne](https://huggingface.co/mlabonne) using the following configuration. | |
{%- for model in models %} | |
* [{{ model }}](https://huggingface.co/{{ model }}) | |
{%- endfor %} | |
## 🧩 Configuration | |
```yaml | |
{{- yaml_config -}} | |
``` | |
## 💻 Usage | |
```python | |
!pip install -qU transformers accelerate | |
from transformers import AutoTokenizer | |
import transformers | |
import torch | |
model = "{{ username }}/{{ model_name }}" | |
messages = [{"role": "user", "content": "What is a large language model?"}] | |
tokenizer = AutoTokenizer.from_pretrained(model) | |
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
pipeline = transformers.pipeline( | |
"text-generation", | |
model=model, | |
torch_dtype=torch.float16, | |
device_map="auto", | |
) | |
outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95) | |
print(outputs[0]["generated_text"]) | |
``` | |
""" | |
# Create a Jinja template object | |
jinja_template = Template(template_text.strip()) | |
# Get list of models from config | |
data = yaml.safe_load(yaml_config) | |
if "models" in data: | |
models = [data["models"][i]["model"] for i in range(len(data["models"])) if "parameters" in data["models"][i]] | |
elif "parameters" in data: | |
models = [data["slices"][0]["sources"][i]["model"] for i in range(len(data["slices"][0]["sources"]))] | |
elif "slices" in data: | |
models = [data["slices"][i]["sources"][0]["model"] for i in range(len(data["slices"]))] | |
else: | |
raise Exception("No models or slices found in yaml config") | |
# Fill the template | |
content = jinja_template.render( | |
model_name=model_name, | |
models=models, | |
yaml_config=yaml_config, | |
username=username, | |
license=license | |
) | |
# Save the model card | |
card = ModelCard(content) | |
card.save('merge/README.md') | |
def upload_model(api: HfApi, username: str, model_name: str) -> None: | |
api.create_repo( | |
repo_id=f"{username}/{model_name}", | |
repo_type="model", | |
exist_ok=True, | |
) | |
api.upload_folder( | |
repo_id=f"{username}/{model_name}", | |
folder_path="merge", | |
) | |
def create_pod(model_name: str, username: str, n=10, wait_seconds=10): | |
for attempt in range(n): | |
try: | |
pod = runpod.create_pod( | |
name=f"Automerge {model_name} on Nous", | |
image_name="runpod/pytorch:2.0.1-py3.10-cuda11.8.0-devel-ubuntu22.04", | |
gpu_type_id="NVIDIA GeForce RTX 3090", | |
cloud_type="COMMUNITY", | |
gpu_count=1, | |
volume_in_gb=0, | |
container_disk_in_gb=50, | |
template_id="au6nz6emhk", | |
env={ | |
"BENCHMARK": "nous", | |
"MODEL_ID": f"{username}/{model_name}", | |
"REPO": "https://github.com/mlabonne/llm-autoeval.git", | |
"TRUST_REMOTE_CODE": False, | |
"DEBUG": False, | |
"GITHUB_API_TOKEN": os.environ["GITHUB_TOKEN"], | |
} | |
) | |
print("Pod creation succeeded.") | |
return pod | |
except Exception as e: | |
print(f"Attempt {attempt + 1} failed with error: {e}") | |
if attempt < n - 1: | |
print(f"Waiting {wait_seconds} seconds before retrying...") | |
time.sleep(wait_seconds) | |
else: | |
print("All attempts failed. Giving up.") | |
raise | |
def merge_loop(): | |
# Start HF API | |
api = HfApi(token=HF_TOKEN) | |
# Create dataset (proceed only if successful) | |
if not create_dataset(): | |
print("Failed to create dataset. Skipping merge loop.") | |
return | |
df = make_df("open-llm-leaderboard.csv", N_ROWS) | |
# Sample two models | |
sample = df.sample(n=2) | |
models = [sample.iloc[i] for i in range(2)] | |
# Get model name | |
model_name = get_name(models, USERNAME, version=0) | |
print(model_name) | |
# Get model license | |
license = get_license(models) | |
print(license) | |
# Merge configs | |
yaml_config = create_config(models) | |
print(yaml_config) | |
# Merge models | |
merge_models() | |
# Create model card | |
create_model_card(yaml_config, model_name, USERNAME, license) | |
# Upload model | |
upload_model(api, USERNAME, model_name) | |
# Evaluate model on Runpod | |
create_pod(model_name, USERNAME) | |
command = ["git", "clone", "-q", "https://github.com/Weyaxi/scrape-open-llm-leaderboard"] | |
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) | |
command = ["pip", "install", "-r", "scrape-open-llm-leaderboard/requirements.txt"] | |
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) | |
command = ["git", "clone", "https://github.com/arcee-ai/mergekit.git"] | |
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) | |
command = ["pip", "install", "-e", "mergekit"] | |
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) | |
# Gradio interface | |
title = """ | |
<div align="center"> | |
<p style="font-size: 36px;">♾️ AutoMerger</p> | |
<p style="font-size: 20px;">📝 <a href="https://medium.com/towards-data-science/merge-large-language-models-with-mergekit-2118fb392b54">Model merging</a> • 💻 <a href="https://github.com/arcee-ai/mergekit">Mergekit</a> • 🐦 <a href="https://twitter.com/maximelabonne">Follow me on X</a></p> | |
<p><em>AutoMerger selects two 7B models on top of the Open LLM Leaderboard, combine them with a merge technique, and evaluate the resulting model.</em></p> | |
</div> | |
""" | |
with gr.Blocks() as demo: | |
gr.Markdown(title) | |
demo.launch().launch(server_name="0.0.0.0") | |
print("Start AutoMerger...") | |
# Main loop | |
while True: | |
merge_loop() | |
time.sleep(WAIT_TIME) |