advanced

Running on CPU Upgrade

File size: 2,917 Bytes

bae4131
6454c0e
 
bae4131
 
3d76e98
bae4131
 
 
 
 
 
 
6454c0e
 
5289522
bae4131
 
3d76e98
bae4131
1e75b2e
bae4131
 
 
 
 
 
 
 
 
 
3d76e98
 
 
 
 
 
6454c0e
 
 
3d76e98
 
bae4131
6454c0e
 
3d76e98
1e75b2e
bae4131
6454c0e
3d76e98
6454c0e
 
 
 
 
 
bae4131
6454c0e
bae4131
6454c0e
 
 
bae4131
6454c0e
3d76e98
6454c0e
3d76e98
 
bae4131
3d76e98
 
bae4131
6454c0e
 
3d76e98
6454c0e
 
bae4131
 
 
6454c0e
bae4131
 
6454c0e
 
 
1e75b2e

import yaml
from yourbench_space.utils import CONFIG_PATH

def generate_base_config(
    hf_org,
    hf_dataset_name,
    model_name,
    provider,
    base_url,
    model_api_key,
    max_concurrent_requests,
    private_dataset,
):
    config = {
        "hf_configuration": {
            "token": "$HF_TOKEN",
            "private": private_dataset,
            "hf_organization": hf_org,
            "hf_dataset_name": hf_dataset_name,
        },
        "local_dataset_dir": "/app/results/",
        "model_list": [
            {
                "model_name": model_name,
                "provider": provider,
                "base_url": base_url,
                "api_key": "$MODEL_API_KEY",
                "max_concurrent_requests": max_concurrent_requests,
            }
        ],
        "model_roles": {
            "ingestion": [model_name],
            "summarization": [model_name],
            "single_shot_question_generation": [model_name],
            "multi_hop_question_generation": [model_name],
            "answer_generation": [model_name],
            "judge_answers": [model_name],
        },
        "pipeline": {
            "ingestion": {
                "source_documents_dir": "/app/example/raw",
                "output_dir": "/app/example/ingested",
                "run": True,
            },
            "upload_ingest_to_hub": {
                "source_documents_dir": "/app/example/ingested",
                "hub_dataset_name": f"{hf_dataset_name}_ingested_documents",
                "run": True,
            },
            "summarization": {"run": True},
            "chunking": {
                "chunking_configuration": {
                    "l_min_tokens": 64,
                    "l_max_tokens": 128,
                    "tau_threshold": 0.3,
                    "h_min": 2,
                    "h_max": 4,
                },
                "run": True,
            },
            "single_shot_question_generation": {
                "diversification_seed": "24 year old adult",
                "run": True,
            },
            "multi_hop_question_generation": {"run": True},
            "answer_generation": {
                "question_type": "single_shot",
                "run": True,
                "strategies": [
                    {"name": "zeroshot", "prompt": "ZEROSHOT_QA_USER_PROMPT", "model_name": model_name},
                    {"name": "gold", "prompt": "GOLD_QA_USER_PROMPT", "model_name": model_name},
                ],
            },
            "judge_answers": {
                "run": True,
                "comparing_strategies": [["zeroshot", "gold"]],
                "chunk_column_index": 0,
                "random_seed": 42,
            },
        },
    }
    return yaml.dump(config, sort_keys=False)

def save_config(yaml_text):
    with open(CONFIG_PATH, "w") as file:
        file.write(yaml_text)
    return "✅ Config saved!"