File size: 2,917 Bytes
bae4131
6454c0e
 
bae4131
 
3d76e98
bae4131
 
 
 
 
 
 
6454c0e
 
5289522
bae4131
 
3d76e98
bae4131
1e75b2e
bae4131
 
 
 
 
 
 
 
 
 
3d76e98
 
 
 
 
 
6454c0e
 
 
3d76e98
 
bae4131
6454c0e
 
3d76e98
1e75b2e
bae4131
6454c0e
3d76e98
6454c0e
 
 
 
 
 
bae4131
6454c0e
bae4131
6454c0e
 
 
bae4131
6454c0e
3d76e98
6454c0e
3d76e98
 
bae4131
3d76e98
 
bae4131
6454c0e
 
3d76e98
6454c0e
 
bae4131
 
 
6454c0e
bae4131
 
6454c0e
 
 
1e75b2e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import yaml
from yourbench_space.utils import CONFIG_PATH

def generate_base_config(
    hf_org,
    hf_dataset_name,
    model_name,
    provider,
    base_url,
    model_api_key,
    max_concurrent_requests,
    private_dataset,
):
    config = {
        "hf_configuration": {
            "token": "$HF_TOKEN",
            "private": private_dataset,
            "hf_organization": hf_org,
            "hf_dataset_name": hf_dataset_name,
        },
        "local_dataset_dir": "/app/results/",
        "model_list": [
            {
                "model_name": model_name,
                "provider": provider,
                "base_url": base_url,
                "api_key": "$MODEL_API_KEY",
                "max_concurrent_requests": max_concurrent_requests,
            }
        ],
        "model_roles": {
            "ingestion": [model_name],
            "summarization": [model_name],
            "single_shot_question_generation": [model_name],
            "multi_hop_question_generation": [model_name],
            "answer_generation": [model_name],
            "judge_answers": [model_name],
        },
        "pipeline": {
            "ingestion": {
                "source_documents_dir": "/app/example/raw",
                "output_dir": "/app/example/ingested",
                "run": True,
            },
            "upload_ingest_to_hub": {
                "source_documents_dir": "/app/example/ingested",
                "hub_dataset_name": f"{hf_dataset_name}_ingested_documents",
                "run": True,
            },
            "summarization": {"run": True},
            "chunking": {
                "chunking_configuration": {
                    "l_min_tokens": 64,
                    "l_max_tokens": 128,
                    "tau_threshold": 0.3,
                    "h_min": 2,
                    "h_max": 4,
                },
                "run": True,
            },
            "single_shot_question_generation": {
                "diversification_seed": "24 year old adult",
                "run": True,
            },
            "multi_hop_question_generation": {"run": True},
            "answer_generation": {
                "question_type": "single_shot",
                "run": True,
                "strategies": [
                    {"name": "zeroshot", "prompt": "ZEROSHOT_QA_USER_PROMPT", "model_name": model_name},
                    {"name": "gold", "prompt": "GOLD_QA_USER_PROMPT", "model_name": model_name},
                ],
            },
            "judge_answers": {
                "run": True,
                "comparing_strategies": [["zeroshot", "gold"]],
                "chunk_column_index": 0,
                "random_seed": 42,
            },
        },
    }
    return yaml.dump(config, sort_keys=False)

def save_config(yaml_text):
    with open(CONFIG_PATH, "w") as file:
        file.write(yaml_text)
    return "✅ Config saved!"