File size: 4,364 Bytes
6454c0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import yaml
import gradio as gr

from yourbench_space.utils import CONFIG_PATH


def generate_config(hf_token: gr.OAuthToken | None, hf_org, model_name, provider, base_url, api_key, max_concurrent_requests):
    config = {
        "hf_configuration": {
            "token": hf_token,
            "private": True,
            "hf_organization": hf_org
        },
        "model_list": [{
            "model_name": model_name,
            "provider": provider,
            "base_url": base_url,
            "api_key": api_key,
            "max_concurrent_requests": max_concurrent_requests
        }],
        "model_roles": {role: [model_name] for role in [
            "ingestion", "summarization", "single_shot_question_generation",
            "multi_hop_question_generation", "answer_generation", "judge_answers"
        ]},
        "inference_config": {"max_concurrent_requests": 16},
        "pipeline": {
            "ingestion": {
                "source_documents_dir": "/app/uploaded_files",
                "output_dir": "/app/ingested",
                "run": True
            },
            "upload_ingest_to_hub": {
                "source_documents_dir": "/app/ingested",
                "hub_dataset_name": "test_ingested_documents",
                "local_dataset_path": "/app/ingested_dataset",
                "run": True
            },
            "summarization": {
                "source_dataset_name": "test_ingested_documents",
                "output_dataset_name": "test_summaries",
                "local_dataset_path": "/results/test_summaries",
                "concat_existing_dataset": False,
                "run": True
            },
            "chunking": {
                "source_dataset_name": "test_summaries",
                "output_dataset_name": "test_chunked_documents",
                "local_dataset_path": "/results/test_chunked_documents",
                "concat_existing_dataset": False,
                "chunking_configuration": {
                    "l_min_tokens": 64,
                    "l_max_tokens": 128,
                    "tau_threshold": 0.3,
                    "h_min": 2,
                    "h_max": 4
                },
                "run": True
            },
            "single_shot_question_generation": {
                "source_dataset_name": "test_chunked_documents",
                "output_dataset_name": "test_single_shot_questions",
                "local_dataset_path": "/results/test_single_shot_questions",
                "diversification_seed": "24 year old adult",
                "concat_existing_dataset": False,
                "run": True
            },
            "multi_hop_question_generation": {
                "source_dataset_name": "test_chunked_documents",
                "output_dataset_name": "test_multi_hop_questions",
                "local_dataset_path": "/results/test_multi_hop_questions",
                "concat_existing_dataset": False,
                "run": True
            },
            "answer_generation": {
                "run": True,
                "question_dataset_name": "test_single_shot_questions",
                "output_dataset_name": "test_answered_questions",
                "local_dataset_path": "/results/test_answered_questions",
                "concat_existing_dataset": False,
                "strategies": [{
                    "name": "zeroshot",
                    "prompt": "ZEROSHOT_QA_USER_PROMPT",
                    "model_name": model_name
                }, {
                    "name": "gold",
                    "prompt": "GOLD_QA_USER_PROMPT",
                    "model_name": model_name
                }]
            },
            "judge_answers": {
                "run": True,
                "source_judge_dataset_name": "test_answered_questions",
                "output_judged_dataset_name": "test_judged_comparisons",
                "local_dataset_path": "/results/test_judged_comparisons",
                "concat_existing_dataset": False,
                "comparing_strategies": [["zeroshot", "gold"]],
                "chunk_column_index": 0,
                "random_seed": 42
            }
        }
    }
    return yaml.dump(config, default_flow_style=False)

def save_config(yaml_text):
    with open(CONFIG_PATH, "w") as file:
        file.write(yaml_text)
    return "✅ Config saved!"