Spaces:
Sleeping
Sleeping
File size: 4,364 Bytes
6454c0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import yaml
import gradio as gr
from yourbench_space.utils import CONFIG_PATH
def generate_config(hf_token: gr.OAuthToken | None, hf_org, model_name, provider, base_url, api_key, max_concurrent_requests):
config = {
"hf_configuration": {
"token": hf_token,
"private": True,
"hf_organization": hf_org
},
"model_list": [{
"model_name": model_name,
"provider": provider,
"base_url": base_url,
"api_key": api_key,
"max_concurrent_requests": max_concurrent_requests
}],
"model_roles": {role: [model_name] for role in [
"ingestion", "summarization", "single_shot_question_generation",
"multi_hop_question_generation", "answer_generation", "judge_answers"
]},
"inference_config": {"max_concurrent_requests": 16},
"pipeline": {
"ingestion": {
"source_documents_dir": "/app/uploaded_files",
"output_dir": "/app/ingested",
"run": True
},
"upload_ingest_to_hub": {
"source_documents_dir": "/app/ingested",
"hub_dataset_name": "test_ingested_documents",
"local_dataset_path": "/app/ingested_dataset",
"run": True
},
"summarization": {
"source_dataset_name": "test_ingested_documents",
"output_dataset_name": "test_summaries",
"local_dataset_path": "/results/test_summaries",
"concat_existing_dataset": False,
"run": True
},
"chunking": {
"source_dataset_name": "test_summaries",
"output_dataset_name": "test_chunked_documents",
"local_dataset_path": "/results/test_chunked_documents",
"concat_existing_dataset": False,
"chunking_configuration": {
"l_min_tokens": 64,
"l_max_tokens": 128,
"tau_threshold": 0.3,
"h_min": 2,
"h_max": 4
},
"run": True
},
"single_shot_question_generation": {
"source_dataset_name": "test_chunked_documents",
"output_dataset_name": "test_single_shot_questions",
"local_dataset_path": "/results/test_single_shot_questions",
"diversification_seed": "24 year old adult",
"concat_existing_dataset": False,
"run": True
},
"multi_hop_question_generation": {
"source_dataset_name": "test_chunked_documents",
"output_dataset_name": "test_multi_hop_questions",
"local_dataset_path": "/results/test_multi_hop_questions",
"concat_existing_dataset": False,
"run": True
},
"answer_generation": {
"run": True,
"question_dataset_name": "test_single_shot_questions",
"output_dataset_name": "test_answered_questions",
"local_dataset_path": "/results/test_answered_questions",
"concat_existing_dataset": False,
"strategies": [{
"name": "zeroshot",
"prompt": "ZEROSHOT_QA_USER_PROMPT",
"model_name": model_name
}, {
"name": "gold",
"prompt": "GOLD_QA_USER_PROMPT",
"model_name": model_name
}]
},
"judge_answers": {
"run": True,
"source_judge_dataset_name": "test_answered_questions",
"output_judged_dataset_name": "test_judged_comparisons",
"local_dataset_path": "/results/test_judged_comparisons",
"concat_existing_dataset": False,
"comparing_strategies": [["zeroshot", "gold"]],
"chunk_column_index": 0,
"random_seed": 42
}
}
}
return yaml.dump(config, default_flow_style=False)
def save_config(yaml_text):
with open(CONFIG_PATH, "w") as file:
file.write(yaml_text)
return "✅ Config saved!"
|