import yaml from yourbench_space.utils import CONFIG_PATH def generate_base_config( hf_org, hf_dataset_name, model_name, provider, base_url, model_api_key, max_concurrent_requests, private_dataset, ): config = { "hf_configuration": { "token": "$HF_TOKEN", "private": private_dataset, "hf_organization": hf_org, "hf_dataset_name": hf_dataset_name, }, "local_dataset_dir": "/app/results/", "model_list": [ { "model_name": model_name, "provider": provider, "base_url": base_url, "api_key": "$MODEL_API_KEY", "max_concurrent_requests": max_concurrent_requests, } ], "model_roles": { "ingestion": [model_name], "summarization": [model_name], "single_shot_question_generation": [model_name], "multi_hop_question_generation": [model_name], "answer_generation": [model_name], "judge_answers": [model_name], }, "pipeline": { "ingestion": { "source_documents_dir": "/app/example/raw", "output_dir": "/app/example/ingested", "run": True, }, "upload_ingest_to_hub": { "source_documents_dir": "/app/example/ingested", "hub_dataset_name": f"{hf_dataset_name}_ingested_documents", "run": True, }, "summarization": {"run": True}, "chunking": { "chunking_configuration": { "l_min_tokens": 64, "l_max_tokens": 128, "tau_threshold": 0.3, "h_min": 2, "h_max": 4, }, "run": True, }, "single_shot_question_generation": { "diversification_seed": "24 year old adult", "run": True, }, "multi_hop_question_generation": {"run": True}, "answer_generation": { "question_type": "single_shot", "run": True, "strategies": [ {"name": "zeroshot", "prompt": "ZEROSHOT_QA_USER_PROMPT", "model_name": model_name}, {"name": "gold", "prompt": "GOLD_QA_USER_PROMPT", "model_name": model_name}, ], }, "judge_answers": { "run": True, "comparing_strategies": [["zeroshot", "gold"]], "chunk_column_index": 0, "random_seed": 42, }, }, } return yaml.dump(config, sort_keys=False) def save_config(yaml_text): with open(CONFIG_PATH, "w") as file: file.write(yaml_text) return "✅ Config saved!"