|
{ |
|
"config_general": { |
|
"lighteval_sha": "?", |
|
"num_fewshot_seeds": 1, |
|
"override_batch_size": -1, |
|
"max_samples": null, |
|
"job_id": "", |
|
"start_time": 1342.339773502, |
|
"end_time": 6498.524528434, |
|
"total_evaluation_time_secondes": "5156.184754932", |
|
"model_name": "tinycompany/BiBo-Mini-v0.99", |
|
"model_sha": "9e66d1135c27c915ecf382453909e5bc1f6a650d", |
|
"model_dtype": "torch.bfloat16", |
|
"model_size": "3.31 GB", |
|
"config": null |
|
}, |
|
"results": { |
|
"indiceval|ARC-Challenge:hindi|10": { |
|
"acc": 0.30887372013651876, |
|
"acc_stderr": 0.013501770929344003, |
|
"acc_norm": 0.3361774744027304, |
|
"acc_norm_stderr": 0.013804855026205752 |
|
}, |
|
"indiceval|ARC-Easy:hindi|5": { |
|
"acc": 0.5627104377104377, |
|
"acc_stderr": 0.010178768429321586, |
|
"acc_norm": 0.5349326599326599, |
|
"acc_norm_stderr": 0.01023471305272368 |
|
}, |
|
"all": { |
|
"acc": 0.43579207892347827, |
|
"acc_stderr": 0.011840269679332795, |
|
"acc_norm": 0.43555506716769515, |
|
"acc_norm_stderr": 0.012019784039464717 |
|
} |
|
}, |
|
"versions": { |
|
"indiceval|ARC-Challenge:hindi|10": 0, |
|
"indiceval|ARC-Easy:hindi|5": 0 |
|
}, |
|
"config_tasks": { |
|
"indiceval|ARC-Challenge:hindi": { |
|
"name": "ARC-Challenge:hindi", |
|
"prompt_function": "arc_indic", |
|
"hf_repo": "Cognitive-Lab/Indic-ARC-Challenge", |
|
"hf_subset": "hi", |
|
"metric": [ |
|
"loglikelihood_acc", |
|
"loglikelihood_acc_norm_nospace" |
|
], |
|
"hf_avail_splits": [ |
|
"train", |
|
"validation", |
|
"test" |
|
], |
|
"evaluation_splits": [ |
|
"test" |
|
], |
|
"few_shots_split": null, |
|
"few_shots_select": "random_sampling_from_train", |
|
"generation_size": 1, |
|
"stop_sequence": [ |
|
"\n" |
|
], |
|
"output_regex": null, |
|
"frozen": false, |
|
"suite": [ |
|
"indiceval", |
|
"leaderboard", |
|
"arc" |
|
], |
|
"original_num_docs": 1172, |
|
"effective_num_docs": 1172, |
|
"trust_dataset": true, |
|
"must_remove_duplicate_docs": null |
|
}, |
|
"indiceval|ARC-Easy:hindi": { |
|
"name": "ARC-Easy:hindi", |
|
"prompt_function": "arc_indic", |
|
"hf_repo": "Cognitive-Lab/Indic-ARC-Easy", |
|
"hf_subset": "hi", |
|
"metric": [ |
|
"loglikelihood_acc", |
|
"loglikelihood_acc_norm_nospace" |
|
], |
|
"hf_avail_splits": [ |
|
"train", |
|
"validation", |
|
"test" |
|
], |
|
"evaluation_splits": [ |
|
"test" |
|
], |
|
"few_shots_split": null, |
|
"few_shots_select": "random_sampling_from_train", |
|
"generation_size": 1, |
|
"stop_sequence": [ |
|
"\n" |
|
], |
|
"output_regex": null, |
|
"frozen": false, |
|
"suite": [ |
|
"indiceval", |
|
"leaderboard", |
|
"arc" |
|
], |
|
"original_num_docs": 2376, |
|
"effective_num_docs": 2376, |
|
"trust_dataset": true, |
|
"must_remove_duplicate_docs": null |
|
} |
|
}, |
|
"summary_tasks": { |
|
"indiceval|ARC-Challenge:hindi|10": { |
|
"hashes": { |
|
"hash_examples": "ede3937107b50671", |
|
"hash_full_prompts": "939fa143e0be8e76", |
|
"hash_input_tokens": "260d85f401153bdc", |
|
"hash_cont_tokens": "b95b113bb2e57385" |
|
}, |
|
"truncated": 0, |
|
"non_truncated": 1172, |
|
"padded": 4680, |
|
"non_padded": 7, |
|
"effective_few_shots": 10.0, |
|
"num_truncated_few_shots": 0 |
|
}, |
|
"indiceval|ARC-Easy:hindi|5": { |
|
"hashes": { |
|
"hash_examples": "0186dde6d6cf5f12", |
|
"hash_full_prompts": "cbaaa6e70e1e350d", |
|
"hash_input_tokens": "5496daca90725251", |
|
"hash_cont_tokens": "f7cf5b125bc52602" |
|
}, |
|
"truncated": 0, |
|
"non_truncated": 2376, |
|
"padded": 9441, |
|
"non_padded": 60, |
|
"effective_few_shots": 5.0, |
|
"num_truncated_few_shots": 0 |
|
} |
|
}, |
|
"summary_general": { |
|
"hashes": { |
|
"hash_examples": "799a1387b6c8a4d2", |
|
"hash_full_prompts": "9938eadd006079ef", |
|
"hash_input_tokens": "13eac7a7a2c17518", |
|
"hash_cont_tokens": "14960e7bf91fd26b" |
|
}, |
|
"truncated": 0, |
|
"non_truncated": 3548, |
|
"padded": 14121, |
|
"non_padded": 67, |
|
"num_truncated_few_shots": 0 |
|
}, |
|
"email": "[email protected]", |
|
"language": "hindi" |
|
} |