Spaces:
Running
Running
File size: 4,511 Bytes
19d93fe 1045c52 19d93fe ebac224 4ad8722 ebac224 4ad8722 1045c52 19d93fe 4ad8722 19d93fe 1045c52 4ad8722 ebac224 4ad8722 ebac224 4ad8722 ebac224 4ad8722 ebac224 4ad8722 ebac224 4ad8722 ebac224 4ad8722 ebac224 1045c52 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
VIDORE_V1_MTEB_NAMES = [
"VidoreArxivQARetrieval",
"VidoreDocVQARetrieval",
"VidoreInfoVQARetrieval",
"VidoreShiftProjectRetrieval",
"VidoreSyntheticDocQAAIRetrieval",
"VidoreSyntheticDocQAEnergyRetrieval",
"VidoreSyntheticDocQAGovernmentReportsRetrieval",
"VidoreSyntheticDocQAHealthcareIndustryRetrieval",
"VidoreTabfquadRetrieval",
"VidoreTatdqaRetrieval",
]
VIDORE_V2_MTEB_NAMES = [
"Vidore2BioMedicalLecturesRetrieval",
"Vidore2EconomicsReportsRetrieval",
"Vidore2ESGReportsHLRetrieval",
"Vidore2ESGReportsRetrieval",
]
DEPRECATED_VIDORE_DATASETS_KEYWORDS = [
"arxivqa",
"docvqa",
"infovqa",
"tabfquad",
"tatdqa",
"shift",
"artificial_intelligence",
"energy",
"government_reports",
"healthcare_industry",
]
DEPRECATED_VIDORE_2_DATASETS_KEYWORDS = [
"restaurant_esg",
"rse_restaurant",
"mit_biomedical",
"economics_macro",
"biomedical_lectures",
"esg_reports",
"economics_reports",
]
def get_datasets_nickname(dataset_name) -> str:
if dataset_name == "VidoreArxivQARetrieval":
return "ArxivQA"
elif dataset_name == "VidoreDocVQARetrieval":
return "DocVQA"
elif dataset_name == "VidoreInfoVQARetrieval":
return "InfoVQA"
elif dataset_name == "VidoreTabfquadRetrieval":
return "TabFQuad"
elif dataset_name == "VidoreTatdqaRetrieval":
return "TAT-DQA"
elif dataset_name == "VidoreShiftProjectRetrieval":
return "Shift Project"
elif dataset_name == "VidoreSyntheticDocQAAIRetrieval":
return "Artificial Intelligence"
elif dataset_name == "VidoreSyntheticDocQAEnergyRetrieval":
return "Energy"
elif dataset_name == "VidoreSyntheticDocQAGovernmentReportsRetrieval":
return "Government Reports"
elif dataset_name == "VidoreSyntheticDocQAHealthcareIndustryRetrieval":
return "Healthcare Industry"
elif dataset_name == "Vidore2ESGReportsHLRetrieval":
return "ESG Restaurant Human English"
elif dataset_name == "Vidore2ESGReportsRetrieval":
return "ESG Restaurant Synthetic Multilingual"
elif dataset_name == "Vidore2BioMedicalLecturesRetrieval":
return "MIT Biomedical Multilingual"
elif dataset_name == "Vidore2EconomicsReportsRetrieval":
return "Economics Macro Multilingual"
else:
raise ValueError(f"Dataset {dataset_name} not found in ViDoRe")
def deprecated_get_datasets_nickname(dataset_name) -> str:
if "arxivqa" in dataset_name:
return "ArxivQA"
elif "docvqa" in dataset_name:
return "DocVQA"
elif "infovqa" in dataset_name:
return "InfoVQA"
elif "tabfquad" in dataset_name:
return "TabFQuad"
elif "tatdqa" in dataset_name:
return "TAT-DQA"
elif "shift" in dataset_name:
return "Shift Project"
elif "artificial_intelligence" in dataset_name:
return "Artificial Intelligence"
elif "energy" in dataset_name:
return "Energy"
elif "government_reports" in dataset_name:
return "Government Reports"
elif "healthcare_industry" in dataset_name:
return "Healthcare Industry"
elif ("restaurant_esg" in dataset_name) or ("esg_reports_human" in dataset_name):
return "ESG Restaurant Human"
elif ("rse_restaurant" in dataset_name and "multilingual" in dataset_name) or (
"esg_reports" in dataset_name and not "_eng_" in dataset_name
):
return "ESG Restaurant Synthetic Multilingual"
elif ("rse_restaurant" in dataset_name) or ("esg_reports" in dataset_name and "_eng_" in dataset_name):
return "ESG Restaurant Synthetic"
elif ("mit_biomedical" in dataset_name and "multilingual" in dataset_name) or (
"biomedical_lectures" in dataset_name and not "_eng_" in dataset_name
):
return "MIT Biomedical Multilingual"
elif ("mit_biomedical" in dataset_name) or ("biomedical_lectures" in dataset_name and "_eng_" in dataset_name):
return "MIT Biomedical"
elif ("economics_macro" in dataset_name and "multilingual" in dataset_name) or (
"economics_reports" in dataset_name and not "_eng_" in dataset_name
):
return "Economics Macro Multilingual"
elif ("economics_macro" in dataset_name) or ("economics_reports" in dataset_name and "_eng_" in dataset_name):
return "Economics Macro"
else:
raise ValueError(f"Dataset {dataset_name} not found in ViDoRe")
|