Spaces:

unitxt
/

metric

Running

Elron commited on Feb 4

Commit

ed33057

verified ·

1 Parent(s): 365fb61

Upload folder using huggingface_hub

Files changed (7) hide show

README.md CHANGED Viewed

@@ -40,11 +40,11 @@ https://github.com/IBM/unitxt/assets/23455264/baef9131-39d4-4164-90b2-05da52919f
 ### 🦄 Currently on Unitxt Catalog
-![NLP Tasks](https://img.shields.io/badge/NLP_tasks-48-blue)
-![Dataset Cards](https://img.shields.io/badge/Dataset_Cards-537-blue)
-![Templates](https://img.shields.io/badge/Templates-265-blue)
-![Formats](https://img.shields.io/badge/Formats-23-blue)
-![Metrics](https://img.shields.io/badge/Metrics-136-blue)
 ### 🦄 Run Unitxt Exploration Dashboard

 ### 🦄 Currently on Unitxt Catalog
+![Abstract Tasks](https://img.shields.io/badge/Abstract_Tasks-62-blue)
+![Dataset Cards](https://img.shields.io/badge/Dataset_Cards-3025-blue)
+![Templates](https://img.shields.io/badge/Templates-342-blue)
+![Benchmarks](https://img.shields.io/badge/Benchmarks-4-blue)
+![Metrics](https://img.shields.io/badge/Metrics-422-blue)
 ### 🦄 Run Unitxt Exploration Dashboard

api.py CHANGED Viewed

@@ -145,8 +145,8 @@ def _source_to_dataset(
         cache_dir = dir_to_be_deleted if not use_cache else None
         ds_builder = UnitxtDataset(
             dataset_name="unitxt",
-            config_name="recipe-" + short_hex_hash(source.to_json()),
-            hash=hash(source.to_json()),
             version=constants.version,
             cache_dir=cache_dir,
         )

         cache_dir = dir_to_be_deleted if not use_cache else None
         ds_builder = UnitxtDataset(
             dataset_name="unitxt",
+            config_name="recipe-" + short_hex_hash(repr(source)),
+            hash=hash(repr(source)),
             version=constants.version,
             cache_dir=cache_dir,
         )

metrics.py CHANGED Viewed

@@ -3355,6 +3355,8 @@ class CustomF1(GlobalMetric):
 class NER(CustomF1):
     prediction_type = List[Tuple[str, str]]
     def get_element_group(self, element, additional_input):
@@ -3364,6 +3366,18 @@ class NER(CustomF1):
         return str(element)
 def normalize_answer(s):
     """Lower text and remove punctuation, articles and extra whitespace."""

 class NER(CustomF1):
+    """F1 Metrics that receives as input a list of (Entity,EntityType) pairs."""
     prediction_type = List[Tuple[str, str]]
     def get_element_group(self, element, additional_input):
         return str(element)
+class KeyValueExtraction(CustomF1):
+    """F1 Metrics that receives as input a list of (Key,Value) pairs."""
+    prediction_type = List[Tuple[str, str]]
+    def get_element_group(self, element, additional_input):
+        return element[0]
+    def get_element_representation(self, element, additional_input):
+        return str(element)
 def normalize_answer(s):
     """Lower text and remove punctuation, articles and extra whitespace."""

struct_data_operators.py CHANGED Viewed

@@ -23,6 +23,7 @@ For key-value pairs, expected input format is:
     {"key1": "value1", "key2": value2, "key3": "value3"}
 """
 import json
 import random
 from abc import ABC, abstractmethod
@@ -31,12 +32,14 @@ from typing import (
     Dict,
     List,
     Optional,
 )
 import pandas as pd
 from .augmentors import TypeDependentAugmentor
 from .dict_utils import dict_get
 from .operators import FieldOperator, InstanceOperator
 from .random_utils import new_random_generator
 from .serializers import ImageSerializer, TableSerializer
@@ -1019,3 +1022,21 @@ class ShuffleColumnsNames(TypeDependentAugmentor):
         random.shuffle(shuffled_header)
         return {"header": shuffled_header, "rows": table["rows"]}

     {"key1": "value1", "key2": value2, "key3": "value3"}
 """
+import ast
 import json
 import random
 from abc import ABC, abstractmethod
     Dict,
     List,
     Optional,
+    Tuple,
 )
 import pandas as pd
 from .augmentors import TypeDependentAugmentor
 from .dict_utils import dict_get
+from .error_utils import UnitxtWarning
 from .operators import FieldOperator, InstanceOperator
 from .random_utils import new_random_generator
 from .serializers import ImageSerializer, TableSerializer
         random.shuffle(shuffled_header)
         return {"header": shuffled_header, "rows": table["rows"]}
+class JsonStrToListOfKeyValuePairs(FieldOperator):
+    def process_value(self, text: str) -> List[Tuple[str, str]]:
+        text = text.replace("null", "None")
+        try:
+            dict_value = ast.literal_eval(text)
+        except Exception as e:
+            UnitxtWarning(
+                f"Unable to convert input text to json format in JsonStrToListOfKeyValuePairs due to {e}. Text: {text}"
+            )
+            dict_value = {}
+        return [
+            (str(key), str(value))
+            for key, value in dict_value.items()
+            if value is not None
+        ]

templates.py CHANGED Viewed

@@ -533,7 +533,8 @@ class MultipleChoiceTemplate(InputFormatTemplate):
             input and reference dictionaries.
         target_field (str): The key under which the correct choice is stored in the
             reference dictionary (can be integer index or textual label).
-        choices_separator (str): A string used to join formatted choices (e.g. ", ").
         source_choice_format (str): A Python format string used for displaying each choice
             in the input fields (e.g. "{choice_numeral}. {choice_text}").
         target_choice_format (str): A Python format string used for displaying each choice
@@ -544,8 +545,10 @@ class MultipleChoiceTemplate(InputFormatTemplate):
             set with `shuffle_choices_seed`.
         shuffle_choices_seed (int, optional): If provided, the choices are shuffled with
             this fixed integer seed for reproducibility.
-        sort_choices_by_length (bool): If True, sorts choices by their length (ascending).
-        sort_choices_alphabetically (bool): If True, sorts choices in alphabetical order.
         reverse_choices (bool): If True, reverses the order of the choices after any
             sorting has been applied. Defaults to False to preserve backward compatibility.
     """

             input and reference dictionaries.
         target_field (str): The key under which the correct choice is stored in the
             reference dictionary (can be integer index or textual label).
+        choices_separator (str): A string used to join formatted
+            choices (e.g. ", ").
         source_choice_format (str): A Python format string used for displaying each choice
             in the input fields (e.g. "{choice_numeral}. {choice_text}").
         target_choice_format (str): A Python format string used for displaying each choice
             set with `shuffle_choices_seed`.
         shuffle_choices_seed (int, optional): If provided, the choices are shuffled with
             this fixed integer seed for reproducibility.
+        sort_choices_by_length (bool): If True, sorts choices
+            by their length (ascending).
+        sort_choices_alphabetically (bool): If True, sorts choices
+            in alphabetical order.
         reverse_choices (bool): If True, reverses the order of the choices after any
             sorting has been applied. Defaults to False to preserve backward compatibility.
     """

text_utils.py CHANGED Viewed

@@ -232,7 +232,7 @@ def construct_dict_as_yaml_lines(d, indent_delta=2) -> List[str]:
     # d1 = re.sub(r"(\n+)", r'"\1"', str(d))
     d1 = str(d).replace("\n", "\\n").replace('"', '\\"')
-    if "\\n" in d1:
         d1 = f'"{d1}"'
     return [d1]

     # d1 = re.sub(r"(\n+)", r'"\1"', str(d))
     d1 = str(d).replace("\n", "\\n").replace('"', '\\"')
+    if "\\n" in d1 or d1 == "":
         d1 = f'"{d1}"'
     return [d1]

version.py CHANGED Viewed

	@@ -1 +1 @@
1	- version = "1.17.2"


1	+ version = "1.18.0"