Elron commited on
Commit
ed33057
Β·
verified Β·
1 Parent(s): 365fb61

Upload folder using huggingface_hub

Browse files
Files changed (7) hide show
  1. README.md +5 -5
  2. api.py +2 -2
  3. metrics.py +14 -0
  4. struct_data_operators.py +21 -0
  5. templates.py +6 -3
  6. text_utils.py +1 -1
  7. version.py +1 -1
README.md CHANGED
@@ -40,11 +40,11 @@ https://github.com/IBM/unitxt/assets/23455264/baef9131-39d4-4164-90b2-05da52919f
40
 
41
  ### πŸ¦„ Currently on Unitxt Catalog
42
 
43
- ![NLP Tasks](https://img.shields.io/badge/NLP_tasks-48-blue)
44
- ![Dataset Cards](https://img.shields.io/badge/Dataset_Cards-537-blue)
45
- ![Templates](https://img.shields.io/badge/Templates-265-blue)
46
- ![Formats](https://img.shields.io/badge/Formats-23-blue)
47
- ![Metrics](https://img.shields.io/badge/Metrics-136-blue)
48
 
49
  ### πŸ¦„ Run Unitxt Exploration Dashboard
50
 
 
40
 
41
  ### πŸ¦„ Currently on Unitxt Catalog
42
 
43
+ ![Abstract Tasks](https://img.shields.io/badge/Abstract_Tasks-62-blue)
44
+ ![Dataset Cards](https://img.shields.io/badge/Dataset_Cards-3025-blue)
45
+ ![Templates](https://img.shields.io/badge/Templates-342-blue)
46
+ ![Benchmarks](https://img.shields.io/badge/Benchmarks-4-blue)
47
+ ![Metrics](https://img.shields.io/badge/Metrics-422-blue)
48
 
49
  ### πŸ¦„ Run Unitxt Exploration Dashboard
50
 
api.py CHANGED
@@ -145,8 +145,8 @@ def _source_to_dataset(
145
  cache_dir = dir_to_be_deleted if not use_cache else None
146
  ds_builder = UnitxtDataset(
147
  dataset_name="unitxt",
148
- config_name="recipe-" + short_hex_hash(source.to_json()),
149
- hash=hash(source.to_json()),
150
  version=constants.version,
151
  cache_dir=cache_dir,
152
  )
 
145
  cache_dir = dir_to_be_deleted if not use_cache else None
146
  ds_builder = UnitxtDataset(
147
  dataset_name="unitxt",
148
+ config_name="recipe-" + short_hex_hash(repr(source)),
149
+ hash=hash(repr(source)),
150
  version=constants.version,
151
  cache_dir=cache_dir,
152
  )
metrics.py CHANGED
@@ -3355,6 +3355,8 @@ class CustomF1(GlobalMetric):
3355
 
3356
 
3357
  class NER(CustomF1):
 
 
3358
  prediction_type = List[Tuple[str, str]]
3359
 
3360
  def get_element_group(self, element, additional_input):
@@ -3364,6 +3366,18 @@ class NER(CustomF1):
3364
  return str(element)
3365
 
3366
 
 
 
 
 
 
 
 
 
 
 
 
 
3367
  def normalize_answer(s):
3368
  """Lower text and remove punctuation, articles and extra whitespace."""
3369
 
 
3355
 
3356
 
3357
  class NER(CustomF1):
3358
+ """F1 Metrics that receives as input a list of (Entity,EntityType) pairs."""
3359
+
3360
  prediction_type = List[Tuple[str, str]]
3361
 
3362
  def get_element_group(self, element, additional_input):
 
3366
  return str(element)
3367
 
3368
 
3369
+ class KeyValueExtraction(CustomF1):
3370
+ """F1 Metrics that receives as input a list of (Key,Value) pairs."""
3371
+
3372
+ prediction_type = List[Tuple[str, str]]
3373
+
3374
+ def get_element_group(self, element, additional_input):
3375
+ return element[0]
3376
+
3377
+ def get_element_representation(self, element, additional_input):
3378
+ return str(element)
3379
+
3380
+
3381
  def normalize_answer(s):
3382
  """Lower text and remove punctuation, articles and extra whitespace."""
3383
 
struct_data_operators.py CHANGED
@@ -23,6 +23,7 @@ For key-value pairs, expected input format is:
23
  {"key1": "value1", "key2": value2, "key3": "value3"}
24
  """
25
 
 
26
  import json
27
  import random
28
  from abc import ABC, abstractmethod
@@ -31,12 +32,14 @@ from typing import (
31
  Dict,
32
  List,
33
  Optional,
 
34
  )
35
 
36
  import pandas as pd
37
 
38
  from .augmentors import TypeDependentAugmentor
39
  from .dict_utils import dict_get
 
40
  from .operators import FieldOperator, InstanceOperator
41
  from .random_utils import new_random_generator
42
  from .serializers import ImageSerializer, TableSerializer
@@ -1019,3 +1022,21 @@ class ShuffleColumnsNames(TypeDependentAugmentor):
1019
  random.shuffle(shuffled_header)
1020
 
1021
  return {"header": shuffled_header, "rows": table["rows"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  {"key1": "value1", "key2": value2, "key3": "value3"}
24
  """
25
 
26
+ import ast
27
  import json
28
  import random
29
  from abc import ABC, abstractmethod
 
32
  Dict,
33
  List,
34
  Optional,
35
+ Tuple,
36
  )
37
 
38
  import pandas as pd
39
 
40
  from .augmentors import TypeDependentAugmentor
41
  from .dict_utils import dict_get
42
+ from .error_utils import UnitxtWarning
43
  from .operators import FieldOperator, InstanceOperator
44
  from .random_utils import new_random_generator
45
  from .serializers import ImageSerializer, TableSerializer
 
1022
  random.shuffle(shuffled_header)
1023
 
1024
  return {"header": shuffled_header, "rows": table["rows"]}
1025
+
1026
+
1027
+ class JsonStrToListOfKeyValuePairs(FieldOperator):
1028
+ def process_value(self, text: str) -> List[Tuple[str, str]]:
1029
+ text = text.replace("null", "None")
1030
+
1031
+ try:
1032
+ dict_value = ast.literal_eval(text)
1033
+ except Exception as e:
1034
+ UnitxtWarning(
1035
+ f"Unable to convert input text to json format in JsonStrToListOfKeyValuePairs due to {e}. Text: {text}"
1036
+ )
1037
+ dict_value = {}
1038
+ return [
1039
+ (str(key), str(value))
1040
+ for key, value in dict_value.items()
1041
+ if value is not None
1042
+ ]
templates.py CHANGED
@@ -533,7 +533,8 @@ class MultipleChoiceTemplate(InputFormatTemplate):
533
  input and reference dictionaries.
534
  target_field (str): The key under which the correct choice is stored in the
535
  reference dictionary (can be integer index or textual label).
536
- choices_separator (str): A string used to join formatted choices (e.g. ", ").
 
537
  source_choice_format (str): A Python format string used for displaying each choice
538
  in the input fields (e.g. "{choice_numeral}. {choice_text}").
539
  target_choice_format (str): A Python format string used for displaying each choice
@@ -544,8 +545,10 @@ class MultipleChoiceTemplate(InputFormatTemplate):
544
  set with `shuffle_choices_seed`.
545
  shuffle_choices_seed (int, optional): If provided, the choices are shuffled with
546
  this fixed integer seed for reproducibility.
547
- sort_choices_by_length (bool): If True, sorts choices by their length (ascending).
548
- sort_choices_alphabetically (bool): If True, sorts choices in alphabetical order.
 
 
549
  reverse_choices (bool): If True, reverses the order of the choices after any
550
  sorting has been applied. Defaults to False to preserve backward compatibility.
551
  """
 
533
  input and reference dictionaries.
534
  target_field (str): The key under which the correct choice is stored in the
535
  reference dictionary (can be integer index or textual label).
536
+ choices_separator (str): A string used to join formatted
537
+ choices (e.g. ", ").
538
  source_choice_format (str): A Python format string used for displaying each choice
539
  in the input fields (e.g. "{choice_numeral}. {choice_text}").
540
  target_choice_format (str): A Python format string used for displaying each choice
 
545
  set with `shuffle_choices_seed`.
546
  shuffle_choices_seed (int, optional): If provided, the choices are shuffled with
547
  this fixed integer seed for reproducibility.
548
+ sort_choices_by_length (bool): If True, sorts choices
549
+ by their length (ascending).
550
+ sort_choices_alphabetically (bool): If True, sorts choices
551
+ in alphabetical order.
552
  reverse_choices (bool): If True, reverses the order of the choices after any
553
  sorting has been applied. Defaults to False to preserve backward compatibility.
554
  """
text_utils.py CHANGED
@@ -232,7 +232,7 @@ def construct_dict_as_yaml_lines(d, indent_delta=2) -> List[str]:
232
 
233
  # d1 = re.sub(r"(\n+)", r'"\1"', str(d))
234
  d1 = str(d).replace("\n", "\\n").replace('"', '\\"')
235
- if "\\n" in d1:
236
  d1 = f'"{d1}"'
237
  return [d1]
238
 
 
232
 
233
  # d1 = re.sub(r"(\n+)", r'"\1"', str(d))
234
  d1 = str(d).replace("\n", "\\n").replace('"', '\\"')
235
+ if "\\n" in d1 or d1 == "":
236
  d1 = f'"{d1}"'
237
  return [d1]
238
 
version.py CHANGED
@@ -1 +1 @@
1
- version = "1.17.2"
 
1
+ version = "1.18.0"