Elron commited on
Commit
152c60b
·
1 Parent(s): c90ec7c

Upload dataset.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. dataset.py +35 -9
dataset.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import datasets
2
 
3
  from .artifact import Artifact, UnitxtArtifactNotFoundError
@@ -27,7 +29,7 @@ from .processors import __file__ as _
27
  from .random_utils import __file__ as _
28
  from .recipe import __file__ as _
29
  from .register import __file__ as _
30
- from .register import register_all_artifacts
31
  from .renderers import __file__ as _
32
  from .schema import __file__ as _
33
  from .split_utils import __file__ as _
@@ -43,6 +45,8 @@ from .validate import __file__ as _
43
  from .version import __file__ as _
44
  from .version import version
45
 
 
 
46
 
47
  def fetch(artifact_name):
48
  try:
@@ -77,6 +81,18 @@ def parse(query: str):
77
  return result
78
 
79
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  class Dataset(datasets.GeneratorBasedBuilder):
81
  """TODO: Short description of my dataset."""
82
 
@@ -85,15 +101,25 @@ class Dataset(datasets.GeneratorBasedBuilder):
85
 
86
  @property
87
  def generators(self):
88
- register_all_artifacts()
89
  if not hasattr(self, "_generators") or self._generators is None:
90
- recipe = fetch(self.config.name)
91
- if recipe is None:
92
- args = parse(self.config.name)
93
- if "type" not in args:
94
- args["type"] = "common_recipe"
95
- recipe = Artifact.from_dict(args)
96
- self._generators = recipe()
 
 
 
 
 
 
 
 
 
 
 
97
  return self._generators
98
 
99
  def _info(self):
 
1
+ import os
2
+
3
  import datasets
4
 
5
  from .artifact import Artifact, UnitxtArtifactNotFoundError
 
29
  from .random_utils import __file__ as _
30
  from .recipe import __file__ as _
31
  from .register import __file__ as _
32
+ from .register import _reset_env_local_catalogs, register_all_artifacts
33
  from .renderers import __file__ as _
34
  from .schema import __file__ as _
35
  from .split_utils import __file__ as _
 
45
  from .version import __file__ as _
46
  from .version import version
47
 
48
+ __default_recipe__ = "common_recipe"
49
+
50
 
51
  def fetch(artifact_name):
52
  try:
 
81
  return result
82
 
83
 
84
+ def get_dataset_artifact(dataset_str):
85
+ _reset_env_local_catalogs()
86
+ register_all_artifacts()
87
+ recipe = fetch(dataset_str)
88
+ if recipe is None:
89
+ args = parse(dataset_str)
90
+ if "type" not in args:
91
+ args["type"] = os.environ.get("UNITXT_DEFAULT_RECIPE", __default_recipe__)
92
+ recipe = Artifact.from_dict(args)
93
+ return recipe
94
+
95
+
96
  class Dataset(datasets.GeneratorBasedBuilder):
97
  """TODO: Short description of my dataset."""
98
 
 
101
 
102
  @property
103
  def generators(self):
 
104
  if not hasattr(self, "_generators") or self._generators is None:
105
+ try:
106
+ from unitxt.dataset import (
107
+ get_dataset_artifact as get_dataset_artifact_installed,
108
+ )
109
+
110
+ unitxt_installed = True
111
+ except ImportError:
112
+ unitxt_installed = False
113
+
114
+ if unitxt_installed:
115
+ print("Loading with installed unitxt library...")
116
+ dataset = get_dataset_artifact_installed(self.config.name)
117
+ else:
118
+ print("Loading with installed unitxt library...")
119
+ dataset = get_dataset_artifact(self.config.name)
120
+
121
+ self._generators = dataset()
122
+
123
  return self._generators
124
 
125
  def _info(self):