Spaces:
Runtime error
Runtime error
Updated for logging messages
Browse files- load_data.py +9 -7
load_data.py
CHANGED
@@ -38,11 +38,11 @@ class LoadDatasets:
|
|
38 |
# Leer el dataset del Hub
|
39 |
try:
|
40 |
print(f"Trying to sync with {HUB_DATASET_NAME}")
|
41 |
-
|
42 |
except Exception as e:
|
43 |
print(f"Not possible to sync with {HUB_DATASET_NAME}")
|
44 |
print(e)
|
45 |
-
|
46 |
|
47 |
# dataset = load_dataset("somosnlp/somos-clean-alpaca-es", split="train")
|
48 |
|
@@ -53,17 +53,19 @@ class LoadDatasets:
|
|
53 |
# print("Concatenated dataset is:")
|
54 |
# print(dataset)
|
55 |
|
56 |
-
|
57 |
-
if not
|
|
|
58 |
return
|
59 |
-
dataset = old_ds
|
60 |
-
records = rg.DatasetForTextClassification.from_datasets(dataset)
|
61 |
|
|
|
|
|
62 |
settings = rg.TextClassificationSettings(
|
63 |
label_schema=["BAD INSTRUCTION", "BAD INPUT", "BAD OUTPUT", "INAPPROPRIATE", "BIASED", "ALL GOOD", "HALLUCINATION", "UNPROCESSABLE"]
|
64 |
)
|
65 |
rg.configure_dataset(name="somos-alpaca-es", settings=settings, workspace="team")
|
66 |
-
|
|
|
67 |
# Log the dataset
|
68 |
rg.log(
|
69 |
records,
|
|
|
38 |
# Leer el dataset del Hub
|
39 |
try:
|
40 |
print(f"Trying to sync with {HUB_DATASET_NAME}")
|
41 |
+
dataset = load_dataset(HUB_DATASET_NAME, split="train")
|
42 |
except Exception as e:
|
43 |
print(f"Not possible to sync with {HUB_DATASET_NAME}")
|
44 |
print(e)
|
45 |
+
dataset = None
|
46 |
|
47 |
# dataset = load_dataset("somosnlp/somos-clean-alpaca-es", split="train")
|
48 |
|
|
|
53 |
# print("Concatenated dataset is:")
|
54 |
# print(dataset)
|
55 |
|
56 |
+
dataset = dataset.remove_columns("metrics")
|
57 |
+
if not dataset:
|
58 |
+
print(f"There is no DATASET - Skipping!")
|
59 |
return
|
|
|
|
|
60 |
|
61 |
+
print(f"Generating records from the dataset")
|
62 |
+
records = rg.DatasetForTextClassification.from_datasets(dataset)
|
63 |
settings = rg.TextClassificationSettings(
|
64 |
label_schema=["BAD INSTRUCTION", "BAD INPUT", "BAD OUTPUT", "INAPPROPRIATE", "BIASED", "ALL GOOD", "HALLUCINATION", "UNPROCESSABLE"]
|
65 |
)
|
66 |
rg.configure_dataset(name="somos-alpaca-es", settings=settings, workspace="team")
|
67 |
+
|
68 |
+
print("Logging the dataset!")
|
69 |
# Log the dataset
|
70 |
rg.log(
|
71 |
records,
|