mserras commited on
Commit
6dbcc57
·
1 Parent(s): f06fcfd

Updated for logging messages

Browse files
Files changed (1) hide show
  1. load_data.py +9 -7
load_data.py CHANGED
@@ -38,11 +38,11 @@ class LoadDatasets:
38
  # Leer el dataset del Hub
39
  try:
40
  print(f"Trying to sync with {HUB_DATASET_NAME}")
41
- old_ds = load_dataset(HUB_DATASET_NAME, split="train")
42
  except Exception as e:
43
  print(f"Not possible to sync with {HUB_DATASET_NAME}")
44
  print(e)
45
- old_ds = None
46
 
47
  # dataset = load_dataset("somosnlp/somos-clean-alpaca-es", split="train")
48
 
@@ -53,17 +53,19 @@ class LoadDatasets:
53
  # print("Concatenated dataset is:")
54
  # print(dataset)
55
 
56
- # dataset = dataset.remove_columns("metrics")
57
- if not old_ds:
 
58
  return
59
- dataset = old_ds
60
- records = rg.DatasetForTextClassification.from_datasets(dataset)
61
 
 
 
62
  settings = rg.TextClassificationSettings(
63
  label_schema=["BAD INSTRUCTION", "BAD INPUT", "BAD OUTPUT", "INAPPROPRIATE", "BIASED", "ALL GOOD", "HALLUCINATION", "UNPROCESSABLE"]
64
  )
65
  rg.configure_dataset(name="somos-alpaca-es", settings=settings, workspace="team")
66
-
 
67
  # Log the dataset
68
  rg.log(
69
  records,
 
38
  # Leer el dataset del Hub
39
  try:
40
  print(f"Trying to sync with {HUB_DATASET_NAME}")
41
+ dataset = load_dataset(HUB_DATASET_NAME, split="train")
42
  except Exception as e:
43
  print(f"Not possible to sync with {HUB_DATASET_NAME}")
44
  print(e)
45
+ dataset = None
46
 
47
  # dataset = load_dataset("somosnlp/somos-clean-alpaca-es", split="train")
48
 
 
53
  # print("Concatenated dataset is:")
54
  # print(dataset)
55
 
56
+ dataset = dataset.remove_columns("metrics")
57
+ if not dataset:
58
+ print(f"There is no DATASET - Skipping!")
59
  return
 
 
60
 
61
+ print(f"Generating records from the dataset")
62
+ records = rg.DatasetForTextClassification.from_datasets(dataset)
63
  settings = rg.TextClassificationSettings(
64
  label_schema=["BAD INSTRUCTION", "BAD INPUT", "BAD OUTPUT", "INAPPROPRIATE", "BIASED", "ALL GOOD", "HALLUCINATION", "UNPROCESSABLE"]
65
  )
66
  rg.configure_dataset(name="somos-alpaca-es", settings=settings, workspace="team")
67
+
68
+ print("Logging the dataset!")
69
  # Log the dataset
70
  rg.log(
71
  records,