Emil Ernerfeldt commited on
Commit
4337a31
1 Parent(s): 2959351

Log any and all columns of the dataset

Browse files
Files changed (2) hide show
  1. main.py +58 -26
  2. requirements.txt +2 -0
main.py CHANGED
@@ -4,30 +4,62 @@ from __future__ import annotations
4
 
5
  import rerun as rr
6
  from datasets import load_dataset
 
 
7
 
8
- # download/load dataset in pyarrow format
9
- print("Loading dataset…")
10
- dataset = load_dataset("lerobot/pusht", split="train")
11
-
12
- # select the frames belonging to episode number 5
13
- print("Select specific episode…")
14
- ds_subset = dataset.filter(lambda frame: frame["episode_id"] == 5)
15
-
16
- print("Starting Rerun…")
17
- rr.init("rerun_example_lerobot", spawn=True)
18
-
19
- print("Logging to Rerun…")
20
- for frame_id, timestamp, image, state, action, next_reward in zip(
21
- ds_subset["frame_id"],
22
- ds_subset["timestamp"],
23
- ds_subset["observation.image"],
24
- ds_subset["observation.state"],
25
- ds_subset["action"],
26
- ds_subset["next.reward"],
27
- ):
28
- rr.set_time_sequence("frame_id", frame_id)
29
- rr.set_time_seconds("timestamp", timestamp)
30
- rr.log("observation/image", rr.Image(image))
31
- rr.log("observation/state", rr.BarChart(state))
32
- rr.log("observation/action", rr.BarChart(action))
33
- rr.log("next/reward", rr.Scalar(next_reward))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  import rerun as rr
6
  from datasets import load_dataset
7
+ from PIL import Image
8
+ from tqdm import tqdm
9
 
10
+
11
+ def log_dataset_to_rerun(dataset) -> None:
12
+ # Special time-like columns
13
+ TIME_LIKE = {"index", "frame_id", "timestamp"}
14
+
15
+ # Ignore these columns
16
+ IGNORE = {"episode_data_index_from", "episode_data_index_to", "episode_id"}
17
+
18
+ num_rows = len(dataset)
19
+ for row_nr in tqdm(range(num_rows)):
20
+ row = dataset[row_nr]
21
+
22
+ # Handle time-like columns first, since they set a state (time is an index in Rerun):
23
+ for column_name in TIME_LIKE:
24
+ if column_name in row:
25
+ cell = row[column_name]
26
+ if isinstance(cell, int):
27
+ rr.set_time_sequence(column_name, cell)
28
+ elif isinstance(cell, float):
29
+ rr.set_time_seconds(column_name, cell) # assume seconds
30
+ else:
31
+ print(f"Unknown time-like column {column_name} with value {cell}")
32
+
33
+ # Now log actual data columns
34
+ for column_name in dataset.column_names:
35
+ if column_name in TIME_LIKE or column_name in IGNORE:
36
+ continue
37
+
38
+ cell = row[column_name]
39
+ if isinstance(cell, Image.Image):
40
+ rr.log(column_name, rr.Image(cell))
41
+ elif isinstance(cell, list):
42
+ rr.log(column_name, rr.BarChart(cell))
43
+ elif isinstance(cell, float) or isinstance(cell, int):
44
+ rr.log(column_name, rr.Scalar(cell))
45
+ else:
46
+ rr.log(column_name, rr.TextDocument(str(cell)))
47
+
48
+
49
+ def main():
50
+ print("Loading dataset…")
51
+ # dataset = load_dataset("lerobot/pusht", split="train")
52
+ dataset = load_dataset("lerobot/aloha_sim_transfer_cube_human", split="train")
53
+
54
+ print("Selecting specific episode…")
55
+ ds_subset = dataset.filter(lambda frame: frame["episode_id"] == 3)
56
+
57
+ print("Starting Rerun…")
58
+ rr.init("rerun_example_lerobot", spawn=True)
59
+
60
+ print("Logging to Rerun…")
61
+ log_dataset_to_rerun(ds_subset)
62
+
63
+
64
+ if __name__ == "__main__":
65
+ main()
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
  datasets
 
2
  rerun-sdk>=0.15.0,<0.16.0
 
 
1
  datasets
2
+ Pillow
3
  rerun-sdk>=0.15.0,<0.16.0
4
+ tqdm