# All the datasets will use the same format: a collection of HDF5 files with data cubes # in t0_fields: scalar fields, like density, pressure, energy # the data is of shape (n_trajectories, n_time_steps, x, y) # in t1_fields: vector fields, like velocity (size=2 => vx, vy) # the data is of shape (n_trajectories, n_time_steps, x, y, vx/vy) # in t2_fields: tensor fields, like ??? # the data is of shape (n_trajectories, n_time_steps, x, y, d1, d2), with d1, d2 in [0, 1] # ie, instead of 1 additional dimension for velocity: a (2,2) matrix where each component # (0,0),(1,0),(0,1),(1,1) can be plotted # Size: # - n_trajectories: 8 to 256 # - n_time_steps: 101 # - x: 128 to 512 # - y: 128 to 512 # - physical fields: 2 to 8 (density, pressure, energy, velocity…) import gradio as gr import h5py import numpy as np from fsspec import url_to_fs from matplotlib import cm from PIL import Image import av from tempfile import gettempdir import os # Get the path of the system's temporary directory temp_directory = gettempdir() print(f"System's temporary directory is: {temp_directory}") videos_temp_directory = os.path.join(temp_directory, "videos") print(f"Videos are saved (and never deleted) in: {videos_temp_directory}") # TODO: add colormap input repo_id = "lhoestq/turbulent_radiative_layer_tcool_demo" set_path = f"hf://datasets/{repo_id}/**/*.hdf5" fs, _ = url_to_fs(set_path) paths = fs.glob(set_path) files = {path: h5py.File(fs.open(path, "rb", cache_type="none"), "r") for path in paths} def get_scalar_fields(path: str) -> list[str]: # TODO: support t1_fields (vector) and t2_fields (tensor) return list(files[path]["t0_fields"].keys()) def get_trajectories(path: str, field: str) -> list[int]: # The first dimension is the trajectory (8 to 256) return list(range(len(files[path]["t0_fields"][field]))) fps = 25 def create_video( path: str, scalar_field: str, trajectory: int, video_filename: str ) -> None: out = files[path]["t0_fields"][scalar_field][trajectory] # out = np.log(out) # not sure why out = (out - out.min()) / (out.max() - out.min()) out = np.uint8(cm.viridis(out) * 255) output = av.open(video_filename, "w") stream = output.add_stream("libvpx-vp9", str(fps)) height, width = out[0].shape[1], out[0].shape[0] stream.width = width stream.height = height stream.pix_fmt = "yuv444p" for img in out: image = Image.fromarray(img) # I think it's the way to get the expected orientation image = image.transpose(method=Image.Transpose.TRANSPOSE) image = image.transpose(method=Image.Transpose.FLIP_TOP_BOTTOM) frame = av.VideoFrame.from_image(image) packet = stream.encode(frame) output.mux(packet) # Flush the encoder and close the "in memory" file: packet = stream.encode(None) output.mux(packet) output.close() # no limit on the size of the videos on the disk def get_video(path: str, scalar_field: str, trajectory: int) -> str: video_filename = os.path.join( videos_temp_directory, *path.split("/"), scalar_field, f"{trajectory}.webm" ) os.makedirs(os.path.dirname(video_filename), exist_ok=True) if not os.path.isfile(video_filename): create_video(path, scalar_field, trajectory, video_filename) return video_filename with gr.Blocks() as demo: default_scalar_fields = get_scalar_fields(paths[0]) default_trajectories = get_trajectories(paths[0], default_scalar_fields[0]) default_video = get_video( paths[0], default_scalar_fields[0], default_trajectories[0] ) gr.Markdown( f"# 💠 HDF5 Viewer for the [{repo_id}](https://huggingface.co/datasets/{repo_id}) Dataset 🌊" ) gr.Markdown(f"Showing files at `{set_path}`") with gr.Row(): files_dropdown = gr.Dropdown( choices=paths, value=paths[0], label="File", scale=4 ) scalar_fields_dropdown = gr.Dropdown( choices=default_scalar_fields, value=default_scalar_fields[0], label="Physical field", ) trajectory_dropdown = gr.Dropdown( choices=default_trajectories, value=default_trajectories[0], label="Trajectory", ) video = gr.Video(default_video, height=400, autoplay=True, loop=True) @files_dropdown.select( inputs=[files_dropdown], outputs=[scalar_fields_dropdown, trajectory_dropdown, video], ) def _update_file(path: str): scalar_fields = get_scalar_fields(path) trajectories = get_trajectories(path, scalar_fields[0]) vid = get_video(path, scalar_fields[0], trajectories[0]) yield { scalar_fields_dropdown: gr.Dropdown( choices=scalar_fields, value=scalar_fields[0] ), trajectory_dropdown: gr.Dropdown( choices=trajectories, value=trajectories[0] ), video: gr.Video(vid), } @scalar_fields_dropdown.select( inputs=[files_dropdown, scalar_fields_dropdown], outputs=[trajectory_dropdown, video], ) def _update_scalar_field(path: str, scalar_field: str): trajectories = get_trajectories(path, scalar_field) vid = get_video(path, scalar_field, trajectories[0]) yield { trajectory_dropdown: gr.Dropdown( choices=trajectories, value=trajectories[0] ), video: gr.Video(vid), } @trajectory_dropdown.select( inputs=[files_dropdown, scalar_fields_dropdown, trajectory_dropdown], outputs=[video], ) def _update_trajectory(path: str, scalar_field: str, trajectory: int): vid = get_video(path, scalar_field, trajectory) yield {video: gr.Video(vid)} demo.launch()