import gradio as gr import earthview as ev import utils import random import pandas as pd import os from itertools import islice # Configuration chunk_size = 100 # Size of the chunks to shuffle label_file = os.path.join(os.path.dirname(__file__), "labels.csv") # Save CSV in the same directory as the script # Load the Satellogic dataset (streaming) dataset = ev.load_dataset("satellogic", streaming=True) data_iter = iter(dataset) shuffled_chunk = [] # Initialize an empty list to hold the current chunk chunk_iter = None # Initialize the chunk iterator # Initialize or load labels DataFrame labels_df = None if os.path.exists(label_file): labels_df = pd.read_csv(label_file) else: labels_df = pd.DataFrame(columns=["image_id", "bounds", "rating", "google_maps_link"]) def get_next_image(): global data_iter, labels_df, shuffled_chunk, chunk_iter while True: # If we don't have a current chunk or it's exhausted, get a new one if not shuffled_chunk or chunk_iter is None: chunk = list(islice(data_iter, chunk_size)) if not chunk: # If the dataset is exhausted, reset the iterator print("Dataset exhausted, resetting iterator.") reset_dataset_iterator() # Use the reset function chunk = list(islice(data_iter, chunk_size)) if not chunk: print("Still no data after reset.") return None, "Dataset exhausted", None, None random.shuffle(chunk) shuffled_chunk = chunk chunk_iter = iter(shuffled_chunk) try: sample = next(chunk_iter) sample = ev.item_to_images("satellogic", sample) image = sample["rgb"][0] metadata = sample["metadata"] bounds = metadata["bounds"] google_maps_link = utils.get_google_map_link(sample, "satellogic") image_id = str(bounds) if labels_df is not None and image_id not in labels_df["image_id"].values: return image, image_id, bounds, google_maps_link elif labels_df is None: # Handle case where labels_df is not initialized yet return image, image_id, bounds, google_maps_link except StopIteration: # Current chunk is exhausted, reset chunk variables to get a new one in the next iteration shuffled_chunk = [] chunk_iter = None def rate_image(image_id, bounds, rating): global labels_df new_row = pd.DataFrame( { "image_id": [image_id], "bounds": [bounds], "rating": [rating], "google_maps_link": [""], # this isn't necessary to pass to the function since we aren't updating it here. } ) labels_df = pd.concat([labels_df, new_row], ignore_index=True) labels_df.to_csv(label_file, index=False) next_image, next_image_id, next_bounds, next_google_maps_link = get_next_image() return next_image, next_image_id, next_bounds, next_google_maps_link def save_labels_parquet(): global labels_df if labels_df is not None and not labels_df.empty: table = pa.Table.from_pandas(labels_df) pq.write_table(table, 'labeled_data.parquet') return 'labeled_data.parquet' else: return None def reset_dataset_iterator(): global data_iter, shuffled_chunk, chunk_iter data_iter = iter(ev.load_dataset("satellogic", streaming=True)) shuffled_chunk = [] chunk_iter = None def load_different_batch(): print("Loading a different batch of images...") reset_dataset_iterator() return get_next_image() # Return the first image from the new batch # Gradio interface with gr.Blocks() as iface: image_out = gr.Image(label="Satellite Image") image_id_out = gr.Textbox(label="Image ID", visible=False) bounds_out = gr.Textbox(label="Bounds", visible=False) google_maps_link_out = gr.Textbox(label="Google Maps Link", visible=True) rating_radio = gr.Radio(["Cool", "Not Cool"], label="Rating") with gr.Row(): submit_button = gr.Button("Submit Rating") different_batch_button = gr.Button("Load Different Batch") # New button download_button = gr.Button("Download Labels (Parquet)") download_output = gr.File(label="Download Labeled Data") submit_button.click( fn=rate_image, inputs=[image_id_out, bounds_out, rating_radio], outputs=[image_out, image_id_out, bounds_out, google_maps_link_out], ) download_button.click( fn=save_labels_parquet, inputs=[], outputs=[download_output], ) different_batch_button.click( fn=load_different_batch, inputs=[], outputs=[image_out, image_id_out, bounds_out, google_maps_link_out] ) # Load the first image initial_image, initial_image_id, initial_bounds, initial_google_maps_link = get_next_image() # Set initial values if initial_image: iface.load(lambda: (initial_image, initial_image_id, initial_bounds, initial_google_maps_link), inputs=None, outputs=[image_out, image_id_out, bounds_out, google_maps_link_out]) iface.launch(share=True)