Spaces:
Runtime error
Runtime error
import gradio as gr | |
from datasets import load_dataset | |
from PIL import Image | |
import io | |
import time | |
import os | |
from datetime import datetime, timedelta | |
import json | |
access_token = os.environ.get("HUGGINGFACE_TOKEN") | |
# Global variables | |
dataset = None | |
dataset_size = "Unknown" | |
last_refresh_time = None | |
REFRESH_INTERVAL = timedelta(hours=24) | |
def load_and_prepare_dataset(): | |
global dataset, dataset_size, last_refresh_time | |
dataset = load_dataset( | |
"taesiri/PhotoshopRequest-DailyDump", | |
split="train", | |
streaming=True, | |
token=access_token, | |
) | |
# Get dataset info | |
dataset_info = dataset.info | |
dataset_size = ( | |
dataset_info.splits["train"].num_examples | |
if dataset_info.splits.get("train") | |
else "Unknown" | |
) | |
last_refresh_time = datetime.now() | |
def check_and_refresh_dataset(): | |
global last_refresh_time | |
current_time = datetime.now() | |
if ( | |
last_refresh_time is None | |
or (current_time - last_refresh_time) >= REFRESH_INTERVAL | |
): | |
load_and_prepare_dataset() | |
# Initial dataset load | |
load_and_prepare_dataset() | |
# Load and prepare the dataset | |
dataset = load_dataset( | |
"taesiri/PhotoshopRequest-DailyDump", | |
split="train", | |
streaming=True, | |
token=access_token, | |
) | |
# Get dataset info | |
dataset_info = dataset.info | |
dataset_size = ( | |
dataset_info.splits["train"].num_examples | |
if dataset_info.splits.get("train") | |
else "Unknown" | |
) | |
BUFFER_SIZE = 1 | |
sample_iterator = None | |
sample_count = 0 | |
def reshuffle_dataset(): | |
global sample_iterator, sample_count | |
seed = int(time.time()) # Convert time to an integer | |
shuffled_dataset = dataset.shuffle(seed=seed, buffer_size=BUFFER_SIZE) | |
sample_iterator = iter(shuffled_dataset) | |
sample_count = 0 | |
reshuffle_dataset() # Initial shuffle | |
def get_next_sample(): | |
check_and_refresh_dataset() | |
global sample_count | |
if sample_count >= BUFFER_SIZE: | |
reshuffle_dataset() | |
sample = next(sample_iterator) | |
sample_count += 1 | |
print(sample) | |
post_id = sample["post_id"] | |
title = sample["title"] | |
reddit_url = f"https://www.reddit.com/r/PhotoshopRequest/comments/{post_id}" | |
selftext = "" | |
try: | |
selftext = json.loads(sample["json_data"])["post"]["selftext"] | |
except: | |
print("No selftext found") | |
markdown_text = f"# {title}\n\n{selftext}\n\n[View post on r/PhotoshopRequest]({reddit_url})" | |
return ( | |
markdown_text, | |
sample["source_image"], | |
sample["edited_image"], | |
) | |
with gr.Blocks() as demo: | |
gr.Markdown("# PhotoshopRequest Dataset Sampler") | |
gr.Markdown( | |
""" | |
This is a preview of the PhotoshopRequest dataset. Each sample represents a Photoshop editing request post. | |
Click the 'Sample New Item' button to retrieve a random sample from the dataset. | |
""" | |
) | |
post_info = gr.Markdown() | |
with gr.Row(): | |
source_image = gr.Image(label="Source Image") | |
edited_image = gr.Image(label="Edited Image") | |
sample_button = gr.Button("Sample New Item") | |
info_md = gr.Markdown() | |
def update_info(): | |
return f""" | |
<div style="text-align: center;"> | |
<hr> | |
Dataset Size: {dataset_size} items<br> | |
Last Refreshed: {last_refresh_time.strftime('%Y-%m-%d %H:%M:%S UTC') if last_refresh_time else 'Unknown'} | |
</div> | |
""" | |
sample_button.click( | |
get_next_sample, outputs=[post_info, source_image, edited_image] | |
).then(update_info, outputs=[info_md]) | |
if __name__ == "__main__": | |
demo.launch() | |