import gradio as gr import numpy as np from sklearn.metrics.pairwise import euclidean_distances import cv2 from keras.models import load_model from keras.models import Model from datasets import load_dataset from sklearn.cluster import KMeans import matplotlib.pyplot as plt from huggingface_hub import hf_hub_download from PIL import Image model_path = hf_hub_download(repo_id="eybro/autoencoder", filename="autoencoder_model.keras", repo_type='model') data_path = hf_hub_download(repo_id="eybro/encoded_images", filename="X_encoded_compressed.npy", repo_type='dataset') autoencoder = load_model(model_path) encoded_images = np.load(data_path) dataset = load_dataset("eybro/images") split_dataset = dataset['train'].train_test_split(test_size=0.2, seed=42) # 80% train, 20% test dataset['train'] = split_dataset['train'] dataset['test'] = split_dataset['test'] example_images = { "Example 1": "example_1.png", "Example 2": "example_2.png", "Example 3": "example_3.jpg" } def create_url_from_title(title: str, timestamp: int): video_urls = load_dataset("eybro/video_urls") df = video_urls['train'].to_pandas() filtered = df[df['title'] == title] base_url = filtered.iloc[0, :]["url"] return base_url + f"&t={timestamp}s" def find_nearest_neighbors(encoded_images, input_image, top_n=5): """ Find the closest neighbors to the input image in the encoded image space. Args: encoded_images (np.ndarray): Array of encoded images (shape: (n_samples, n_features)). input_image (np.ndarray): The encoded input image (shape: (1, n_features)). top_n (int): The number of nearest neighbors to return. Returns: List of tuples: (index, distance) of the top_n nearest neighbors. """ # Compute pairwise distances distances = euclidean_distances(encoded_images, input_image.reshape(1, -1)).flatten() # Sort by distance nearest_neighbors = np.argsort(distances)[:top_n] return [(index, distances[index]) for index in nearest_neighbors] def get_image(index): split = len(dataset["train"]) if index < split: return dataset["train"][index] else: return dataset["test"][index-split] def process_image(image): img = np.array(image) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (64, 64)) img = img.astype('float32') img /= 255.0 img = np.expand_dims(img, axis=0) layer_model = Model(inputs=autoencoder.input, outputs=autoencoder.layers[4].output) encoded_array = layer_model.predict(img) pooled_array = encoded_array.max(axis=-1) return pooled_array # Shape: (1, n_features) def inference(user_image=None, selected_example=None): if user_image is not None and selected_example is not None: return "Please upload an image or select an example image." elif user_image is not None: input_image = process_image(user_image) elif selected_example is not None: input_image = load_example(selected_example) input_image = process_image(input_image) else: return "Please upload an image or select an example image." nearest_neighbors = find_nearest_neighbors(encoded_images, input_image, top_n=5) top4 = [int(i[0]) for i in nearest_neighbors[:4]] for i in top4: im = get_image(i) print(im["label"], im["timestamp"]) result_image = get_image(top4[0]) url = create_url_from_title(result_image['label'], result_image['timestamp']) result = f"{result_image['label']} {result_image['timestamp']} \n{url}" return result def load_example(example_name): image_path = example_images.get(example_name) if image_path: return Image.open(image_path) return None with gr.Blocks() as demo: gr.Markdown(""" # Image to Video App Find your favorite Gordon Ramasay scene by uploading an image from the scene, the app will thereafter find a corresponding youtube video for that scene. Or try one of our examples - Screenshots form Youtube videos. """) with gr.Row(): with gr.Column(): inp_image = gr.Image(label="Upload Image", type="pil") example_selection = gr.Radio( choices=list(example_images.keys()), label="Select Example Image", type="value" # Ensure single string return value ) example_display = gr.Image(label="Selected Example Image", type="pil") with gr.Column(): output = gr.Markdown() example_selection.change( lambda selected_example: load_example(selected_example), inputs=[example_selection], outputs=[example_display] ) clear_button = gr.Button("Clear Example") clear_button.click( lambda: (None, None), inputs=[], outputs=[example_selection, example_display] ) submit_button = gr.Button("Submit") submit_button.click( lambda user_image, selected_example: inference(user_image=user_image, selected_example=selected_example), inputs=[inp_image, example_selection], outputs=output ) if __name__ == "__main__": demo.launch()