Spaces:

eybro
/

image_video_timestamp

Running

App Files Files Community

image_video_timestamp / app.py

eybro

Update app.py

90d2bda verified 16 days ago

raw

history blame contribute delete

5.23 kB

	import gradio as gr
	import numpy as np
	from sklearn.metrics.pairwise import euclidean_distances
	import cv2
	from keras.models import load_model
	from keras.models import Model
	from datasets import load_dataset
	from sklearn.cluster import KMeans
	import matplotlib.pyplot as plt
	from huggingface_hub import hf_hub_download
	from PIL import Image

	model_path = hf_hub_download(repo_id="eybro/autoencoder", filename="autoencoder_model.keras", repo_type='model')
	data_path = hf_hub_download(repo_id="eybro/encoded_images", filename="X_encoded_compressed.npy", repo_type='dataset')

	autoencoder = load_model(model_path)
	encoded_images = np.load(data_path)

	dataset = load_dataset("eybro/images")
	split_dataset = dataset['train'].train_test_split(test_size=0.2, seed=42) # 80% train, 20% test
	dataset['train'] = split_dataset['train']
	dataset['test'] = split_dataset['test']

	example_images = {
	"Example 1": "example_1.png",
	"Example 2": "example_2.png",
	"Example 3": "example_3.jpg"
	}

	def create_url_from_title(title: str, timestamp: int):
	video_urls = load_dataset("eybro/video_urls")
	df = video_urls['train'].to_pandas()
	filtered = df[df['title'] == title]
	base_url = filtered.iloc[0, :]["url"]
	return base_url + f"&t={timestamp}s"

	def find_nearest_neighbors(encoded_images, input_image, top_n=5):
	"""
	Find the closest neighbors to the input image in the encoded image space.
	Args:
	encoded_images (np.ndarray): Array of encoded images (shape: (n_samples, n_features)).
	input_image (np.ndarray): The encoded input image (shape: (1, n_features)).
	top_n (int): The number of nearest neighbors to return.
	Returns:
	List of tuples: (index, distance) of the top_n nearest neighbors.
	"""
	# Compute pairwise distances
	distances = euclidean_distances(encoded_images, input_image.reshape(1, -1)).flatten()

	# Sort by distance
	nearest_neighbors = np.argsort(distances)[:top_n]
	return [(index, distances[index]) for index in nearest_neighbors]

	def get_image(index):
	split = len(dataset["train"])
	if index < split:
	return dataset["train"][index]
	else:
	return dataset["test"][index-split]

	def process_image(image):
	img = np.array(image)
	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	img = cv2.resize(img, (64, 64))
	img = img.astype('float32')
	img /= 255.0
	img = np.expand_dims(img, axis=0)

	layer_model = Model(inputs=autoencoder.input, outputs=autoencoder.layers[4].output)

	encoded_array = layer_model.predict(img)

	pooled_array = encoded_array.max(axis=-1)
	return pooled_array # Shape: (1, n_features)

	def inference(user_image=None, selected_example=None):

	if user_image is not None and selected_example is not None:
	return "Please upload an image or select an example image."
	elif user_image is not None:
	input_image = process_image(user_image)
	elif selected_example is not None:
	input_image = load_example(selected_example)
	input_image = process_image(input_image)
	else:
	return "Please upload an image or select an example image."

	nearest_neighbors = find_nearest_neighbors(encoded_images, input_image, top_n=5)

	top4 = [int(i[0]) for i in nearest_neighbors[:4]]

	for i in top4:
	im = get_image(i)
	print(im["label"], im["timestamp"])

	result_image = get_image(top4[0])
	url = create_url_from_title(result_image['label'], result_image['timestamp'])
	result = f"{result_image['label']} {result_image['timestamp']} \n{url}"

	return result

	def load_example(example_name):
	image_path = example_images.get(example_name)
	if image_path:
	return Image.open(image_path)
	return None

	with gr.Blocks() as demo:
	gr.Markdown("""
	# Image to Video App
	Find your favorite Gordon Ramasay scene by uploading an image from the scene, the app will thereafter find a corresponding youtube video for that scene.
	Or try one of our examples - Screenshots form Youtube videos.
	""")

	with gr.Row():
	with gr.Column():
	inp_image = gr.Image(label="Upload Image", type="pil")
	example_selection = gr.Radio(
	choices=list(example_images.keys()),
	label="Select Example Image",
	type="value" # Ensure single string return value
	)
	example_display = gr.Image(label="Selected Example Image", type="pil")

	with gr.Column():
	output = gr.Markdown()


	example_selection.change(
	lambda selected_example: load_example(selected_example),
	inputs=[example_selection],
	outputs=[example_display]
	)

	clear_button = gr.Button("Clear Example")

	clear_button.click(
	lambda: (None, None),
	inputs=[],
	outputs=[example_selection, example_display]
	)

	submit_button = gr.Button("Submit")

	submit_button.click(
	lambda user_image, selected_example: inference(user_image=user_image, selected_example=selected_example),
	inputs=[inp_image, example_selection],
	outputs=output
	)

	if __name__ == "__main__":
	demo.launch()