Prompt_Squirrel / app.py
FoodDesert's picture
Upload app.py
cd4fb52 verified
raw
history blame
2 kB
import gradio as gr
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from joblib import load
import h5py
# Load the model and data once at startup
with h5py.File('artist_data.hdf5', 'r') as f:
# Deserialize the vectorizer
vectorizer_bytes = f['vectorizer'][()].tobytes()
vectorizer_buffer = BytesIO(vectorizer_bytes)
vectorizer = load(vectorizer_buffer)
# Load X_artist
X_artist = f['X_artist'][:]
# Load artist names and decode to strings
artist_names = [name.decode() for name in f['artist_names'][:]]
def find_similar_artists(new_tags_string):
new_image_tags = [tag.strip() for tag in new_tags_string.split(",")]
unseen_tags = set(new_image_tags) - set(vectorizer.vocabulary_.keys())
unseen_tags_str = f'Unseen Tags: {", ".join(unseen_tags)}' if unseen_tags else 'No unseen tags.'
X_new_image = vectorizer.transform([','.join(new_image_tags)])
similarities = cosine_similarity(X_new_image, X_artist)[0]
top_n = 20
top_artist_indices = np.argsort(similarities)[-top_n:][::-1]
bottom_artist_indices = np.argsort(similarities)[:top_n]
top_artists = [(artist_names[i], similarities[i]) for i in top_artist_indices]
bottom_artists = [(artist_names[i], similarities[i]) for i in bottom_artist_indices]
top_artists_str = "\n".join([f"{rank+1}. {artist} - similarity score: {score:.4f}" for rank, (artist, score) in enumerate(top_artists)])
bottom_artists_str = "\n".join([f"{rank+1}. {artist} - similarity score: {score:.4f}" for rank, (artist, score) in enumerate(bottom_artists)])
output_str = f"{unseen_tags_str}\n\nTop 10 artists:\n{top_artists_str}\n\nBottom 10 artists:\n{bottom_artists_str}"
return output_str
iface = gr.Interface(
fn=find_similar_artists,
inputs="text",
outputs="text",
title="Artist Similarity Finder",
description="Enter image tags to find similar artists based on learned similarities."
)
iface.launch()