Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
from PIL import Image | |
from io import BytesIO | |
from transformers import pipeline | |
from datasets import load_dataset | |
import torch | |
import soundfile as sf | |
image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") | |
synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts") | |
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") | |
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) | |
url = "https://www.thecocktaildb.com/api/json/v1/1/search.php?s=margarita" | |
response = requests.get(url) | |
lista = [] | |
if response.status_code == 200: | |
datos = response.json() | |
drinks = datos.get("drinks", []) | |
for drink in drinks: | |
lista.append(drink['strDrink']) | |
else: | |
print(f"Error: {response.status_code}") | |
def change_textbox(choice): | |
cocktail = requests.get(f"https://www.thecocktaildb.com/api/json/v1/1/search.php?s={choice}") | |
data = cocktail.json() | |
dataCocktail = data.get("drinks", []) | |
for i in dataCocktail: | |
if i['strDrink'].lower() == choice.lower(): | |
name = i['strDrink'] | |
instructions = i['strInstructions'] | |
image_url = i['strDrinkThumb'] | |
break | |
textInstructions = gr.Textbox(instructions) | |
img_response = requests.get(image_url) | |
image = Image.open(BytesIO(img_response.content)).convert("RGB") | |
result = image_to_text(image) | |
descripcion = result[0]['generated_text'] | |
speech = synthesiser(instructions, forward_params={"speaker_embeddings": speaker_embedding}) | |
sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"]) | |
audio_path = "speech.wav" | |
speech2 = synthesiser(descripcion, forward_params={"speaker_embeddings": speaker_embedding}) | |
sf.write("speech2.wav", speech2["audio"], samplerate=speech2["sampling_rate"]) | |
audio_path2 = "speech2.wav" | |
return name, image,textInstructions,audio_path,descripcion,audio_path2 | |
with gr.Blocks() as demo: | |
gr.HTML( | |
""" | |
<style> | |
/* Cambiar el fondo de toda la página */ | |
body { | |
background-color: #000000; | |
color: #FFFFFF; | |
font-family: Arial, sans-serif; | |
margin: 0; | |
padding: 0; | |
text-align: center; | |
} | |
.gradio-container { | |
background-color: #000000; | |
padding: 20px; | |
border-radius: 10px; | |
display: flex; | |
flex-direction: column; | |
align-items: center; | |
justify-content: center; | |
} | |
.gradio-container .gradio-radio { | |
display: inline-block; | |
margin: 10px; | |
text-align: center; | |
} | |
h1 { | |
text-align: center; | |
color: #ffffff !important; | |
} | |
</style> | |
""" | |
) | |
gr.Markdown( | |
"""<h1>Cocktails Descriptions</h1>""" | |
) | |
radio = gr.Radio(lista, label="Choose your cocktail:") | |
text = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Cocktail Name") | |
imagen = gr.Image(label="Cocktail Image") | |
text2 = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Instructions") | |
audio = gr.Audio(label="Cocktail Instructions Audio") | |
text3 = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Image description") | |
audio2 = gr.Audio(label="Audio image description") | |
radio.change(fn=change_textbox, inputs=radio, outputs=[text, imagen,text2, audio,text3, audio2]) | |
demo.launch() |