Spaces:
Sleeping
Sleeping
File size: 3,874 Bytes
59e1ddd c6eb91a 59e1ddd b5f36c2 c6eb91a 59e1ddd c6eb91a b5f36c2 59e1ddd c6eb91a 59e1ddd b5f36c2 59e1ddd b5f36c2 c6eb91a b5f36c2 c6eb91a b5f36c2 c6eb91a b5f36c2 c6eb91a b5f36c2 59e1ddd c6eb91a b5f36c2 c6eb91a 59e1ddd c6eb91a b5f36c2 c6eb91a b5f36c2 c6eb91a b5f36c2 c6eb91a b3465eb c6eb91a b3465eb c6eb91a b3465eb c6eb91a b3465eb c6eb91a b5f36c2 c6eb91a 6628dfa c6eb91a b5f36c2 c6eb91a b5f36c2 c6eb91a b5f36c2 c6eb91a b5f36c2 59e1ddd b5f36c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import gradio as gr
import requests
from PIL import Image
from io import BytesIO
from transformers import pipeline
import torch
import soundfile as sf
# Pipelines de Transformers
image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
# Speaker embedding aleatorio (para demo)
speaker_embedding = torch.randn(1, 512)
# Obtener lista de cócteles desde la API
url = "https://www.thecocktaildb.com/api/json/v1/1/search.php?s=margarita"
response = requests.get(url)
lista = []
if response.status_code == 200:
datos = response.json()
drinks = datos.get("drinks", [])
for drink in drinks:
lista.append(drink['strDrink'])
else:
print(f"Error: {response.status_code}")
# Función principal que actualiza la interfaz
def change_textbox(choice):
cocktail = requests.get(f"https://www.thecocktaildb.com/api/json/v1/1/search.php?s={choice}")
data = cocktail.json()
dataCocktail = data.get("drinks", [])
name = ""
instructions = ""
image_url = ""
for i in dataCocktail:
if i['strDrink'].lower() == choice.lower():
name = i['strDrink']
instructions = i['strInstructions']
image_url = i['strDrinkThumb']
break
# Cargar imagen
img_response = requests.get(image_url)
image = Image.open(BytesIO(img_response.content)).convert("RGB")
# Generar descripción de la imagen
result = image_to_text(image)
descripcion = result[0]['generated_text']
# Generar audio de instrucciones
speech = synthesiser(instructions, forward_params={"speaker_embeddings": speaker_embedding})
sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
audio_path = "speech.wav"
# Generar audio de descripción de la imagen
speech2 = synthesiser(descripcion, forward_params={"speaker_embeddings": speaker_embedding})
sf.write("speech2.wav", speech2["audio"], samplerate=speech2["sampling_rate"])
audio_path2 = "speech2.wav"
return name, image, instructions, audio_path, descripcion, audio_path2
# Crear la interfaz de Gradio
with gr.Blocks() as demo:
gr.HTML(
"""
<style>
body {
background-color: #000000;
color: #ffffff;
font-family: Arial, sans-serif;
margin: 0;
padding: 0;
text-align: center;
}
.gradio-container {
background-color: #000000;
padding: 20px;
border-radius: 10px;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
}
.gradio-container .gradio-radio {
display: inline-block;
margin: 10px;
text-align: center;
}
h1 {
text-align: center;
color: #ffffff !important;
}
</style>
"""
)
gr.Markdown("<h1>Cocktails Descriptions</h1>")
radio = gr.Radio(lista, label="Choose your cocktail:")
text = gr.Textbox(lines=1, interactive=False, show_copy_button=True, label="Cocktail Name")
imagen = gr.Image(label="Cocktail Image")
text2 = gr.Textbox(lines=4, interactive=False, show_copy_button=True, label="Instructions")
audio = gr.Audio(label="Cocktail Instructions Audio")
text3 = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Image description")
audio2 = gr.Audio(label="Audio image description")
radio.change(
fn=change_textbox,
inputs=radio,
outputs=[text, imagen, text2, audio, text3, audio2]
)
demo.launch(share=True) |