CommentImage / app.py
DGutierrez81's picture
Update app.py
6628dfa verified
import gradio as gr
import requests
from PIL import Image
from io import BytesIO
from transformers import pipeline
from datasets import load_dataset
import torch
import soundfile as sf
image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
url = "https://www.thecocktaildb.com/api/json/v1/1/search.php?s=margarita"
response = requests.get(url)
lista = []
if response.status_code == 200:
datos = response.json()
drinks = datos.get("drinks", [])
for drink in drinks:
lista.append(drink['strDrink'])
else:
print(f"Error: {response.status_code}")
def change_textbox(choice):
cocktail = requests.get(f"https://www.thecocktaildb.com/api/json/v1/1/search.php?s={choice}")
data = cocktail.json()
dataCocktail = data.get("drinks", [])
for i in dataCocktail:
if i['strDrink'].lower() == choice.lower():
name = i['strDrink']
instructions = i['strInstructions']
image_url = i['strDrinkThumb']
break
textInstructions = gr.Textbox(instructions)
img_response = requests.get(image_url)
image = Image.open(BytesIO(img_response.content)).convert("RGB")
result = image_to_text(image)
descripcion = result[0]['generated_text']
speech = synthesiser(instructions, forward_params={"speaker_embeddings": speaker_embedding})
sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
audio_path = "speech.wav"
speech2 = synthesiser(descripcion, forward_params={"speaker_embeddings": speaker_embedding})
sf.write("speech2.wav", speech2["audio"], samplerate=speech2["sampling_rate"])
audio_path2 = "speech2.wav"
return name, image,textInstructions,audio_path,descripcion,audio_path2
with gr.Blocks() as demo:
gr.HTML(
"""
<style>
/* Cambiar el fondo de toda la página */
body {
background-color: #000000;
color: #FFFFFF;
font-family: Arial, sans-serif;
margin: 0;
padding: 0;
text-align: center;
}
.gradio-container {
background-color: #000000;
padding: 20px;
border-radius: 10px;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
}
.gradio-container .gradio-radio {
display: inline-block;
margin: 10px;
text-align: center;
}
h1 {
text-align: center;
color: #ffffff !important;
}
</style>
"""
)
gr.Markdown(
"""<h1>Cocktails Descriptions</h1>"""
)
radio = gr.Radio(lista, label="Choose your cocktail:")
text = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Cocktail Name")
imagen = gr.Image(label="Cocktail Image")
text2 = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Instructions")
audio = gr.Audio(label="Cocktail Instructions Audio")
text3 = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Image description")
audio2 = gr.Audio(label="Audio image description")
radio.change(fn=change_textbox, inputs=radio, outputs=[text, imagen,text2, audio,text3, audio2])
demo.launch()