CommentImage / app.py
DGutierrez81's picture
Update app.py
b3465eb verified
raw
history blame
3.66 kB
import gradio as gr
import requests
from PIL import Image
from io import BytesIO
from transformers import pipeline
from datasets import load_dataset
import torch
import soundfile as sf
image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
url = "https://www.thecocktaildb.com/api/json/v1/1/search.php?s=margarita"
response = requests.get(url)
lista = []
if response.status_code == 200:
datos = response.json()
drinks = datos.get("drinks", [])
for drink in drinks:
lista.append(drink['strDrink'])
else:
print(f"Error: {response.status_code}")
def change_textbox(choice):
cocktail = requests.get(f"https://www.thecocktaildb.com/api/json/v1/1/search.php?s={choice}")
data = cocktail.json()
dataCocktail = data.get("drinks", [])
for i in dataCocktail:
if i['strDrink'].lower() == choice.lower():
name = i['strDrink']
instructions = i['strInstructions']
image_url = i['strDrinkThumb']
break
textInstructions = gr.Textbox(instructions)
img_response = requests.get(image_url)
image = Image.open(BytesIO(img_response.content)).convert("RGB")
result = image_to_text(image)
descripcion = result[0]['generated_text']
speech = synthesiser(instructions, forward_params={"speaker_embeddings": speaker_embedding})
sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
audio_path = "speech.wav"
speech2 = synthesiser(descripcion, forward_params={"speaker_embeddings": speaker_embedding})
sf.write("speech2.wav", speech2["audio"], samplerate=speech2["sampling_rate"])
audio_path2 = "speech2.wav"
return name, image,textInstructions,audio_path,descripcion,audio_path2
with gr.Blocks() as demo:
gr.HTML(
"""
<style>
/* Cambiar el fondo de toda la página */
body {
background-color: #000000;
color: #FFFFFF;
font-family: Arial, sans-serif;
margin: 0;
padding: 0;
text-align: center;
}
.gradio-container {
background-color: #000000;
padding: 20px;
border-radius: 10px;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
}
.gradio-container .gradio-radio {
display: inline-block;
margin: 10px;
text-align: center;
}
</style>
"""
)
gr.Markdown(
"""<h1 style="text-align: center">Cocktails Descriptions</h1>"""
)
radio = gr.Radio(lista, label="Choose your cocktail:")
text = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Cocktail Name")
imagen = gr.Image(label="Cocktail Image")
text2 = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Instructions")
audio = gr.Audio(label="Cocktail Instructions Audio")
text3 = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Image description")
audio2 = gr.Audio(label="Audio image description")
radio.change(fn=change_textbox, inputs=radio, outputs=[text, imagen,text2, audio,text3, audio2])
demo.launch(share=True)