Spaces:
Sleeping
Sleeping
File size: 3,737 Bytes
59e1ddd c6eb91a 59e1ddd c6eb91a 59e1ddd c6eb91a 59e1ddd c6eb91a 59e1ddd c6eb91a 59e1ddd c6eb91a 59e1ddd c6eb91a 59e1ddd c6eb91a b3465eb c6eb91a b3465eb c6eb91a b3465eb c6eb91a b3465eb c6eb91a b3465eb c6eb91a b3465eb c6eb91a 6628dfa c6eb91a 6628dfa 59e1ddd c6eb91a 59e1ddd 3ea35ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import gradio as gr
import requests
from PIL import Image
from io import BytesIO
from transformers import pipeline
from datasets import load_dataset
import torch
import soundfile as sf
image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
url = "https://www.thecocktaildb.com/api/json/v1/1/search.php?s=margarita"
response = requests.get(url)
lista = []
if response.status_code == 200:
datos = response.json()
drinks = datos.get("drinks", [])
for drink in drinks:
lista.append(drink['strDrink'])
else:
print(f"Error: {response.status_code}")
def change_textbox(choice):
cocktail = requests.get(f"https://www.thecocktaildb.com/api/json/v1/1/search.php?s={choice}")
data = cocktail.json()
dataCocktail = data.get("drinks", [])
for i in dataCocktail:
if i['strDrink'].lower() == choice.lower():
name = i['strDrink']
instructions = i['strInstructions']
image_url = i['strDrinkThumb']
break
textInstructions = gr.Textbox(instructions)
img_response = requests.get(image_url)
image = Image.open(BytesIO(img_response.content)).convert("RGB")
result = image_to_text(image)
descripcion = result[0]['generated_text']
speech = synthesiser(instructions, forward_params={"speaker_embeddings": speaker_embedding})
sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
audio_path = "speech.wav"
speech2 = synthesiser(descripcion, forward_params={"speaker_embeddings": speaker_embedding})
sf.write("speech2.wav", speech2["audio"], samplerate=speech2["sampling_rate"])
audio_path2 = "speech2.wav"
return name, image,textInstructions,audio_path,descripcion,audio_path2
with gr.Blocks() as demo:
gr.HTML(
"""
<style>
/* Cambiar el fondo de toda la página */
body {
background-color: #000000;
color: #FFFFFF;
font-family: Arial, sans-serif;
margin: 0;
padding: 0;
text-align: center;
}
.gradio-container {
background-color: #000000;
padding: 20px;
border-radius: 10px;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
}
.gradio-container .gradio-radio {
display: inline-block;
margin: 10px;
text-align: center;
}
h1 {
text-align: center;
color: #ffffff !important;
}
</style>
"""
)
gr.Markdown(
"""<h1>Cocktails Descriptions</h1>"""
)
radio = gr.Radio(lista, label="Choose your cocktail:")
text = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Cocktail Name")
imagen = gr.Image(label="Cocktail Image")
text2 = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Instructions")
audio = gr.Audio(label="Cocktail Instructions Audio")
text3 = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Image description")
audio2 = gr.Audio(label="Audio image description")
radio.change(fn=change_textbox, inputs=radio, outputs=[text, imagen,text2, audio,text3, audio2])
demo.launch() |