File size: 3,874 Bytes
59e1ddd
c6eb91a
59e1ddd
b5f36c2
c6eb91a
59e1ddd
c6eb91a
 
b5f36c2
59e1ddd
c6eb91a
59e1ddd
b5f36c2
 
59e1ddd
b5f36c2
c6eb91a
 
 
 
 
 
 
 
 
 
 
 
b5f36c2
c6eb91a
 
 
 
b5f36c2
 
 
 
 
c6eb91a
 
 
 
 
 
b5f36c2
 
c6eb91a
 
b5f36c2
 
59e1ddd
c6eb91a
b5f36c2
 
c6eb91a
59e1ddd
c6eb91a
b5f36c2
 
c6eb91a
 
 
 
b5f36c2
 
 
c6eb91a
 
 
 
 
b5f36c2
 
 
c6eb91a
 
b3465eb
c6eb91a
b3465eb
c6eb91a
b3465eb
c6eb91a
 
 
 
b3465eb
c6eb91a
 
b5f36c2
c6eb91a
 
 
 
 
6628dfa
 
 
 
c6eb91a
 
 
 
b5f36c2
c6eb91a
 
b5f36c2
c6eb91a
b5f36c2
c6eb91a
 
 
 
b5f36c2
 
 
 
 
59e1ddd
b5f36c2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import gradio as gr
import requests
from PIL import Image
from io import BytesIO
from transformers import pipeline
import torch
import soundfile as sf

# Pipelines de Transformers
image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")

# Speaker embedding aleatorio (para demo)
speaker_embedding = torch.randn(1, 512)

# Obtener lista de cócteles desde la API
url = "https://www.thecocktaildb.com/api/json/v1/1/search.php?s=margarita"
response = requests.get(url)
lista = []

if response.status_code == 200:
    datos = response.json()
    drinks = datos.get("drinks", [])
    for drink in drinks:
        lista.append(drink['strDrink'])
else:
    print(f"Error: {response.status_code}")

# Función principal que actualiza la interfaz
def change_textbox(choice):
    cocktail = requests.get(f"https://www.thecocktaildb.com/api/json/v1/1/search.php?s={choice}")
    data = cocktail.json()
    dataCocktail = data.get("drinks", [])

    name = ""
    instructions = ""
    image_url = ""

    for i in dataCocktail:
        if i['strDrink'].lower() == choice.lower():
            name = i['strDrink']
            instructions = i['strInstructions']
            image_url = i['strDrinkThumb']
            break

    # Cargar imagen
    img_response = requests.get(image_url)
    image = Image.open(BytesIO(img_response.content)).convert("RGB")

    # Generar descripción de la imagen
    result = image_to_text(image)
    descripcion = result[0]['generated_text']

    # Generar audio de instrucciones
    speech = synthesiser(instructions, forward_params={"speaker_embeddings": speaker_embedding})
    sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
    audio_path = "speech.wav"

    # Generar audio de descripción de la imagen
    speech2 = synthesiser(descripcion, forward_params={"speaker_embeddings": speaker_embedding})
    sf.write("speech2.wav", speech2["audio"], samplerate=speech2["sampling_rate"])
    audio_path2 = "speech2.wav"

    return name, image, instructions, audio_path, descripcion, audio_path2

# Crear la interfaz de Gradio
with gr.Blocks() as demo:
    gr.HTML(
        """
        <style>
            body {
                background-color: #000000;
                color: #ffffff; 
                font-family: Arial, sans-serif;
                margin: 0;
                padding: 0;
                text-align: center; 
            }
            
            .gradio-container {
                background-color: #000000; 
                padding: 20px;
                border-radius: 10px;
                display: flex;
                flex-direction: column;
                align-items: center; 
                justify-content: center;
            }
           
            .gradio-container .gradio-radio {
                display: inline-block;
                margin: 10px;
                text-align: center;
            }
            h1 {
                text-align: center;
                color: #ffffff !important; 
            }
        </style>
        """
    )

    gr.Markdown("<h1>Cocktails Descriptions</h1>")
    
    radio = gr.Radio(lista, label="Choose your cocktail:")
    text = gr.Textbox(lines=1, interactive=False, show_copy_button=True, label="Cocktail Name")
    imagen = gr.Image(label="Cocktail Image")
    text2 = gr.Textbox(lines=4, interactive=False, show_copy_button=True, label="Instructions")
    audio = gr.Audio(label="Cocktail Instructions Audio")
    text3 = gr.Textbox(lines=2, interactive=False, show_copy_button=True, label="Image description")
    audio2 = gr.Audio(label="Audio image description")

    radio.change(
        fn=change_textbox, 
        inputs=radio, 
        outputs=[text, imagen, text2, audio, text3, audio2]
    )

demo.launch(share=True)