File size: 3,266 Bytes
05d3cbf
 
7b6fb4e
05d3cbf
 
 
c361e08
7b6fb4e
 
 
 
 
c361e08
b3d0e64
 
05d3cbf
 
b71115e
b3d0e64
10aef09
5bc77f9
391069f
05d3cbf
 
b3d0e64
 
05d3cbf
b3d0e64
 
 
 
 
 
 
 
 
 
4caa659
b43a3bd
05d3cbf
 
b43a3bd
b3d0e64
05d3cbf
 
4caa659
05d3cbf
 
4caa659
b43a3bd
05d3cbf
b3d0e64
 
 
 
 
7ea5716
 
 
 
 
 
 
 
 
 
 
 
495ebd3
05d3cbf
7ea5716
b43a3bd
7ea5716
10aef09
7ea5716
05d3cbf
fe4d261
391069f
 
ae6fcec
 
 
 
 
 
f47be50
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
import os
import subprocess
from gtts import gTTS
from pydub import AudioSegment
from transformers import GPT2LMHeadModel, GPT2Tokenizer, pipeline
from PIL import Image

def get_thumbnail(image_path, width):
    image = Image.open(image_path)
    image.thumbnail((width, width))
    return image

generator = pipeline('text-generation', model="checkpoints")
tokenizer = GPT2Tokenizer.from_pretrained('checkpoints')
os.environ["TOKENIZERS_PARALLELISM"] = "true"

def generate_output(name, date_of_birth, image):
    prompt = f"Tu carta astral de hoy {name} es:"
    input_tokens = tokenizer.encode(prompt, add_special_tokens=False)
    input_text = tokenizer.decode(input_tokens)
    gpt2_output = generator(input_text, max_length=120, do_sample=True, temperature=0.9)
    generated_text = gpt2_output[0]['generated_text']
    generated_text = generated_text.replace(input_text, "").strip()
    if len(gpt2_output) == 0 or 'generated_text' not in gpt2_output[0]:
        return None, "No se pudo generar el texto."

    try:
        tts = gTTS(generated_text, lang='es')
        temp_audio_path = "temp_audio.mp3"
        tts.save(temp_audio_path)
        audio_path = "audio.wav"
        audio = AudioSegment.from_mp3(temp_audio_path)
        audio.export(audio_path, format="wav")
        print("Archivo de audio generado:", audio_path)
    except Exception as e:
        return None, f"No se pudo generar el audio: {str(e)}"   

    command = f"python3 inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face {image} --audio audio.wav --outfile video.mp4 --nosmooth"
    process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    if process.returncode != 0:
        error_message = process.stderr.decode("utf-8")
        return None, f"No se pudo generar el video: {error_message}"

    output_video_path = "video.mp4"
    os.remove(temp_audio_path)

    if os.path.isfile(output_video_path):
        return output_video_path, None
    return None, "No se pudo generar el video"

def error_message_fn(error_message):
    if error_message is not None:
        return gr.outputs.Textbox(text=error_message, placeholder="Error")
    else:
        return None

def get_image_choices():
    image_paths = ["1.jpg", "2.jpg", "3.jpg", "4.jpg", "5.jpg", "6.jpg"]
    image_choices = []
    for image_path in image_paths:
        image = Image.open(image_path)
        image.thumbnail((50, 50))
        image_choices.append(image)
    return image_choices

def select_image(image):
    return image

iface = gr.Interface(
    fn=select_image,
    inputs=[
        gr.inputs.Image(label="Selecciona una imagen:", type="pil", choices=get_image_choices(), preview="label"),
        gr.inputs.Textbox(lines=1, label="Nombre", placeholder="Ingresa tu nombre"),
        gr.inputs.Textbox(lines=1, label="Fecha de Nacimiento", placeholder="DD/MM/AAAA")
    ],
    outputs=[
        gr.outputs.Video(label="Respuesta de Andrea (un minuto aproximadamente)").style(width=256),
        gr.outputs.Textbox(label="Mensaje de error", type="text")
    ],
    title="Oráculo de Inteligencia Artificial v2.1",
    layout="vertical",
    description="Por favor, ingresa tu nombre y fecha de nacimiento."
)

iface.launch()