oraculo / app.py
salomonsky's picture
Update app.py
bb6e368
raw
history blame
2.65 kB
import gradio as gr
import os
import subprocess
from gtts import gTTS
from pydub import AudioSegment
from transformers import GPT2LMHeadModel, GPT2Tokenizer, pipeline
from PIL import Image
generator = pipeline('text-generation', model="checkpoints")
tokenizer = GPT2Tokenizer.from_pretrained('checkpoints')
os.environ["TOKENIZERS_PARALLELISM"] = "true"
def generate_output(name, date_of_birth, progress=gr.Progress()):
prompt = f"Bienvenido {name}:"
input_tokens = tokenizer.encode(prompt, add_special_tokens=True)
input_text = tokenizer.decode(input_tokens)
gpt2_output = generator(input_text, max_length=60, do_sample=True, temperature=0.6)
generated_text = gpt2_output[0]['generated_text']
generated_text = generated_text.replace(input_text, "").strip()
if len(gpt2_output) == 0 or 'generated_text' not in gpt2_output[0]:
return None, "No se pudo generar el texto."
def _progress(generated, to_generate):
progress((generated, to_generate))
_progress(0, 1)
try:
tts = gTTS(generated_text, lang='es')
temp_audio_path = "temp_audio.mp3"
tts.save(temp_audio_path)
audio_path = "audio.wav"
audio = AudioSegment.from_mp3(temp_audio_path)
audio.export(audio_path, format="wav")
print("Archivo de audio generado:", audio_path)
_progress(1, 2)
except Exception as e:
return None, f"No se pudo generar el audio: {str(e)}"
command = f"python3 inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face oraculo.jpg --audio audio.wav --outfile video.mp4 --nosmooth"
process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if process.returncode != 0:
error_message = process.stderr.decode("utf-8")
return None, f"No se pudo generar el video: {error_message}"
output_video_path = "video.mp4"
os.remove(temp_audio_path)
if os.path.isfile(output_video_path):
_progress(2, 2)
return output_video_path, None
return None, "No se pudo generar el video"
iface = gr.Interface(
fn=generate_output,
inputs=[
gr.inputs.Textbox(lines=1, label="Nombre", placeholder="Ingresa tu nombre"),
gr.inputs.Textbox(lines=1, label="Fecha de Nacimiento", placeholder="DD/MM/AAAA")
],
outputs=[
gr.outputs.Video(label="Respuesta de Andrea (un minuto aproximadamente)").style(width=256),
#gr.outputs.Textbox(label="Mensaje de error", type="text")
],
title="Oráculo de Inteligencia Artificial v2.1",
description="Por favor, ingresa tu nombre y fecha de nacimiento."
)
iface.launch()