oraculo / app.py
salomonsky's picture
Update app.py
c361e08
raw
history blame
2.59 kB
import gradio as gr
import os
import subprocess
from gtts import gTTS
from pydub import AudioSegment
from transformers import GPT2LMHeadModel, GPT2Tokenizer, pipeline
from PIL import Image
def get_thumbnail(image_path, width):
image = Image.open(image_path)
image.thumbnail((width, width))
return image
model = GPT2LMHeadModel.from_pretrained("salomonsky/deepSP")
os.environ["TOKENIZERS_PARALLELISM"] = "true"
generator = pipeline('text-generation', model="salomonsky/deepSP")
tokenizer = GPT2Tokenizer.from_pretrained('salomonsky/deepSP')
def generate_output(name, date_of_birth, image):
prompt = f"Tu frase mágica de hoy {name} es:"
input_tokens = tokenizer.encode(prompt, add_special_tokens=False)
input_text = tokenizer.decode(input_tokens)
gpt2_output = generator(input_text, max_length=120, do_sample=True, temperature=0.9)
generated_text = gpt2_output[0]['generated_text']
generated_text = generated_text.replace(input_text, "").strip()
tts = gTTS(generated_text, lang='es')
temp_audio_path = "temp_audio.mp3"
tts.save(temp_audio_path)
audio_path = "audio.wav"
audio = AudioSegment.from_mp3(temp_audio_path)
audio.export(audio_path, format="wav")
command = f"python3 inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face {image} --audio audio.wav --outfile video.mp4 --nosmooth"
process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if process.returncode != 0:
error_message = process.stderr.decode("utf-8")
return None, error_message
output_video_path = "video.mp4"
os.remove(temp_audio_path)
if os.path.isfile(output_video_path):
return output_video_path, None
return None, "No se pudo generar el video"
choices = ["1.jpg", "2.jpg", "3.jpg", "4.jpg", "5.jpg", "6.jpg"]
thumbnail_width = "50px"
iface = gr.Interface(
fn=generate_output,
inputs=[
gr.inputs.Textbox(lines=1, label="Nombre", placeholder="Ingresa tu nombre"),
gr.inputs.Textbox(lines=1, label="Fecha de Nacimiento", placeholder="DD/MM/AAAA"),
gr.inputs.Radio(choices, label="Selecciona una imagen:", thumbnails=[get_thumbnail(image, 50) for image in choices])
],
outputs=[
gr.outputs.Video(label="Respuesta de Andrea (un minuto aproximadamente)").style(width=256),
gr.outputs.Textbox(label="Mensaje de error", type="text")
],
title="Oraculo de Inteligencia Artifical v2.1",
layout="vertical",
description="Por favor, ingresa tu nombre y fecha de nacimiento."
)
iface.launch()