Spaces:
Sleeping
Sleeping
File size: 3,266 Bytes
05d3cbf 7b6fb4e 05d3cbf c361e08 7b6fb4e c361e08 b3d0e64 05d3cbf b71115e b3d0e64 10aef09 5bc77f9 391069f 05d3cbf b3d0e64 05d3cbf b3d0e64 4caa659 b43a3bd 05d3cbf b43a3bd b3d0e64 05d3cbf 4caa659 05d3cbf 4caa659 b43a3bd 05d3cbf b3d0e64 7ea5716 495ebd3 05d3cbf 7ea5716 b43a3bd 7ea5716 10aef09 7ea5716 05d3cbf fe4d261 391069f ae6fcec f47be50 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import gradio as gr
import os
import subprocess
from gtts import gTTS
from pydub import AudioSegment
from transformers import GPT2LMHeadModel, GPT2Tokenizer, pipeline
from PIL import Image
def get_thumbnail(image_path, width):
image = Image.open(image_path)
image.thumbnail((width, width))
return image
generator = pipeline('text-generation', model="checkpoints")
tokenizer = GPT2Tokenizer.from_pretrained('checkpoints')
os.environ["TOKENIZERS_PARALLELISM"] = "true"
def generate_output(name, date_of_birth, image):
prompt = f"Tu carta astral de hoy {name} es:"
input_tokens = tokenizer.encode(prompt, add_special_tokens=False)
input_text = tokenizer.decode(input_tokens)
gpt2_output = generator(input_text, max_length=120, do_sample=True, temperature=0.9)
generated_text = gpt2_output[0]['generated_text']
generated_text = generated_text.replace(input_text, "").strip()
if len(gpt2_output) == 0 or 'generated_text' not in gpt2_output[0]:
return None, "No se pudo generar el texto."
try:
tts = gTTS(generated_text, lang='es')
temp_audio_path = "temp_audio.mp3"
tts.save(temp_audio_path)
audio_path = "audio.wav"
audio = AudioSegment.from_mp3(temp_audio_path)
audio.export(audio_path, format="wav")
print("Archivo de audio generado:", audio_path)
except Exception as e:
return None, f"No se pudo generar el audio: {str(e)}"
command = f"python3 inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face {image} --audio audio.wav --outfile video.mp4 --nosmooth"
process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if process.returncode != 0:
error_message = process.stderr.decode("utf-8")
return None, f"No se pudo generar el video: {error_message}"
output_video_path = "video.mp4"
os.remove(temp_audio_path)
if os.path.isfile(output_video_path):
return output_video_path, None
return None, "No se pudo generar el video"
def error_message_fn(error_message):
if error_message is not None:
return gr.outputs.Textbox(text=error_message, placeholder="Error")
else:
return None
def get_image_choices():
image_paths = ["1.jpg", "2.jpg", "3.jpg", "4.jpg", "5.jpg", "6.jpg"]
image_choices = []
for image_path in image_paths:
image = Image.open(image_path)
image.thumbnail((50, 50))
image_choices.append(image)
return image_choices
def select_image(image):
return image
iface = gr.Interface(
fn=select_image,
inputs=[
gr.inputs.Image(label="Selecciona una imagen:", type="pil", choices=get_image_choices(), preview="label"),
gr.inputs.Textbox(lines=1, label="Nombre", placeholder="Ingresa tu nombre"),
gr.inputs.Textbox(lines=1, label="Fecha de Nacimiento", placeholder="DD/MM/AAAA")
],
outputs=[
gr.outputs.Video(label="Respuesta de Andrea (un minuto aproximadamente)").style(width=256),
gr.outputs.Textbox(label="Mensaje de error", type="text")
],
title="Oráculo de Inteligencia Artificial v2.1",
layout="vertical",
description="Por favor, ingresa tu nombre y fecha de nacimiento."
)
iface.launch() |