The space has been successfully deployed on A100 space on 2025-01-23; synchronize your space to fix the errors
e4b275c
verified
import os | |
import time | |
from pathlib import Path | |
from loguru import logger | |
from datetime import datetime | |
import gradio as gr | |
import random | |
import spaces | |
import torch | |
from hyvideo.utils.file_utils import save_videos_grid | |
from hyvideo.utils.preprocess_text_encoder_tokenizer_utils import preprocess_text_encoder_tokenizer | |
from hyvideo.config import parse_args | |
from hyvideo.inference import HunyuanVideoSampler | |
from hyvideo.constants import NEGATIVE_PROMPT | |
from huggingface_hub import snapshot_download | |
if torch.cuda.device_count() > 0: | |
snapshot_download(repo_id="tencent/HunyuanVideo", repo_type="model", local_dir="ckpts", force_download=True) | |
snapshot_download(repo_id="xtuner/llava-llama-3-8b-v1_1-transformers", repo_type="model", local_dir="ckpts/llava-llama-3-8b-v1_1-transformers", force_download=True) | |
class Args: | |
def __init__(self, input_dir, output_dir): | |
self.input_dir = input_dir | |
self.output_dir = output_dir | |
# Create the object | |
args = Args("ckpts/llava-llama-3-8b-v1_1-transformers", "ckpts/text_encoder") | |
preprocess_text_encoder_tokenizer(args) | |
snapshot_download(repo_id="openai/clip-vit-large-patch14", repo_type="model", local_dir="ckpts/text_encoder_2", force_download=True) | |
def initialize_model(model_path): | |
print("initialize_model: " + model_path) | |
if torch.cuda.device_count() == 0: | |
return None | |
args = parse_args() | |
models_root_path = Path(model_path) | |
if not models_root_path.exists(): | |
raise ValueError(f"`models_root` not exists: {models_root_path}") | |
print(f"`models_root` exists: {models_root_path}") | |
hunyuan_video_sampler = HunyuanVideoSampler.from_pretrained(models_root_path, args=args) | |
print("Model initialized: " + model_path) | |
return hunyuan_video_sampler | |
model = initialize_model("ckpts") | |
def generate_video( | |
prompt, | |
resolution, | |
video_length, | |
seed, | |
num_inference_steps, | |
guidance_scale, | |
flow_shift, | |
embedded_guidance_scale | |
): | |
print("generate_video (prompt: " + prompt + ")") | |
return generate_video_gpu( | |
model, | |
prompt, | |
resolution, | |
video_length, | |
seed, | |
num_inference_steps, | |
guidance_scale, | |
flow_shift, | |
embedded_guidance_scale | |
) | |
def generate_video_gpu( | |
model, | |
prompt, | |
resolution, | |
video_length, | |
seed, | |
num_inference_steps, | |
guidance_scale, | |
flow_shift, | |
embedded_guidance_scale | |
): | |
print("generate_video_gpu (prompt: " + prompt + ")") | |
if torch.cuda.device_count() == 0: | |
gr.Warning("Set this space to GPU config to make it work.") | |
return None | |
seed = None if seed == -1 else seed | |
width, height = resolution.split("x") | |
width, height = int(width), int(height) | |
negative_prompt = "" # not applicable in the inference | |
print("Predicting video...") | |
outputs = model.predict( | |
prompt=prompt, | |
height=height, | |
width=width, | |
video_length=video_length, | |
seed=seed, | |
negative_prompt=negative_prompt, | |
infer_steps=num_inference_steps, | |
guidance_scale=guidance_scale, | |
num_videos_per_prompt=1, | |
flow_shift=flow_shift, | |
batch_size=1, | |
embedded_guidance_scale=embedded_guidance_scale | |
) | |
print("Video predicted") | |
samples = outputs["samples"] | |
sample = samples[0].unsqueeze(0) | |
save_path = "./gradio_outputs" | |
os.makedirs(save_path, exist_ok=True) | |
time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%H:%M:%S") | |
video_path = f"{save_path}/{time_flag}_seed{outputs['seeds'][0]}_{outputs['prompts'][0][:100].replace('/','')}.mp4" | |
save_videos_grid(sample, video_path, fps=24) | |
logger.info(f"Sample saved to: {video_path}") | |
print("Return the video") | |
return video_path | |
def create_demo(model_path): | |
with gr.Blocks() as demo: | |
if torch.cuda.device_count() == 0: | |
with gr.Row(): | |
gr.HTML(""" | |
<p style="background-color: red;"><big><big><big><b>⚠️To use <i>Hunyuan Video</i>, <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/HunyuanVideo?duplicate=true">duplicate this space</a> and set a GPU with 80 GB VRAM.</b> | |
You can't use <i>Hunyuan Video</i> directly here because this space runs on a CPU, which is not enough for <i>Hunyuan Video</i>. Please provide <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/HunyuanVideo/discussions/new">feedback</a> if you have issues. | |
</big></big></big></p><br/> | |
<p style="background-color: light-green;"><big>The space has been successfully deployed on A100 space on 2025-01-23. Synchronize your space to fix the errors.</big></p> | |
""") | |
gr.Markdown("# Hunyuan Video Generation") | |
with gr.Row(): | |
with gr.Column(): | |
prompt = gr.Textbox(label="Prompt", value="A cat walks on the grass, realistic style.") | |
with gr.Row(): | |
resolution = gr.Dropdown( | |
choices=[ | |
# 720p | |
("1280x720 (16:9, 720p)", "1280x720"), | |
("720x1280 (9:16, 720p)", "720x1280"), | |
("1104x832 (4:3, 720p)", "1104x832"), | |
("832x1104 (3:4, 720p)", "832x1104"), | |
("960x960 (1:1, 720p)", "960x960"), | |
# 540p | |
("960x544 (16:9, 540p)", "960x544"), | |
("544x960 (9:16, 540p)", "544x960"), | |
("832x624 (4:3, 540p)", "832x624"), | |
("624x832 (3:4, 540p)", "624x832"), | |
("720x720 (1:1, 540p)", "720x720"), | |
], | |
value="832x624", | |
label="Resolution" | |
) | |
video_length = gr.Dropdown( | |
label="Video Length", | |
choices=[ | |
("2s(65f)", 65), | |
("5s(129f)", 129), | |
], | |
value=65, | |
) | |
num_inference_steps = gr.Slider(1, 100, value=5, step=1, label="Number of Inference Steps") | |
with gr.Accordion("Advanced Options", open=False): | |
with gr.Column(): | |
seed = gr.Slider(label="Seed (-1 for random)", value=-1, minimum=-1, maximum=2**63 - 1, step=1) | |
guidance_scale = gr.Slider(1.0, 20.0, value=1.0, step=0.5, label="Guidance Scale") | |
flow_shift = gr.Slider(0.0, 10.0, value=7.0, step=0.1, label="Flow Shift") | |
embedded_guidance_scale = gr.Slider(1.0, 20.0, value=6.0, step=0.5, label="Embedded Guidance Scale") | |
generate_btn = gr.Button(value = "🚀 Generate Video", variant = "primary") | |
with gr.Row(): | |
output = gr.Video(label = "Generated Video", autoplay = True) | |
gr.Markdown(""" | |
## **Alternatives** | |
If you can't use _Hunyuan Video_, you can use _[CogVideoX](https://huggingface.co/spaces/THUDM/CogVideoX-5B-Space)_ or _[LTX Video Playground](https://huggingface.co/spaces/Lightricks/LTX-Video-Playground)_ instead. | |
""") | |
generate_btn.click( | |
fn=generate_video, | |
inputs=[ | |
prompt, | |
resolution, | |
video_length, | |
seed, | |
num_inference_steps, | |
guidance_scale, | |
flow_shift, | |
embedded_guidance_scale | |
], | |
outputs=output | |
) | |
return demo | |
if __name__ == "__main__": | |
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False" | |
demo = create_demo("ckpts") | |
demo.queue(10).launch() |