|
import spaces |
|
import gc |
|
import gradio as gr |
|
import numpy as np |
|
import os |
|
from pathlib import Path |
|
from diffusers import GGUFQuantizationConfig, HunyuanVideoPipeline, HunyuanVideoTransformer3DModel |
|
from diffusers.utils import export_to_video |
|
from huggingface_hub import snapshot_download |
|
import torch |
|
|
|
gc.collect() |
|
torch.cuda.empty_cache() |
|
torch.set_grad_enabled(False) |
|
torch.backends.cudnn.deterministic = True |
|
torch.backends.cudnn.benchmark = False |
|
|
|
model_id = "hunyuanvideo-community/HunyuanVideo" |
|
base_path = f"/home/user/app/{model_id}" |
|
os.makedirs(base_path, exist_ok=True) |
|
snapshot_download(repo_id=model_id, local_dir=base_path) |
|
ckp_path = Path(base_path) |
|
|
|
gguf_filename = "hunyuan-video-t2v-720p-Q4_0.gguf" |
|
transformer_path = f"https://huggingface.co/city96/HunyuanVideo-gguf/blob/main/{gguf_filename}" |
|
transformer = HunyuanVideoTransformer3DModel.from_single_file( |
|
transformer_path, |
|
quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16), |
|
torch_dtype=torch.bfloat16, |
|
) |
|
transformer = transformer.to('cuda') |
|
|
|
pipe = HunyuanVideoPipeline.from_pretrained( |
|
ckp_path, |
|
transformer=transformer, |
|
torch_dtype=torch.float16 |
|
) |
|
|
|
if pipe.text_encoder: |
|
pipe.text_encoder = pipe.text_encoder.to('cuda') |
|
pipe.text_encoder.eval() |
|
|
|
pipe.vae.enable_tiling() |
|
pipe.vae.enable_slicing() |
|
pipe.vae.eval() |
|
pipe.vae = pipe.vae.to("cuda") |
|
pipe = pipe.to("cuda") |
|
|
|
pipe.load_lora_weights( |
|
"sergidev/IllustrationTTV", |
|
weight_name="hunyuan_flat_color_v2.safetensors", |
|
adapter_name="hyvid_lora_adapter" |
|
) |
|
pipe.set_adapters("hyvid_lora_adapter", 1.2) |
|
|
|
gc.collect() |
|
torch.cuda.empty_cache() |
|
|
|
MAX_SEED = np.iinfo(np.int32).max |
|
MAX_IMAGE_SIZE = 1024 |
|
|
|
@spaces.GPU(duration=120) |
|
def generate( |
|
prompt, |
|
height, |
|
width, |
|
num_frames, |
|
num_inference_steps, |
|
seed_value, |
|
fps, |
|
progress=gr.Progress(track_tqdm=True) |
|
): |
|
with torch.cuda.device(0): |
|
if seed_value == -1: |
|
seed_value = torch.randint(0, MAX_SEED, (1,)).item() |
|
generator = torch.Generator('cuda').manual_seed(seed_value) |
|
|
|
with torch.amp.autocast_mode.autocast('cuda', dtype=torch.bfloat16), torch.inference_mode(), torch.no_grad(): |
|
output = pipe( |
|
prompt=prompt, |
|
height=height, |
|
width=width, |
|
num_frames=num_frames, |
|
num_inference_steps=num_inference_steps, |
|
generator=generator, |
|
).frames[0] |
|
|
|
output_path = "output.mp4" |
|
export_to_video(output, output_path, fps=fps) |
|
torch.cuda.empty_cache() |
|
gc.collect() |
|
return output_path |
|
|
|
def apply_preset(preset_name, *current_values): |
|
if preset_name == "Higher Resolution": |
|
return [608, 448, 24, 29, 12] |
|
elif preset_name == "More Frames": |
|
return [512, 320, 42, 27, 14] |
|
return current_values |
|
|
|
css = """ |
|
#col-container { |
|
margin: 0 auto; |
|
max-width: 850px; |
|
} |
|
|
|
.dark-theme { |
|
background-color: #1f1f1f; |
|
color: #ffffff; |
|
} |
|
|
|
.container { |
|
margin: 0 auto; |
|
padding: 20px; |
|
border-radius: 10px; |
|
background-color: #2d2d2d; |
|
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
|
} |
|
|
|
.title { |
|
text-align: center; |
|
margin-bottom: 1em; |
|
color: #ffffff; |
|
} |
|
|
|
.description { |
|
text-align: center; |
|
margin-bottom: 2em; |
|
color: #cccccc; |
|
font-size: 0.95em; |
|
line-height: 1.5; |
|
} |
|
|
|
.prompt-container { |
|
background-color: #363636; |
|
padding: 15px; |
|
border-radius: 8px; |
|
margin-bottom: 1em; |
|
width: 100%; |
|
} |
|
|
|
.prompt-textbox { |
|
min-height: 80px !important; |
|
} |
|
|
|
.preset-buttons { |
|
display: flex; |
|
gap: 10px; |
|
justify-content: center; |
|
margin-bottom: 1em; |
|
} |
|
|
|
.support-text { |
|
text-align: center; |
|
margin-top: 1em; |
|
color: #cccccc; |
|
font-size: 0.9em; |
|
} |
|
|
|
a { |
|
color: #00a7e1; |
|
text-decoration: none; |
|
} |
|
|
|
a:hover { |
|
text-decoration: underline; |
|
} |
|
""" |
|
|
|
with gr.Blocks(css=css, theme="dark") as demo: |
|
with gr.Column(elem_id="col-container"): |
|
gr.Markdown("# 🎬 Illustration TTV", elem_classes=["title"]) |
|
gr.Markdown( |
|
"""Transform your text descriptions into illustrative videos using HunyuanVideo for free! |
|
This space uses the 'hunyuan flat color v2' LORA by Motimalu to generate better 2d animated sequences. Prompt only handles 77 tokens. |
|
|
|
If you find this useful, please consider giving the space a ❤️ and supporting me on [Ko-Fi](https://ko-fi.com/sergidev)!""", |
|
elem_classes=["description"] |
|
) |
|
|
|
with gr.Column(elem_classes=["prompt-container"]): |
|
prompt = gr.Textbox( |
|
label="Prompt", |
|
placeholder="Enter your prompt here (Include the terms 'flat color, no lineart, blending' for 2d illustration)", |
|
show_label=False, |
|
elem_classes=["prompt-textbox"], |
|
lines=3 |
|
) |
|
|
|
with gr.Row(): |
|
run_button = gr.Button("🎨 Generate", variant="primary", size="lg") |
|
|
|
with gr.Row(elem_classes=["preset-buttons"]): |
|
preset_high_res = gr.Button("📺 Higher Resolution Preset") |
|
preset_more_frames = gr.Button("🎞️ More Frames Preset") |
|
|
|
with gr.Row(): |
|
result = gr.Video(label="Generated Video") |
|
|
|
with gr.Accordion("⚙️ Advanced Settings", open=False): |
|
seed = gr.Slider( |
|
label="Seed (-1 for random)", |
|
minimum=-1, |
|
maximum=MAX_SEED, |
|
step=1, |
|
value=-1, |
|
) |
|
with gr.Row(): |
|
height = gr.Slider( |
|
label="Height", |
|
minimum=256, |
|
maximum=MAX_IMAGE_SIZE, |
|
step=16, |
|
value=608, |
|
) |
|
width = gr.Slider( |
|
label="Width", |
|
minimum=256, |
|
maximum=MAX_IMAGE_SIZE, |
|
step=16, |
|
value=448, |
|
) |
|
with gr.Row(): |
|
num_frames = gr.Slider( |
|
label="Number of frames to generate", |
|
minimum=1.0, |
|
maximum=257.0, |
|
step=1, |
|
value=24, |
|
) |
|
num_inference_steps = gr.Slider( |
|
label="Number of inference steps", |
|
minimum=1, |
|
maximum=50, |
|
step=1, |
|
value=29, |
|
) |
|
fps = gr.Slider( |
|
label="Frames per second", |
|
minimum=1, |
|
maximum=60, |
|
step=1, |
|
value=12, |
|
) |
|
|
|
|
|
run_button.click( |
|
fn=generate, |
|
inputs=[prompt, height, width, num_frames, num_inference_steps, seed, fps], |
|
outputs=[result], |
|
) |
|
|
|
|
|
preset_high_res.click( |
|
fn=lambda: apply_preset("Higher Resolution"), |
|
outputs=[height, width, num_frames, num_inference_steps, fps] |
|
) |
|
|
|
preset_more_frames.click( |
|
fn=lambda: apply_preset("More Frames"), |
|
outputs=[height, width, num_frames, num_inference_steps, fps] |
|
) |
|
|