svjack/hunyuan_video_pixel_early_lora

Installtion

sudo apt-get update && sudo apt-get install ffmpeg git-lfs 
pip install torch torchvision diffusers transformers moviepy==1.0.3 peft safetensors
git clone https://huggingface.co/svjack/hunyuan_video_pixel_early_lora

Inference

import torch
from diffusers import HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
from diffusers.utils import export_to_video
from safetensors.torch import load_file
import os

def infer_video(
    pretrained_model,
    prompt,
    height,
    width,
    num_frames,
    num_inference_steps,
    seed,
    output_dir,
    use_lora=False,
    lora_path=None,
    alpha=None,
):
    """
    合并使用和不使用 LoRA 的视频生成函数。

    参数:
        pretrained_model (str): 预训练模型的路径。
        prompt (str): 生成视频的提示词。
        height (int): 生成视频的高度。
        width (int): 生成视频的宽度。
        num_frames (int): 生成视频的帧数。
        num_inference_steps (int): 推断步数。
        seed (int): 随机种子。
        output_dir (str): 输出视频的目录。
        use_lora (bool): 是否使用 LoRA，默认为 False。
        lora_path (str): LoRA 文件的路径，仅在 use_lora=True 时有效。
        alpha (int): LoRA 的 alpha 参数，仅在 use_lora=True 时有效。
    """
    # 加载模型
    transformer = HunyuanVideoTransformer3DModel.from_pretrained(
        pretrained_model,
        subfolder="transformer",
        torch_dtype=torch.bfloat16,
    )
    # 如果使用 LoRA
    if use_lora:
        if lora_path is None:
            raise ValueError("lora_path must be provided when use_lora is True")

        # 加载 LoRA 权重
        lora_sd = load_file(lora_path)
        rank = 0
        for key in lora_sd.keys():
            if ".lora_A.weight" in key:
                rank = lora_sd[key].shape[0]

        alpha = 1 if alpha is None else alpha
        lora_weight = alpha / rank

        print(f"lora rank = {rank}")
        print(f"alpha = {alpha}")
        print(f"lora weight = {lora_weight}")

        # 应用 LoRA
        transformer.load_lora_adapter(lora_sd, adapter_name="default_lora")
        transformer.set_adapters(adapter_names="default_lora", weights=lora_weight)

    pipe = HunyuanVideoPipeline.from_pretrained(pretrained_model, transformer=transformer, torch_dtype=torch.float16)
    pipe.transformer = transformer
    
    pipe.vae.enable_tiling(
        tile_sample_min_height=256,
        tile_sample_min_width=256,
        tile_sample_min_num_frames=64,
        tile_sample_stride_height=192,
        tile_sample_stride_width=192,
        tile_sample_stride_num_frames=16,
    )
    pipe.enable_sequential_cpu_offload()

    # 进行推断
    output = pipe(
        prompt=prompt,
        height=height,
        width=width,
        num_frames=num_frames,
        num_inference_steps=num_inference_steps,
        generator=torch.Generator(device="cpu").manual_seed(seed),
    ).frames[0]

    # 导出视频
    output_filename = "output_lora.mp4" if use_lora else "output_base.mp4"
    export_to_video(
        output,
        os.path.join(output_dir, output_filename),
        fps=15,
    )

### Base
infer_video(
    pretrained_model="hunyuanvideo-community/HunyuanVideo",
    prompt="The video showcases a pixel art animation featuring charming anime-style scene featuring a pink-haired girl with angel wings. She's seated at a desk, enjoying a donut while working on a laptop. The setting is a cozy, pastel-colored room with a pink chair, a milk carton, and a coffee cup. The girl's expression is one of delight as she savors her treat",
    height=512,
    width=512,
    num_frames=33,
    num_inference_steps=20,
    seed=42,
    output_dir="./",
    use_lora=False,
)

### With Lora
infer_video(
    pretrained_model="hunyuanvideo-community/HunyuanVideo",
    prompt="The video showcases a pixel art animation featuring a serene and majestic snowy mountain landscape. The scene is dominated by towering peaks covered in pristine white snow, with a soft gradient of blue and purple hues in the sky. A small cabin with a smoking chimney sits at the base of the mountain, surrounded by pine trees dusted with snow. A winding path leads up the mountain, with footprints visible in the snow. The atmosphere is calm and peaceful, evoking a sense of solitude and wonder.",
    height=512,
    width=512,
    num_frames=33,
    num_inference_steps=20,
    seed=42,
    output_dir="./",
    use_lora=True,
    lora_path="hunyuan_video_pixel_early_lora/hyv-lora-00000700.safetensors",
    alpha=16,
)

Demo

Girl Demo

The video showcases a pixel art animation featuring charming anime-style scene featuring a pink-haired girl with angel wings. She's seated at a desk, enjoying a donut while working on a laptop. The setting is a cozy, pastel-colored room with a pink chair, a milk carton, and a coffee cup. The girl's expression is one of delight as she savors her treat.

Without LoRA

With LoRA 600 atep

With LoRA 700 atep

Snow Hill Demo

The video showcases a pixel art animation featuring a serene and majestic snowy mountain landscape. The scene is dominated by towering peaks covered in pristine white snow, with a soft gradient of blue and purple hues in the sky. A small cabin with a smoking chimney sits at the base of the mountain, surrounded by pine trees dusted with snow. A winding path leads up the mountain, with footprints visible in the snow. The atmosphere is calm and peaceful, evoking a sense of solitude and wonder.