Spaces:

Smart44
/

HunyuanVideo

Running

App Files Files Community

Fabrice-TIERCELIN commited on Jan 24

Commit

82766d1

verified ·

1 Parent(s): 8bbb26f

Fix errors

Browse files

Files changed (1) hide show

app.py +50 -10

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import spaces
 import torch
 from hyvideo.utils.file_utils import save_videos_grid
 from hyvideo.config import parse_args
 from hyvideo.inference import HunyuanVideoSampler
 from hyvideo.constants import NEGATIVE_PROMPT
@@ -17,9 +18,20 @@ from huggingface_hub import snapshot_download
 if torch.cuda.device_count() > 0:
     snapshot_download(repo_id="tencent/HunyuanVideo", repo_type="model", local_dir="ckpts", force_download=True)
 def initialize_model(model_path):
-    print('initialize_model: ' + model_path)
     if torch.cuda.device_count() == 0:
         return None
@@ -30,11 +42,36 @@ def initialize_model(model_path):
     print(f"`models_root` exists: {models_root_path}")
     hunyuan_video_sampler = HunyuanVideoSampler.from_pretrained(models_root_path, args=args)
-    print('Model initialized: ' + model_path)
     return hunyuan_video_sampler
-@spaces.GPU(duration=120)
 def generate_video(
     model,
     prompt,
     resolution,
@@ -45,14 +82,16 @@ def generate_video(
     flow_shift,
     embedded_guidance_scale
 ):
     if torch.cuda.device_count() == 0:
-        gr.Warning('Set this space to GPU config to make it work.')
         return None
     seed = None if seed == -1 else seed
     width, height = resolution.split("x")
     width, height = int(width), int(height)
     negative_prompt = "" # not applicable in the inference
     outputs = model.predict(
         prompt=prompt,
@@ -69,7 +108,8 @@ def generate_video(
         embedded_guidance_scale=embedded_guidance_scale
     )
-    samples = outputs['samples']
     sample = samples[0].unsqueeze(0)
     save_path = "./gradio_outputs"
@@ -78,13 +118,12 @@ def generate_video(
     time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%H:%M:%S")
     video_path = f"{save_path}/{time_flag}_seed{outputs['seeds'][0]}_{outputs['prompts'][0][:100].replace('/','')}.mp4"
     save_videos_grid(sample, video_path, fps=24)
-    logger.info(f'Sample saved to: {video_path}')
     return video_path
 def create_demo(model_path):
-    model = initialize_model(model_path)
     with gr.Blocks() as demo:
         if torch.cuda.device_count() == 0:
             with gr.Row():
@@ -92,7 +131,8 @@ def create_demo(model_path):
                     <p style="background-color: red;"><big><big><big><b>⚠️To use <i>Hunyuan Video</i>, <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/HunyuanVideo?duplicate=true">duplicate this space</a> and set a GPU with 80 GB VRAM.</b>
                     You can't use <i>Hunyuan Video</i> directly here because this space runs on a CPU, which is not enough for <i>Hunyuan Video</i>. Please provide <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/HunyuanVideo/discussions/new">feedback</a> if you have issues.
-                    </big></big></big></p>
                     """)
         gr.Markdown("# Hunyuan Video Generation")
@@ -146,7 +186,7 @@ If you can't use _Hunyuan Video_, you can use _[CogVideoX](https://huggingface.c
                     """)
         generate_btn.click(
-            fn=lambda *inputs: generate_video(model, *inputs),
             inputs=[
                 prompt,
                 resolution,

 import torch
 from hyvideo.utils.file_utils import save_videos_grid
+from hyvideo.utils.preprocess_text_encoder_tokenizer_utils import preprocess_text_encoder_tokenizer
 from hyvideo.config import parse_args
 from hyvideo.inference import HunyuanVideoSampler
 from hyvideo.constants import NEGATIVE_PROMPT
 if torch.cuda.device_count() > 0:
     snapshot_download(repo_id="tencent/HunyuanVideo", repo_type="model", local_dir="ckpts", force_download=True)
+    snapshot_download(repo_id="xtuner/llava-llama-3-8b-v1_1-transformers", repo_type="model", local_dir="ckpts/llava-llama-3-8b-v1_1-transformers", force_download=True)
+    class Args:
+        def __init__(self, input_dir, output_dir):
+            self.input_dir = input_dir
+            self.output_dir = output_dir
+    # Create the object
+    args = Args("ckpts/llava-llama-3-8b-v1_1-transformers", "ckpts/text_encoder")
+    preprocess_text_encoder_tokenizer(args)
+    snapshot_download(repo_id="openai/clip-vit-large-patch14", repo_type="model", local_dir="ckpts/text_encoder_2", force_download=True)
 def initialize_model(model_path):
+    print("initialize_model: " + model_path)
     if torch.cuda.device_count() == 0:
         return None
     print(f"`models_root` exists: {models_root_path}")
     hunyuan_video_sampler = HunyuanVideoSampler.from_pretrained(models_root_path, args=args)
+    print("Model initialized: " + model_path)
     return hunyuan_video_sampler
+model = initialize_model("ckpts")
 def generate_video(
+    prompt,
+    resolution,
+    video_length,
+    seed,
+    num_inference_steps,
+    guidance_scale,
+    flow_shift,
+    embedded_guidance_scale
+):
+    print("generate_video (prompt: " + prompt + ")")
+    return generate_video_gpu(
+        model,
+        prompt,
+        resolution,
+        video_length,
+        seed,
+        num_inference_steps,
+        guidance_scale,
+        flow_shift,
+        embedded_guidance_scale
+    )
+@spaces.GPU(duration=120)
+def generate_video_gpu(
     model,
     prompt,
     resolution,
     flow_shift,
     embedded_guidance_scale
 ):
+    print("generate_video_gpu (prompt: " + prompt + ")")
     if torch.cuda.device_count() == 0:
+        gr.Warning("Set this space to GPU config to make it work.")
         return None
     seed = None if seed == -1 else seed
     width, height = resolution.split("x")
     width, height = int(width), int(height)
     negative_prompt = "" # not applicable in the inference
+    print("Predicting video...")
     outputs = model.predict(
         prompt=prompt,
         embedded_guidance_scale=embedded_guidance_scale
     )
+    print("Video predicted")
+    samples = outputs["samples"]
     sample = samples[0].unsqueeze(0)
     save_path = "./gradio_outputs"
     time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%H:%M:%S")
     video_path = f"{save_path}/{time_flag}_seed{outputs['seeds'][0]}_{outputs['prompts'][0][:100].replace('/','')}.mp4"
     save_videos_grid(sample, video_path, fps=24)
+    logger.info(f"Sample saved to: {video_path}")
+    print("Return the video")
     return video_path
 def create_demo(model_path):
     with gr.Blocks() as demo:
         if torch.cuda.device_count() == 0:
             with gr.Row():
                     <p style="background-color: red;"><big><big><big><b>⚠️To use <i>Hunyuan Video</i>, <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/HunyuanVideo?duplicate=true">duplicate this space</a> and set a GPU with 80 GB VRAM.</b>
                     You can't use <i>Hunyuan Video</i> directly here because this space runs on a CPU, which is not enough for <i>Hunyuan Video</i>. Please provide <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/HunyuanVideo/discussions/new">feedback</a> if you have issues.
+                    </big></big></big></p><br/>
+                    <p style="background-color: light-green;"><big>The space has been successfully deployed on A100 space on 2025-01-23. Synchronize your space to fix the errors.</big></p>
                     """)
         gr.Markdown("# Hunyuan Video Generation")
                     """)
         generate_btn.click(
+            fn=generate_video,
             inputs=[
                 prompt,
                 resolution,