Spaces:
Running
Running
Fabrice-TIERCELIN
commited on
Fix errors
Browse files
app.py
CHANGED
@@ -9,6 +9,7 @@ import spaces
|
|
9 |
import torch
|
10 |
|
11 |
from hyvideo.utils.file_utils import save_videos_grid
|
|
|
12 |
from hyvideo.config import parse_args
|
13 |
from hyvideo.inference import HunyuanVideoSampler
|
14 |
from hyvideo.constants import NEGATIVE_PROMPT
|
@@ -17,9 +18,20 @@ from huggingface_hub import snapshot_download
|
|
17 |
|
18 |
if torch.cuda.device_count() > 0:
|
19 |
snapshot_download(repo_id="tencent/HunyuanVideo", repo_type="model", local_dir="ckpts", force_download=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
def initialize_model(model_path):
|
22 |
-
print(
|
23 |
if torch.cuda.device_count() == 0:
|
24 |
return None
|
25 |
|
@@ -30,11 +42,36 @@ def initialize_model(model_path):
|
|
30 |
|
31 |
print(f"`models_root` exists: {models_root_path}")
|
32 |
hunyuan_video_sampler = HunyuanVideoSampler.from_pretrained(models_root_path, args=args)
|
33 |
-
print(
|
34 |
return hunyuan_video_sampler
|
35 |
|
36 |
-
|
|
|
37 |
def generate_video(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
model,
|
39 |
prompt,
|
40 |
resolution,
|
@@ -45,14 +82,16 @@ def generate_video(
|
|
45 |
flow_shift,
|
46 |
embedded_guidance_scale
|
47 |
):
|
|
|
48 |
if torch.cuda.device_count() == 0:
|
49 |
-
gr.Warning(
|
50 |
return None
|
51 |
|
52 |
seed = None if seed == -1 else seed
|
53 |
width, height = resolution.split("x")
|
54 |
width, height = int(width), int(height)
|
55 |
negative_prompt = "" # not applicable in the inference
|
|
|
56 |
|
57 |
outputs = model.predict(
|
58 |
prompt=prompt,
|
@@ -69,7 +108,8 @@ def generate_video(
|
|
69 |
embedded_guidance_scale=embedded_guidance_scale
|
70 |
)
|
71 |
|
72 |
-
|
|
|
73 |
sample = samples[0].unsqueeze(0)
|
74 |
|
75 |
save_path = "./gradio_outputs"
|
@@ -78,13 +118,12 @@ def generate_video(
|
|
78 |
time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%H:%M:%S")
|
79 |
video_path = f"{save_path}/{time_flag}_seed{outputs['seeds'][0]}_{outputs['prompts'][0][:100].replace('/','')}.mp4"
|
80 |
save_videos_grid(sample, video_path, fps=24)
|
81 |
-
logger.info(f
|
82 |
|
|
|
83 |
return video_path
|
84 |
|
85 |
def create_demo(model_path):
|
86 |
-
model = initialize_model(model_path)
|
87 |
-
|
88 |
with gr.Blocks() as demo:
|
89 |
if torch.cuda.device_count() == 0:
|
90 |
with gr.Row():
|
@@ -92,7 +131,8 @@ def create_demo(model_path):
|
|
92 |
<p style="background-color: red;"><big><big><big><b>⚠️To use <i>Hunyuan Video</i>, <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/HunyuanVideo?duplicate=true">duplicate this space</a> and set a GPU with 80 GB VRAM.</b>
|
93 |
|
94 |
You can't use <i>Hunyuan Video</i> directly here because this space runs on a CPU, which is not enough for <i>Hunyuan Video</i>. Please provide <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/HunyuanVideo/discussions/new">feedback</a> if you have issues.
|
95 |
-
</big></big></big></p
|
|
|
96 |
""")
|
97 |
gr.Markdown("# Hunyuan Video Generation")
|
98 |
|
@@ -146,7 +186,7 @@ If you can't use _Hunyuan Video_, you can use _[CogVideoX](https://huggingface.c
|
|
146 |
""")
|
147 |
|
148 |
generate_btn.click(
|
149 |
-
fn=
|
150 |
inputs=[
|
151 |
prompt,
|
152 |
resolution,
|
|
|
9 |
import torch
|
10 |
|
11 |
from hyvideo.utils.file_utils import save_videos_grid
|
12 |
+
from hyvideo.utils.preprocess_text_encoder_tokenizer_utils import preprocess_text_encoder_tokenizer
|
13 |
from hyvideo.config import parse_args
|
14 |
from hyvideo.inference import HunyuanVideoSampler
|
15 |
from hyvideo.constants import NEGATIVE_PROMPT
|
|
|
18 |
|
19 |
if torch.cuda.device_count() > 0:
|
20 |
snapshot_download(repo_id="tencent/HunyuanVideo", repo_type="model", local_dir="ckpts", force_download=True)
|
21 |
+
snapshot_download(repo_id="xtuner/llava-llama-3-8b-v1_1-transformers", repo_type="model", local_dir="ckpts/llava-llama-3-8b-v1_1-transformers", force_download=True)
|
22 |
+
|
23 |
+
class Args:
|
24 |
+
def __init__(self, input_dir, output_dir):
|
25 |
+
self.input_dir = input_dir
|
26 |
+
self.output_dir = output_dir
|
27 |
+
|
28 |
+
# Create the object
|
29 |
+
args = Args("ckpts/llava-llama-3-8b-v1_1-transformers", "ckpts/text_encoder")
|
30 |
+
preprocess_text_encoder_tokenizer(args)
|
31 |
+
snapshot_download(repo_id="openai/clip-vit-large-patch14", repo_type="model", local_dir="ckpts/text_encoder_2", force_download=True)
|
32 |
|
33 |
def initialize_model(model_path):
|
34 |
+
print("initialize_model: " + model_path)
|
35 |
if torch.cuda.device_count() == 0:
|
36 |
return None
|
37 |
|
|
|
42 |
|
43 |
print(f"`models_root` exists: {models_root_path}")
|
44 |
hunyuan_video_sampler = HunyuanVideoSampler.from_pretrained(models_root_path, args=args)
|
45 |
+
print("Model initialized: " + model_path)
|
46 |
return hunyuan_video_sampler
|
47 |
|
48 |
+
model = initialize_model("ckpts")
|
49 |
+
|
50 |
def generate_video(
|
51 |
+
prompt,
|
52 |
+
resolution,
|
53 |
+
video_length,
|
54 |
+
seed,
|
55 |
+
num_inference_steps,
|
56 |
+
guidance_scale,
|
57 |
+
flow_shift,
|
58 |
+
embedded_guidance_scale
|
59 |
+
):
|
60 |
+
print("generate_video (prompt: " + prompt + ")")
|
61 |
+
return generate_video_gpu(
|
62 |
+
model,
|
63 |
+
prompt,
|
64 |
+
resolution,
|
65 |
+
video_length,
|
66 |
+
seed,
|
67 |
+
num_inference_steps,
|
68 |
+
guidance_scale,
|
69 |
+
flow_shift,
|
70 |
+
embedded_guidance_scale
|
71 |
+
)
|
72 |
+
|
73 |
+
@spaces.GPU(duration=120)
|
74 |
+
def generate_video_gpu(
|
75 |
model,
|
76 |
prompt,
|
77 |
resolution,
|
|
|
82 |
flow_shift,
|
83 |
embedded_guidance_scale
|
84 |
):
|
85 |
+
print("generate_video_gpu (prompt: " + prompt + ")")
|
86 |
if torch.cuda.device_count() == 0:
|
87 |
+
gr.Warning("Set this space to GPU config to make it work.")
|
88 |
return None
|
89 |
|
90 |
seed = None if seed == -1 else seed
|
91 |
width, height = resolution.split("x")
|
92 |
width, height = int(width), int(height)
|
93 |
negative_prompt = "" # not applicable in the inference
|
94 |
+
print("Predicting video...")
|
95 |
|
96 |
outputs = model.predict(
|
97 |
prompt=prompt,
|
|
|
108 |
embedded_guidance_scale=embedded_guidance_scale
|
109 |
)
|
110 |
|
111 |
+
print("Video predicted")
|
112 |
+
samples = outputs["samples"]
|
113 |
sample = samples[0].unsqueeze(0)
|
114 |
|
115 |
save_path = "./gradio_outputs"
|
|
|
118 |
time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%H:%M:%S")
|
119 |
video_path = f"{save_path}/{time_flag}_seed{outputs['seeds'][0]}_{outputs['prompts'][0][:100].replace('/','')}.mp4"
|
120 |
save_videos_grid(sample, video_path, fps=24)
|
121 |
+
logger.info(f"Sample saved to: {video_path}")
|
122 |
|
123 |
+
print("Return the video")
|
124 |
return video_path
|
125 |
|
126 |
def create_demo(model_path):
|
|
|
|
|
127 |
with gr.Blocks() as demo:
|
128 |
if torch.cuda.device_count() == 0:
|
129 |
with gr.Row():
|
|
|
131 |
<p style="background-color: red;"><big><big><big><b>⚠️To use <i>Hunyuan Video</i>, <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/HunyuanVideo?duplicate=true">duplicate this space</a> and set a GPU with 80 GB VRAM.</b>
|
132 |
|
133 |
You can't use <i>Hunyuan Video</i> directly here because this space runs on a CPU, which is not enough for <i>Hunyuan Video</i>. Please provide <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/HunyuanVideo/discussions/new">feedback</a> if you have issues.
|
134 |
+
</big></big></big></p><br/>
|
135 |
+
<p style="background-color: light-green;"><big>The space has been successfully deployed on A100 space on 2025-01-23. Synchronize your space to fix the errors.</big></p>
|
136 |
""")
|
137 |
gr.Markdown("# Hunyuan Video Generation")
|
138 |
|
|
|
186 |
""")
|
187 |
|
188 |
generate_btn.click(
|
189 |
+
fn=generate_video,
|
190 |
inputs=[
|
191 |
prompt,
|
192 |
resolution,
|