Fabrice-TIERCELIN commited on
Commit
82766d1
·
verified ·
1 Parent(s): 8bbb26f

Fix errors

Browse files
Files changed (1) hide show
  1. app.py +50 -10
app.py CHANGED
@@ -9,6 +9,7 @@ import spaces
9
  import torch
10
 
11
  from hyvideo.utils.file_utils import save_videos_grid
 
12
  from hyvideo.config import parse_args
13
  from hyvideo.inference import HunyuanVideoSampler
14
  from hyvideo.constants import NEGATIVE_PROMPT
@@ -17,9 +18,20 @@ from huggingface_hub import snapshot_download
17
 
18
  if torch.cuda.device_count() > 0:
19
  snapshot_download(repo_id="tencent/HunyuanVideo", repo_type="model", local_dir="ckpts", force_download=True)
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def initialize_model(model_path):
22
- print('initialize_model: ' + model_path)
23
  if torch.cuda.device_count() == 0:
24
  return None
25
 
@@ -30,11 +42,36 @@ def initialize_model(model_path):
30
 
31
  print(f"`models_root` exists: {models_root_path}")
32
  hunyuan_video_sampler = HunyuanVideoSampler.from_pretrained(models_root_path, args=args)
33
- print('Model initialized: ' + model_path)
34
  return hunyuan_video_sampler
35
 
36
- @spaces.GPU(duration=120)
 
37
  def generate_video(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  model,
39
  prompt,
40
  resolution,
@@ -45,14 +82,16 @@ def generate_video(
45
  flow_shift,
46
  embedded_guidance_scale
47
  ):
 
48
  if torch.cuda.device_count() == 0:
49
- gr.Warning('Set this space to GPU config to make it work.')
50
  return None
51
 
52
  seed = None if seed == -1 else seed
53
  width, height = resolution.split("x")
54
  width, height = int(width), int(height)
55
  negative_prompt = "" # not applicable in the inference
 
56
 
57
  outputs = model.predict(
58
  prompt=prompt,
@@ -69,7 +108,8 @@ def generate_video(
69
  embedded_guidance_scale=embedded_guidance_scale
70
  )
71
 
72
- samples = outputs['samples']
 
73
  sample = samples[0].unsqueeze(0)
74
 
75
  save_path = "./gradio_outputs"
@@ -78,13 +118,12 @@ def generate_video(
78
  time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%H:%M:%S")
79
  video_path = f"{save_path}/{time_flag}_seed{outputs['seeds'][0]}_{outputs['prompts'][0][:100].replace('/','')}.mp4"
80
  save_videos_grid(sample, video_path, fps=24)
81
- logger.info(f'Sample saved to: {video_path}')
82
 
 
83
  return video_path
84
 
85
  def create_demo(model_path):
86
- model = initialize_model(model_path)
87
-
88
  with gr.Blocks() as demo:
89
  if torch.cuda.device_count() == 0:
90
  with gr.Row():
@@ -92,7 +131,8 @@ def create_demo(model_path):
92
  <p style="background-color: red;"><big><big><big><b>⚠️To use <i>Hunyuan Video</i>, <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/HunyuanVideo?duplicate=true">duplicate this space</a> and set a GPU with 80 GB VRAM.</b>
93
 
94
  You can't use <i>Hunyuan Video</i> directly here because this space runs on a CPU, which is not enough for <i>Hunyuan Video</i>. Please provide <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/HunyuanVideo/discussions/new">feedback</a> if you have issues.
95
- </big></big></big></p>
 
96
  """)
97
  gr.Markdown("# Hunyuan Video Generation")
98
 
@@ -146,7 +186,7 @@ If you can't use _Hunyuan Video_, you can use _[CogVideoX](https://huggingface.c
146
  """)
147
 
148
  generate_btn.click(
149
- fn=lambda *inputs: generate_video(model, *inputs),
150
  inputs=[
151
  prompt,
152
  resolution,
 
9
  import torch
10
 
11
  from hyvideo.utils.file_utils import save_videos_grid
12
+ from hyvideo.utils.preprocess_text_encoder_tokenizer_utils import preprocess_text_encoder_tokenizer
13
  from hyvideo.config import parse_args
14
  from hyvideo.inference import HunyuanVideoSampler
15
  from hyvideo.constants import NEGATIVE_PROMPT
 
18
 
19
  if torch.cuda.device_count() > 0:
20
  snapshot_download(repo_id="tencent/HunyuanVideo", repo_type="model", local_dir="ckpts", force_download=True)
21
+ snapshot_download(repo_id="xtuner/llava-llama-3-8b-v1_1-transformers", repo_type="model", local_dir="ckpts/llava-llama-3-8b-v1_1-transformers", force_download=True)
22
+
23
+ class Args:
24
+ def __init__(self, input_dir, output_dir):
25
+ self.input_dir = input_dir
26
+ self.output_dir = output_dir
27
+
28
+ # Create the object
29
+ args = Args("ckpts/llava-llama-3-8b-v1_1-transformers", "ckpts/text_encoder")
30
+ preprocess_text_encoder_tokenizer(args)
31
+ snapshot_download(repo_id="openai/clip-vit-large-patch14", repo_type="model", local_dir="ckpts/text_encoder_2", force_download=True)
32
 
33
  def initialize_model(model_path):
34
+ print("initialize_model: " + model_path)
35
  if torch.cuda.device_count() == 0:
36
  return None
37
 
 
42
 
43
  print(f"`models_root` exists: {models_root_path}")
44
  hunyuan_video_sampler = HunyuanVideoSampler.from_pretrained(models_root_path, args=args)
45
+ print("Model initialized: " + model_path)
46
  return hunyuan_video_sampler
47
 
48
+ model = initialize_model("ckpts")
49
+
50
  def generate_video(
51
+ prompt,
52
+ resolution,
53
+ video_length,
54
+ seed,
55
+ num_inference_steps,
56
+ guidance_scale,
57
+ flow_shift,
58
+ embedded_guidance_scale
59
+ ):
60
+ print("generate_video (prompt: " + prompt + ")")
61
+ return generate_video_gpu(
62
+ model,
63
+ prompt,
64
+ resolution,
65
+ video_length,
66
+ seed,
67
+ num_inference_steps,
68
+ guidance_scale,
69
+ flow_shift,
70
+ embedded_guidance_scale
71
+ )
72
+
73
+ @spaces.GPU(duration=120)
74
+ def generate_video_gpu(
75
  model,
76
  prompt,
77
  resolution,
 
82
  flow_shift,
83
  embedded_guidance_scale
84
  ):
85
+ print("generate_video_gpu (prompt: " + prompt + ")")
86
  if torch.cuda.device_count() == 0:
87
+ gr.Warning("Set this space to GPU config to make it work.")
88
  return None
89
 
90
  seed = None if seed == -1 else seed
91
  width, height = resolution.split("x")
92
  width, height = int(width), int(height)
93
  negative_prompt = "" # not applicable in the inference
94
+ print("Predicting video...")
95
 
96
  outputs = model.predict(
97
  prompt=prompt,
 
108
  embedded_guidance_scale=embedded_guidance_scale
109
  )
110
 
111
+ print("Video predicted")
112
+ samples = outputs["samples"]
113
  sample = samples[0].unsqueeze(0)
114
 
115
  save_path = "./gradio_outputs"
 
118
  time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%H:%M:%S")
119
  video_path = f"{save_path}/{time_flag}_seed{outputs['seeds'][0]}_{outputs['prompts'][0][:100].replace('/','')}.mp4"
120
  save_videos_grid(sample, video_path, fps=24)
121
+ logger.info(f"Sample saved to: {video_path}")
122
 
123
+ print("Return the video")
124
  return video_path
125
 
126
  def create_demo(model_path):
 
 
127
  with gr.Blocks() as demo:
128
  if torch.cuda.device_count() == 0:
129
  with gr.Row():
 
131
  <p style="background-color: red;"><big><big><big><b>⚠️To use <i>Hunyuan Video</i>, <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/HunyuanVideo?duplicate=true">duplicate this space</a> and set a GPU with 80 GB VRAM.</b>
132
 
133
  You can't use <i>Hunyuan Video</i> directly here because this space runs on a CPU, which is not enough for <i>Hunyuan Video</i>. Please provide <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/HunyuanVideo/discussions/new">feedback</a> if you have issues.
134
+ </big></big></big></p><br/>
135
+ <p style="background-color: light-green;"><big>The space has been successfully deployed on A100 space on 2025-01-23. Synchronize your space to fix the errors.</big></p>
136
  """)
137
  gr.Markdown("# Hunyuan Video Generation")
138
 
 
186
  """)
187
 
188
  generate_btn.click(
189
+ fn=generate_video,
190
  inputs=[
191
  prompt,
192
  resolution,