Spaces:
Running
on
Zero
Running
on
Zero
Update skyreelsinfer/pipelines/pipeline_skyreels_video.py
Browse files
skyreelsinfer/pipelines/pipeline_skyreels_video.py
CHANGED
@@ -162,8 +162,8 @@ class SkyreelsVideoPipeline(HunyuanVideoPipeline):
|
|
162 |
self,
|
163 |
prompt: str,
|
164 |
negative_prompt: str = "Aerial view, aerial view, overexposed, low quality, deformation, a poor composition, bad hands, bad teeth, bad eyes, bad limbs, distortion",
|
165 |
-
height: int =
|
166 |
-
width: int =
|
167 |
num_frames: int = 129,
|
168 |
num_inference_steps: int = 50,
|
169 |
sigmas: List[float] = None,
|
@@ -240,7 +240,7 @@ class SkyreelsVideoPipeline(HunyuanVideoPipeline):
|
|
240 |
batch_size = len(prompt)
|
241 |
else:
|
242 |
batch_size = prompt_embeds.shape[0]
|
243 |
-
|
244 |
|
245 |
# 3. Encode input prompt
|
246 |
(
|
@@ -338,8 +338,8 @@ class SkyreelsVideoPipeline(HunyuanVideoPipeline):
|
|
338 |
|
339 |
if hasattr(self, "text_encoder_to_cpu"):
|
340 |
self.text_encoder_to_cpu()
|
341 |
-
|
342 |
-
|
343 |
torch.cuda.empty_cache()
|
344 |
|
345 |
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
@@ -414,7 +414,7 @@ class SkyreelsVideoPipeline(HunyuanVideoPipeline):
|
|
414 |
progress_bar.update()
|
415 |
|
416 |
if not output_type == "latent":
|
417 |
-
|
418 |
latents = latents.to(self.vae.dtype) / self.vae.config.scaling_factor
|
419 |
video = self.vae.decode(latents, return_dict=False)[0]
|
420 |
video = self.video_processor.postprocess_video(video, output_type=output_type)
|
|
|
162 |
self,
|
163 |
prompt: str,
|
164 |
negative_prompt: str = "Aerial view, aerial view, overexposed, low quality, deformation, a poor composition, bad hands, bad teeth, bad eyes, bad limbs, distortion",
|
165 |
+
height: int = 512,
|
166 |
+
width: int = 512,
|
167 |
num_frames: int = 129,
|
168 |
num_inference_steps: int = 50,
|
169 |
sigmas: List[float] = None,
|
|
|
240 |
batch_size = len(prompt)
|
241 |
else:
|
242 |
batch_size = prompt_embeds.shape[0]
|
243 |
+
self.text_encoder.to("cuda")
|
244 |
|
245 |
# 3. Encode input prompt
|
246 |
(
|
|
|
338 |
|
339 |
if hasattr(self, "text_encoder_to_cpu"):
|
340 |
self.text_encoder_to_cpu()
|
341 |
+
self.text_encoder.to("cpu")
|
342 |
+
self.vae.to("cpu")
|
343 |
torch.cuda.empty_cache()
|
344 |
|
345 |
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
|
|
414 |
progress_bar.update()
|
415 |
|
416 |
if not output_type == "latent":
|
417 |
+
self.vae.to("cuda")
|
418 |
latents = latents.to(self.vae.dtype) / self.vae.config.scaling_factor
|
419 |
video = self.vae.decode(latents, return_dict=False)[0]
|
420 |
video = self.video_processor.postprocess_video(video, output_type=output_type)
|