Update pipeline.py
Browse files- pipeline.py +8 -5
pipeline.py
CHANGED
@@ -1533,15 +1533,18 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
1533 |
output_batch_size = 2 # prevents out of memory errors with large videos
|
1534 |
num_digits = output_path.count('#') # count the number of '#' characters
|
1535 |
frame_format = output_path.replace('#' * num_digits, '{:0' + str(num_digits) + 'd}')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1536 |
for batch in range((num_frames + output_batch_size - 1) // output_batch_size):
|
1537 |
start_id = batch * output_batch_size
|
1538 |
end_id = min((batch + 1) * output_batch_size, num_frames)
|
1539 |
video_tensor = self.decode_latents(latents[:, :, start_id:end_id, :, :])
|
1540 |
-
|
1541 |
-
if len(prompt_embeds_list) > 1:
|
1542 |
-
# wrap the first n number of frames to the end of the video to fix the offseting from the context scheduler
|
1543 |
-
offset = num_inference_steps * 2
|
1544 |
-
video_tensor = torch.cat((video_tensor[:, :, offset:, :, :], video_tensor[:, :, :offset, :, :]), dim=2)
|
1545 |
video = tensor2vid(video_tensor, self.image_processor, output_type=output_type)
|
1546 |
for f_id, frame in enumerate(video[0]):
|
1547 |
frame.save(frame_format.format(start_id + f_id))
|
|
|
1533 |
output_batch_size = 2 # prevents out of memory errors with large videos
|
1534 |
num_digits = output_path.count('#') # count the number of '#' characters
|
1535 |
frame_format = output_path.replace('#' * num_digits, '{:0' + str(num_digits) + 'd}')
|
1536 |
+
|
1537 |
+
# if we had more then one prompt, we need to offset the video frames back by number of inference steps
|
1538 |
+
if len(prompt_embeds_list) > 1:
|
1539 |
+
# wrap the first n number of frames to the end of the video to fix the offseting from the context scheduler
|
1540 |
+
offset_frames = num_inference_steps
|
1541 |
+
video_tensor = torch.cat((latents[:, :, offset_frames:, :, :], latents[:, :, :offset_frames, :, :]), dim=2)
|
1542 |
+
|
1543 |
for batch in range((num_frames + output_batch_size - 1) // output_batch_size):
|
1544 |
start_id = batch * output_batch_size
|
1545 |
end_id = min((batch + 1) * output_batch_size, num_frames)
|
1546 |
video_tensor = self.decode_latents(latents[:, :, start_id:end_id, :, :])
|
1547 |
+
|
|
|
|
|
|
|
|
|
1548 |
video = tensor2vid(video_tensor, self.image_processor, output_type=output_type)
|
1549 |
for f_id, frame in enumerate(video[0]):
|
1550 |
frame.save(frame_format.format(start_id + f_id))
|