smoothieAI
/

pipeline_animatediff_context_controlnet_v2

Model card Files Files and versions Community

smoothieAI commited on Feb 9, 2024

Commit

c69de4c

·

verified ·

1 Parent(s): 8563ce2

Update pipeline.py

Files changed (1) hide show

pipeline.py +8 -5

pipeline.py CHANGED Viewed

@@ -1533,15 +1533,18 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
             output_batch_size = 2 # prevents out of memory errors with large videos
             num_digits = output_path.count('#')  # count the number of '#' characters
             frame_format = output_path.replace('#' * num_digits, '{:0' + str(num_digits) + 'd}')
             for batch in range((num_frames + output_batch_size - 1) // output_batch_size):
                 start_id = batch * output_batch_size
                 end_id = min((batch + 1) * output_batch_size, num_frames)
                 video_tensor = self.decode_latents(latents[:, :, start_id:end_id, :, :])
-                # if we had more then one prompt, we need to offset the video frames back by number of inference steps
-                if len(prompt_embeds_list) > 1:
-                    # wrap the first n number of frames to the end of the video to fix the offseting from the context scheduler
-                    offset = num_inference_steps * 2
-                    video_tensor = torch.cat((video_tensor[:, :, offset:, :, :], video_tensor[:, :, :offset, :, :]), dim=2)
                 video = tensor2vid(video_tensor, self.image_processor, output_type=output_type)
                 for f_id, frame in enumerate(video[0]):
                     frame.save(frame_format.format(start_id + f_id))

             output_batch_size = 2 # prevents out of memory errors with large videos
             num_digits = output_path.count('#')  # count the number of '#' characters
             frame_format = output_path.replace('#' * num_digits, '{:0' + str(num_digits) + 'd}')
+            # if we had more then one prompt, we need to offset the video frames back by number of inference steps
+            if len(prompt_embeds_list) > 1:
+                # wrap the first n number of frames to the end of the video to fix the offseting from the context scheduler
+                offset_frames = num_inference_steps
+                video_tensor = torch.cat((latents[:, :, offset_frames:, :, :], latents[:, :, :offset_frames, :, :]), dim=2)
             for batch in range((num_frames + output_batch_size - 1) // output_batch_size):
                 start_id = batch * output_batch_size
                 end_id = min((batch + 1) * output_batch_size, num_frames)
                 video_tensor = self.decode_latents(latents[:, :, start_id:end_id, :, :])
                 video = tensor2vid(video_tensor, self.image_processor, output_type=output_type)
                 for f_id, frame in enumerate(video[0]):
                     frame.save(frame_format.format(start_id + f_id))