jbilcke-hf
/

LTX-Video-0.9.1-HFIE

@@ -8,6 +8,11 @@ import tempfile
 import numpy as np
 from moviepy.editor import ImageSequenceClip
 import os
 class EndpointHandler:
     def __init__(self, path: str = ""):
@@ -34,22 +39,28 @@ class EndpointHandler:
         # Set default FPS
         self.fps = 24
-    def _create_video_file(self, images: torch.Tensor, fps: int = 24) -> bytes:
         """Convert frames to an MP4 video file.
         Args:
-            images (torch.Tensor): Generated frames tensor
             fps (int): Frames per second for the output video
         Returns:
             bytes: MP4 video file content
         """
-        # Convert tensor to numpy array
-        video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy()
         video_np = (video_np * 255).astype(np.uint8)
         # Get dimensions
-        height, width = video_np.shape[1:3]
         # Create temporary file
         output_path = tempfile.mktemp(suffix=".mp4")
@@ -103,6 +114,9 @@ class EndpointHandler:
         guidance_scale = data.get("guidance_scale", 7.5)
         num_inference_steps = data.get("num_inference_steps", 50)
         # Check if image is provided for image-to-video generation
         image_data = data.get("image")
@@ -112,6 +126,7 @@ class EndpointHandler:
                     # Decode base64 image
                     image_bytes = base64.b64decode(image_data)
                     image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
                     # Generate video from image
                     output = self.image_to_video(
@@ -121,8 +136,9 @@ class EndpointHandler:
                         guidance_scale=guidance_scale,
                         num_inference_steps=num_inference_steps,
                         output_type="pt"
-                    ).frames[0]
                 else:
                     # Generate video from text only
                     output = self.text_to_video(
                         prompt=prompt,
@@ -130,7 +146,7 @@ class EndpointHandler:
                         guidance_scale=guidance_scale,
                         num_inference_steps=num_inference_steps,
                         output_type="pt"
-                    ).frames[0]
                 # Convert frames to video file
                 video_content = self._create_video_file(output, fps=fps)
@@ -144,4 +160,5 @@ class EndpointHandler:
                 }
         except Exception as e:
             raise RuntimeError(f"Error generating video: {str(e)}")

 import numpy as np
 from moviepy.editor import ImageSequenceClip
 import os
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 class EndpointHandler:
     def __init__(self, path: str = ""):
         # Set default FPS
         self.fps = 24
+    def _create_video_file(self, frames: torch.Tensor, fps: int = 24) -> bytes:
         """Convert frames to an MP4 video file.
         Args:
+            frames (torch.Tensor): Generated frames tensor
             fps (int): Frames per second for the output video
         Returns:
             bytes: MP4 video file content
         """
+        # Log frame information
+        num_frames = frames.shape[1]  # Shape should be [1, num_frames, channels, height, width]
+        duration = num_frames / fps
+        logger.info(f"Creating video with {num_frames} frames at {fps} FPS (duration: {duration:.2f} seconds)")
+        # Convert tensor to numpy array - remove batch dimension and rearrange to [num_frames, height, width, channels]
+        video_np = frames.squeeze(0).permute(0, 2, 3, 1).cpu().float().numpy()
         video_np = (video_np * 255).astype(np.uint8)
         # Get dimensions
+        _, height, width, _ = video_np.shape
+        logger.info(f"Video dimensions: {width}x{height}")
         # Create temporary file
         output_path = tempfile.mktemp(suffix=".mp4")
         guidance_scale = data.get("guidance_scale", 7.5)
         num_inference_steps = data.get("num_inference_steps", 50)
+        logger.info(f"Generating video with prompt: '{prompt}'")
+        logger.info(f"Parameters: num_frames={num_frames}, fps={fps}, guidance_scale={guidance_scale}, num_inference_steps={num_inference_steps}")
         # Check if image is provided for image-to-video generation
         image_data = data.get("image")
                     # Decode base64 image
                     image_bytes = base64.b64decode(image_data)
                     image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+                    logger.info("Using image-to-video generation mode")
                     # Generate video from image
                     output = self.image_to_video(
                         guidance_scale=guidance_scale,
                         num_inference_steps=num_inference_steps,
                         output_type="pt"
+                    ).frames  # Remove [0] to keep all frames
                 else:
+                    logger.info("Using text-to-video generation mode")
                     # Generate video from text only
                     output = self.text_to_video(
                         prompt=prompt,
                         guidance_scale=guidance_scale,
                         num_inference_steps=num_inference_steps,
                         output_type="pt"
+                    ).frames  # Remove [0] to keep all frames
                 # Convert frames to video file
                 video_content = self._create_video_file(output, fps=fps)
                 }
         except Exception as e:
+            logger.error(f"Error generating video: {str(e)}")
             raise RuntimeError(f"Error generating video: {str(e)}")