jbilcke-hf
/

LTX-Video-0.9.1-HFIE

@@ -171,6 +171,16 @@ class GenerationConfig:
     audio_prompt: str = ""  # Text prompt for audio generation
     audio_negative_prompt: str = "voices, voice, talking, speaking, speech" # Negative prompt for audio generation
     def validate_and_adjust(self) -> 'GenerationConfig':
         """Validate and adjust parameters to meet constraints"""
         # First check if it's one of our explicitly allowed resolutions
@@ -279,10 +289,7 @@ class EndpointHandler:
             )
             # Convert to data URI
-            video_uri = await result.write(
-                type="data-uri",
-                quality=17
-            )
             # Collect metadata
             metadata = {
@@ -323,6 +330,7 @@ class EndpointHandler:
                     - enable_audio (optional, bool): automatically generate an audio track
                     - audio_prompt (optional, str): prompt to use for the audio generation (concepts to add)
                     - audio_negative_prompt (optional, str): nehative prompt to use for the audio generation (concepts to ignore)
         Returns:
             Dictionary containing:
                 - video: Base64 encoded MP4 data URI
@@ -369,6 +377,7 @@ class EndpointHandler:
             enable_audio=params.get("enable_audio", GenerationConfig.enable_audio),
             audio_prompt=params.get("audio_prompt", GenerationConfig.audio_prompt),
             audio_negative_prompt=params.get("audio_negative_prompt", GenerationConfig.audio_negative_prompt),
         ).validate_and_adjust()
         logger.info(f"Global request settings:")
@@ -396,7 +405,11 @@ class EndpointHandler:
                     # constants
                     "output_type": "pt",
-                    "generator": generator
                 }
                 #logger.info(f"Video model generation settings:")
                 #pprint.pprint(generation_kwargs)

     audio_prompt: str = ""  # Text prompt for audio generation
     audio_negative_prompt: str = "voices, voice, talking, speaking, speech" # Negative prompt for audio generation
+    # The range of the CRF scale is 0–51, where:
+    # 0 is lossless (for 8 bit only, for 10 bit use -qp 0)
+    # 23 is the default
+    # 51 is worst quality possible
+    # A lower value generally leads to higher quality, and a subjectively sane range is 17–28.
+    # Consider 17 or 18 to be visually lossless or nearly so;
+    # it should look the same or nearly the same as the input but it isn't technically lossless.
+    # The range is exponential, so increasing the CRF value +6 results in roughly half the bitrate / file size, while -6 leads to roughly twice the bitrate.
+    quality: int = 18
     def validate_and_adjust(self) -> 'GenerationConfig':
         """Validate and adjust parameters to meet constraints"""
         # First check if it's one of our explicitly allowed resolutions
             )
             # Convert to data URI
+            video_uri = await result.write(type="data-uri", quality=config.quality)
             # Collect metadata
             metadata = {
                     - enable_audio (optional, bool): automatically generate an audio track
                     - audio_prompt (optional, str): prompt to use for the audio generation (concepts to add)
                     - audio_negative_prompt (optional, str): nehative prompt to use for the audio generation (concepts to ignore)
+                    - quality (optional, str, default to 18): The range of the CRF scale is 0–51, where 0 is lossless (for 8 bit only, for 10 bit use -qp 0), 23 is the default, and 51 is worst quality possible.
         Returns:
             Dictionary containing:
                 - video: Base64 encoded MP4 data URI
             enable_audio=params.get("enable_audio", GenerationConfig.enable_audio),
             audio_prompt=params.get("audio_prompt", GenerationConfig.audio_prompt),
             audio_negative_prompt=params.get("audio_negative_prompt", GenerationConfig.audio_negative_prompt),
+            quality=params.get("quality", GenerationConfig.quality),
         ).validate_and_adjust()
         logger.info(f"Global request settings:")
                     # constants
                     "output_type": "pt",
+                    "generator": generator,
+                    # VAE noise augmentation - not sure if we should expose those to the API
+                    "decode_timestep": 0.05, # Timestep for decoding noise
+                    "decode_noise_scale": 0.025, # Noise level for decoding noise
                 }
                 #logger.info(f"Video model generation settings:")
                 #pprint.pprint(generation_kwargs)