Update handler.py
Browse files- handler.py +18 -5
handler.py
CHANGED
@@ -171,6 +171,16 @@ class GenerationConfig:
|
|
171 |
audio_prompt: str = "" # Text prompt for audio generation
|
172 |
audio_negative_prompt: str = "voices, voice, talking, speaking, speech" # Negative prompt for audio generation
|
173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
def validate_and_adjust(self) -> 'GenerationConfig':
|
175 |
"""Validate and adjust parameters to meet constraints"""
|
176 |
# First check if it's one of our explicitly allowed resolutions
|
@@ -279,10 +289,7 @@ class EndpointHandler:
|
|
279 |
)
|
280 |
|
281 |
# Convert to data URI
|
282 |
-
video_uri = await result.write(
|
283 |
-
type="data-uri",
|
284 |
-
quality=17
|
285 |
-
)
|
286 |
|
287 |
# Collect metadata
|
288 |
metadata = {
|
@@ -323,6 +330,7 @@ class EndpointHandler:
|
|
323 |
- enable_audio (optional, bool): automatically generate an audio track
|
324 |
- audio_prompt (optional, str): prompt to use for the audio generation (concepts to add)
|
325 |
- audio_negative_prompt (optional, str): nehative prompt to use for the audio generation (concepts to ignore)
|
|
|
326 |
Returns:
|
327 |
Dictionary containing:
|
328 |
- video: Base64 encoded MP4 data URI
|
@@ -369,6 +377,7 @@ class EndpointHandler:
|
|
369 |
enable_audio=params.get("enable_audio", GenerationConfig.enable_audio),
|
370 |
audio_prompt=params.get("audio_prompt", GenerationConfig.audio_prompt),
|
371 |
audio_negative_prompt=params.get("audio_negative_prompt", GenerationConfig.audio_negative_prompt),
|
|
|
372 |
).validate_and_adjust()
|
373 |
|
374 |
logger.info(f"Global request settings:")
|
@@ -396,7 +405,11 @@ class EndpointHandler:
|
|
396 |
|
397 |
# constants
|
398 |
"output_type": "pt",
|
399 |
-
"generator": generator
|
|
|
|
|
|
|
|
|
400 |
}
|
401 |
#logger.info(f"Video model generation settings:")
|
402 |
#pprint.pprint(generation_kwargs)
|
|
|
171 |
audio_prompt: str = "" # Text prompt for audio generation
|
172 |
audio_negative_prompt: str = "voices, voice, talking, speaking, speech" # Negative prompt for audio generation
|
173 |
|
174 |
+
# The range of the CRF scale is 0–51, where:
|
175 |
+
# 0 is lossless (for 8 bit only, for 10 bit use -qp 0)
|
176 |
+
# 23 is the default
|
177 |
+
# 51 is worst quality possible
|
178 |
+
# A lower value generally leads to higher quality, and a subjectively sane range is 17–28.
|
179 |
+
# Consider 17 or 18 to be visually lossless or nearly so;
|
180 |
+
# it should look the same or nearly the same as the input but it isn't technically lossless.
|
181 |
+
# The range is exponential, so increasing the CRF value +6 results in roughly half the bitrate / file size, while -6 leads to roughly twice the bitrate.
|
182 |
+
quality: int = 18
|
183 |
+
|
184 |
def validate_and_adjust(self) -> 'GenerationConfig':
|
185 |
"""Validate and adjust parameters to meet constraints"""
|
186 |
# First check if it's one of our explicitly allowed resolutions
|
|
|
289 |
)
|
290 |
|
291 |
# Convert to data URI
|
292 |
+
video_uri = await result.write(type="data-uri", quality=config.quality)
|
|
|
|
|
|
|
293 |
|
294 |
# Collect metadata
|
295 |
metadata = {
|
|
|
330 |
- enable_audio (optional, bool): automatically generate an audio track
|
331 |
- audio_prompt (optional, str): prompt to use for the audio generation (concepts to add)
|
332 |
- audio_negative_prompt (optional, str): nehative prompt to use for the audio generation (concepts to ignore)
|
333 |
+
- quality (optional, str, default to 18): The range of the CRF scale is 0–51, where 0 is lossless (for 8 bit only, for 10 bit use -qp 0), 23 is the default, and 51 is worst quality possible.
|
334 |
Returns:
|
335 |
Dictionary containing:
|
336 |
- video: Base64 encoded MP4 data URI
|
|
|
377 |
enable_audio=params.get("enable_audio", GenerationConfig.enable_audio),
|
378 |
audio_prompt=params.get("audio_prompt", GenerationConfig.audio_prompt),
|
379 |
audio_negative_prompt=params.get("audio_negative_prompt", GenerationConfig.audio_negative_prompt),
|
380 |
+
quality=params.get("quality", GenerationConfig.quality),
|
381 |
).validate_and_adjust()
|
382 |
|
383 |
logger.info(f"Global request settings:")
|
|
|
405 |
|
406 |
# constants
|
407 |
"output_type": "pt",
|
408 |
+
"generator": generator,
|
409 |
+
|
410 |
+
# VAE noise augmentation - not sure if we should expose those to the API
|
411 |
+
"decode_timestep": 0.05, # Timestep for decoding noise
|
412 |
+
"decode_noise_scale": 0.025, # Noise level for decoding noise
|
413 |
}
|
414 |
#logger.info(f"Video model generation settings:")
|
415 |
#pprint.pprint(generation_kwargs)
|