jbilcke-hf HF staff commited on
Commit
d979b5a
·
verified ·
1 Parent(s): 003fd85

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +18 -5
handler.py CHANGED
@@ -171,6 +171,16 @@ class GenerationConfig:
171
  audio_prompt: str = "" # Text prompt for audio generation
172
  audio_negative_prompt: str = "voices, voice, talking, speaking, speech" # Negative prompt for audio generation
173
 
 
 
 
 
 
 
 
 
 
 
174
  def validate_and_adjust(self) -> 'GenerationConfig':
175
  """Validate and adjust parameters to meet constraints"""
176
  # First check if it's one of our explicitly allowed resolutions
@@ -279,10 +289,7 @@ class EndpointHandler:
279
  )
280
 
281
  # Convert to data URI
282
- video_uri = await result.write(
283
- type="data-uri",
284
- quality=17
285
- )
286
 
287
  # Collect metadata
288
  metadata = {
@@ -323,6 +330,7 @@ class EndpointHandler:
323
  - enable_audio (optional, bool): automatically generate an audio track
324
  - audio_prompt (optional, str): prompt to use for the audio generation (concepts to add)
325
  - audio_negative_prompt (optional, str): nehative prompt to use for the audio generation (concepts to ignore)
 
326
  Returns:
327
  Dictionary containing:
328
  - video: Base64 encoded MP4 data URI
@@ -369,6 +377,7 @@ class EndpointHandler:
369
  enable_audio=params.get("enable_audio", GenerationConfig.enable_audio),
370
  audio_prompt=params.get("audio_prompt", GenerationConfig.audio_prompt),
371
  audio_negative_prompt=params.get("audio_negative_prompt", GenerationConfig.audio_negative_prompt),
 
372
  ).validate_and_adjust()
373
 
374
  logger.info(f"Global request settings:")
@@ -396,7 +405,11 @@ class EndpointHandler:
396
 
397
  # constants
398
  "output_type": "pt",
399
- "generator": generator
 
 
 
 
400
  }
401
  #logger.info(f"Video model generation settings:")
402
  #pprint.pprint(generation_kwargs)
 
171
  audio_prompt: str = "" # Text prompt for audio generation
172
  audio_negative_prompt: str = "voices, voice, talking, speaking, speech" # Negative prompt for audio generation
173
 
174
+ # The range of the CRF scale is 0–51, where:
175
+ # 0 is lossless (for 8 bit only, for 10 bit use -qp 0)
176
+ # 23 is the default
177
+ # 51 is worst quality possible
178
+ # A lower value generally leads to higher quality, and a subjectively sane range is 17–28.
179
+ # Consider 17 or 18 to be visually lossless or nearly so;
180
+ # it should look the same or nearly the same as the input but it isn't technically lossless.
181
+ # The range is exponential, so increasing the CRF value +6 results in roughly half the bitrate / file size, while -6 leads to roughly twice the bitrate.
182
+ quality: int = 18
183
+
184
  def validate_and_adjust(self) -> 'GenerationConfig':
185
  """Validate and adjust parameters to meet constraints"""
186
  # First check if it's one of our explicitly allowed resolutions
 
289
  )
290
 
291
  # Convert to data URI
292
+ video_uri = await result.write(type="data-uri", quality=config.quality)
 
 
 
293
 
294
  # Collect metadata
295
  metadata = {
 
330
  - enable_audio (optional, bool): automatically generate an audio track
331
  - audio_prompt (optional, str): prompt to use for the audio generation (concepts to add)
332
  - audio_negative_prompt (optional, str): nehative prompt to use for the audio generation (concepts to ignore)
333
+ - quality (optional, str, default to 18): The range of the CRF scale is 0–51, where 0 is lossless (for 8 bit only, for 10 bit use -qp 0), 23 is the default, and 51 is worst quality possible.
334
  Returns:
335
  Dictionary containing:
336
  - video: Base64 encoded MP4 data URI
 
377
  enable_audio=params.get("enable_audio", GenerationConfig.enable_audio),
378
  audio_prompt=params.get("audio_prompt", GenerationConfig.audio_prompt),
379
  audio_negative_prompt=params.get("audio_negative_prompt", GenerationConfig.audio_negative_prompt),
380
+ quality=params.get("quality", GenerationConfig.quality),
381
  ).validate_and_adjust()
382
 
383
  logger.info(f"Global request settings:")
 
405
 
406
  # constants
407
  "output_type": "pt",
408
+ "generator": generator,
409
+
410
+ # VAE noise augmentation - not sure if we should expose those to the API
411
+ "decode_timestep": 0.05, # Timestep for decoding noise
412
+ "decode_noise_scale": 0.025, # Noise level for decoding noise
413
  }
414
  #logger.info(f"Video model generation settings:")
415
  #pprint.pprint(generation_kwargs)