Upload folder using huggingface_hub

Files changed (8) hide show

README.md CHANGED Viewed

@@ -6,7 +6,7 @@ tags:
 - image-to-video
 ---
-Unofficial Diffusers-format weights for https://huggingface.co/Lightricks/LTX-Video (version 0.9.0).
 Text-to-Video:
@@ -15,7 +15,7 @@ import torch
 from diffusers import LTXPipeline
 from diffusers.utils import export_to_video
-pipe = LTXPipeline.from_pretrained("a-r-r-o-w/LTX-Video-diffusers", torch_dtype=torch.bfloat16)
 pipe.to("cuda")
 prompt = "A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage"
@@ -28,6 +28,8 @@ video = pipe(
     height=480,
     num_frames=161,
     num_inference_steps=50,
 ).frames[0]
 export_to_video(video, "output.mp4", fps=24)
 ```
@@ -39,7 +41,7 @@ import torch
 from diffusers import LTXImageToVideoPipeline
 from diffusers.utils import export_to_video, load_image
-pipe = LTXImageToVideoPipeline.from_pretrained("a-r-r-o-w/LTX-Video-diffusers", torch_dtype=torch.bfloat16)
 pipe.to("cuda")
 image = load_image(
@@ -56,6 +58,8 @@ video = pipe(
     height=480,
     num_frames=161,
     num_inference_steps=50,
 ).frames[0]
 export_to_video(video, "output.mp4", fps=24)
 ```

 - image-to-video
 ---
+Unofficial Diffusers-format weights for https://huggingface.co/Lightricks/LTX-Video (version 0.9.1).
 Text-to-Video:
 from diffusers import LTXPipeline
 from diffusers.utils import export_to_video
+pipe = LTXPipeline.from_pretrained("a-r-r-o-w/LTX-Video-0.9.1-diffusers", torch_dtype=torch.bfloat16)
 pipe.to("cuda")
 prompt = "A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage"
     height=480,
     num_frames=161,
     num_inference_steps=50,
+    decode_timestep=0.03,
+    decode_noise_scale=0.025,
 ).frames[0]
 export_to_video(video, "output.mp4", fps=24)
 ```
 from diffusers import LTXImageToVideoPipeline
 from diffusers.utils import export_to_video, load_image
+pipe = LTXImageToVideoPipeline.from_pretrained("a-r-r-o-w/LTX-Video-0.9.1-diffusers", torch_dtype=torch.bfloat16)
 pipe.to("cuda")
 image = load_image(
     height=480,
     num_frames=161,
     num_inference_steps=50,
+    decode_timestep=0.03,
+    decode_noise_scale=0.025,
 ).frames[0]
 export_to_video(video, "output.mp4", fps=24)
 ```

model_index.json CHANGED Viewed

@@ -15,10 +15,10 @@
   ],
   "transformer": [
     "diffusers",
-    "LTXTransformer3DModel"
   ],
   "vae": [
     "diffusers",
-    "AutoencoderKLLTX"
   ]
 }

   ],
   "transformer": [
     "diffusers",
+    "LTXVideoTransformer3DModel"
   ],
   "vae": [
     "diffusers",
+    "AutoencoderKLLTXVideo"
   ]
 }

text_encoder/config.json CHANGED Viewed

@@ -26,7 +26,7 @@
   "relative_attention_num_buckets": 32,
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
-  "transformers_version": "4.46.2",
   "use_cache": true,
   "vocab_size": 32128
 }

   "relative_attention_num_buckets": 32,
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
+  "transformers_version": "4.48.0.dev0",
   "use_cache": true,
   "vocab_size": 32128
 }

tokenizer/tokenizer_config.json CHANGED Viewed

@@ -931,6 +931,7 @@
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "extra_ids": 100,
   "legacy": true,
   "model_max_length": 128,
   "pad_token": "<pad>",

   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "extra_ids": 100,
+  "extra_special_tokens": {},
   "legacy": true,
   "model_max_length": 128,
   "pad_token": "<pad>",

transformer/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_class_name": "LTXTransformer3DModel",
   "_diffusers_version": "0.32.0.dev0",
   "activation_fn": "gelu-approximate",
   "attention_bias": true,

 {
+  "_class_name": "LTXVideoTransformer3DModel",
   "_diffusers_version": "0.32.0.dev0",
   "activation_fn": "gelu-approximate",
   "attention_bias": true,

transformer/diffusion_pytorch_model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:75e214999dbee61d5b8dd3d865dd4927c501f71510ab4efe612db1b50f96b973
+size 3846852608

vae/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_class_name": "AutoencoderKLLTX",
   "_diffusers_version": "0.32.0.dev0",
   "block_out_channels": [
     128,
@@ -7,7 +7,29 @@
     512,
     512
   ],
   "decoder_causal": false,
   "encoder_causal": true,
   "in_channels": 3,
   "latent_channels": 128,
@@ -28,5 +50,16 @@
     true,
     true,
     false
   ]
 }

 {
+  "_class_name": "AutoencoderKLLTXVideo",
   "_diffusers_version": "0.32.0.dev0",
   "block_out_channels": [
     128,
     512,
     512
   ],
+  "decoder_block_out_channels": [
+    256,
+    512,
+    1024
+  ],
   "decoder_causal": false,
+  "decoder_inject_noise": [
+    true,
+    true,
+    true,
+    false
+  ],
+  "decoder_layers_per_block": [
+    5,
+    6,
+    7,
+    8
+  ],
+  "decoder_spatio_temporal_scaling": [
+    true,
+    true,
+    true
+  ],
   "encoder_causal": true,
   "in_channels": 3,
   "latent_channels": 128,
     true,
     true,
     false
+  ],
+  "timestep_conditioning": true,
+  "upsample_factor": [
+    2,
+    2,
+    2
+  ],
+  "upsample_residual": [
+    true,
+    true,
+    true
   ]
 }

vae/diffusion_pytorch_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:265ca87cb5dff5e37f924286e957324e282fe7710a952a7dafc0df43883e2010
-size 1676798532

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a70d8d49fb2cc3698ffe9ed0e09fba5cd65d6a7d83fe89320ebc1d6fcc94536
+size 1869989690