ProgramerSalar
/

L1-S

Model card Files Files and versions Community

ProgramerSalar commited on Feb 11

Commit

a995c69

·

verified ·

1 Parent(s): 524f2f8

update the Readme file

Files changed (1) hide show

README.md +81 -3

README.md CHANGED Viewed

@@ -1,3 +1,81 @@
----
-license: mit
----

+---
+license: mit
+---
+```
+git clone https://huggingface.co/ProgramerSalar/L1-S
+cd L1-S
+pip install -r requirements.txt
+```
+```
+import os
+import json
+import torch
+import numpy as np
+import PIL
+from PIL import Image
+from IPython.display import HTML
+from pyramid_dit import PyramidDiTForVideoGeneration
+from IPython.display import Image as ipython_image
+from diffusers.utils import load_image, export_to_video, export_to_gif
+variant='diffusion_transformer_768p'         # For high resolution
+# variant='diffusion_transformer_384p'       # For low resolution
+model_path = "Path"   # The downloaded checkpoint dir
+model_dtype = 'bf16'
+device_id = 0
+torch.cuda.set_device(device_id)
+model = PyramidDiTForVideoGeneration(
+    model_path,
+    model_dtype,
+    model_variant=variant,
+)
+model.vae.to("cuda")
+model.dit.to("cuda")
+model.text_encoder.to("cuda")
+model.vae.enable_tiling()
+if model_dtype == "bf16":
+    torch_dtype = torch.bfloat16
+elif model_dtype == "fp16":
+    torch_dtype = torch.float16
+else:
+    torch_dtype = torch.float32
+prompt = "A movie trailer featuring the adventures of the 30 year old space man wearing a red wool knitted motorcycle helmet, blue sky, salt desert, cinematic style, shot on 35mm film, vivid colors"
+# used for 384p model variant
+# width = 640
+# height = 384
+# used for 768p model variant
+width = 1280
+height = 768
+temp = 16   # temp in [1, 31] <=> frame in [1, 241] <=> duration in [0, 10s]
+with torch.no_grad(), torch.cuda.amp.autocast(enabled=True if model_dtype != 'fp32' else False, dtype=torch_dtype):
+    frames = model.generate(
+        prompt=prompt,
+        num_inference_steps=[20, 20, 20],
+        video_num_inference_steps=[10, 10, 10],
+        height=height,
+        width=width,
+        temp=temp,
+        guidance_scale=9.0,         # The guidance for the first frame, set it to 7 for 384p variant
+        video_guidance_scale=5.0,   # The guidance for the other video latent
+        output_type="pil",
+        save_memory=True,           # If you have enough GPU memory, set it to `False` to improve vae decoding speed
+    )
+export_to_video(frames, "./text_to_video_sample.mp4", fps=24)
+```