ProgramerSalar commited on
Commit
a995c69
·
verified ·
1 Parent(s): 524f2f8

update the Readme file

Browse files
Files changed (1) hide show
  1. README.md +81 -3
README.md CHANGED
@@ -1,3 +1,81 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
4
+
5
+
6
+ ```
7
+ git clone https://huggingface.co/ProgramerSalar/L1-S
8
+ cd L1-S
9
+ pip install -r requirements.txt
10
+ ```
11
+
12
+ ```
13
+ import os
14
+ import json
15
+ import torch
16
+ import numpy as np
17
+ import PIL
18
+ from PIL import Image
19
+ from IPython.display import HTML
20
+ from pyramid_dit import PyramidDiTForVideoGeneration
21
+ from IPython.display import Image as ipython_image
22
+ from diffusers.utils import load_image, export_to_video, export_to_gif
23
+
24
+
25
+ variant='diffusion_transformer_768p' # For high resolution
26
+ # variant='diffusion_transformer_384p' # For low resolution
27
+
28
+ model_path = "Path" # The downloaded checkpoint dir
29
+ model_dtype = 'bf16'
30
+
31
+ device_id = 0
32
+ torch.cuda.set_device(device_id)
33
+
34
+ model = PyramidDiTForVideoGeneration(
35
+ model_path,
36
+ model_dtype,
37
+ model_variant=variant,
38
+ )
39
+
40
+ model.vae.to("cuda")
41
+ model.dit.to("cuda")
42
+ model.text_encoder.to("cuda")
43
+
44
+ model.vae.enable_tiling()
45
+
46
+ if model_dtype == "bf16":
47
+ torch_dtype = torch.bfloat16
48
+ elif model_dtype == "fp16":
49
+ torch_dtype = torch.float16
50
+ else:
51
+ torch_dtype = torch.float32
52
+
53
+ prompt = "A movie trailer featuring the adventures of the 30 year old space man wearing a red wool knitted motorcycle helmet, blue sky, salt desert, cinematic style, shot on 35mm film, vivid colors"
54
+
55
+ # used for 384p model variant
56
+ # width = 640
57
+ # height = 384
58
+
59
+ # used for 768p model variant
60
+ width = 1280
61
+ height = 768
62
+
63
+ temp = 16 # temp in [1, 31] <=> frame in [1, 241] <=> duration in [0, 10s]
64
+
65
+ with torch.no_grad(), torch.cuda.amp.autocast(enabled=True if model_dtype != 'fp32' else False, dtype=torch_dtype):
66
+ frames = model.generate(
67
+ prompt=prompt,
68
+ num_inference_steps=[20, 20, 20],
69
+ video_num_inference_steps=[10, 10, 10],
70
+ height=height,
71
+ width=width,
72
+ temp=temp,
73
+ guidance_scale=9.0, # The guidance for the first frame, set it to 7 for 384p variant
74
+ video_guidance_scale=5.0, # The guidance for the other video latent
75
+ output_type="pil",
76
+ save_memory=True, # If you have enough GPU memory, set it to `False` to improve vae decoding speed
77
+ )
78
+
79
+ export_to_video(frames, "./text_to_video_sample.mp4", fps=24)
80
+
81
+ ```