Spaces:
Running
on
Zero
Running
on
Zero
Upload 66 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +29 -0
- app_video_image_guidence.py +258 -0
- assets/0.mp4 +3 -0
- assets/0.txt +4 -0
- assets/0_edit.png +3 -0
- assets/1.mp4 +3 -0
- assets/1.txt +2 -0
- assets/1_edit.png +3 -0
- assets/2.mp4 +3 -0
- assets/2.txt +2 -0
- assets/2_edit.png +3 -0
- assets/3.mp4 +3 -0
- assets/3.txt +2 -0
- assets/3_edit.png +3 -0
- assets/4.mp4 +3 -0
- assets/4.txt +2 -0
- assets/4_edit.png +3 -0
- assets/5.mp4 +3 -0
- assets/5.txt +2 -0
- assets/5_edit.png +3 -0
- assets/6.mp4 +3 -0
- assets/6.txt +2 -0
- assets/6_edit.png +3 -0
- assets/7.mp4 +3 -0
- assets/7.txt +2 -0
- assets/7_edit.png +3 -0
- assets/8.mp4 +3 -0
- assets/8.txt +2 -0
- assets/8_edit.png +3 -0
- assets/9.mp4 +3 -0
- assets/9.txt +3 -0
- assets/9_edit.png +3 -0
- assets/outputvideo/output_0.mp4 +3 -0
- assets/outputvideo/output_1.mp4 +3 -0
- assets/outputvideo/output_2.mp4 +3 -0
- assets/outputvideo/output_3.mp4 +3 -0
- assets/outputvideo/output_4.mp4 +0 -0
- assets/outputvideo/output_5.mp4 +3 -0
- assets/outputvideo/output_6.mp4 +3 -0
- assets/outputvideo/output_7.mp4 +3 -0
- assets/outputvideo/output_8.mp4 +3 -0
- control_cogvideox/__pycache__/attention_processor.cpython-310.pyc +0 -0
- control_cogvideox/__pycache__/cogvideox_transformer_3d.cpython-310.pyc +0 -0
- control_cogvideox/__pycache__/cogvideox_transformer_3d.cpython-311.pyc +0 -0
- control_cogvideox/__pycache__/cogvideox_transformer_3d_ipadapter.cpython-310.pyc +0 -0
- control_cogvideox/__pycache__/cogvideox_transformer_3d_new_version.cpython-310.pyc +0 -0
- control_cogvideox/__pycache__/controlnet_cogvideox_transformer_3d.cpython-310.pyc +0 -0
- control_cogvideox/__pycache__/controlnet_cogvideox_transformer_3d.cpython-311.pyc +0 -0
- control_cogvideox/__pycache__/controlnet_cogvideox_transformer_3d_condition.cpython-310.pyc +0 -0
- control_cogvideox/__pycache__/embeddings.cpython-310.pyc +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,32 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
assets/0_edit.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
assets/0.mp4 filter=lfs diff=lfs merge=lfs -text
|
38 |
+
assets/1_edit.png filter=lfs diff=lfs merge=lfs -text
|
39 |
+
assets/1.mp4 filter=lfs diff=lfs merge=lfs -text
|
40 |
+
assets/2_edit.png filter=lfs diff=lfs merge=lfs -text
|
41 |
+
assets/2.mp4 filter=lfs diff=lfs merge=lfs -text
|
42 |
+
assets/3_edit.png filter=lfs diff=lfs merge=lfs -text
|
43 |
+
assets/3.mp4 filter=lfs diff=lfs merge=lfs -text
|
44 |
+
assets/4_edit.png filter=lfs diff=lfs merge=lfs -text
|
45 |
+
assets/4.mp4 filter=lfs diff=lfs merge=lfs -text
|
46 |
+
assets/5_edit.png filter=lfs diff=lfs merge=lfs -text
|
47 |
+
assets/5.mp4 filter=lfs diff=lfs merge=lfs -text
|
48 |
+
assets/6_edit.png filter=lfs diff=lfs merge=lfs -text
|
49 |
+
assets/6.mp4 filter=lfs diff=lfs merge=lfs -text
|
50 |
+
assets/7_edit.png filter=lfs diff=lfs merge=lfs -text
|
51 |
+
assets/7.mp4 filter=lfs diff=lfs merge=lfs -text
|
52 |
+
assets/8_edit.png filter=lfs diff=lfs merge=lfs -text
|
53 |
+
assets/8.mp4 filter=lfs diff=lfs merge=lfs -text
|
54 |
+
assets/9_edit.png filter=lfs diff=lfs merge=lfs -text
|
55 |
+
assets/9.mp4 filter=lfs diff=lfs merge=lfs -text
|
56 |
+
assets/outputvideo/output_0.mp4 filter=lfs diff=lfs merge=lfs -text
|
57 |
+
assets/outputvideo/output_1.mp4 filter=lfs diff=lfs merge=lfs -text
|
58 |
+
assets/outputvideo/output_2.mp4 filter=lfs diff=lfs merge=lfs -text
|
59 |
+
assets/outputvideo/output_3.mp4 filter=lfs diff=lfs merge=lfs -text
|
60 |
+
assets/outputvideo/output_5.mp4 filter=lfs diff=lfs merge=lfs -text
|
61 |
+
assets/outputvideo/output_6.mp4 filter=lfs diff=lfs merge=lfs -text
|
62 |
+
assets/outputvideo/output_7.mp4 filter=lfs diff=lfs merge=lfs -text
|
63 |
+
assets/outputvideo/output_8.mp4 filter=lfs diff=lfs merge=lfs -text
|
64 |
+
control_cogvideox/__pycache__/embeddings.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
app_video_image_guidence.py
ADDED
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
import cv2
|
4 |
+
import torch
|
5 |
+
import numpy as np
|
6 |
+
import os
|
7 |
+
from control_cogvideox.cogvideox_transformer_3d import CogVideoXTransformer3DModel
|
8 |
+
from control_cogvideox.controlnet_cogvideox_transformer_3d import ControlCogVideoXTransformer3DModel
|
9 |
+
from pipeline_cogvideox_controlnet_5b_i2v_instruction2 import ControlCogVideoXPipeline
|
10 |
+
from diffusers.utils import export_to_video
|
11 |
+
from diffusers import AutoencoderKLCogVideoX
|
12 |
+
from transformers import T5EncoderModel, T5Tokenizer
|
13 |
+
from diffusers.schedulers import CogVideoXDDIMScheduler
|
14 |
+
|
15 |
+
from omegaconf import OmegaConf
|
16 |
+
from transformers import T5EncoderModel
|
17 |
+
from einops import rearrange
|
18 |
+
import decord
|
19 |
+
from typing import List
|
20 |
+
from tqdm import tqdm
|
21 |
+
|
22 |
+
import PIL
|
23 |
+
import torch.nn.functional as F
|
24 |
+
from torchvision import transforms
|
25 |
+
|
26 |
+
def get_prompt(file:str):
|
27 |
+
with open(file,'r') as f:
|
28 |
+
a=f.readlines()
|
29 |
+
return a #a[0]:positive prompt, a[1] negative prompt
|
30 |
+
|
31 |
+
def init_pipe():
|
32 |
+
def unwarp_model(state_dict):
|
33 |
+
new_state_dict = {}
|
34 |
+
for key in state_dict:
|
35 |
+
new_state_dict[key.split('module.')[1]] = state_dict[key]
|
36 |
+
return new_state_dict
|
37 |
+
|
38 |
+
i2v=True
|
39 |
+
root_path="./cogvideox_instructions_lr_1e_5_bs_48_2_epoch_params_controlnet_first_frame_5b_mixed_precision_480_896/cogvideox-2025-01-15T12-42-19/"
|
40 |
+
training_steps=30001
|
41 |
+
|
42 |
+
if i2v:
|
43 |
+
key = "i2v"
|
44 |
+
else:
|
45 |
+
key = "t2v"
|
46 |
+
noise_scheduler = CogVideoXDDIMScheduler(
|
47 |
+
**OmegaConf.to_container(
|
48 |
+
OmegaConf.load(f"./cogvideox-5b-{key}/scheduler/scheduler_config.json")
|
49 |
+
)
|
50 |
+
)
|
51 |
+
|
52 |
+
text_encoder = T5EncoderModel.from_pretrained(f"./cogvideox-5b-{key}/", subfolder="text_encoder", torch_dtype=torch.float16)#.to("cuda:0")
|
53 |
+
vae = AutoencoderKLCogVideoX.from_pretrained(f"./cogvideox-5b-{key}/", subfolder="vae", torch_dtype=torch.float16).to("cuda:0")
|
54 |
+
tokenizer = T5Tokenizer.from_pretrained(f"./cogvideox-5b-{key}/tokenizer", torch_dtype=torch.float16)
|
55 |
+
|
56 |
+
|
57 |
+
config = OmegaConf.to_container(
|
58 |
+
OmegaConf.load(f"./cogvideox-5b-{key}/transformer/config.json")
|
59 |
+
)
|
60 |
+
if i2v:
|
61 |
+
config["in_channels"] = 32
|
62 |
+
else:
|
63 |
+
config["in_channels"] = 16
|
64 |
+
transformer = CogVideoXTransformer3DModel(**config)
|
65 |
+
|
66 |
+
control_config = OmegaConf.to_container(
|
67 |
+
OmegaConf.load(f"./cogvideox-5b-{key}/transformer/config.json")
|
68 |
+
)
|
69 |
+
if i2v:
|
70 |
+
control_config["in_channels"] = 32
|
71 |
+
else:
|
72 |
+
control_config["in_channels"] = 16
|
73 |
+
control_config['num_layers'] = 6
|
74 |
+
control_config['control_in_channels'] = 16
|
75 |
+
controlnet_transformer = ControlCogVideoXTransformer3DModel(**control_config)
|
76 |
+
|
77 |
+
all_state_dicts = torch.load(f"{root_path}/checkpoints/checkpoint{training_steps}.ckpt", map_location="cpu",weights_only=True)
|
78 |
+
transformer_state_dict = unwarp_model(all_state_dicts["transformer_state_dict"])
|
79 |
+
controlnet_transformer_state_dict = unwarp_model(all_state_dicts["controlnet_transformer_state_dict"])
|
80 |
+
|
81 |
+
transformer.load_state_dict(transformer_state_dict, strict=True)
|
82 |
+
controlnet_transformer.load_state_dict(controlnet_transformer_state_dict, strict=True)
|
83 |
+
|
84 |
+
transformer = transformer.half().to("cuda:0")
|
85 |
+
controlnet_transformer = controlnet_transformer.half().to("cuda:0")
|
86 |
+
|
87 |
+
vae = vae.eval()
|
88 |
+
text_encoder = text_encoder.eval()
|
89 |
+
transformer = transformer.eval()
|
90 |
+
controlnet_transformer = controlnet_transformer.eval()
|
91 |
+
|
92 |
+
pipe = ControlCogVideoXPipeline(tokenizer,
|
93 |
+
text_encoder,
|
94 |
+
vae,
|
95 |
+
transformer,
|
96 |
+
noise_scheduler,
|
97 |
+
controlnet_transformer,
|
98 |
+
)
|
99 |
+
|
100 |
+
pipe.vae.enable_slicing()
|
101 |
+
pipe.vae.enable_tiling()
|
102 |
+
pipe.enable_model_cpu_offload()
|
103 |
+
|
104 |
+
return pipe
|
105 |
+
|
106 |
+
def inference(source_images,
|
107 |
+
target_images,
|
108 |
+
text_prompt, negative_prompt,
|
109 |
+
pipe, vae,
|
110 |
+
step, guidance_scale,
|
111 |
+
h, w, random_seed)->List[PIL.Image.Image]:
|
112 |
+
torch.manual_seed(random_seed)
|
113 |
+
|
114 |
+
source_pixel_values = source_images/127.5 - 1.0
|
115 |
+
source_pixel_values = source_pixel_values.to(torch.float16).to("cuda:0")
|
116 |
+
if target_images is not None:
|
117 |
+
target_pixel_values = target_images/127.5 - 1.0
|
118 |
+
target_pixel_values = target_pixel_values.to(torch.float16).to("cuda:0")
|
119 |
+
bsz,f,h,w,c = source_pixel_values.shape
|
120 |
+
|
121 |
+
with torch.no_grad():
|
122 |
+
source_pixel_values = rearrange(source_pixel_values, "b f w h c -> b c f w h")
|
123 |
+
source_latents = vae.encode(source_pixel_values).latent_dist.sample()
|
124 |
+
source_latents = source_latents.to(torch.float16)
|
125 |
+
source_latents = source_latents * vae.config.scaling_factor
|
126 |
+
source_latents = rearrange(source_latents, "b c f h w -> b f c h w")
|
127 |
+
|
128 |
+
if target_images is not None:
|
129 |
+
target_pixel_values = rearrange(target_pixel_values, "b f w h c -> b c f w h")
|
130 |
+
images = target_pixel_values[:,:,:1,...]
|
131 |
+
image_latents = vae.encode(images).latent_dist.sample()
|
132 |
+
image_latents = image_latents.to(torch.float16)
|
133 |
+
image_latents = image_latents * vae.config.scaling_factor
|
134 |
+
image_latents = rearrange(image_latents, "b c f h w -> b f c h w")
|
135 |
+
image_latents = torch.cat([image_latents, torch.zeros_like(source_latents)[:,1:]],dim=1)
|
136 |
+
latents = torch.cat([image_latents, source_latents], dim=2)
|
137 |
+
else:
|
138 |
+
image_latents = None
|
139 |
+
latents = source_latents
|
140 |
+
|
141 |
+
video = pipe(
|
142 |
+
prompt = text_prompt,
|
143 |
+
negative_prompt = negative_prompt,
|
144 |
+
video_condition = source_latents, # input to controlnet
|
145 |
+
video_condition2 = image_latents, # concat with latents
|
146 |
+
height = h,
|
147 |
+
width = w,
|
148 |
+
num_frames = f,
|
149 |
+
num_inference_steps = 50,
|
150 |
+
interval = 6,
|
151 |
+
guidance_scale = guidance_scale,
|
152 |
+
generator = torch.Generator(device=f"cuda:0").manual_seed(random_seed)
|
153 |
+
).frames[0]
|
154 |
+
|
155 |
+
return video
|
156 |
+
|
157 |
+
def process_video(video_file, image_file, positive_prompt, negative_prompt, guidance, random_seed, choice, progress=gr.Progress(track_tqdm=True))->str:
|
158 |
+
if choice==33:
|
159 |
+
video_shard=1
|
160 |
+
elif choice==65:
|
161 |
+
video_shard=2
|
162 |
+
|
163 |
+
pipe=PIPE
|
164 |
+
|
165 |
+
h = 448
|
166 |
+
w = 768
|
167 |
+
step=30001 #checkpoint
|
168 |
+
frames_per_shard=33
|
169 |
+
|
170 |
+
#get image
|
171 |
+
image = cv2.imread(image_file)
|
172 |
+
resized_image = cv2.resize(image, (768, 448))
|
173 |
+
resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB)
|
174 |
+
image=torch.from_numpy(resized_image)
|
175 |
+
#get mp4
|
176 |
+
vr = decord.VideoReader(video_file)
|
177 |
+
frames = vr.get_batch(list(range(33))).asnumpy()
|
178 |
+
_,src_h,src_w,_=frames.shape
|
179 |
+
resized_frames = [cv2.resize(frame, (768, 448)) for frame in frames]
|
180 |
+
images=torch.from_numpy(np.array(resized_frames))
|
181 |
+
|
182 |
+
target_path="outputvideo"
|
183 |
+
source_images = images[None,...]
|
184 |
+
target_images = image[None,None,...]
|
185 |
+
|
186 |
+
video:List[PIL.Image.Image]=[]
|
187 |
+
|
188 |
+
|
189 |
+
for i in progress.tqdm(range(video_shard)):
|
190 |
+
if i>0: #first frame guidence
|
191 |
+
first_frame=transforms.ToTensor()(video[-1])
|
192 |
+
first_frame = first_frame*255.0
|
193 |
+
first_frame = rearrange(first_frame,"c w h -> w h c")
|
194 |
+
source_images=source_images#仍用原视频引导
|
195 |
+
target_images=first_frame[None,None,...]
|
196 |
+
|
197 |
+
video+=inference(source_images, \
|
198 |
+
target_images, positive_prompt, \
|
199 |
+
negative_prompt, pipe, pipe.vae, \
|
200 |
+
step, guidance, \
|
201 |
+
h, w, random_seed)
|
202 |
+
i+=1
|
203 |
+
|
204 |
+
video=[image.resize((int(src_w/src_h*448),448))for image in video]
|
205 |
+
|
206 |
+
os.makedirs(f"./{target_path}", exist_ok=True)
|
207 |
+
output_path:str=f"./{target_path}/output_{video_file[-5]}.mp4"
|
208 |
+
export_to_video(video, output_path, fps=8)
|
209 |
+
return output_path
|
210 |
+
|
211 |
+
|
212 |
+
PIPE=init_pipe()
|
213 |
+
|
214 |
+
with gr.Blocks() as demo:
|
215 |
+
gr.Markdown("""
|
216 |
+
# Señorita-2M: A High-Quality Instruction-based Dataset for General Video Editing by Video Specialists
|
217 |
+
|
218 |
+
[Paper](https://arxiv.org/bas/2502.06734) | [Code](https://127.0.0.1:7860) | [Huggingface](https://127.0.0.1:7860)
|
219 |
+
""")
|
220 |
+
#gr.HTML(open("gradio_title.md",'r').read())
|
221 |
+
|
222 |
+
with gr.Row():
|
223 |
+
video_input = gr.Video(label="Video input")
|
224 |
+
image_input = gr.Image(type="filepath", label="First frame guidence")
|
225 |
+
with gr.Row():
|
226 |
+
with gr.Column():
|
227 |
+
positive_prompt = gr.Textbox(label="Positive prompt",value="")
|
228 |
+
negative_prompt = gr.Textbox(label="Negative prompt",value="")
|
229 |
+
seed = gr.Slider(minimum=0, maximum=2147483647, step=1, value=0, label="Seed")
|
230 |
+
guidance_slider = gr.Slider(minimum=1, maximum=10, value=4, label="Guidance")
|
231 |
+
choice=gr.Radio(choices=[33,65],label="Frame number",value=33)
|
232 |
+
with gr.Column():
|
233 |
+
video_output = gr.Video(label="Video output")
|
234 |
+
|
235 |
+
with gr.Row():
|
236 |
+
submit_button = gr.Button("Generate")
|
237 |
+
submit_button.click(fn=process_video, inputs=[video_input, image_input, positive_prompt, negative_prompt, guidance_slider, seed, choice], outputs=video_output)
|
238 |
+
with gr.Row():
|
239 |
+
gr.Examples(
|
240 |
+
[
|
241 |
+
["assets/0.mp4","assets/0_edit.png",get_prompt("assets/0.txt")[0],get_prompt("assets/0.txt")[1],4,0,33],
|
242 |
+
["assets/1.mp4","assets/1_edit.png",get_prompt("assets/1.txt")[0],get_prompt("assets/1.txt")[1],4,0,33],
|
243 |
+
["assets/2.mp4","assets/2_edit.png",get_prompt("assets/2.txt")[0],get_prompt("assets/2.txt")[1],4,0,33],
|
244 |
+
["assets/3.mp4","assets/3_edit.png",get_prompt("assets/3.txt")[0],get_prompt("assets/3.txt")[1],4,0,33],
|
245 |
+
["assets/4.mp4","assets/4_edit.png",get_prompt("assets/4.txt")[0],get_prompt("assets/4.txt")[1],4,0,33],
|
246 |
+
["assets/5.mp4","assets/5_edit.png",get_prompt("assets/5.txt")[0],get_prompt("assets/5.txt")[1],4,0,33],
|
247 |
+
["assets/6.mp4","assets/6_edit.png",get_prompt("assets/6.txt")[0],get_prompt("assets/6.txt")[1],4,0,33],
|
248 |
+
["assets/7.mp4","assets/7_edit.png",get_prompt("assets/7.txt")[0],get_prompt("assets/7.txt")[1],4,0,33],
|
249 |
+
["assets/8.mp4","assets/8_edit.png",get_prompt("assets/8.txt")[0],get_prompt("assets/8.txt")[1],4,0,33]
|
250 |
+
],
|
251 |
+
inputs=[video_input, image_input, positive_prompt, negative_prompt, guidance_slider, seed, choice],
|
252 |
+
outputs=video_output,
|
253 |
+
fn=process_video,
|
254 |
+
cache_examples=False
|
255 |
+
)
|
256 |
+
|
257 |
+
if __name__ == "__main__":
|
258 |
+
demo.queue().launch()
|
assets/0.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b9b4cbbc26fd2c76e5339fc9868b97cb3a9e6dfd394a99a6217f8f2070ad4af
|
3 |
+
size 1234363
|
assets/0.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Swap dog for fox. prompt: the fox with two ears. the motion is clear. The background is strictly aligned.
|
2 |
+
The fox with three ears.
|
3 |
+
|
4 |
+
|
assets/0_edit.png
ADDED
![]() |
Git LFS Details
|
assets/1.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b5bc1baee8501d33e58ea3a914b90f8737b3dc692cd21c313a790b7c90d2ed1
|
3 |
+
size 583649
|
assets/1.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
Make it anime style. prompt: the flower is swaying in the wind. the video is captured by professional camera. The motion is stable. Best quality.
|
2 |
+
bad quality.
|
assets/1_edit.png
ADDED
![]() |
Git LFS Details
|
assets/2.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0bff917b61848877637c5e9e9c8f8a7ff246ecf02b91b6d5b496737f7ea9ddc
|
3 |
+
size 3359278
|
assets/2.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
Add a hat on girl's head.
|
2 |
+
Bad quality.
|
assets/2_edit.png
ADDED
![]() |
Git LFS Details
|
assets/3.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea5c36673ade253be717ee8a7b06599f7108a03cd75ef6a954c32f1a05fde812
|
3 |
+
size 1294568
|
assets/3.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
Make it oil painting style. The color is bright and beautiful. the video is captured by professional camera. The motion is stable. Best quality.
|
2 |
+
bad quality.
|
assets/3_edit.png
ADDED
![]() |
Git LFS Details
|
assets/4.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a57e8216cdb4b2bc990a380d72844e2fe6594820ff31993330718c9315c0167a
|
3 |
+
size 460228
|
assets/4.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
Remove the girl. the video is captured by professional camera. The motion is stable. Best quality.
|
2 |
+
bad quality.
|
assets/4_edit.png
ADDED
![]() |
Git LFS Details
|
assets/5.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92dbba09c23b2dc594a84505dc7575bf2111b5af3a2636723533282d349e6e86
|
3 |
+
size 513130
|
assets/5.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
Make it water color style. prompt: the flowers with green leaves. The color is bright and beautiful. the video is captured by professional camera. The motion is stable. Best quality.
|
2 |
+
bad quality.
|
assets/5_edit.png
ADDED
![]() |
Git LFS Details
|
assets/6.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afb4aadab4339e508e66e1630225191f859dd34902c7f165284070241b853eff
|
3 |
+
size 119857
|
assets/6.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
Make it anime style. prompt: the butterfly in on the flower. The color is bright and beautiful. the video is captured by professional camera. The motion is stable. Best quality.
|
2 |
+
bad quality.
|
assets/6_edit.png
ADDED
![]() |
Git LFS Details
|
assets/7.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ade27a4aed77a41c809a82768c2006179718d2779b8391e5b74d8bf7a44aecf
|
3 |
+
size 403370
|
assets/7.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
Make it anime style. prompt: white swan, autumn. The color is bright and beautiful. the video is captured by professional camera. The motion is stable. Best quality.
|
2 |
+
bad quality.
|
assets/7_edit.png
ADDED
![]() |
Git LFS Details
|
assets/8.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfe7c713e94c8f85bb174326db147942d906973d49c4dd1a0412a81a7a7a1d93
|
3 |
+
size 690881
|
assets/8.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
Swap bird for squirrel. prompt: the squirrel is standing on the column. Squirrel are looking around. two ears.
|
2 |
+
Static Squirrel.
|
assets/8_edit.png
ADDED
![]() |
Git LFS Details
|
assets/9.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0073470e8507542945226a48e3af1e72656cdb5df331449374955be07fef4871
|
3 |
+
size 539224
|
assets/9.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
Swap black dog for white pig. prompt: the pig is standing between two trees.
|
2 |
+
black dog.
|
3 |
+
|
assets/9_edit.png
ADDED
![]() |
Git LFS Details
|
assets/outputvideo/output_0.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ba1cf2d89579e0d9e0d12cf4ebc06c0756725f048c6c2b269d52a99f6185477
|
3 |
+
size 1282150
|
assets/outputvideo/output_1.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc843ae155c3c3198e147b882fe2460c18d05e0b387136925df828edd83587fc
|
3 |
+
size 266780
|
assets/outputvideo/output_2.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee9743f7d86ff59dc3b19cbfaf2677aad38340cc89c6856f353da1aa69a226fe
|
3 |
+
size 912515
|
assets/outputvideo/output_3.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07928cc806c93453a99e6de1ece37d949d71fbf33bbd20ea2c7116ec988f1d17
|
3 |
+
size 1058274
|
assets/outputvideo/output_4.mp4
ADDED
Binary file (97.2 kB). View file
|
|
assets/outputvideo/output_5.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3e32b887b2084b9ce04bc77c94da9c068018432d1b15de7f93435dff9e65ee3
|
3 |
+
size 237088
|
assets/outputvideo/output_6.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2fff03bc3efbb20752cae96fb8da35b5fee292596ad00d588a09fabb20c9aeef
|
3 |
+
size 141423
|
assets/outputvideo/output_7.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0910239dd1f4b40a49602bdc27af42bd394c3d55724a3665d731471fe20989db
|
3 |
+
size 294492
|
assets/outputvideo/output_8.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8abf76c7fffe1f58445a24bfe203073c094dbc44ec9934ccaff71c31e645a7f9
|
3 |
+
size 358596
|
control_cogvideox/__pycache__/attention_processor.cpython-310.pyc
ADDED
Binary file (88.4 kB). View file
|
|
control_cogvideox/__pycache__/cogvideox_transformer_3d.cpython-310.pyc
ADDED
Binary file (16.9 kB). View file
|
|
control_cogvideox/__pycache__/cogvideox_transformer_3d.cpython-311.pyc
ADDED
Binary file (24.1 kB). View file
|
|
control_cogvideox/__pycache__/cogvideox_transformer_3d_ipadapter.cpython-310.pyc
ADDED
Binary file (18 kB). View file
|
|
control_cogvideox/__pycache__/cogvideox_transformer_3d_new_version.cpython-310.pyc
ADDED
Binary file (16.9 kB). View file
|
|
control_cogvideox/__pycache__/controlnet_cogvideox_transformer_3d.cpython-310.pyc
ADDED
Binary file (16.8 kB). View file
|
|
control_cogvideox/__pycache__/controlnet_cogvideox_transformer_3d.cpython-311.pyc
ADDED
Binary file (23.8 kB). View file
|
|
control_cogvideox/__pycache__/controlnet_cogvideox_transformer_3d_condition.cpython-310.pyc
ADDED
Binary file (17.2 kB). View file
|
|
control_cogvideox/__pycache__/embeddings.cpython-310.pyc
ADDED
Binary file (54.9 kB). View file
|
|