Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -190,124 +190,125 @@ def generate_video(positive_prompt, num_frames, input_image):
|
|
190 |
print("Number of Frames:", num_frames)
|
191 |
print("Input Image:", input_image)
|
192 |
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
loadimage = NODE_CLASS_MAPPINGS["LoadImage"]()
|
208 |
-
loadimage_8 = loadimage.load_image(image=input_image)
|
209 |
-
|
210 |
-
cliploader = NODE_CLASS_MAPPINGS["CLIPLoader"]()
|
211 |
-
cliploader_20 = cliploader.load_clip(
|
212 |
-
clip_name="google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
|
213 |
-
type="sd3",
|
214 |
-
device="default",
|
215 |
-
)
|
216 |
-
|
217 |
-
emptylatentimage = NODE_CLASS_MAPPINGS["EmptyLatentImage"]()
|
218 |
-
emptylatentimage_161 = emptylatentimage.generate(
|
219 |
-
width=720, height=480, batch_size=1
|
220 |
-
)
|
221 |
-
|
222 |
-
cogvideotextencode = NODE_CLASS_MAPPINGS["CogVideoTextEncode"]()
|
223 |
-
cogvideosampler = NODE_CLASS_MAPPINGS["CogVideoSampler"]()
|
224 |
-
cogvideodecode = NODE_CLASS_MAPPINGS["CogVideoDecode"]()
|
225 |
-
reactorfaceswap = NODE_CLASS_MAPPINGS["ReActorFaceSwap"]()
|
226 |
-
cr_upscale_image = NODE_CLASS_MAPPINGS["CR Upscale Image"]()
|
227 |
-
vhs_videocombine = NODE_CLASS_MAPPINGS["VHS_VideoCombine"]()
|
228 |
-
|
229 |
-
for q in range(1):
|
230 |
-
cogvideotextencode_30 = cogvideotextencode.process(
|
231 |
-
prompt=positive_prompt,
|
232 |
-
strength=1,
|
233 |
-
force_offload=True,
|
234 |
-
clip=get_value_at_index(cliploader_20, 0),
|
235 |
-
)
|
236 |
-
|
237 |
-
cogvideotextencode_31 = cogvideotextencode.process(
|
238 |
-
prompt='',
|
239 |
-
strength=1,
|
240 |
-
force_offload=True,
|
241 |
-
clip=get_value_at_index(cogvideotextencode_30, 1),
|
242 |
-
)
|
243 |
-
|
244 |
-
cogvideosampler_155 = cogvideosampler.process(
|
245 |
-
num_frames=num_frames,
|
246 |
-
steps=50,
|
247 |
-
cfg=6,
|
248 |
-
seed=random.randint(1, 2**64),
|
249 |
-
scheduler="CogVideoXDDIM",
|
250 |
-
denoise_strength=1,
|
251 |
-
model=get_value_at_index(downloadandloadcogvideomodel_1, 0),
|
252 |
-
positive=get_value_at_index(cogvideotextencode_30, 0),
|
253 |
-
negative=get_value_at_index(cogvideotextencode_31, 0),
|
254 |
-
samples=get_value_at_index(emptylatentimage_161, 0),
|
255 |
-
)
|
256 |
-
|
257 |
-
cogvideodecode_11 = cogvideodecode.decode(
|
258 |
-
enable_vae_tiling=False,
|
259 |
-
tile_sample_min_height=240,
|
260 |
-
tile_sample_min_width=360,
|
261 |
-
tile_overlap_factor_height=0.2,
|
262 |
-
tile_overlap_factor_width=0.2,
|
263 |
-
auto_tile_size=True,
|
264 |
-
vae=get_value_at_index(downloadandloadcogvideomodel_1, 1),
|
265 |
-
samples=get_value_at_index(cogvideosampler_155, 0),
|
266 |
-
)
|
267 |
-
|
268 |
-
reactorfaceswap_3 = reactorfaceswap.execute(
|
269 |
-
enabled=True,
|
270 |
-
swap_model="inswapper_128.onnx",
|
271 |
-
facedetection="retinaface_resnet50",
|
272 |
-
face_restore_model="GFPGANv1.4.pth",
|
273 |
-
face_restore_visibility=1,
|
274 |
-
codeformer_weight=0.75,
|
275 |
-
detect_gender_input="no",
|
276 |
-
detect_gender_source="no",
|
277 |
-
input_faces_index="0",
|
278 |
-
source_faces_index="0",
|
279 |
-
console_log_level=1,
|
280 |
-
input_image=get_value_at_index(cogvideodecode_11, 0),
|
281 |
-
source_image=get_value_at_index(loadimage_8, 0),
|
282 |
)
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
image=get_value_at_index(reactorfaceswap_3, 0),
|
293 |
)
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
filename_prefix="AnimateDiff",
|
299 |
-
format="video/h264-mp4",
|
300 |
-
pix_fmt="yuv420p",
|
301 |
-
crf=19,
|
302 |
-
save_metadata=True,
|
303 |
-
trim_to_audio=False,
|
304 |
-
pingpong=True,
|
305 |
-
save_output=True,
|
306 |
-
images=get_value_at_index(cr_upscale_image_151, 0),
|
307 |
-
unique_id=7214086815220268849,
|
308 |
)
|
309 |
-
|
310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
|
312 |
|
313 |
if __name__ == "__main__":
|
|
|
190 |
print("Number of Frames:", num_frames)
|
191 |
print("Input Image:", input_image)
|
192 |
|
193 |
+
with gr.Progress(track_tqdm=True):
|
194 |
+
import_custom_nodes()
|
195 |
+
with torch.inference_mode():
|
196 |
+
downloadandloadcogvideomodel = NODE_CLASS_MAPPINGS[
|
197 |
+
"DownloadAndLoadCogVideoModel"
|
198 |
+
]()
|
199 |
+
downloadandloadcogvideomodel_1 = downloadandloadcogvideomodel.loadmodel(
|
200 |
+
model="THUDM/CogVideoX-5b",
|
201 |
+
precision="bf16",
|
202 |
+
quantization="disabled",
|
203 |
+
enable_sequential_cpu_offload=True,
|
204 |
+
attention_mode="sdpa",
|
205 |
+
load_device="main_device",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
)
|
207 |
+
|
208 |
+
loadimage = NODE_CLASS_MAPPINGS["LoadImage"]()
|
209 |
+
loadimage_8 = loadimage.load_image(image=input_image)
|
210 |
+
|
211 |
+
cliploader = NODE_CLASS_MAPPINGS["CLIPLoader"]()
|
212 |
+
cliploader_20 = cliploader.load_clip(
|
213 |
+
clip_name="google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
|
214 |
+
type="sd3",
|
215 |
+
device="default",
|
|
|
216 |
)
|
217 |
+
|
218 |
+
emptylatentimage = NODE_CLASS_MAPPINGS["EmptyLatentImage"]()
|
219 |
+
emptylatentimage_161 = emptylatentimage.generate(
|
220 |
+
width=720, height=480, batch_size=1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
)
|
222 |
+
|
223 |
+
cogvideotextencode = NODE_CLASS_MAPPINGS["CogVideoTextEncode"]()
|
224 |
+
cogvideosampler = NODE_CLASS_MAPPINGS["CogVideoSampler"]()
|
225 |
+
cogvideodecode = NODE_CLASS_MAPPINGS["CogVideoDecode"]()
|
226 |
+
reactorfaceswap = NODE_CLASS_MAPPINGS["ReActorFaceSwap"]()
|
227 |
+
cr_upscale_image = NODE_CLASS_MAPPINGS["CR Upscale Image"]()
|
228 |
+
vhs_videocombine = NODE_CLASS_MAPPINGS["VHS_VideoCombine"]()
|
229 |
+
|
230 |
+
for q in range(1):
|
231 |
+
cogvideotextencode_30 = cogvideotextencode.process(
|
232 |
+
prompt=positive_prompt,
|
233 |
+
strength=1,
|
234 |
+
force_offload=True,
|
235 |
+
clip=get_value_at_index(cliploader_20, 0),
|
236 |
+
)
|
237 |
+
|
238 |
+
cogvideotextencode_31 = cogvideotextencode.process(
|
239 |
+
prompt='',
|
240 |
+
strength=1,
|
241 |
+
force_offload=True,
|
242 |
+
clip=get_value_at_index(cogvideotextencode_30, 1),
|
243 |
+
)
|
244 |
+
|
245 |
+
cogvideosampler_155 = cogvideosampler.process(
|
246 |
+
num_frames=num_frames,
|
247 |
+
steps=50,
|
248 |
+
cfg=6,
|
249 |
+
seed=random.randint(1, 2**64),
|
250 |
+
scheduler="CogVideoXDDIM",
|
251 |
+
denoise_strength=1,
|
252 |
+
model=get_value_at_index(downloadandloadcogvideomodel_1, 0),
|
253 |
+
positive=get_value_at_index(cogvideotextencode_30, 0),
|
254 |
+
negative=get_value_at_index(cogvideotextencode_31, 0),
|
255 |
+
samples=get_value_at_index(emptylatentimage_161, 0),
|
256 |
+
)
|
257 |
+
|
258 |
+
cogvideodecode_11 = cogvideodecode.decode(
|
259 |
+
enable_vae_tiling=False,
|
260 |
+
tile_sample_min_height=240,
|
261 |
+
tile_sample_min_width=360,
|
262 |
+
tile_overlap_factor_height=0.2,
|
263 |
+
tile_overlap_factor_width=0.2,
|
264 |
+
auto_tile_size=True,
|
265 |
+
vae=get_value_at_index(downloadandloadcogvideomodel_1, 1),
|
266 |
+
samples=get_value_at_index(cogvideosampler_155, 0),
|
267 |
+
)
|
268 |
+
|
269 |
+
reactorfaceswap_3 = reactorfaceswap.execute(
|
270 |
+
enabled=True,
|
271 |
+
swap_model="inswapper_128.onnx",
|
272 |
+
facedetection="retinaface_resnet50",
|
273 |
+
face_restore_model="GFPGANv1.4.pth",
|
274 |
+
face_restore_visibility=1,
|
275 |
+
codeformer_weight=0.75,
|
276 |
+
detect_gender_input="no",
|
277 |
+
detect_gender_source="no",
|
278 |
+
input_faces_index="0",
|
279 |
+
source_faces_index="0",
|
280 |
+
console_log_level=1,
|
281 |
+
input_image=get_value_at_index(cogvideodecode_11, 0),
|
282 |
+
source_image=get_value_at_index(loadimage_8, 0),
|
283 |
+
)
|
284 |
+
|
285 |
+
cr_upscale_image_151 = cr_upscale_image.upscale(
|
286 |
+
upscale_model="4x_NMKD-Superscale-SP_178000_G.pth",
|
287 |
+
mode="rescale",
|
288 |
+
rescale_factor=4,
|
289 |
+
resize_width=720,
|
290 |
+
resampling_method="lanczos",
|
291 |
+
supersample="true",
|
292 |
+
rounding_modulus=16,
|
293 |
+
image=get_value_at_index(reactorfaceswap_3, 0),
|
294 |
+
)
|
295 |
+
|
296 |
+
vhs_videocombine_154 = vhs_videocombine.combine_video(
|
297 |
+
frame_rate=8,
|
298 |
+
loop_count=0,
|
299 |
+
filename_prefix="AnimateDiff",
|
300 |
+
format="video/h264-mp4",
|
301 |
+
pix_fmt="yuv420p",
|
302 |
+
crf=19,
|
303 |
+
save_metadata=True,
|
304 |
+
trim_to_audio=False,
|
305 |
+
pingpong=True,
|
306 |
+
save_output=True,
|
307 |
+
images=get_value_at_index(cr_upscale_image_151, 0),
|
308 |
+
unique_id=7214086815220268849,
|
309 |
+
)
|
310 |
+
saved_path = f"output/{vhs_videocombine_154['ui']['images'][0]['filename']}"
|
311 |
+
return saved_path
|
312 |
|
313 |
|
314 |
if __name__ == "__main__":
|