Spaces:

yijin928
/

Test

Running on Zero

App Files Files Community

yijin928 commited on Mar 7

Commit

e839e85

verified ·

1 Parent(s): 16e3fea

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -118

app.py CHANGED Viewed

@@ -184,127 +184,127 @@ def generate_video(positive_prompt, num_frames, input_image):
     print("Number of Frames:", num_frames)
     print("Input Image:", input_image)
-    with gr.Progress(track_tqdm=True):
-        import_custom_nodes()
-        with torch.inference_mode():
-            downloadandloadcogvideomodel = NODE_CLASS_MAPPINGS[
-                "DownloadAndLoadCogVideoModel"
-            ]()
-            downloadandloadcogvideomodel_1 = downloadandloadcogvideomodel.loadmodel(
-                model="THUDM/CogVideoX-5b",
-                precision="bf16",
-                quantization="disabled",
-                enable_sequential_cpu_offload=True,
-                attention_mode="sdpa",
-                load_device="main_device",
             )
-            loadimage = NODE_CLASS_MAPPINGS["LoadImage"]()
-            loadimage_8 = loadimage.load_image(image=input_image)
-            cliploader = NODE_CLASS_MAPPINGS["CLIPLoader"]()
-            cliploader_20 = cliploader.load_clip(
-                clip_name="google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
-                type="sd3",
-                device="default",
             )
-            emptylatentimage = NODE_CLASS_MAPPINGS["EmptyLatentImage"]()
-            emptylatentimage_161 = emptylatentimage.generate(
-                width=360, #reduce this to avoid OOM error
-                height=240, #reduce this to avoid OOM error
-                batch_size=1 #reduce this to avoid OOM error
             )
-            cogvideotextencode = NODE_CLASS_MAPPINGS["CogVideoTextEncode"]()
-            cogvideosampler = NODE_CLASS_MAPPINGS["CogVideoSampler"]()
-            cogvideodecode = NODE_CLASS_MAPPINGS["CogVideoDecode"]()
-            reactorfaceswap = NODE_CLASS_MAPPINGS["ReActorFaceSwap"]()
-            cr_upscale_image = NODE_CLASS_MAPPINGS["CR Upscale Image"]()
-            vhs_videocombine = NODE_CLASS_MAPPINGS["VHS_VideoCombine"]()
-            for q in range(1):
-                cogvideotextencode_30 = cogvideotextencode.process(
-                    prompt=positive_prompt,
-                    strength=1,
-                    force_offload=True,
-                    clip=get_value_at_index(cliploader_20, 0),
-                )
-                cogvideotextencode_31 = cogvideotextencode.process(
-                    prompt='',
-                    strength=1,
-                    force_offload=True,
-                    clip=get_value_at_index(cogvideotextencode_30, 1),
-                )
-                cogvideosampler_155 = cogvideosampler.process(
-                    num_frames=num_frames,
-                    steps=30, #reduce this to avoid OOM error
-                    cfg=6,
-                    seed=random.randint(1, 2**64),
-                    scheduler="CogVideoXDDIM",
-                    denoise_strength=1,
-                    model=get_value_at_index(downloadandloadcogvideomodel_1, 0),
-                    positive=get_value_at_index(cogvideotextencode_30, 0),
-                    negative=get_value_at_index(cogvideotextencode_31, 0),
-                    samples=get_value_at_index(emptylatentimage_161, 0),
-                )
-                cogvideodecode_11 = cogvideodecode.decode(
-                    enable_vae_tiling=False,
-                    tile_sample_min_height=240,#reduce this to avoid OOM error
-                    tile_sample_min_width=360,#reduce this to avoid OOM error
-                    tile_overlap_factor_height=0.2,
-                    tile_overlap_factor_width=0.2,
-                    auto_tile_size=True,
-                    vae=get_value_at_index(downloadandloadcogvideomodel_1, 1),
-                    samples=get_value_at_index(cogvideosampler_155, 0),
-                )
-                reactorfaceswap_3 = reactorfaceswap.execute(
-                    enabled=True,
-                    swap_model="inswapper_128.onnx",
-                    facedetection="retinaface_resnet50",
-                    face_restore_model="GFPGANv1.4.pth",
-                    face_restore_visibility=1,
-                    codeformer_weight=0.75,
-                    detect_gender_input="no",
-                    detect_gender_source="no",
-                    input_faces_index="0",
-                    source_faces_index="0",
-                    console_log_level=1,
-                    input_image=get_value_at_index(cogvideodecode_11, 0),
-                    source_image=get_value_at_index(loadimage_8, 0),
-                )
-                cr_upscale_image_151 = cr_upscale_image.upscale(
-                    upscale_model="4x_NMKD-Superscale-SP_178000_G.pth",
-                    mode="rescale",
-                    rescale_factor=4,
-                    resize_width=720,
-                    resampling_method="lanczos",
-                    supersample="true",
-                    rounding_modulus=16,
-                    image=get_value_at_index(reactorfaceswap_3, 0),
-                )
-                vhs_videocombine_154 = vhs_videocombine.combine_video(
-                    frame_rate=8,
-                    loop_count=0,
-                    filename_prefix="AnimateDiff",
-                    format="video/h264-mp4",
-                    pix_fmt="yuv420p",
-                    crf=19,
-                    save_metadata=True,
-                    trim_to_audio=False,
-                    pingpong=True,
-                    save_output=True,
-                    images=get_value_at_index(cr_upscale_image_151, 0),
-                    unique_id=7214086815220268849,
-                )
-                saved_path = f"output/{vhs_videocombine_154['ui']['images'][0]['filename']}"
-                return saved_path
 if __name__ == "__main__":

     print("Number of Frames:", num_frames)
     print("Input Image:", input_image)
+    progress = gr.Progress(track_tqdm=True)
+    import_custom_nodes()
+    with torch.inference_mode():
+        downloadandloadcogvideomodel = NODE_CLASS_MAPPINGS[
+            "DownloadAndLoadCogVideoModel"
+        ]()
+        downloadandloadcogvideomodel_1 = downloadandloadcogvideomodel.loadmodel(
+            model="THUDM/CogVideoX-5b",
+            precision="bf16",
+            quantization="disabled",
+            enable_sequential_cpu_offload=True,
+            attention_mode="sdpa",
+            load_device="main_device",
+        )
+        loadimage = NODE_CLASS_MAPPINGS["LoadImage"]()
+        loadimage_8 = loadimage.load_image(image=input_image)
+        cliploader = NODE_CLASS_MAPPINGS["CLIPLoader"]()
+        cliploader_20 = cliploader.load_clip(
+            clip_name="google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
+            type="sd3",
+            device="default",
+        )
+        emptylatentimage = NODE_CLASS_MAPPINGS["EmptyLatentImage"]()
+        emptylatentimage_161 = emptylatentimage.generate(
+            width=360, #reduce this to avoid OOM error
+            height=240, #reduce this to avoid OOM error
+            batch_size=1 #reduce this to avoid OOM error
+        )
+        cogvideotextencode = NODE_CLASS_MAPPINGS["CogVideoTextEncode"]()
+        cogvideosampler = NODE_CLASS_MAPPINGS["CogVideoSampler"]()
+        cogvideodecode = NODE_CLASS_MAPPINGS["CogVideoDecode"]()
+        reactorfaceswap = NODE_CLASS_MAPPINGS["ReActorFaceSwap"]()
+        cr_upscale_image = NODE_CLASS_MAPPINGS["CR Upscale Image"]()
+        vhs_videocombine = NODE_CLASS_MAPPINGS["VHS_VideoCombine"]()
+        for q in range(1):
+            cogvideotextencode_30 = cogvideotextencode.process(
+                prompt=positive_prompt,
+                strength=1,
+                force_offload=True,
+                clip=get_value_at_index(cliploader_20, 0),
             )
+            cogvideotextencode_31 = cogvideotextencode.process(
+                prompt='',
+                strength=1,
+                force_offload=True,
+                clip=get_value_at_index(cogvideotextencode_30, 1),
             )
+            cogvideosampler_155 = cogvideosampler.process(
+                num_frames=num_frames,
+                steps=30, #reduce this to avoid OOM error
+                cfg=6,
+                seed=random.randint(1, 2**64),
+                scheduler="CogVideoXDDIM",
+                denoise_strength=1,
+                model=get_value_at_index(downloadandloadcogvideomodel_1, 0),
+                positive=get_value_at_index(cogvideotextencode_30, 0),
+                negative=get_value_at_index(cogvideotextencode_31, 0),
+                samples=get_value_at_index(emptylatentimage_161, 0),
             )
+            cogvideodecode_11 = cogvideodecode.decode(
+                enable_vae_tiling=False,
+                tile_sample_min_height=240,#reduce this to avoid OOM error
+                tile_sample_min_width=360,#reduce this to avoid OOM error
+                tile_overlap_factor_height=0.2,
+                tile_overlap_factor_width=0.2,
+                auto_tile_size=True,
+                vae=get_value_at_index(downloadandloadcogvideomodel_1, 1),
+                samples=get_value_at_index(cogvideosampler_155, 0),
+            )
+            reactorfaceswap_3 = reactorfaceswap.execute(
+                enabled=True,
+                swap_model="inswapper_128.onnx",
+                facedetection="retinaface_resnet50",
+                face_restore_model="GFPGANv1.4.pth",
+                face_restore_visibility=1,
+                codeformer_weight=0.75,
+                detect_gender_input="no",
+                detect_gender_source="no",
+                input_faces_index="0",
+                source_faces_index="0",
+                console_log_level=1,
+                input_image=get_value_at_index(cogvideodecode_11, 0),
+                source_image=get_value_at_index(loadimage_8, 0),
+            )
+            cr_upscale_image_151 = cr_upscale_image.upscale(
+                upscale_model="4x_NMKD-Superscale-SP_178000_G.pth",
+                mode="rescale",
+                rescale_factor=4,
+                resize_width=720,
+                resampling_method="lanczos",
+                supersample="true",
+                rounding_modulus=16,
+                image=get_value_at_index(reactorfaceswap_3, 0),
+            )
+            vhs_videocombine_154 = vhs_videocombine.combine_video(
+                frame_rate=8,
+                loop_count=0,
+                filename_prefix="AnimateDiff",
+                format="video/h264-mp4",
+                pix_fmt="yuv420p",
+                crf=19,
+                save_metadata=True,
+                trim_to_audio=False,
+                pingpong=True,
+                save_output=True,
+                images=get_value_at_index(cr_upscale_image_151, 0),
+                unique_id=7214086815220268849,
+            )
+            saved_path = f"output/{vhs_videocombine_154['ui']['images'][0]['filename']}"
+            return saved_path
 if __name__ == "__main__":