Spaces:

yijin928
/

Test

Running on Zero

App Files Files Community

yijin928 commited on Mar 7

Commit

16e3fea

verified ·

1 Parent(s): dca21c9

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -53

app.py CHANGED Viewed

@@ -35,19 +35,14 @@ hf_hub_download(
 def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
     """Returns the value at the given index of a sequence or mapping.
     If the object is a sequence (like list or string), returns the value at the given index.
     If the object is a mapping (like a dictionary), returns the value at the index-th key.
     Some return a dictionary, in these cases, we look for the "results" key
     Args:
         obj (Union[Sequence, Mapping]): The object to retrieve the value from.
         index (int): The index of the value to retrieve.
     Returns:
         Any: The value at the given index.
     Raises:
         IndexError: If the index is out of bounds for the object and the object is not a mapping.
     """
@@ -119,7 +114,6 @@ add_extra_model_paths()
 def import_custom_nodes() -> None:
     """Find all custom nodes in the custom_nodes folder and add those node objects to NODE_CLASS_MAPPINGS
     This function sets up a new asyncio event loop, initializes the PromptServer,
     creates a PromptQueue, and initializes the custom nodes.
     """
@@ -140,11 +134,11 @@ def import_custom_nodes() -> None:
     init_extra_nodes()
 from nodes import NODE_CLASS_MAPPINGS
-import_custom_nodes()
-# TO be added to "model_loaders" as it loads a model
-downloadandloadcogvideomodel = NODE_CLASS_MAPPINGS[
-    "DownloadAndLoadCogVideoModel"
-]()
 # downloadandloadcogvideomodel_1 = downloadandloadcogvideomodel.loadmodel(
 #     model="THUDM/CogVideoX-5b",
 #     precision="bf16",
@@ -153,35 +147,34 @@ downloadandloadcogvideomodel = NODE_CLASS_MAPPINGS[
 #     attention_mode="sdpa",
 #     load_device="main_device",
 # )
-loadimage = NODE_CLASS_MAPPINGS["LoadImage"]()
-cliploader = NODE_CLASS_MAPPINGS["CLIPLoader"]()
-cliploader_20 = cliploader.load_clip(
-    clip_name="t5/google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
-    type="sd3",
-    device="default",
-)
-emptylatentimage = NODE_CLASS_MAPPINGS["EmptyLatentImage"]()
-cogvideotextencode = NODE_CLASS_MAPPINGS["CogVideoTextEncode"]()
-cogvideosampler = NODE_CLASS_MAPPINGS["CogVideoSampler"]()
-cogvideodecode = NODE_CLASS_MAPPINGS["CogVideoDecode"]()
-reactorfaceswap = NODE_CLASS_MAPPINGS["ReActorFaceSwap"]()
-cr_upscale_image = NODE_CLASS_MAPPINGS["CR Upscale Image"]()
-vhs_videocombine = NODE_CLASS_MAPPINGS["VHS_VideoCombine"]()
-#Add all the models that load a safetensors file
-model_loaders = [cliploader_20]
 # model_loaders = [downloadandloadcogvideomodel_1, cliploader_20]
-# Check which models are valid and how to best load them
-valid_models = [
-    getattr(loader[0], 'patcher', loader[0])
-    for loader in model_loaders
-    if not isinstance(loader[0], dict) and not isinstance(getattr(loader[0], 'patcher', None), dict)
-]
-#Finally loads the models
-model_management.load_models_gpu(valid_models)
 #Run ComfyUI Workflow
 @spaces.GPU(duration=60)
@@ -192,11 +185,11 @@ def generate_video(positive_prompt, num_frames, input_image):
     print("Input Image:", input_image)
     with gr.Progress(track_tqdm=True):
-        # import_custom_nodes()
         with torch.inference_mode():
-            # downloadandloadcogvideomodel = NODE_CLASS_MAPPINGS[
-            #     "DownloadAndLoadCogVideoModel"
-            # ]()
             downloadandloadcogvideomodel_1 = downloadandloadcogvideomodel.loadmodel(
                 model="THUDM/CogVideoX-5b",
                 precision="bf16",
@@ -206,27 +199,29 @@ def generate_video(positive_prompt, num_frames, input_image):
                 load_device="main_device",
             )
-            # loadimage = NODE_CLASS_MAPPINGS["LoadImage"]()
             loadimage_8 = loadimage.load_image(image=input_image)
-            # cliploader = NODE_CLASS_MAPPINGS["CLIPLoader"]()
             cliploader_20 = cliploader.load_clip(
                 clip_name="google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
                 type="sd3",
                 device="default",
             )
-            # emptylatentimage = NODE_CLASS_MAPPINGS["EmptyLatentImage"]()
             emptylatentimage_161 = emptylatentimage.generate(
-                width=720, height=480, batch_size=1
             )
-            # cogvideotextencode = NODE_CLASS_MAPPINGS["CogVideoTextEncode"]()
-            # cogvideosampler = NODE_CLASS_MAPPINGS["CogVideoSampler"]()
-            # cogvideodecode = NODE_CLASS_MAPPINGS["CogVideoDecode"]()
-            # reactorfaceswap = NODE_CLASS_MAPPINGS["ReActorFaceSwap"]()
-            # cr_upscale_image = NODE_CLASS_MAPPINGS["CR Upscale Image"]()
-            # vhs_videocombine = NODE_CLASS_MAPPINGS["VHS_VideoCombine"]()
             for q in range(1):
                 cogvideotextencode_30 = cogvideotextencode.process(
@@ -245,7 +240,7 @@ def generate_video(positive_prompt, num_frames, input_image):
                 cogvideosampler_155 = cogvideosampler.process(
                     num_frames=num_frames,
-                    steps=50,
                     cfg=6,
                     seed=random.randint(1, 2**64),
                     scheduler="CogVideoXDDIM",
@@ -258,8 +253,8 @@ def generate_video(positive_prompt, num_frames, input_image):
                 cogvideodecode_11 = cogvideodecode.decode(
                     enable_vae_tiling=False,
-                    tile_sample_min_height=240,
-                    tile_sample_min_width=360,
                     tile_overlap_factor_height=0.2,
                     tile_overlap_factor_width=0.2,
                     auto_tile_size=True,
@@ -330,4 +325,4 @@ if __name__ == "__main__":
             outputs=[output_video]
         )
-    app.launch(share=True)

 def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
     """Returns the value at the given index of a sequence or mapping.
     If the object is a sequence (like list or string), returns the value at the given index.
     If the object is a mapping (like a dictionary), returns the value at the index-th key.
     Some return a dictionary, in these cases, we look for the "results" key
     Args:
         obj (Union[Sequence, Mapping]): The object to retrieve the value from.
         index (int): The index of the value to retrieve.
     Returns:
         Any: The value at the given index.
     Raises:
         IndexError: If the index is out of bounds for the object and the object is not a mapping.
     """
 def import_custom_nodes() -> None:
     """Find all custom nodes in the custom_nodes folder and add those node objects to NODE_CLASS_MAPPINGS
     This function sets up a new asyncio event loop, initializes the PromptServer,
     creates a PromptQueue, and initializes the custom nodes.
     """
     init_extra_nodes()
 from nodes import NODE_CLASS_MAPPINGS
+#TO be added to "model_loaders" as it loads a model
+# downloadandloadcogvideomodel = NODE_CLASS_MAPPINGS[
+#     "DownloadAndLoadCogVideoModel"
+# ]()
 # downloadandloadcogvideomodel_1 = downloadandloadcogvideomodel.loadmodel(
 #     model="THUDM/CogVideoX-5b",
 #     precision="bf16",
 #     attention_mode="sdpa",
 #     load_device="main_device",
 # )
+# loadimage = NODE_CLASS_MAPPINGS["LoadImage"]()
+# cliploader = NODE_CLASS_MAPPINGS["CLIPLoader"]()
+# cliploader_20 = cliploader.load_clip(
+#     clip_name="t5/google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
+#     type="sd3",
+#     device="default",
+# )
+# emptylatentimage = NODE_CLASS_MAPPINGS["EmptyLatentImage"]()
+# cogvideotextencode = NODE_CLASS_MAPPINGS["CogVideoTextEncode"]()
+# cogvideosampler = NODE_CLASS_MAPPINGS["CogVideoSampler"]()
+# cogvideodecode = NODE_CLASS_MAPPINGS["CogVideoDecode"]()
+# reactorfaceswap = NODE_CLASS_MAPPINGS["ReActorFaceSwap"]()
+# cr_upscale_image = NODE_CLASS_MAPPINGS["CR Upscale Image"]()
+# vhs_videocombine = NODE_CLASS_MAPPINGS["VHS_VideoCombine"]()
+# #Add all the models that load a safetensors file
 # model_loaders = [downloadandloadcogvideomodel_1, cliploader_20]
+# # Check which models are valid and how to best load them
+# valid_models = [
+#     getattr(loader[0], 'patcher', loader[0])
+#     for loader in model_loaders
+#     if not isinstance(loader[0], dict) and not isinstance(getattr(loader[0], 'patcher', None), dict)
+# ]
+# #Finally loads the models
+# model_management.load_models_gpu(valid_models)
 #Run ComfyUI Workflow
 @spaces.GPU(duration=60)
     print("Input Image:", input_image)
     with gr.Progress(track_tqdm=True):
+        import_custom_nodes()
         with torch.inference_mode():
+            downloadandloadcogvideomodel = NODE_CLASS_MAPPINGS[
+                "DownloadAndLoadCogVideoModel"
+            ]()
             downloadandloadcogvideomodel_1 = downloadandloadcogvideomodel.loadmodel(
                 model="THUDM/CogVideoX-5b",
                 precision="bf16",
                 load_device="main_device",
             )
+            loadimage = NODE_CLASS_MAPPINGS["LoadImage"]()
             loadimage_8 = loadimage.load_image(image=input_image)
+            cliploader = NODE_CLASS_MAPPINGS["CLIPLoader"]()
             cliploader_20 = cliploader.load_clip(
                 clip_name="google_t5-v1_1-xxl_encoderonly-fp8_e4m3fn.safetensors",
                 type="sd3",
                 device="default",
             )
+            emptylatentimage = NODE_CLASS_MAPPINGS["EmptyLatentImage"]()
             emptylatentimage_161 = emptylatentimage.generate(
+                width=360, #reduce this to avoid OOM error
+                height=240, #reduce this to avoid OOM error
+                batch_size=1 #reduce this to avoid OOM error
             )
+            cogvideotextencode = NODE_CLASS_MAPPINGS["CogVideoTextEncode"]()
+            cogvideosampler = NODE_CLASS_MAPPINGS["CogVideoSampler"]()
+            cogvideodecode = NODE_CLASS_MAPPINGS["CogVideoDecode"]()
+            reactorfaceswap = NODE_CLASS_MAPPINGS["ReActorFaceSwap"]()
+            cr_upscale_image = NODE_CLASS_MAPPINGS["CR Upscale Image"]()
+            vhs_videocombine = NODE_CLASS_MAPPINGS["VHS_VideoCombine"]()
             for q in range(1):
                 cogvideotextencode_30 = cogvideotextencode.process(
                 cogvideosampler_155 = cogvideosampler.process(
                     num_frames=num_frames,
+                    steps=30, #reduce this to avoid OOM error
                     cfg=6,
                     seed=random.randint(1, 2**64),
                     scheduler="CogVideoXDDIM",
                 cogvideodecode_11 = cogvideodecode.decode(
                     enable_vae_tiling=False,
+                    tile_sample_min_height=240,#reduce this to avoid OOM error
+                    tile_sample_min_width=360,#reduce this to avoid OOM error
                     tile_overlap_factor_height=0.2,
                     tile_overlap_factor_width=0.2,
                     auto_tile_size=True,
             outputs=[output_video]
         )
+    app.launch(share=True)