Spaces:

multimodalart
/

nano-banana

Running on CPU Upgrade

App Files Files Community

multimodalart HF Staff commited on 13 days ago

Commit

aa0696d

verified ·

1 Parent(s): ae7dd23

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -8

app.py CHANGED Viewed

@@ -50,6 +50,7 @@ def _resize_image(image_path: str, target_size: Tuple[int, int]) -> str:
     with Image.open(image_path) as img:
         if img.size == target_size:
             return image_path
         resized_img = img.resize(target_size, Image.Resampling.LANCZOS)
         suffix = os.path.splitext(image_path)[1] or ".png"
         with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
@@ -58,6 +59,7 @@ def _resize_image(image_path: str, target_size: Tuple[int, int]) -> str:
 def _trim_first_frame_fast(video_path: str) -> str:
     """Removes exactly the first frame of a video without re-encoding."""
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_output_file:
         output_path = tmp_output_file.name
     try:
@@ -76,7 +78,7 @@ def _trim_first_frame_fast(video_path: str) -> str:
 def _combine_videos_simple(video1_path: str, video2_path: str) -> str:
     """Combines two videos using the fast concat demuxer."""
     with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix=".txt") as tmp_list_file:
         tmp_list_file.write(f"file '{os.path.abspath(video1_path)}'\n")
         tmp_list_file.write(f"file '{os.path.abspath(video2_path)}'\n")
@@ -99,6 +101,7 @@ def _combine_videos_simple(video1_path: str, video2_path: str) -> str:
 def _generate_video_segment(input_image_path: str, output_image_path: str, prompt: str, token: str) -> str:
     """Generates a single video segment using the external service."""
     video_client = Client("multimodalart/wan-2-2-first-last-frame", hf_token=token)
     result = video_client.predict(
         start_image_pil=handle_file(input_image_path),
@@ -107,7 +110,7 @@ def _generate_video_segment(input_image_path: str, output_image_path: str, promp
     )
     return result[0]["video"]
-def unified_image_generator(prompt: str, images: Optional[List[str]], previous_video_path: Optional[str], oauth_token: Optional[gr.OAuthToken]) -> tuple:
     if not verify_pro_status(oauth_token): raise gr.Error("Access Denied.")
     try:
         contents = [Image.open(image_path[0]) for image_path in images] if images else []
@@ -118,8 +121,14 @@ def unified_image_generator(prompt: str, images: Optional[List[str]], previous_v
         with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
             Image.open(BytesIO(image_data)).save(tmp.name)
             output_path = tmp.name
         can_create_video = bool(images and len(images) == 1)
-        can_extend_video = can_create_video and bool(previous_video_path)
         return (output_path, gr.update(visible=can_create_video), gr.update(visible=can_extend_video), gr.update(visible=False))
     except Exception as e:
         raise gr.Error(f"Image generation failed: {e}")
@@ -129,7 +138,7 @@ def create_new_video(input_image_gallery: List[str], prompt_input: str, output_i
     if not input_image_gallery or not output_image: raise gr.Error("Input/output images required.")
     try:
         new_segment_path = _generate_video_segment(input_image_gallery[0][0], output_image, prompt_input, oauth_token.token)
-        return new_segment_path, new_segment_path
     except Exception as e:
         raise gr.Error(f"Video creation failed: {e}")
@@ -144,7 +153,7 @@ def extend_existing_video(input_image_gallery: List[str], prompt_input: str, out
         new_segment_path = _generate_video_segment(resized_input_path, resized_output_path, prompt_input, oauth_token.token)
         trimmed_segment_path = _trim_first_frame_fast(new_segment_path)
         final_video_path = _combine_videos_simple(previous_video_path, trimmed_segment_path)
-        return final_video_path, final_video_path
     except Exception as e:
         raise gr.Error(f"Video extension failed: {e}")
@@ -171,7 +180,9 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
     gr.HTML("<h3 style='text-align:center'>Hugging Face PRO users can use Google's Nano Banana (Gemini 2.5 Flash Image Preview) on this Space. <a href='http://huggingface.co/subscribe/pro?source=nana_banana' target='_blank'>Subscribe to PRO</a></h3>", elem_id="sub_title")
     pro_message = gr.Markdown(visible=False)
     main_interface = gr.Column(visible=False)
     previous_video_state = gr.State(None)
     with main_interface:
         with gr.Row():
@@ -195,7 +206,7 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
     gr.on(
         triggers=[generate_button.click, prompt_input.submit],
         fn=unified_image_generator,
-        inputs=[prompt_input, image_input_gallery, previous_video_state],
         outputs=[output_image, create_video_button, extend_video_button, video_group]
     )
     use_image_button.click(
@@ -211,14 +222,14 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
     ).then(
         fn=create_new_video,
         inputs=[image_input_gallery, prompt_input, output_image],
-        outputs=[video_output, previous_video_state],
     )
     extend_video_button.click(
         fn=lambda: gr.update(visible=True), outputs=[video_group]
     ).then(
         fn=extend_existing_video,
         inputs=[image_input_gallery, prompt_input, output_image, previous_video_state],
-        outputs=[video_output, previous_video_state],
     )
     def control_access(profile: Optional[gr.OAuthProfile] = None, oauth_token: Optional[gr.OAuthToken] = None):

     with Image.open(image_path) as img:
         if img.size == target_size:
             return image_path
+        gr.Info(f"Resizing image to {target_size[0]}x{target_size[1]} to match previous video.")
         resized_img = img.resize(target_size, Image.Resampling.LANCZOS)
         suffix = os.path.splitext(image_path)[1] or ".png"
         with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
 def _trim_first_frame_fast(video_path: str) -> str:
     """Removes exactly the first frame of a video without re-encoding."""
+    gr.Info("Preparing video segment...")
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_output_file:
         output_path = tmp_output_file.name
     try:
 def _combine_videos_simple(video1_path: str, video2_path: str) -> str:
     """Combines two videos using the fast concat demuxer."""
+    gr.Info("Stitching videos...")
     with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix=".txt") as tmp_list_file:
         tmp_list_file.write(f"file '{os.path.abspath(video1_path)}'\n")
         tmp_list_file.write(f"file '{os.path.abspath(video2_path)}'\n")
 def _generate_video_segment(input_image_path: str, output_image_path: str, prompt: str, token: str) -> str:
     """Generates a single video segment using the external service."""
+    gr.Info("Generating new video segment...")
     video_client = Client("multimodalart/wan-2-2-first-last-frame", hf_token=token)
     result = video_client.predict(
         start_image_pil=handle_file(input_image_path),
     )
     return result[0]["video"]
+def unified_image_generator(prompt: str, images: Optional[List[str]], previous_video_path: Optional[str], last_frame_path: Optional[str], oauth_token: Optional[gr.OAuthToken]) -> tuple:
     if not verify_pro_status(oauth_token): raise gr.Error("Access Denied.")
     try:
         contents = [Image.open(image_path[0]) for image_path in images] if images else []
         with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
             Image.open(BytesIO(image_data)).save(tmp.name)
             output_path = tmp.name
         can_create_video = bool(images and len(images) == 1)
+        can_extend_video = False
+        if can_create_video and previous_video_path and last_frame_path:
+            # The crucial check for continuity
+            if images[0][0] == last_frame_path:
+                can_extend_video = True
         return (output_path, gr.update(visible=can_create_video), gr.update(visible=can_extend_video), gr.update(visible=False))
     except Exception as e:
         raise gr.Error(f"Image generation failed: {e}")
     if not input_image_gallery or not output_image: raise gr.Error("Input/output images required.")
     try:
         new_segment_path = _generate_video_segment(input_image_gallery[0][0], output_image, prompt_input, oauth_token.token)
+        return new_segment_path, new_segment_path, output_image
     except Exception as e:
         raise gr.Error(f"Video creation failed: {e}")
         new_segment_path = _generate_video_segment(resized_input_path, resized_output_path, prompt_input, oauth_token.token)
         trimmed_segment_path = _trim_first_frame_fast(new_segment_path)
         final_video_path = _combine_videos_simple(previous_video_path, trimmed_segment_path)
+        return final_video_path, final_video_path, output_image
     except Exception as e:
         raise gr.Error(f"Video extension failed: {e}")
     gr.HTML("<h3 style='text-align:center'>Hugging Face PRO users can use Google's Nano Banana (Gemini 2.5 Flash Image Preview) on this Space. <a href='http://huggingface.co/subscribe/pro?source=nana_banana' target='_blank'>Subscribe to PRO</a></h3>", elem_id="sub_title")
     pro_message = gr.Markdown(visible=False)
     main_interface = gr.Column(visible=False)
     previous_video_state = gr.State(None)
+    last_frame_of_video_state = gr.State(None)
     with main_interface:
         with gr.Row():
     gr.on(
         triggers=[generate_button.click, prompt_input.submit],
         fn=unified_image_generator,
+        inputs=[prompt_input, image_input_gallery, previous_video_state, last_frame_of_video_state],
         outputs=[output_image, create_video_button, extend_video_button, video_group]
     )
     use_image_button.click(
     ).then(
         fn=create_new_video,
         inputs=[image_input_gallery, prompt_input, output_image],
+        outputs=[video_output, previous_video_state, last_frame_of_video_state],
     )
     extend_video_button.click(
         fn=lambda: gr.update(visible=True), outputs=[video_group]
     ).then(
         fn=extend_existing_video,
         inputs=[image_input_gallery, prompt_input, output_image, previous_video_state],
+        outputs=[video_output, previous_video_state, last_frame_of_video_state],
     )
     def control_access(profile: Optional[gr.OAuthProfile] = None, oauth_token: Optional[gr.OAuthToken] = None):