Spaces:

HuggingFaceTB
/

SmolVLM2-HighlightGenerator

Running on A100

App Files Files Community

mfarre HF staff commited on 15 days ago

Commit

f38285f

1 Parent(s): 4bc123c

.

Browse files

Files changed (1) hide show

app.py +155 -163

app.py CHANGED Viewed

@@ -32,47 +32,6 @@ def format_duration(seconds: int) -> str:
     return f"{minutes}:{secs:02d}"
-# @spaces.GPU
-# def process_video(
-#     video_path: str,
-#     progress = gr.Progress()
-# ) -> Tuple[str, str, str, str]:
-#     try:
-#         # duration = get_video_duration_seconds(video_path)
-#         # if duration > 1200:  # 20 minutes
-#         #     return None, None, None, "Video must be shorter than 20 minutes"
-#         progress(0.1, desc="Loading model...")
-#         model, processor = load_model()
-#         detector = BatchedVideoHighlightDetector(model, processor, batch_size=8)
-#         progress(0.2, desc="Analyzing video content...")
-#         video_description = detector.analyze_video_content(video_path)
-#         progress(0.3, desc="Determining highlight types...")
-#         highlight_types = detector.determine_highlights(video_description)
-#         progress(0.4, desc="Detecting and extracting highlights...")
-#         with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
-#             output_path = tmp_file.name
-#         detector.create_highlight_video(video_path, output_path)
-#         # progress(0.9, desc="Adding watermark...")
-#         # output_path = temp_output.replace('.mp4', '_watermark.mp4')
-#         # add_watermark(temp_output, output_path)
-#         os.unlink(output_path)
-#         progress(1.0, desc="Complete!")
-#         video_description = video_description[:500] + "..." if len(video_description) > 500 else video_description
-#         highlight_types = highlight_types[:500] + "..." if len(highlight_types) > 500 else highlight_types
-#         return output_path, video_description, highlight_types, None
-#     except Exception as e:
-#         return None, None, None, f"Error processing video: {str(e)}"
 def create_ui(examples_path: str):
     examples_data = load_examples(examples_path)
@@ -131,137 +90,149 @@ def create_ui(examples_path: str):
                 with analysis_accordion:
                     video_description = gr.Markdown("", elem_id="video_desc")
                     highlight_types = gr.Markdown("", elem_id="highlight_types")
-        # # Main interface section
-        # gr.Markdown("## Try It Yourself!")
-        # with gr.Row():
-        #     # Left column: Upload and Process
-        #     with gr.Column(scale=1):
-        #         input_video = gr.Video(
-        #             label="Upload your video (max 20 minutes)",
-        #             interactive=True
-        #         )
-        #         process_btn = gr.Button("Process Video", variant="primary")
-        #     # Right column: Progress and Analysis
-        #     with gr.Column(scale=1):
-        #         # Output video (initially hidden)
-        #         output_video = gr.Video(
-        #             label="Highlight Video",
-        #             visible=False,
-        #             interactive=False,
-        #         )
-        #         status = gr.Markdown()
-        #         with gr.Accordion("Model chain of thought details", open=True, visible=True) as analysis_accordion:
-        #             video_description = gr.Markdown("", elem_id="video_desc")
-        #             highlight_types = gr.Markdown("", elem_id="highlight_types")
         @spaces.GPU
-        def on_process(video):
             if not video:
-                return {
-                    status: "Please upload a video",
-                    video_description: "",
-                    highlight_types: "",
-                    output_video: gr.update(visible=False),
-                    analysis_accordion: gr.update(visible=False)
-                }
             try:
                 duration = get_video_duration_seconds(video)
                 if duration > 1200:  # 20 minutes
-                    return {
-                        status: "Video must be shorter than 20 minutes",
-                        video_description: "",
-                        highlight_types: "",
-                        output_video: gr.update(visible=False),
-                        analysis_accordion: gr.update(visible=False)
-                    }
-                # Make accordion visible as soon as processing starts
-                yield {
-                    status: "Loading model...",
-                    video_description: "",
-                    highlight_types: "",
-                    output_video: gr.update(visible=False),
-                    analysis_accordion: gr.update(visible=True)
-                }
                 model, processor = load_model()
                 detector = BatchedVideoHighlightDetector(model, processor, batch_size=8)
-                yield {
-                    status: "Analyzing video content...",
-                    video_description: "",
-                    highlight_types: "",
-                    output_video: gr.update(visible=False),
-                    analysis_accordion: gr.update(visible=True)
-                }
                 video_desc = detector.analyze_video_content(video)
                 formatted_desc = f"#Summary: {video_desc[:500] + '...' if len(video_desc) > 500 else video_desc}"
-                # Update description as soon as it's available
-                yield {
-                    status: "Determining highlight types...",
-                    video_description: formatted_desc,
-                    highlight_types: "",
-                    output_video: gr.update(visible=False),
-                    analysis_accordion: gr.update(visible=True)
-                }
                 highlights = detector.determine_highlights(video_desc)
                 formatted_highlights = f"#Highlights to search for: {highlights[:500] + '...' if len(highlights) > 500 else highlights}"
-                # Update highlights as soon as they're available
-                yield {
-                    status: "Detecting and extracting highlights...",
-                    video_description: formatted_desc,
-                    highlight_types: formatted_highlights,
-                    output_video: gr.update(visible=False),
-                    analysis_accordion: gr.update(visible=True)
-                }
                 with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
                     temp_output = tmp_file.name
                 detector.create_highlight_video(video, temp_output)
-                return {
-                    status: "Processing complete!",
-                    video_description: formatted_desc,
-                    highlight_types: formatted_highlights,
-                    output_video: gr.update(value=temp_output, visible=True),
-                    analysis_accordion: gr.update(visible=True)
-                }
             except Exception as e:
-                return {
-                    status: f"Error processing video: {str(e)}",
-                    video_description: "",
-                    highlight_types: "",
-                    output_video: gr.update(visible=False),
-                    analysis_accordion: gr.update(visible=False)
-                }
         process_btn.click(
-            on_process,
             inputs=[input_video],
-            outputs=[status, video_description, highlight_types, output_video, analysis_accordion]
         )
     return app
     #     @spaces.GPU
-    #     def on_process(video, progress=gr.Progress()):
     #         if not video:
     #             return {
     #                 status: "Please upload a video",
     #                 video_description: "",
     #                 highlight_types: "",
-    #                 output_video: gr.update(visible=False)
     #             }
     #         try:
@@ -271,45 +242,64 @@ def create_ui(examples_path: str):
     #                     status: "Video must be shorter than 20 minutes",
     #                     video_description: "",
     #                     highlight_types: "",
-    #                     output_video: gr.update(visible=False)
     #                 }
-    #             progress(0.1, desc="Loading model...")
-    #             status.value = "Loading model..."
     #             model, processor = load_model()
     #             detector = BatchedVideoHighlightDetector(model, processor, batch_size=8)
-    #             progress(0.2, desc="Analyzing video content...")
-    #             status.value = "Analyzing video content..."
     #             video_desc = detector.analyze_video_content(video)
-    #             # Update description in real-time
-    #             video_description.value = f"#Summary: {video_desc[:500] + '...' if len(video_desc) > 500 else video_desc}"
-    #             progress(0.3, desc="Determining highlight types...")
-    #             status.value = "Determining highlight types..."
     #             highlights = detector.determine_highlights(video_desc)
-    #             # Update highlights in real-time
-    #             highlight_types.value = f"#Highlights to search for: {highlights[:500] + '...' if len(highlights) > 500 else highlights}"
-    #             progress(0.4, desc="Detecting and extracting highlights...")
-    #             status.value = "Detecting and extracting highlights..."
     #             with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
     #                 temp_output = tmp_file.name
     #             detector.create_highlight_video(video, temp_output)
-    #             # progress(0.9, desc="Adding watermark...")
-    #             # status.value = "Adding watermark..."
-    #             # output_path = temp_output.replace('.mp4', '_watermark.mp4')
-    #             # add_watermark(temp_output, output_path)
-    #             # os.unlink(temp_output)
-    #             progress(1.0, desc="Complete!")
     #             return {
     #                 status: "Processing complete!",
-    #                 video_description: video_description.value,
-    #                 highlight_types: highlight_types.value,
-    #                 output_video: gr.update(value=temp_output, visible=True)
     #             }
     #         except Exception as e:
@@ -317,17 +307,19 @@ def create_ui(examples_path: str):
     #                 status: f"Error processing video: {str(e)}",
     #                 video_description: "",
     #                 highlight_types: "",
-    #                 output_video: gr.update(visible=False)
     #             }
     #     process_btn.click(
     #         on_process,
     #         inputs=[input_video],
-    #         outputs=[status, video_description, highlight_types, output_video]
     #     )
     # return app
 if __name__ == "__main__":
     # Initialize CUDA
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

     return f"{minutes}:{secs:02d}"
 def create_ui(examples_path: str):
     examples_data = load_examples(examples_path)
                 with analysis_accordion:
                     video_description = gr.Markdown("", elem_id="video_desc")
                     highlight_types = gr.Markdown("", elem_id="highlight_types")
         @spaces.GPU
+        def process_video(video):
             if not video:
+                return [
+                    "Please upload a video",
+                    "",
+                    "",
+                    None,
+                    False
+                ]
             try:
                 duration = get_video_duration_seconds(video)
                 if duration > 1200:  # 20 minutes
+                    return [
+                        "Video must be shorter than 20 minutes",
+                        "",
+                        "",
+                        None,
+                        False
+                    ]
+                # Load model
                 model, processor = load_model()
                 detector = BatchedVideoHighlightDetector(model, processor, batch_size=8)
+                # Analyze content
                 video_desc = detector.analyze_video_content(video)
                 formatted_desc = f"#Summary: {video_desc[:500] + '...' if len(video_desc) > 500 else video_desc}"
+                # Determine highlights
                 highlights = detector.determine_highlights(video_desc)
                 formatted_highlights = f"#Highlights to search for: {highlights[:500] + '...' if len(highlights) > 500 else highlights}"
+                # Create highlight video
                 with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
                     temp_output = tmp_file.name
                 detector.create_highlight_video(video, temp_output)
+                return [
+                    "Processing complete!",
+                    formatted_desc,
+                    formatted_highlights,
+                    temp_output,
+                    True
+                ]
             except Exception as e:
+                return [
+                    f"Error processing video: {str(e)}",
+                    "",
+                    "",
+                    None,
+                    False
+                ]
+        def process_with_updates(video):
+            # Initial state
+            yield [
+                "Loading model...",
+                "",
+                "",
+                None,
+                True  # Show accordion
+            ]
+            # Analyzing video
+            yield [
+                "Analyzing video content...",
+                "",
+                "",
+                None,
+                True
+            ]
+            # Get final results
+            results = process_video(video)
+            # If we're still processing, show an intermediate state
+            if results[0] != "Processing complete!":
+                yield [
+                    "Detecting and extracting highlights...",
+                    results[1],  # description
+                    results[2],  # highlights
+                    None,
+                    True
+                ]
+            # Return final state
+            yield results
         process_btn.click(
+            process_with_updates,
             inputs=[input_video],
+            outputs=[
+                status,
+                video_description,
+                highlight_types,
+                output_video,
+                analysis_accordion
+            ]
         )
     return app
+    #     gr.Markdown("## Try It Yourself!")
+    #     with gr.Row():
+    #         with gr.Column(scale=1):
+    #             input_video = gr.Video(
+    #                 label="Upload your video (max 20 minutes)",
+    #                 interactive=True
+    #             )
+    #             process_btn = gr.Button("Process Video", variant="primary")
+    #         with gr.Column(scale=1):
+    #             output_video = gr.Video(
+    #                 label="Highlight Video",
+    #                 visible=False,
+    #                 interactive=False,
+    #             )
+    #             status = gr.Markdown()
+    #             analysis_accordion = gr.Accordion(
+    #                 "Model chain of thought details",
+    #                 open=True,
+    #                 visible=False
+    #             )
+    #             with analysis_accordion:
+    #                 video_description = gr.Markdown("", elem_id="video_desc")
+    #                 highlight_types = gr.Markdown("", elem_id="highlight_types")
     #     @spaces.GPU
+    #     def on_process(video):
     #         if not video:
     #             return {
     #                 status: "Please upload a video",
     #                 video_description: "",
     #                 highlight_types: "",
+    #                 output_video: gr.update(visible=False),
+    #                 analysis_accordion: gr.update(visible=False)
     #             }
     #         try:
     #                     status: "Video must be shorter than 20 minutes",
     #                     video_description: "",
     #                     highlight_types: "",
+    #                     output_video: gr.update(visible=False),
+    #                     analysis_accordion: gr.update(visible=False)
     #                 }
+    #             # Make accordion visible as soon as processing starts
+    #             yield {
+    #                 status: "Loading model...",
+    #                 video_description: "",
+    #                 highlight_types: "",
+    #                 output_video: gr.update(visible=False),
+    #                 analysis_accordion: gr.update(visible=True)
+    #             }
     #             model, processor = load_model()
     #             detector = BatchedVideoHighlightDetector(model, processor, batch_size=8)
+    #             yield {
+    #                 status: "Analyzing video content...",
+    #                 video_description: "",
+    #                 highlight_types: "",
+    #                 output_video: gr.update(visible=False),
+    #                 analysis_accordion: gr.update(visible=True)
+    #             }
     #             video_desc = detector.analyze_video_content(video)
+    #             formatted_desc = f"#Summary: {video_desc[:500] + '...' if len(video_desc) > 500 else video_desc}"
+    #             # Update description as soon as it's available
+    #             yield {
+    #                 status: "Determining highlight types...",
+    #                 video_description: formatted_desc,
+    #                 highlight_types: "",
+    #                 output_video: gr.update(visible=False),
+    #                 analysis_accordion: gr.update(visible=True)
+    #             }
     #             highlights = detector.determine_highlights(video_desc)
+    #             formatted_highlights = f"#Highlights to search for: {highlights[:500] + '...' if len(highlights) > 500 else highlights}"
+    #             # Update highlights as soon as they're available
+    #             yield {
+    #                 status: "Detecting and extracting highlights...",
+    #                 video_description: formatted_desc,
+    #                 highlight_types: formatted_highlights,
+    #                 output_video: gr.update(visible=False),
+    #                 analysis_accordion: gr.update(visible=True)
+    #             }
     #             with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
     #                 temp_output = tmp_file.name
     #             detector.create_highlight_video(video, temp_output)
     #             return {
     #                 status: "Processing complete!",
+    #                 video_description: formatted_desc,
+    #                 highlight_types: formatted_highlights,
+    #                 output_video: gr.update(value=temp_output, visible=True),
+    #                 analysis_accordion: gr.update(visible=True)
     #             }
     #         except Exception as e:
     #                 status: f"Error processing video: {str(e)}",
     #                 video_description: "",
     #                 highlight_types: "",
+    #                 output_video: gr.update(visible=False),
+    #                 analysis_accordion: gr.update(visible=False)
     #             }
     #     process_btn.click(
     #         on_process,
     #         inputs=[input_video],
+    #         outputs=[status, video_description, highlight_types, output_video, analysis_accordion]
     #     )
     # return app
 if __name__ == "__main__":
     # Initialize CUDA
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')