Spaces:

HuggingFaceTB
/

SmolVLM2-HighlightGenerator

Running on A100

App Files Files Community

mfarre HF staff commited on 15 days ago

Commit

be5d51f

1 Parent(s): 8323202

.

Browse files

Files changed (2) hide show

app.py +94 -75
video_highlight_detector.py +25 -5

app.py CHANGED Viewed

@@ -63,7 +63,6 @@ def create_ui(examples_path: str):
                         gr.Markdown(f"#Summary: {example['analysis']['video_description']}")
                         gr.Markdown(f"#Highlights to search for: {example['analysis']['highlight_types']}")
         gr.Markdown("## Try It Yourself!")
         with gr.Row():
             with gr.Column(scale=1):
@@ -92,15 +91,20 @@ def create_ui(examples_path: str):
                     video_description = gr.Markdown("", elem_id="video_desc")
                     highlight_types = gr.Markdown("", elem_id="highlight_types")
         @spaces.GPU
         def on_process(video):
             if not video:
                 yield [
-                    "Please upload a video",  # status
-                    "",  # video_description
-                    "",  # highlight_types
-                    gr.update(visible=False),  # output_video
-                    gr.update(visible=False)  # analysis_accordion
                 ]
                 return
@@ -126,7 +130,8 @@ def create_ui(examples_path: str):
                 ]
                 model, processor = load_model()
-                detector = BatchedVideoHighlightDetector(model, processor, batch_size=8)
                 yield [
                     "Analyzing video content...",
@@ -139,7 +144,6 @@ def create_ui(examples_path: str):
                 video_desc = detector.analyze_video_content(video)
                 formatted_desc = f"#Summary: {video_desc[:500] + '...' if len(video_desc) > 500 else video_desc}"
-                # Update description as soon as it's available
                 yield [
                     "Determining highlight types...",
                     formatted_desc,
@@ -151,14 +155,22 @@ def create_ui(examples_path: str):
                 highlights = detector.determine_highlights(video_desc)
                 formatted_highlights = f"#Highlights to search for: {highlights[:500] + '...' if len(highlights) > 500 else highlights}"
-                # Update highlights as soon as they're available
-                yield [
-                    "Detecting and extracting highlights...",
-                    formatted_desc,
-                    formatted_highlights,
-                    gr.update(visible=False),
-                    gr.update(visible=True)
-                ]
                 with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
                     temp_output = tmp_file.name
@@ -195,7 +207,6 @@ def create_ui(examples_path: str):
         )
     return app
     #     gr.Markdown("## Try It Yourself!")
     #     with gr.Row():
     #         with gr.Column(scale=1):
@@ -227,99 +238,107 @@ def create_ui(examples_path: str):
     #     @spaces.GPU
     #     def on_process(video):
     #         if not video:
-    #             return {
-    #                 status: "Please upload a video",
-    #                 video_description: "",
-    #                 highlight_types: "",
-    #                 output_video: gr.update(visible=False),
-    #                 analysis_accordion: gr.update(visible=False)
-    #             }
     #         try:
     #             duration = get_video_duration_seconds(video)
     #             if duration > 1200:  # 20 minutes
-    #                 return {
-    #                     status: "Video must be shorter than 20 minutes",
-    #                     video_description: "",
-    #                     highlight_types: "",
-    #                     output_video: gr.update(visible=False),
-    #                     analysis_accordion: gr.update(visible=False)
-    #                 }
     #             # Make accordion visible as soon as processing starts
-    #             yield {
-    #                 status: "Loading model...",
-    #                 video_description: "",
-    #                 highlight_types: "",
-    #                 output_video: gr.update(visible=False),
-    #                 analysis_accordion: gr.update(visible=True)
-    #             }
     #             model, processor = load_model()
     #             detector = BatchedVideoHighlightDetector(model, processor, batch_size=8)
-    #             yield {
-    #                 status: "Analyzing video content...",
-    #                 video_description: "",
-    #                 highlight_types: "",
-    #                 output_video: gr.update(visible=False),
-    #                 analysis_accordion: gr.update(visible=True)
-    #             }
     #             video_desc = detector.analyze_video_content(video)
     #             formatted_desc = f"#Summary: {video_desc[:500] + '...' if len(video_desc) > 500 else video_desc}"
     #             # Update description as soon as it's available
-    #             yield {
-    #                 status: "Determining highlight types...",
-    #                 video_description: formatted_desc,
-    #                 highlight_types: "",
-    #                 output_video: gr.update(visible=False),
-    #                 analysis_accordion: gr.update(visible=True)
-    #             }
     #             highlights = detector.determine_highlights(video_desc)
     #             formatted_highlights = f"#Highlights to search for: {highlights[:500] + '...' if len(highlights) > 500 else highlights}"
     #             # Update highlights as soon as they're available
-    #             yield {
-    #                 status: "Detecting and extracting highlights...",
-    #                 video_description: formatted_desc,
-    #                 highlight_types: formatted_highlights,
-    #                 output_video: gr.update(visible=False),
-    #                 analysis_accordion: gr.update(visible=True)
-    #             }
     #             with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
     #                 temp_output = tmp_file.name
     #             detector.create_highlight_video(video, temp_output)
-    #             return {
-    #                 status: "Processing complete!",
-    #                 video_description: formatted_desc,
-    #                 highlight_types: formatted_highlights,
-    #                 output_video: gr.update(value=temp_output, visible=True),
-    #                 analysis_accordion: gr.update(visible=True)
-    #             }
     #         except Exception as e:
-    #             return {
-    #                 status: f"Error processing video: {str(e)}",
-    #                 video_description: "",
-    #                 highlight_types: "",
-    #                 output_video: gr.update(visible=False),
-    #                 analysis_accordion: gr.update(visible=False)
-    #             }
     #     process_btn.click(
     #         on_process,
     #         inputs=[input_video],
-    #         outputs=[status, video_description, highlight_types, output_video, analysis_accordion]
     #     )
     # return app
 if __name__ == "__main__":
     # Initialize CUDA
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

                         gr.Markdown(f"#Summary: {example['analysis']['video_description']}")
                         gr.Markdown(f"#Highlights to search for: {example['analysis']['highlight_types']}")
         gr.Markdown("## Try It Yourself!")
         with gr.Row():
             with gr.Column(scale=1):
                     video_description = gr.Markdown("", elem_id="video_desc")
                     highlight_types = gr.Markdown("", elem_id="highlight_types")
+        def progress_callback(current, total):
+            """Callback to update progress percentage"""
+            percentage = int((current / total) * 100)
+            return f"Processing segments... {percentage}% complete"
         @spaces.GPU
         def on_process(video):
             if not video:
                 yield [
+                    "Please upload a video",
+                    "",
+                    "",
+                    gr.update(visible=False),
+                    gr.update(visible=False)
                 ]
                 return
                 ]
                 model, processor = load_model()
+                detector = BatchedVideoHighlightDetector(model, processor, batch_size=8, progress_callback=lambda current, total: print(f"Progress: {current}/{total}")
+)
                 yield [
                     "Analyzing video content...",
                 video_desc = detector.analyze_video_content(video)
                 formatted_desc = f"#Summary: {video_desc[:500] + '...' if len(video_desc) > 500 else video_desc}"
                 yield [
                     "Determining highlight types...",
                     formatted_desc,
                 highlights = detector.determine_highlights(video_desc)
                 formatted_highlights = f"#Highlights to search for: {highlights[:500] + '...' if len(highlights) > 500 else highlights}"
+                # Get total number of segments for progress tracking
+                segments = get_fixed_30s_segments(video)
+                total_segments = len(segments)
+                # Process segments in batches with progress updates
+                for i in range(0, total_segments, detector.batch_size):
+                    current_batch = i + detector.batch_size
+                    progress_msg = progress_callback(min(current_batch, total_segments), total_segments)
+                    yield [
+                        progress_msg,
+                        formatted_desc,
+                        formatted_highlights,
+                        gr.update(visible=False),
+                        gr.update(visible=True)
+                    ]
                 with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
                     temp_output = tmp_file.name
         )
     return app
     #     gr.Markdown("## Try It Yourself!")
     #     with gr.Row():
     #         with gr.Column(scale=1):
     #     @spaces.GPU
     #     def on_process(video):
     #         if not video:
+    #             yield [
+    #                 "Please upload a video",  # status
+    #                 "",  # video_description
+    #                 "",  # highlight_types
+    #                 gr.update(visible=False),  # output_video
+    #                 gr.update(visible=False)  # analysis_accordion
+    #             ]
+    #             return
     #         try:
     #             duration = get_video_duration_seconds(video)
     #             if duration > 1200:  # 20 minutes
+    #                 yield [
+    #                     "Video must be shorter than 20 minutes",
+    #                     "",
+    #                     "",
+    #                     gr.update(visible=False),
+    #                     gr.update(visible=False)
+    #                 ]
+    #                 return
     #             # Make accordion visible as soon as processing starts
+    #             yield [
+    #                 "Loading model...",
+    #                 "",
+    #                 "",
+    #                 gr.update(visible=False),
+    #                 gr.update(visible=True)
+    #             ]
     #             model, processor = load_model()
     #             detector = BatchedVideoHighlightDetector(model, processor, batch_size=8)
+    #             yield [
+    #                 "Analyzing video content...",
+    #                 "",
+    #                 "",
+    #                 gr.update(visible=False),
+    #                 gr.update(visible=True)
+    #             ]
     #             video_desc = detector.analyze_video_content(video)
     #             formatted_desc = f"#Summary: {video_desc[:500] + '...' if len(video_desc) > 500 else video_desc}"
     #             # Update description as soon as it's available
+    #             yield [
+    #                 "Determining highlight types...",
+    #                 formatted_desc,
+    #                 "",
+    #                 gr.update(visible=False),
+    #                 gr.update(visible=True)
+    #             ]
     #             highlights = detector.determine_highlights(video_desc)
     #             formatted_highlights = f"#Highlights to search for: {highlights[:500] + '...' if len(highlights) > 500 else highlights}"
     #             # Update highlights as soon as they're available
+    #             yield [
+    #                 "Detecting and extracting highlights...",
+    #                 formatted_desc,
+    #                 formatted_highlights,
+    #                 gr.update(visible=False),
+    #                 gr.update(visible=True)
+    #             ]
     #             with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
     #                 temp_output = tmp_file.name
     #             detector.create_highlight_video(video, temp_output)
+    #             yield [
+    #                 "Processing complete!",
+    #                 formatted_desc,
+    #                 formatted_highlights,
+    #                 gr.update(value=temp_output, visible=True),
+    #                 gr.update(visible=True)
+    #             ]
     #         except Exception as e:
+    #             yield [
+    #                 f"Error processing video: {str(e)}",
+    #                 "",
+    #                 "",
+    #                 gr.update(visible=False),
+    #                 gr.update(visible=False)
+    #             ]
     #     process_btn.click(
     #         on_process,
     #         inputs=[input_video],
+    #         outputs=[
+    #             status,
+    #             video_description,
+    #             highlight_types,
+    #             output_video,
+    #             analysis_accordion
+    #         ],
+    #         queue=True,
     #     )
     # return app
 if __name__ == "__main__":
     # Initialize CUDA
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

video_highlight_detector.py CHANGED Viewed

@@ -317,7 +317,8 @@ class BatchedVideoHighlightDetector:
         device="cuda",
         batch_size=8,
         max_frames_per_segment=32,
-        target_fps=1.0
     ):
         self.model = model
         self.processor = processor
@@ -325,6 +326,7 @@ class BatchedVideoHighlightDetector:
         self.batch_size = batch_size
         self.max_frames_per_segment = max_frames_per_segment
         self.target_fps = target_fps
     def _extract_frames_batch(
         self,
@@ -466,10 +468,13 @@ class BatchedVideoHighlightDetector:
         self,
         video_path: str,
         segments: List[Tuple[float, float]],
-        highlight_types: str
     ) -> List[bool]:
         """
         Process a batch of segments and return which ones contain highlights.
         """
         # Extract frames for all segments in batch
         frame_batches = self._extract_frames_batch(video_path, segments)
@@ -493,12 +498,17 @@ class BatchedVideoHighlightDetector:
             for output in outputs
         ]
         # Check for "yes" in responses
         return ["yes" in response for response in responses]
     def create_highlight_video(self, video_path: str, output_path: str) -> List[Tuple[float, float]]:
         """
         Main function that executes the batched highlight detection pipeline.
         """
         # Step 1: Analyze video content
         logger.info("Step 1: Analyzing video content...")
@@ -511,15 +521,25 @@ class BatchedVideoHighlightDetector:
         logger.info(f"Looking for highlights: {highlight_types}")
         # Step 3: Get all segments
-        segments = self._get_fixed_30s_segments(video_path)
         # Step 4: Process segments in batches
         logger.info("Step 3: Detecting highlight segments in batches...")
         kept_segments = []
-        for i in tqdm(range(0, len(segments), self.batch_size)):
             batch_segments = segments[i:i + self.batch_size]
-            keep_flags = self._process_segment_batch(video_path, batch_segments, highlight_types)
             for segment, keep in zip(batch_segments, keep_flags):
                 if keep:

         device="cuda",
         batch_size=8,
         max_frames_per_segment=32,
+        target_fps=1.0,
+        progress_callback=None
     ):
         self.model = model
         self.processor = processor
         self.batch_size = batch_size
         self.max_frames_per_segment = max_frames_per_segment
         self.target_fps = target_fps
+        self.progress_callback = progress_callback
     def _extract_frames_batch(
         self,
         self,
         video_path: str,
         segments: List[Tuple[float, float]],
+        highlight_types: str,
+        total_segments: int,
+        segments_processed: int
     ) -> List[bool]:
         """
         Process a batch of segments and return which ones contain highlights.
+        Now includes progress tracking.
         """
         # Extract frames for all segments in batch
         frame_batches = self._extract_frames_batch(video_path, segments)
             for output in outputs
         ]
+        # Update progress if callback is provided
+        if self.progress_callback:
+            self.progress_callback(segments_processed + len(segments), total_segments)
         # Check for "yes" in responses
         return ["yes" in response for response in responses]
     def create_highlight_video(self, video_path: str, output_path: str) -> List[Tuple[float, float]]:
         """
         Main function that executes the batched highlight detection pipeline.
+        Now includes progress tracking.
         """
         # Step 1: Analyze video content
         logger.info("Step 1: Analyzing video content...")
         logger.info(f"Looking for highlights: {highlight_types}")
         # Step 3: Get all segments
+        segments = get_fixed_30s_segments(video_path)
+        total_segments = len(segments)
+        segments_processed = 0
         # Step 4: Process segments in batches
         logger.info("Step 3: Detecting highlight segments in batches...")
         kept_segments = []
+        for i in range(0, len(segments), self.batch_size):
             batch_segments = segments[i:i + self.batch_size]
+            keep_flags = self._process_segment_batch(
+                video_path,
+                batch_segments,
+                highlight_types,
+                total_segments,
+                segments_processed
+            )
+            segments_processed += len(batch_segments)
             for segment, keep in zip(batch_segments, keep_flags):
                 if keep: