Spaces:

SpyC0der77
/

AI-Video-Stabilization

Running

App Files Files Community

SpyC0der77 commited on Mar 15

Commit

29b5ebe

verified ·

1 Parent(s): 7419c44

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -84

app.py CHANGED Viewed

@@ -7,14 +7,24 @@ import tempfile
 import os
 import gradio as gr
 import time
-import io
-# Set up device for torch
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"[INFO] Using device: {device}")
-# Try to load the RAFT model from torch.hub.
-# If it fails, fall back to OpenCV's Farneback optical flow.
 try:
     print("[INFO] Attempting to load RAFT model from torch.hub...")
     raft_model = torch.hub.load("princeton-vl/RAFT", "raft_small", pretrained=True, trust_repo=True)
@@ -26,64 +36,58 @@ except Exception as e:
     print("[INFO] Falling back to OpenCV Farneback optical flow.")
     raft_model = None
-def process_video_ai(video_file, zoom):
     """
-    Generator function for Gradio:
-      - Generates motion data (CSV) from the input video using an AI model (RAFT if available, else Farneback).
-      - Stabilizes the video using the generated motion data.
-    Yields:
-      A tuple of (original_video, stabilized_video, logs, progress)
-      During processing, original_video and stabilized_video are None.
-      The final yield returns the video file paths with final logs and progress=100.
     """
-    logs = []
-    def add_log(msg):
-        logs.append(msg)
-        return "\n".join(logs)
-    # Check and extract the file path.
-    if isinstance(video_file, dict):
-        video_file = video_file.get("name", None)
-    if video_file is None:
-        yield (None, None, "[ERROR] Please upload a video file.", 0)
-        return
-    add_log("[INFO] Starting AI-powered video processing...")
-    yield (None, None, add_log("Starting processing..."), 0)
     # === CSV Generation Phase ===
-    add_log("[INFO] Starting motion CSV generation...")
-    yield (None, None, add_log("Starting CSV generation..."), 0)
     cap = cv2.VideoCapture(video_file)
     if not cap.isOpened():
-        yield (None, None, add_log("[ERROR] Could not open video file for CSV generation."), 0)
         return
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    add_log(f"[INFO] Total frames in video: {total_frames}")
-    # Create temporary CSV file.
     csv_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv').name
     with open(csv_file, 'w', newline='') as csvfile:
         fieldnames = ['frame', 'mag', 'ang', 'zoom']
         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
         writer.writeheader()
         ret, first_frame = cap.read()
         if not ret:
-            yield (None, None, add_log("[ERROR] Cannot read first frame from video."), 0)
             return
         if raft_model is not None:
             first_frame_rgb = cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB)
             prev_tensor = torch.from_numpy(first_frame_rgb).permute(2, 0, 1).float().unsqueeze(0) / 255.0
             prev_tensor = prev_tensor.to(device)
-            add_log("[INFO] Using RAFT model for optical flow computation.")
         else:
             prev_gray = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)
-            add_log("[INFO] Using Farneback optical flow for computation.")
         frame_idx = 1
         while True:
             ret, frame = cap.read()
@@ -95,7 +99,7 @@ def process_video_ai(video_file, zoom):
                 curr_tensor = torch.from_numpy(curr_frame_rgb).permute(2, 0, 1).float().unsqueeze(0) / 255.0
                 curr_tensor = curr_tensor.to(device)
                 with torch.no_grad():
-                    flow_low, flow_up = raft_model(prev_tensor, curr_tensor, iters=20, test_mode=True)
                 flow = flow_up[0].permute(1, 2, 0).cpu().numpy()
                 prev_tensor = curr_tensor.clone()
             else:
@@ -104,7 +108,7 @@ def process_video_ai(video_file, zoom):
                                                     pyr_scale=0.5, levels=3, winsize=15,
                                                     iterations=3, poly_n=5, poly_sigma=1.2, flags=0)
                 prev_gray = curr_gray
             # Compute median magnitude and angle.
             mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1], angleInDegrees=True)
             median_mag = np.median(mag)
@@ -117,27 +121,24 @@ def process_video_ai(video_file, zoom):
             y_offset = y_coords - center_y
             dot = flow[..., 0] * x_offset + flow[..., 1] * y_offset
             zoom_factor = np.count_nonzero(dot > 0) / (w * h)
             writer.writerow({
                 'frame': frame_idx,
                 'mag': median_mag,
                 'ang': median_ang,
                 'zoom': zoom_factor
             })
             if frame_idx % 10 == 0 or frame_idx == total_frames:
                 progress_csv = (frame_idx / total_frames) * 50  # CSV phase: 0-50%
-                add_log(f"[INFO] CSV: Processed frame {frame_idx}/{total_frames}")
-                yield (None, None, add_log(""), progress_csv)
             frame_idx += 1
     cap.release()
-    add_log("[INFO] CSV generation complete.")
-    yield (None, None, add_log(""), 50)
     # === Stabilization Phase ===
-    add_log("[INFO] Starting video stabilization...")
-    yield (None, None, add_log("Starting stabilization..."), 51)
     # Read the CSV and compute cumulative motion data.
     motion_data = {}
     cumulative_dx = 0.0
@@ -154,9 +155,9 @@ def process_video_ai(video_file, zoom):
             cumulative_dx += dx
             cumulative_dy += dy
             motion_data[frame_num] = (-cumulative_dx, -cumulative_dy)
-    add_log("[INFO] Motion CSV read complete.")
-    yield (None, None, add_log(""), 55)
     # Re-open video for stabilization.
     cap = cv2.VideoCapture(video_file)
     fps = cap.get(cv2.CAP_PROP_FPS)
@@ -167,7 +168,7 @@ def process_video_ai(video_file, zoom):
     temp_file.close()
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_file, fourcc, fps, (width, height))
     frame_idx = 1
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     while True:
@@ -180,55 +181,60 @@ def process_video_ai(video_file, zoom):
             start_x = max((zoomed_w - width) // 2, 0)
             start_y = max((zoomed_h - height) // 2, 0)
             frame = zoomed_frame[start_y:start_y+height, start_x:start_x+width]
         dx, dy = motion_data.get(frame_idx, (0, 0))
         transform = np.array([[1, 0, dx],
                               [0, 1, dy]], dtype=np.float32)
         stabilized_frame = cv2.warpAffine(frame, transform, (width, height))
         out.write(stabilized_frame)
         if frame_idx % 10 == 0 or frame_idx == total_frames:
             progress_stab = 50 + (frame_idx / total_frames) * 50  # Stabilization phase: 50-100%
-            add_log(f"[INFO] Stabilization: Processed frame {frame_idx}/{total_frames}")
-            yield (None, None, add_log(""), progress_stab)
         frame_idx += 1
     cap.release()
     out.release()
-    add_log("[INFO] Stabilization complete.")
-    yield (video_file, output_file, add_log(""), 100)
 # Build the Gradio UI.
 with gr.Blocks() as demo:
     gr.Markdown("# AI-Powered Video Stabilization")
-    gr.Markdown("Upload a video and select a zoom factor. The system will generate motion data using an AI model (RAFT if available, else Farneback) and then stabilize the video. Logs and progress will update during processing.")
     with gr.Row():
         with gr.Column():
             video_input = gr.Video(label="Input Video")
             zoom_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.1, value=1.0, label="Zoom Factor")
-            process_button = gr.Button("Process Video")
         with gr.Column():
             original_video = gr.Video(label="Original Video")
             stabilized_video = gr.Video(label="Stabilized Video")
             logs_output = gr.Textbox(label="Logs", lines=15)
             progress_bar = gr.Slider(label="Progress", minimum=0, maximum=100, value=0, interactive=False)
-    demo.queue()  # enable queue for streaming
-    # Try using stream=True. If that raises a TypeError, fall back without it.
-    try:
-        process_button.click(
-            fn=process_video_ai,
-            inputs=[video_input, zoom_slider],
-            outputs=[original_video, stabilized_video, logs_output, progress_bar],
-            stream=True
-        )
-    except TypeError as e:
-        print("[WARNING] Streaming not supported in this version of Gradio. Disabling streaming.")
-        process_button.click(
-            fn=process_video_ai,
-            inputs=[video_input, zoom_slider],
-            outputs=[original_video, stabilized_video, logs_output, progress_bar]
-        )
 demo.launch()

 import os
 import gradio as gr
 import time
+import threading
+# Global status and result dictionaries.
+status = {
+    "logs": "",
+    "progress": 0,  # 0 to 100
+    "finished": False
+}
+result = {
+    "original_video": None,
+    "stabilized_video": None
+}
+# Set up device for torch.
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"[INFO] Using device: {device}")
+# Try to load the RAFT model. If it fails, we fall back to Farneback.
 try:
     print("[INFO] Attempting to load RAFT model from torch.hub...")
     raft_model = torch.hub.load("princeton-vl/RAFT", "raft_small", pretrained=True, trust_repo=True)
     print("[INFO] Falling back to OpenCV Farneback optical flow.")
     raft_model = None
+def append_log(msg):
+    """Helper to append a log message to the global status."""
+    global status
+    status["logs"] += msg + "\n"
+    print(msg)
+def background_process(video_file, zoom):
     """
+    Runs the full processing: generates a motion CSV using RAFT (or Farneback)
+    and then stabilizes the video. Updates the global status and result.
     """
+    global status, result
+    status["logs"] = ""
+    status["progress"] = 0
+    status["finished"] = False
+    result["original_video"] = None
+    result["stabilized_video"] = None
+    append_log("[INFO] Starting AI-powered video processing...")
     # === CSV Generation Phase ===
+    append_log("[INFO] Starting motion CSV generation...")
     cap = cv2.VideoCapture(video_file)
     if not cap.isOpened():
+        append_log("[ERROR] Could not open video file for CSV generation.")
+        status["finished"] = True
         return
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    append_log(f"[INFO] Total frames in video: {total_frames}")
     csv_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv').name
     with open(csv_file, 'w', newline='') as csvfile:
         fieldnames = ['frame', 'mag', 'ang', 'zoom']
         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
         writer.writeheader()
         ret, first_frame = cap.read()
         if not ret:
+            append_log("[ERROR] Cannot read first frame from video.")
+            status["finished"] = True
+            cap.release()
             return
         if raft_model is not None:
             first_frame_rgb = cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB)
             prev_tensor = torch.from_numpy(first_frame_rgb).permute(2, 0, 1).float().unsqueeze(0) / 255.0
             prev_tensor = prev_tensor.to(device)
+            append_log("[INFO] Using RAFT model for optical flow computation.")
         else:
             prev_gray = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)
+            append_log("[INFO] Using Farneback optical flow for computation.")
         frame_idx = 1
         while True:
             ret, frame = cap.read()
                 curr_tensor = torch.from_numpy(curr_frame_rgb).permute(2, 0, 1).float().unsqueeze(0) / 255.0
                 curr_tensor = curr_tensor.to(device)
                 with torch.no_grad():
+                    _, flow_up = raft_model(prev_tensor, curr_tensor, iters=20, test_mode=True)
                 flow = flow_up[0].permute(1, 2, 0).cpu().numpy()
                 prev_tensor = curr_tensor.clone()
             else:
                                                     pyr_scale=0.5, levels=3, winsize=15,
                                                     iterations=3, poly_n=5, poly_sigma=1.2, flags=0)
                 prev_gray = curr_gray
             # Compute median magnitude and angle.
             mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1], angleInDegrees=True)
             median_mag = np.median(mag)
             y_offset = y_coords - center_y
             dot = flow[..., 0] * x_offset + flow[..., 1] * y_offset
             zoom_factor = np.count_nonzero(dot > 0) / (w * h)
             writer.writerow({
                 'frame': frame_idx,
                 'mag': median_mag,
                 'ang': median_ang,
                 'zoom': zoom_factor
             })
             if frame_idx % 10 == 0 or frame_idx == total_frames:
                 progress_csv = (frame_idx / total_frames) * 50  # CSV phase: 0-50%
+                append_log(f"[INFO] CSV: Processed frame {frame_idx}/{total_frames}")
+                status["progress"] = progress_csv
             frame_idx += 1
     cap.release()
+    append_log("[INFO] CSV generation complete.")
+    status["progress"] = 50
     # === Stabilization Phase ===
+    append_log("[INFO] Starting video stabilization...")
     # Read the CSV and compute cumulative motion data.
     motion_data = {}
     cumulative_dx = 0.0
             cumulative_dx += dx
             cumulative_dy += dy
             motion_data[frame_num] = (-cumulative_dx, -cumulative_dy)
+    append_log("[INFO] Motion CSV read complete.")
+    status["progress"] = 55
     # Re-open video for stabilization.
     cap = cv2.VideoCapture(video_file)
     fps = cap.get(cv2.CAP_PROP_FPS)
     temp_file.close()
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_file, fourcc, fps, (width, height))
     frame_idx = 1
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     while True:
             start_x = max((zoomed_w - width) // 2, 0)
             start_y = max((zoomed_h - height) // 2, 0)
             frame = zoomed_frame[start_y:start_y+height, start_x:start_x+width]
         dx, dy = motion_data.get(frame_idx, (0, 0))
         transform = np.array([[1, 0, dx],
                               [0, 1, dy]], dtype=np.float32)
         stabilized_frame = cv2.warpAffine(frame, transform, (width, height))
         out.write(stabilized_frame)
         if frame_idx % 10 == 0 or frame_idx == total_frames:
             progress_stab = 50 + (frame_idx / total_frames) * 50  # Stabilization phase: 50-100%
+            append_log(f"[INFO] Stabilization: Processed frame {frame_idx}/{total_frames}")
+            status["progress"] = progress_stab
         frame_idx += 1
     cap.release()
     out.release()
+    append_log("[INFO] Stabilization complete.")
+    status["progress"] = 100
+    status["finished"] = True
+    result["original_video"] = video_file
+    result["stabilized_video"] = output_file
+def start_processing(video_file, zoom):
+    """Starts background processing in a new thread."""
+    thread = threading.Thread(target=background_process, args=(video_file, zoom), daemon=True)
+    thread.start()
+    return "Processing started."
+def poll_status():
+    """
+    Returns the current processing status:
+      - original_video: path if finished (else None)
+      - stabilized_video: path if finished (else None)
+      - logs: current logs string
+      - progress: current progress value (0 to 100)
+    """
+    return result["original_video"], result["stabilized_video"], status["logs"], status["progress"]
 # Build the Gradio UI.
 with gr.Blocks() as demo:
     gr.Markdown("# AI-Powered Video Stabilization")
+    gr.Markdown("Upload a video and select a zoom factor. Click **Process Video** to start processing in the background. Then click **Refresh Status** to update the logs and progress (once processing finishes, the stabilized video will be shown).")
     with gr.Row():
         with gr.Column():
             video_input = gr.Video(label="Input Video")
             zoom_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.1, value=1.0, label="Zoom Factor")
+            start_button = gr.Button("Process Video")
         with gr.Column():
             original_video = gr.Video(label="Original Video")
             stabilized_video = gr.Video(label="Stabilized Video")
             logs_output = gr.Textbox(label="Logs", lines=15)
             progress_bar = gr.Slider(label="Progress", minimum=0, maximum=100, value=0, interactive=False)
+            refresh_button = gr.Button("Refresh Status")
+    # When "Process Video" is clicked, start processing.
+    start_button.click(fn=start_processing, inputs=[video_input, zoom_slider], outputs=[logs_output])
+    # When "Refresh Status" is clicked, update logs, progress, and videos.
+    refresh_button.click(fn=poll_status, inputs=[], outputs=[original_video, stabilized_video, logs_output, progress_bar])
 demo.launch()