Kaushik066 commited on
Commit
38a6f4b
·
verified ·
1 Parent(s): d0910b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -10
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import torch
2
  # For data transformation
3
  from torchvision import transforms
@@ -5,7 +6,6 @@ from torchvision import transforms
5
  import transformers
6
  from transformers import VivitImageProcessor, VivitConfig, VivitModel
7
  from transformers import set_seed
8
-
9
  # For Data Loaders
10
  import datasets
11
  from torch.utils.data import Dataset, DataLoader
@@ -25,6 +25,8 @@ import numpy as np
25
  from torch.nn import Linear, Softmax
26
  import gradio as gr
27
  import cv2
 
 
28
  # Mediapipe Library
29
  import mediapipe as mp
30
  from mediapipe.tasks import python
@@ -236,23 +238,38 @@ def prod_function(model_pretrained, prod_ds):
236
  return prod_pred
237
 
238
  # Function to get landmarked video
239
- def tensor_to_video(video_tensor, fps=30, output_path="output.mp4"):
 
240
  video_numpy = video_tensor.permute(0, 2, 3, 1).cpu().numpy()
241
  # Normalize values to [0, 255] if necessary
242
  if video_numpy.max() <= 1.0:
243
  video_numpy = (video_numpy * 255).astype(np.uint8)
244
 
245
- width = video_numpy.shape[2]
246
- height = video_numpy.shape[3]
247
- fourcc = cv2.VideoWriter_fourcc(*"mp4v") # MP4 Codec
248
- out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
 
 
 
 
 
 
 
 
 
249
 
 
250
  for frame in video_numpy:
251
- out.write(frame)
 
 
 
252
  out.release()
 
 
 
253
 
254
- return output_path
255
-
256
  # Function to list available videos dynamically
257
  def list_videos():
258
  if os.path.exists(data_path):
@@ -262,12 +279,13 @@ def list_videos():
262
  # Function to return the selected video path
263
  def play_video(selected_video):
264
  return os.path.join(data_path, selected_video) if selected_video else None
 
265
 
266
  # Main Function for tab - Gesture recognition
267
  def translate_sign_language(gesture):
268
  # Create Dataset
269
  prod_ds = dataset_prod_obj.create_dataset(gesture)
270
- prod_video_path = tensor_to_video(prod_ds)
271
  #prod_video = np.random.randint(0, 255, (32, 225, 225, 3), dtype=np.uint8)
272
 
273
  # Run ML Model
@@ -291,6 +309,7 @@ with gr.Blocks() as demo:
291
 
292
  # Gesture recognition Tab
293
  with gr.Tab("Gesture recognition"):
 
294
  with gr.Row(height=350, variant="panel"): # equal_height=False, show_progress=True
295
  with gr.Column(scale=1, variant="panel"):
296
  # Add webcam input for sign language video capture
@@ -298,6 +317,7 @@ with gr.Blocks() as demo:
298
  with gr.Column(scale=1, variant="panel"):
299
  # Display the landmarked video
300
  video_output = gr.Video(interactive=False, autoplay=True, streaming=False, label="Landmarked Gesture")
 
301
  with gr.Row(variant="panel"): # equal_height=False, show_progress=True
302
  with gr.Column(scale=1, variant="panel"):
303
  # Submit the Video
@@ -307,6 +327,7 @@ with gr.Blocks() as demo:
307
  text_output = gr.Textbox(label="Translation in English")
308
  # Set up the interface
309
  video_button.click(translate_sign_language, inputs=video_input, outputs=[text_output, video_output])
 
310
 
311
  # Indian Sign Language gesture reference tab
312
  with gr.Tab("Indian Sign Language gesture reference"):
 
1
+ # Base Framework
2
  import torch
3
  # For data transformation
4
  from torchvision import transforms
 
6
  import transformers
7
  from transformers import VivitImageProcessor, VivitConfig, VivitModel
8
  from transformers import set_seed
 
9
  # For Data Loaders
10
  import datasets
11
  from torch.utils.data import Dataset, DataLoader
 
25
  from torch.nn import Linear, Softmax
26
  import gradio as gr
27
  import cv2
28
+ import io
29
+ import tempfile
30
  # Mediapipe Library
31
  import mediapipe as mp
32
  from mediapipe.tasks import python
 
238
  return prod_pred
239
 
240
  # Function to get landmarked video
241
+ def save_video_to_mp4(video_tensor, fps=20):
242
+ # Convert pytorch tensor to numpy ndarray
243
  video_numpy = video_tensor.permute(0, 2, 3, 1).cpu().numpy()
244
  # Normalize values to [0, 255] if necessary
245
  if video_numpy.max() <= 1.0:
246
  video_numpy = (video_numpy * 255).astype(np.uint8)
247
 
248
+ ## Create a temporary file to save the video
249
+ #temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
250
+ #output_path = temp_file.name
251
+ # Create an in-memory byte buffer to store the video
252
+ byte_buffer = io.BytesIO()
253
+
254
+ # Get video dimensions
255
+ height, width, channels = video_numpy[0].shape
256
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Codec for .mp4
257
+
258
+ # Create VideoWriter object
259
+ #out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
260
+ out = cv2.VideoWriter(byte_buffer, fourcc, fps, (width, height), isColor=True)
261
 
262
+ # Write the frames to the output file
263
  for frame in video_numpy:
264
+ # Convert RGB back to BGR for OpenCV
265
+ frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
266
+ out.write(frame_bgr)
267
+
268
  out.release()
269
+ # Return the byte buffer's content (the video as bytes)
270
+ byte_buffer.seek(0)
271
+ return byte_buffer.read() #output_path
272
 
 
 
273
  # Function to list available videos dynamically
274
  def list_videos():
275
  if os.path.exists(data_path):
 
279
  # Function to return the selected video path
280
  def play_video(selected_video):
281
  return os.path.join(data_path, selected_video) if selected_video else None
282
+ # Get Landmarked video
283
 
284
  # Main Function for tab - Gesture recognition
285
  def translate_sign_language(gesture):
286
  # Create Dataset
287
  prod_ds = dataset_prod_obj.create_dataset(gesture)
288
+ prod_video_path = save_video_to_mp4(prod_ds)
289
  #prod_video = np.random.randint(0, 255, (32, 225, 225, 3), dtype=np.uint8)
290
 
291
  # Run ML Model
 
309
 
310
  # Gesture recognition Tab
311
  with gr.Tab("Gesture recognition"):
312
+ landmarked_video = gr.State([])
313
  with gr.Row(height=350, variant="panel"): # equal_height=False, show_progress=True
314
  with gr.Column(scale=1, variant="panel"):
315
  # Add webcam input for sign language video capture
 
317
  with gr.Column(scale=1, variant="panel"):
318
  # Display the landmarked video
319
  video_output = gr.Video(interactive=False, autoplay=True, streaming=False, label="Landmarked Gesture")
320
+
321
  with gr.Row(variant="panel"): # equal_height=False, show_progress=True
322
  with gr.Column(scale=1, variant="panel"):
323
  # Submit the Video
 
327
  text_output = gr.Textbox(label="Translation in English")
328
  # Set up the interface
329
  video_button.click(translate_sign_language, inputs=video_input, outputs=[text_output, video_output])
330
+ landmarked_video.change(translate_sign_language, inputs=landmarked_video, outputs=[text_output, video_output])
331
 
332
  # Indian Sign Language gesture reference tab
333
  with gr.Tab("Indian Sign Language gesture reference"):