Spaces:

Kaushik066
/

indian_sign_language_translation

Running

App Files Files Community

Kaushik066 commited on 17 days ago

Commit

64ffec6

verified ·

1 Parent(s): 6fb6f1a

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -50

app.py CHANGED Viewed

@@ -76,19 +76,19 @@ holistic = mp_holistic.Holistic(
     min_tracking_confidence=0.5
     )
-# Creating Dataloader
-class CustomDatasetProd(Dataset):
-    def __init__(self, pixel_values):
-        self.pixel_values = pixel_values.to('cpu')
-    def __len__(self):
-        return len(self.pixel_values)
-    def __getitem__(self, idx):
-        item = {
-            'pixel_values': self.pixel_values[idx]
-        }
-        return item
 class CreateDatasetProd():
     def __init__(self
@@ -132,7 +132,6 @@ class CreateDatasetProd():
     def add_landmarks(self, video):
         annotated_image = []
         for frame in video:
             #Convert pytorch Tensor to CV2 image
             image = frame.permute(1, 2, 0).numpy() # Convert to (H, W, C) format for mediapipe to work
@@ -169,22 +168,19 @@ class CreateDatasetProd():
         return torch.stack(annotated_image)
     def create_dataset(self, video_paths):
-        pixel_values = []
-        for path in tqdm(video_paths):
-            #print('Video', path)
-            # Read and process Videos
-            video = self.read_video(path)
-            video = transforms.v2.functional.resize(video.permute(0, 3, 1, 2), size=(self.clip_size*2, self.clip_size*3)) # Auto converts to (F, C, H, W) format
-            video = self.add_landmarks(video)
-            # Data Preperation for ML Model without Augmentation
-            video = self.transform_prod(video.permute(0, 3, 1, 2))
-            pixel_values.append(video.to(device))
-            del video
-            # Force garbage collection
-            gc.collect()
-        pixel_values = torch.stack(pixel_values).to(device)
-        return CustomDatasetProd(pixel_values=pixel_values)
 # Creating Dataloader object
 dataset_prod_obj = CreateDatasetProd(CLIP_LENGTH, CLIP_SIZE, FRAME_STEPS)
@@ -210,10 +206,10 @@ class SignClassificationModel(torch.nn.Module):
         return reduced_tensor
 # Load the model
-model_pretrained = torch.load(model_path, map_location=torch.device('cpu'), weights_only=False)
 # Evaluation Function
-def prod_function(model_pretrained, prod_dl):
     # Initialize accelerator
     accelerator = Accelerator()
@@ -228,38 +224,35 @@ def prod_function(model_pretrained, prod_dl):
     set_seed(SEED)
     # There is no specific order to remember, we just need to unpack the objects in the same order we gave them to the prepare method.
-    accelerated_model, acclerated_prod_dl = accelerator.prepare(model_pretrained, prod_dl)
-    # Evaluate at the end of the epoch (distributed evaluation as we have 8 TPU cores)
     accelerated_model.eval()
-    prod_preds = []
-    for batch in acclerated_prod_dl:
-        videos = batch['pixel_values']
-        with torch.no_grad():
-            outputs = accelerated_model(videos)
-        prod_logits = outputs.squeeze(1)
-        prod_pred = prod_logits.argmax(-1)
-        prod_preds.append(prod_pred)
-    return prod_preds
 def translate_sign_language(gesture):
     # Create Dataset
     prod_ds = dataset_prod_obj.create_dataset(gesture)
-    prod_dl = DataLoader(prod_ds, batch_size=BATCH_SIZE)
     # Run ML Model
-    predicted_prod_label = prod_function(model_pretrained, prod_dl)
     # Identify the hand gesture
-    predicted_prod_label = torch.stack(predicted_prod_label)
-    predicted_prod_label = predicted_prod_label.squeeze(1)
     idx_to_label = model_pretrained.config.id2label
-    for val in np.array(predicted_prod_label):
-      gesture_translation = idx_to_label[val]
     return gesture_translation

     min_tracking_confidence=0.5
     )
+## Creating Dataloader
+#class CustomDatasetProd(Dataset):
+#    def __init__(self, pixel_values):
+#        self.pixel_values = pixel_values.to('cpu')
+#
+#    def __len__(self):
+#        return len(self.pixel_values)
+#
+#    def __getitem__(self, idx):
+#        item = {
+#            'pixel_values': self.pixel_values[idx]
+#        }
+#        return item
 class CreateDatasetProd():
     def __init__(self
     def add_landmarks(self, video):
         annotated_image = []
         for frame in video:
             #Convert pytorch Tensor to CV2 image
             image = frame.permute(1, 2, 0).numpy() # Convert to (H, W, C) format for mediapipe to work
         return torch.stack(annotated_image)
     def create_dataset(self, video_paths):
+        # Read and process Videos
+        video = self.read_video(path)
+        video = transforms.v2.functional.resize(video.permute(0, 3, 1, 2), size=(self.clip_size*2, self.clip_size*3)) # Auto converts to (F, C, H, W) format
+        video = self.add_landmarks(video)
+        # Data Preperation for ML Model without Augmentation
+        video = self.transform_prod(video.permute(0, 3, 1, 2))
+        pixel_values = video.to(device)
+        # Force garbage collection
+        del video
+        gc.collect()
+        return pixel_values #CustomDatasetProd(pixel_values=pixel_values)
 # Creating Dataloader object
 dataset_prod_obj = CreateDatasetProd(CLIP_LENGTH, CLIP_SIZE, FRAME_STEPS)
         return reduced_tensor
 # Load the model
+model_pretrained = torch.load(model_path, map_location=device, weights_only=False) #torch.device('cpu')
 # Evaluation Function
+def prod_function(model_pretrained, prod_ds):
     # Initialize accelerator
     accelerator = Accelerator()
     set_seed(SEED)
     # There is no specific order to remember, we just need to unpack the objects in the same order we gave them to the prepare method.
+    accelerated_model, acclerated_prod_ds = accelerator.prepare(model_pretrained, prod_ds)
+    # Evaluate at the end of the epoch
     accelerated_model.eval()
+    videos = acclerated_prod_ds['pixel_values']
+    with torch.no_grad():
+        outputs = accelerated_model(videos)
+    prod_logits = outputs.squeeze(1)
+    prod_pred = prod_logits.argmax(-1)
+    return prod_pred
 def translate_sign_language(gesture):
     # Create Dataset
     prod_ds = dataset_prod_obj.create_dataset(gesture)
+    #prod_dl = DataLoader(prod_ds, batch_size=BATCH_SIZE)
     # Run ML Model
+    predicted_prod_label = prod_function(model_pretrained, prod_ds)
     # Identify the hand gesture
+    predicted_prod_label = predicted_prod_label#.squeeze(1)
     idx_to_label = model_pretrained.config.id2label
+    gesture_translation = idx_to_label[np.array(predicted_prod_label)]
+    #for val in np.array(predicted_prod_label):
+    #  gesture_translation = idx_to_label[val]
     return gesture_translation