Kaushik066 commited on
Commit
64ffec6
·
verified ·
1 Parent(s): 6fb6f1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -50
app.py CHANGED
@@ -76,19 +76,19 @@ holistic = mp_holistic.Holistic(
76
  min_tracking_confidence=0.5
77
  )
78
 
79
- # Creating Dataloader
80
- class CustomDatasetProd(Dataset):
81
- def __init__(self, pixel_values):
82
- self.pixel_values = pixel_values.to('cpu')
83
-
84
- def __len__(self):
85
- return len(self.pixel_values)
86
-
87
- def __getitem__(self, idx):
88
- item = {
89
- 'pixel_values': self.pixel_values[idx]
90
- }
91
- return item
92
 
93
  class CreateDatasetProd():
94
  def __init__(self
@@ -132,7 +132,6 @@ class CreateDatasetProd():
132
  def add_landmarks(self, video):
133
  annotated_image = []
134
  for frame in video:
135
-
136
  #Convert pytorch Tensor to CV2 image
137
  image = frame.permute(1, 2, 0).numpy() # Convert to (H, W, C) format for mediapipe to work
138
 
@@ -169,22 +168,19 @@ class CreateDatasetProd():
169
  return torch.stack(annotated_image)
170
 
171
  def create_dataset(self, video_paths):
172
- pixel_values = []
173
- for path in tqdm(video_paths):
174
- #print('Video', path)
175
- # Read and process Videos
176
- video = self.read_video(path)
177
- video = transforms.v2.functional.resize(video.permute(0, 3, 1, 2), size=(self.clip_size*2, self.clip_size*3)) # Auto converts to (F, C, H, W) format
178
- video = self.add_landmarks(video)
179
- # Data Preperation for ML Model without Augmentation
180
- video = self.transform_prod(video.permute(0, 3, 1, 2))
181
- pixel_values.append(video.to(device))
182
- del video
183
- # Force garbage collection
184
- gc.collect()
185
-
186
- pixel_values = torch.stack(pixel_values).to(device)
187
- return CustomDatasetProd(pixel_values=pixel_values)
188
 
189
  # Creating Dataloader object
190
  dataset_prod_obj = CreateDatasetProd(CLIP_LENGTH, CLIP_SIZE, FRAME_STEPS)
@@ -210,10 +206,10 @@ class SignClassificationModel(torch.nn.Module):
210
  return reduced_tensor
211
 
212
  # Load the model
213
- model_pretrained = torch.load(model_path, map_location=torch.device('cpu'), weights_only=False)
214
 
215
  # Evaluation Function
216
- def prod_function(model_pretrained, prod_dl):
217
  # Initialize accelerator
218
  accelerator = Accelerator()
219
 
@@ -228,38 +224,35 @@ def prod_function(model_pretrained, prod_dl):
228
  set_seed(SEED)
229
 
230
  # There is no specific order to remember, we just need to unpack the objects in the same order we gave them to the prepare method.
231
- accelerated_model, acclerated_prod_dl = accelerator.prepare(model_pretrained, prod_dl)
232
 
233
- # Evaluate at the end of the epoch (distributed evaluation as we have 8 TPU cores)
234
  accelerated_model.eval()
235
 
236
- prod_preds = []
 
 
237
 
238
- for batch in acclerated_prod_dl:
239
- videos = batch['pixel_values']
240
- with torch.no_grad():
241
- outputs = accelerated_model(videos)
242
-
243
- prod_logits = outputs.squeeze(1)
244
- prod_pred = prod_logits.argmax(-1)
245
- prod_preds.append(prod_pred)
246
- return prod_preds
247
 
248
  def translate_sign_language(gesture):
249
  # Create Dataset
250
  prod_ds = dataset_prod_obj.create_dataset(gesture)
251
- prod_dl = DataLoader(prod_ds, batch_size=BATCH_SIZE)
252
 
253
  # Run ML Model
254
- predicted_prod_label = prod_function(model_pretrained, prod_dl)
255
 
256
  # Identify the hand gesture
257
- predicted_prod_label = torch.stack(predicted_prod_label)
258
- predicted_prod_label = predicted_prod_label.squeeze(1)
259
 
260
  idx_to_label = model_pretrained.config.id2label
261
- for val in np.array(predicted_prod_label):
262
- gesture_translation = idx_to_label[val]
 
 
263
 
264
  return gesture_translation
265
 
 
76
  min_tracking_confidence=0.5
77
  )
78
 
79
+ ## Creating Dataloader
80
+ #class CustomDatasetProd(Dataset):
81
+ # def __init__(self, pixel_values):
82
+ # self.pixel_values = pixel_values.to('cpu')
83
+ #
84
+ # def __len__(self):
85
+ # return len(self.pixel_values)
86
+ #
87
+ # def __getitem__(self, idx):
88
+ # item = {
89
+ # 'pixel_values': self.pixel_values[idx]
90
+ # }
91
+ # return item
92
 
93
  class CreateDatasetProd():
94
  def __init__(self
 
132
  def add_landmarks(self, video):
133
  annotated_image = []
134
  for frame in video:
 
135
  #Convert pytorch Tensor to CV2 image
136
  image = frame.permute(1, 2, 0).numpy() # Convert to (H, W, C) format for mediapipe to work
137
 
 
168
  return torch.stack(annotated_image)
169
 
170
  def create_dataset(self, video_paths):
171
+ # Read and process Videos
172
+ video = self.read_video(path)
173
+ video = transforms.v2.functional.resize(video.permute(0, 3, 1, 2), size=(self.clip_size*2, self.clip_size*3)) # Auto converts to (F, C, H, W) format
174
+ video = self.add_landmarks(video)
175
+ # Data Preperation for ML Model without Augmentation
176
+ video = self.transform_prod(video.permute(0, 3, 1, 2))
177
+ pixel_values = video.to(device)
178
+
179
+ # Force garbage collection
180
+ del video
181
+ gc.collect()
182
+
183
+ return pixel_values #CustomDatasetProd(pixel_values=pixel_values)
 
 
 
184
 
185
  # Creating Dataloader object
186
  dataset_prod_obj = CreateDatasetProd(CLIP_LENGTH, CLIP_SIZE, FRAME_STEPS)
 
206
  return reduced_tensor
207
 
208
  # Load the model
209
+ model_pretrained = torch.load(model_path, map_location=device, weights_only=False) #torch.device('cpu')
210
 
211
  # Evaluation Function
212
+ def prod_function(model_pretrained, prod_ds):
213
  # Initialize accelerator
214
  accelerator = Accelerator()
215
 
 
224
  set_seed(SEED)
225
 
226
  # There is no specific order to remember, we just need to unpack the objects in the same order we gave them to the prepare method.
227
+ accelerated_model, acclerated_prod_ds = accelerator.prepare(model_pretrained, prod_ds)
228
 
229
+ # Evaluate at the end of the epoch
230
  accelerated_model.eval()
231
 
232
+ videos = acclerated_prod_ds['pixel_values']
233
+ with torch.no_grad():
234
+ outputs = accelerated_model(videos)
235
 
236
+ prod_logits = outputs.squeeze(1)
237
+ prod_pred = prod_logits.argmax(-1)
238
+ return prod_pred
 
 
 
 
 
 
239
 
240
  def translate_sign_language(gesture):
241
  # Create Dataset
242
  prod_ds = dataset_prod_obj.create_dataset(gesture)
243
+ #prod_dl = DataLoader(prod_ds, batch_size=BATCH_SIZE)
244
 
245
  # Run ML Model
246
+ predicted_prod_label = prod_function(model_pretrained, prod_ds)
247
 
248
  # Identify the hand gesture
249
+ predicted_prod_label = predicted_prod_label#.squeeze(1)
 
250
 
251
  idx_to_label = model_pretrained.config.id2label
252
+ gesture_translation = idx_to_label[np.array(predicted_prod_label)]
253
+
254
+ #for val in np.array(predicted_prod_label):
255
+ # gesture_translation = idx_to_label[val]
256
 
257
  return gesture_translation
258