emanuelaboros
/

lang-detect

Token Classification

language-identification

Model card Files Files and versions Community

emanuelaboros commited on Feb 27

Commit

f729b09

·

1 Parent(s): 52d99b3

testin the trick

Files changed (1) hide show

modeling_stacked.py +14 -9

modeling_stacked.py CHANGED Viewed

@@ -27,20 +27,20 @@ def get_info(label_map):
 #         return cls()
-class SafeFloretWrapper(nn.Module):
     """
     A safe wrapper for floret model that keeps it off-device to avoid segmentation faults.
     """
-    def __init__(self, floret_model):
-        super().__init__()
-        self.floret_model = floret_model
-    def forward(self, texts):
         # Floret expects strings, not tensors
-        _, predictions = self.floret_model.predict([texts], k=1)
-        # Convert predictions to tensors for Hugging Face compatibility
-        return torch.tensor(predictions)
 class ExtendedMultitaskModelForTokenClassification(PreTrainedModel):
@@ -53,7 +53,7 @@ class ExtendedMultitaskModelForTokenClassification(PreTrainedModel):
         # Load floret model
         self.dummy_param = nn.Parameter(torch.zeros(1))
-        self.model_floret = floret.load_model(self.config.filename)
         # self.model_floret = SafeFloretWrapper(model_floret)
         # input_ids = "this is a text"
@@ -72,6 +72,11 @@ class ExtendedMultitaskModelForTokenClassification(PreTrainedModel):
             texts = input_ids
         else:
             raise ValueError(f"Unexpected input type: {type(input_ids)}")
         # print(self.model_floret(input_ids))
         # if input_ids is not None:
         #     tokenizer = kwargs.get("tokenizer")

 #         return cls()
+class SafeFloretWrapper:
     """
     A safe wrapper for floret model that keeps it off-device to avoid segmentation faults.
+    This class is pure Python and never interacts with PyTorch tensors or devices.
     """
+    def __init__(self, model_path):
+        print(f"Loading floret model from {model_path}")
+        self.model_floret = floret.load_model(model_path)
+    def predict(self, texts, k=1):
         # Floret expects strings, not tensors
+        predictions, probabilities = self.model_floret.predict(texts, k=k)
+        return predictions, probabilities
 class ExtendedMultitaskModelForTokenClassification(PreTrainedModel):
         # Load floret model
         self.dummy_param = nn.Parameter(torch.zeros(1))
+        self.safe_floret = SafeFloretWrapper(self.config.filename)
         # self.model_floret = SafeFloretWrapper(model_floret)
         # input_ids = "this is a text"
             texts = input_ids
         else:
             raise ValueError(f"Unexpected input type: {type(input_ids)}")
+        # Use the SafeFloretWrapper to get predictions
+        predictions, probabilities = self.safe_floret.predict(texts)
+        print(f"Predictions: {predictions}")
+        print(f"Probabilities: {probabilities}")
         # print(self.model_floret(input_ids))
         # if input_ids is not None:
         #     tokenizer = kwargs.get("tokenizer")