Oriaz commited on
Commit
41b7068
·
verified ·
1 Parent(s): d77b584

Update tasks/text.py

Browse files
Files changed (1) hide show
  1. tasks/text.py +50 -16
tasks/text.py CHANGED
@@ -8,14 +8,21 @@ from .utils.evaluation import TextEvaluationRequest
8
  from .utils.emissions import tracker, clean_emissions_data, get_space_info
9
 
10
  ## add-on imports
 
 
 
11
  from sentence_transformers import SentenceTransformer
12
  from sklearn.preprocessing import MinMaxScaler
13
- import numpy as np
14
  import skops.io as sio
15
 
 
 
 
 
 
16
  router = APIRouter()
17
 
18
- DESCRIPTION = "Embedding + Logistic Regression"
19
  ROUTE = "/text"
20
 
21
  @router.post(ROUTE, tags=["Text Task"],
@@ -62,23 +69,50 @@ async def evaluate_text(request: TextEvaluationRequest):
62
  # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
63
  #--------------------------------------------------------------------------------------------
64
 
65
- ## Models loading
66
- # Embedding model
67
- query_prompt_name = "s2s_query"
68
- model = SentenceTransformer("dunzhang/stella_en_400M_v5",trust_remote_code=True).cuda()
69
-
70
- # Pre-trained Logistic Regression model
71
- trusted_types = ['sklearn.feature_selection._univariate_selection.f_classif']
72
- disp = sio.load('./tasks/logistic_regression_model.skops',trusted=trusted_types)
 
73
 
74
- ## Data prep
75
- embeddings = model.encode(list(test_dataset['quote']), prompt_name=query_prompt_name)
76
- scaler = MinMaxScaler()
77
- X_scaled = scaler.fit_transform(embeddings)
78
 
79
- ## Predictions
80
- predictions = disp.predict(X_scaled)
 
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  #--------------------------------------------------------------------------------------------
83
  # YOUR MODEL INFERENCE STOPS HERE
84
  #--------------------------------------------------------------------------------------------
 
8
  from .utils.emissions import tracker, clean_emissions_data, get_space_info
9
 
10
  ## add-on imports
11
+ import numpy as np
12
+
13
+ # Logistic REG reqs
14
  from sentence_transformers import SentenceTransformer
15
  from sklearn.preprocessing import MinMaxScaler
 
16
  import skops.io as sio
17
 
18
+ # BERT reqs
19
+ from transformers import AutoTokenizer,BertForSequenceClassification,AutoModelForSequenceClassification,Trainer, TrainingArguments,DataCollatorWithPadding
20
+ from datasets import Dataset
21
+ import torch
22
+
23
  router = APIRouter()
24
 
25
+ DESCRIPTION = "Simple BERT classif"
26
  ROUTE = "/text"
27
 
28
  @router.post(ROUTE, tags=["Text Task"],
 
69
  # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
70
  #--------------------------------------------------------------------------------------------
71
 
72
+ ######################## LOG REG ########################
73
+ # ## Models loading
74
+ # # Embedding model
75
+ # query_prompt_name = "s2s_query"
76
+ # model = SentenceTransformer("dunzhang/stella_en_400M_v5",trust_remote_code=True).cuda()
77
+
78
+ # # Pre-trained Logistic Regression model
79
+ # trusted_types = ['sklearn.feature_selection._univariate_selection.f_classif']
80
+ # disp = sio.load('./tasks/logistic_regression_model.skops',trusted=trusted_types)
81
 
82
+ # ## Data prep
83
+ # embeddings = model.encode(list(test_dataset['quote']), prompt_name=query_prompt_name)
84
+ # scaler = MinMaxScaler()
85
+ # X_scaled = scaler.fit_transform(embeddings)
86
 
87
+ # ## Predictions
88
+ # predictions = disp.predict(X_scaled)
89
+
90
 
91
+ ######################## BERT ########################
92
+ ## Model loading
93
+ model = BertForSequenceClassification.from_pretrained("Oriaz/climate_change_bert_classif")
94
+ tokenizer = AutoTokenizer.from_pretrained("Oriaz/climate_change_bert_classif")
95
+
96
+ ## Data prep
97
+ def preprocess_function(df):
98
+ return tokenizer(df["quote"], truncation=True)
99
+ tokenized_test = test_dataset.map(preprocess_function, batched=True)
100
+
101
+ ## Modify inference model
102
+ training_args = torch.load("./tasks/utils/training_args.bin")
103
+ training_args.eval_strategy='no'
104
+
105
+ trainer = Trainer(
106
+ model=model,
107
+ args=training_args,
108
+ tokenizer=tokenizer
109
+ )
110
+
111
+ ## prediction
112
+ preds = trainer.predict(tokenized_test)
113
+ predictions = np.array([np.argmax(x) for x in preds[0]])
114
+
115
+
116
  #--------------------------------------------------------------------------------------------
117
  # YOUR MODEL INFERENCE STOPS HERE
118
  #--------------------------------------------------------------------------------------------