Rafik Matta commited on
Commit
4fa2adf
·
1 Parent(s): 9313b78

adding GPT2 as an option

Browse files
Files changed (1) hide show
  1. app.py +67 -21
app.py CHANGED
@@ -3,20 +3,23 @@ import json
3
  import time
4
 
5
  import gradio as gr
6
- from transformers import AutoTokenizer, AutoModel
7
- # pytorch library
8
  import torch
9
  import torch.nn.functional as f
10
 
11
-
12
  from roles_list import roles
13
- # Load the model from the specified directory
14
- embed_store = {}
15
- model = 'sentence-transformers/all-MiniLM-L12-v2'
16
- sbert_model = AutoModel.from_pretrained(model)
17
- sbert_tokenizer = AutoTokenizer.from_pretrained(model)
18
 
 
 
 
 
 
 
 
 
 
19
 
 
20
  for role in roles:
21
  encoding = sbert_tokenizer(role, # the texts to be tokenized
22
  max_length=10,
@@ -28,16 +31,16 @@ for role in roles:
28
  embed = sbert_model(**encoding)
29
  embed = embed.pooler_output
30
  embed_store[role] = f.normalize(embed, p=2, dim=1)
31
- print("Model is ready for inference")
32
 
33
 
34
  def get_role_from_sbert(title):
35
  start_time = time.time()
36
  encoding = sbert_tokenizer(title,
37
- max_length=10,
38
- padding="max_length",
39
- return_tensors='pt'
40
- )
41
  # Run the model prediction on the input data
42
  with torch.no_grad():
43
  # get the model embeddings
@@ -49,19 +52,62 @@ def get_role_from_sbert(title):
49
  store_cos[role] = round(cos_sim.item(), 3)
50
  # Get the top 3 items with the highest cosine similarity
51
  top_3_keys_values = sorted(store_cos.items(), key=lambda item: item[1], reverse=True)
52
- job_scores_str = '\n'.join([f"{job}: {score}" for job, score in top_3_keys_values])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  end_time = time.time()
55
  execution_time = end_time - start_time
56
- # Convert to dictionary if needed or keep as list of tuples
57
- return job_scores_str + f" \nExecution time: {str(execution_time)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
 
60
- demo = gr.Interface(fn=get_role_from_sbert,
61
- inputs=gr.Textbox(label="Job Title"),
62
- outputs=gr.Textbox(label="Role"),
63
- title="HackerRank Role Classifier")
 
 
 
 
64
 
 
65
 
66
  gr.close_all()
67
- demo.launch()
 
3
  import time
4
 
5
  import gradio as gr
6
+ from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
 
7
  import torch
8
  import torch.nn.functional as f
9
 
 
10
  from roles_list import roles
 
 
 
 
 
11
 
12
+ # Load the SBERT model and tokenizer
13
+ sbert_model_name = 'sentence-transformers/all-MiniLM-L12-v2'
14
+ sbert_model = AutoModel.from_pretrained(sbert_model_name)
15
+ sbert_tokenizer = AutoTokenizer.from_pretrained(sbert_model_name)
16
+
17
+ # Load the LLM model and tokenizer
18
+ llm_model_name = 'bert-base-uncased' # Using BERT for sequence classification
19
+ llm_model = AutoModelForSequenceClassification.from_pretrained(llm_model_name)
20
+ llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
21
 
22
+ embed_store = {}
23
  for role in roles:
24
  encoding = sbert_tokenizer(role, # the texts to be tokenized
25
  max_length=10,
 
31
  embed = sbert_model(**encoding)
32
  embed = embed.pooler_output
33
  embed_store[role] = f.normalize(embed, p=2, dim=1)
34
+ print("SBERT model is ready for inference")
35
 
36
 
37
  def get_role_from_sbert(title):
38
  start_time = time.time()
39
  encoding = sbert_tokenizer(title,
40
+ max_length=10,
41
+ padding="max_length",
42
+ return_tensors='pt'
43
+ )
44
  # Run the model prediction on the input data
45
  with torch.no_grad():
46
  # get the model embeddings
 
52
  store_cos[role] = round(cos_sim.item(), 3)
53
  # Get the top 3 items with the highest cosine similarity
54
  top_3_keys_values = sorted(store_cos.items(), key=lambda item: item[1], reverse=True)
55
+ job_scores = [{"Role": job, "SBERT Score": score} for job, score in top_3_keys_values]
56
+
57
+ end_time = time.time()
58
+ execution_time = end_time - start_time
59
+
60
+ return job_scores, execution_time
61
+
62
+
63
+ def get_role_from_llm(title):
64
+ start_time = time.time()
65
+
66
+ llm_scores = []
67
+ for role in roles:
68
+ inputs = llm_tokenizer.encode_plus(title, role, return_tensors='pt', max_length=512, truncation=True)
69
+ with torch.no_grad():
70
+ outputs = llm_model(**inputs)
71
+ score = torch.softmax(outputs.logits, dim=1)[0][1].item()
72
+ llm_scores.append({"Role": role, "LLM Score": round(score, 3)})
73
 
74
  end_time = time.time()
75
  execution_time = end_time - start_time
76
+
77
+ return llm_scores, execution_time
78
+
79
+
80
+ def classify_role(title):
81
+ sbert_scores, sbert_execution_time = get_role_from_sbert(title)
82
+ llm_scores, llm_execution_time = get_role_from_llm(title)
83
+
84
+ # Merge results into a single table
85
+ role_dict = {item["Role"]: item for item in sbert_scores}
86
+ for item in llm_scores:
87
+ if item["Role"] in role_dict:
88
+ role_dict[item["Role"]]["LLM Score"] = item["LLM Score"]
89
+ else:
90
+ role_dict[item["Role"]] = {"Role": item["Role"], "SBERT Score": "", "LLM Score": item["LLM Score"]}
91
+
92
+ results = []
93
+ for role, scores in role_dict.items():
94
+ results.append([role, scores.get("SBERT Score", ""), scores.get("LLM Score", "")])
95
+
96
+ execution_time_info = f"SBERT Execution Time: {sbert_execution_time:.4f} seconds, LLM Execution Time: {llm_execution_time:.4f} seconds"
97
+
98
+ return results, execution_time_info
99
 
100
 
101
+ # Gradio Blocks interface
102
+ with gr.Blocks() as demo:
103
+ gr.Markdown("# HackerRank Role Classifier")
104
+ with gr.Column():
105
+ input_text = gr.Textbox(label="Job Title")
106
+ classify_button = gr.Button("Classify")
107
+ output_table = gr.Dataframe(headers=["Role", "SBERT Score", "LLM Score"], label="Role Scores")
108
+ execution_time_text = gr.Textbox(label="Execution Time", interactive=False)
109
 
110
+ classify_button.click(fn=classify_role, inputs=input_text, outputs=[output_table, execution_time_text])
111
 
112
  gr.close_all()
113
+ demo.launch()