Spaces:

hackerrank
/

screen-HR

Sleeping

App Files Files Community

Rafik Matta commited on Jun 28, 2024

Commit

4fa2adf

1 Parent(s): 9313b78

adding GPT2 as an option

Browse files

Files changed (1) hide show

app.py +67 -21

app.py CHANGED Viewed

@@ -3,20 +3,23 @@ import json
 import time
 import gradio as gr
-from transformers import AutoTokenizer, AutoModel
-# pytorch library
 import torch
 import torch.nn.functional as f
 from roles_list import roles
-# Load the model from the specified directory
-embed_store = {}
-model = 'sentence-transformers/all-MiniLM-L12-v2'
-sbert_model = AutoModel.from_pretrained(model)
-sbert_tokenizer = AutoTokenizer.from_pretrained(model)
 for role in roles:
     encoding = sbert_tokenizer(role,  # the texts to be tokenized
                                max_length=10,
@@ -28,16 +31,16 @@ for role in roles:
         embed = sbert_model(**encoding)
         embed = embed.pooler_output
     embed_store[role] = f.normalize(embed, p=2, dim=1)
-print("Model is ready for inference")
 def get_role_from_sbert(title):
     start_time = time.time()
     encoding = sbert_tokenizer(title,
-                         max_length=10,
-                         padding="max_length",
-                         return_tensors='pt'
-                         )
     # Run the model prediction on the input data
     with torch.no_grad():
         # get the model embeddings
@@ -49,19 +52,62 @@ def get_role_from_sbert(title):
         store_cos[role] = round(cos_sim.item(), 3)
     # Get the top 3 items with the highest cosine similarity
     top_3_keys_values = sorted(store_cos.items(), key=lambda item: item[1], reverse=True)
-    job_scores_str = '\n'.join([f"{job}: {score}" for job, score in top_3_keys_values])
     end_time = time.time()
     execution_time = end_time - start_time
-    # Convert to dictionary if needed or keep as list of tuples
-    return job_scores_str + f" \nExecution time: {str(execution_time)}"
-demo = gr.Interface(fn=get_role_from_sbert,
-                    inputs=gr.Textbox(label="Job Title"),
-                    outputs=gr.Textbox(label="Role"),
-                    title="HackerRank Role Classifier")
 gr.close_all()
-demo.launch()

 import time
 import gradio as gr
+from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
 import torch
 import torch.nn.functional as f
 from roles_list import roles
+# Load the SBERT model and tokenizer
+sbert_model_name = 'sentence-transformers/all-MiniLM-L12-v2'
+sbert_model = AutoModel.from_pretrained(sbert_model_name)
+sbert_tokenizer = AutoTokenizer.from_pretrained(sbert_model_name)
+# Load the LLM model and tokenizer
+llm_model_name = 'bert-base-uncased'  # Using BERT for sequence classification
+llm_model = AutoModelForSequenceClassification.from_pretrained(llm_model_name)
+llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
+embed_store = {}
 for role in roles:
     encoding = sbert_tokenizer(role,  # the texts to be tokenized
                                max_length=10,
         embed = sbert_model(**encoding)
         embed = embed.pooler_output
     embed_store[role] = f.normalize(embed, p=2, dim=1)
+print("SBERT model is ready for inference")
 def get_role_from_sbert(title):
     start_time = time.time()
     encoding = sbert_tokenizer(title,
+                               max_length=10,
+                               padding="max_length",
+                               return_tensors='pt'
+                               )
     # Run the model prediction on the input data
     with torch.no_grad():
         # get the model embeddings
         store_cos[role] = round(cos_sim.item(), 3)
     # Get the top 3 items with the highest cosine similarity
     top_3_keys_values = sorted(store_cos.items(), key=lambda item: item[1], reverse=True)
+    job_scores = [{"Role": job, "SBERT Score": score} for job, score in top_3_keys_values]
+    end_time = time.time()
+    execution_time = end_time - start_time
+    return job_scores, execution_time
+def get_role_from_llm(title):
+    start_time = time.time()
+    llm_scores = []
+    for role in roles:
+        inputs = llm_tokenizer.encode_plus(title, role, return_tensors='pt', max_length=512, truncation=True)
+        with torch.no_grad():
+            outputs = llm_model(**inputs)
+            score = torch.softmax(outputs.logits, dim=1)[0][1].item()
+            llm_scores.append({"Role": role, "LLM Score": round(score, 3)})
     end_time = time.time()
     execution_time = end_time - start_time
+    return llm_scores, execution_time
+def classify_role(title):
+    sbert_scores, sbert_execution_time = get_role_from_sbert(title)
+    llm_scores, llm_execution_time = get_role_from_llm(title)
+    # Merge results into a single table
+    role_dict = {item["Role"]: item for item in sbert_scores}
+    for item in llm_scores:
+        if item["Role"] in role_dict:
+            role_dict[item["Role"]]["LLM Score"] = item["LLM Score"]
+        else:
+            role_dict[item["Role"]] = {"Role": item["Role"], "SBERT Score": "", "LLM Score": item["LLM Score"]}
+    results = []
+    for role, scores in role_dict.items():
+        results.append([role, scores.get("SBERT Score", ""), scores.get("LLM Score", "")])
+    execution_time_info = f"SBERT Execution Time: {sbert_execution_time:.4f} seconds, LLM Execution Time: {llm_execution_time:.4f} seconds"
+    return results, execution_time_info
+# Gradio Blocks interface
+with gr.Blocks() as demo:
+    gr.Markdown("# HackerRank Role Classifier")
+    with gr.Column():
+        input_text = gr.Textbox(label="Job Title")
+        classify_button = gr.Button("Classify")
+        output_table = gr.Dataframe(headers=["Role", "SBERT Score", "LLM Score"], label="Role Scores")
+        execution_time_text = gr.Textbox(label="Execution Time", interactive=False)
+    classify_button.click(fn=classify_role, inputs=input_text, outputs=[output_table, execution_time_text])
 gr.close_all()
+demo.launch()