Spaces:

saifeddinemk
/

cv_job

Sleeping

App Files Files Community

saifeddinemk commited on Nov 9, 2024

Commit

9f26a6c

1 Parent(s): 08ba70e

Init app

Browse files

Files changed (2) hide show

app.py +57 -41
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,49 +1,65 @@
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from peft import PeftModel, PeftConfig
 import torch
 # Initialize FastAPI app
 app = FastAPI()
-# Load model and tokenizer in optimized mode at startup
-base_model_name = "akjindal53244/Llama-3.1-Storm-8B"
-peft_model_id = "LlamaFactoryAI/cv-job-description-matching"
-# Load the model with FP16 precision on CUDA if available
-device = "cuda" if torch.cuda.is_available() else "cpu"
-base_model = AutoModelForCausalLM.from_pretrained(
-    base_model_name,
-    torch_dtype=torch.float16 if device == "cuda" else torch.float16,
-    device_map="auto" if device == "cuda" else None
 )
-tokenizer = AutoTokenizer.from_pretrained(base_model_name)
-model = PeftModel.from_pretrained(base_model, peft_model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float16)
-# Define request model
-class AnalysisRequest(BaseModel):
-    cv: str
-    job_description: str
-@app.post("/analyze")
-async def analyze(request: AnalysisRequest):
-    try:
-        # Prepare input text with formatted message
-        system_prompt = """
-        You are an advanced AI model designed to analyze the compatibility between a CV and a job description...
-        """
-        user_input = f"<CV> {request.cv} </CV>\n<job_description> {request.job_description} </job_description>"
-        input_text = system_prompt + user_input
-        # Tokenize and generate response with memory optimizations
-        inputs = tokenizer(input_text, return_tensors="pt").to(device)
-        with torch.no_grad():  # Disable gradients for inference
-            outputs = model.generate(**inputs, max_new_tokens=32)  # Limit generated tokens
-        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return {"analysis": generated_text}
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))

 import torch
+from fastapi import FastAPI, File, UploadFile, Form
+from transformers import GPT2Tokenizer, GPT2Model
+from sklearn.metrics.pairwise import cosine_similarity
+from typing import List
+from fastapi.middleware.cors import CORSMiddleware
 # Initialize FastAPI app
 app = FastAPI()
+# Add CORS middleware to allow frontend access
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
 )
+# Load GPT-2 model and tokenizer
+tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+model = GPT2Model.from_pretrained("gpt2")
+# Helper function to get GPT-2 embeddings
+def get_gpt2_embedding(text):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
+    outputs = model(**inputs)
+    embeddings = torch.mean(outputs.last_hidden_state, dim=1).detach().numpy()
+    return embeddings
+# Helper function to calculate cosine similarity
+def calculate_similarity(embedding1, embedding2):
+    similarity = cosine_similarity(embedding1, embedding2)
+    return similarity[0][0]
+# Endpoint to upload CV file and store CV text
+@app.post("/upload-cv/")
+async def upload_cv(file: UploadFile = File(...)):
+    content = await file.read()
+    cv_text = content.decode("utf-8")
+    return {"cv_text": cv_text}
+# Endpoint to compare job descriptions with the CV text
+@app.post("/compare/")
+async def compare_job_cv(job_descriptions: str = Form(...), cv_text: str = Form(...)):
+    # Generate embedding for the CV text
+    cv_embedding = get_gpt2_embedding(cv_text)
+    # Split job descriptions by line and calculate scores
+    descriptions = job_descriptions.strip().split("\n")
+    results = []
+    for description in descriptions:
+        job_embedding = get_gpt2_embedding(description)
+        similarity_score = calculate_similarity(cv_embedding, job_embedding)
+        # Append results
+        results.append({
+            "Job Description": description,
+            "Match Score": round(similarity_score, 2)
+        })
+    return {"results": results}
+# Run the app with `uvicorn <filename>:app --reload`

requirements.txt CHANGED Viewed

@@ -3,3 +3,4 @@ uvicorn
 torch
 transformers
 scikit-learn

 torch
 transformers
 scikit-learn
+peft