Spaces:

Bhanuprasadchouki
/

Resume_score

Sleeping

App Files Files Community

Bhanuprasadchouki commited on Dec 29, 2024

Commit

d4b14dd

verified ·

1 Parent(s): 8733a0e

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -20

app.py CHANGED Viewed

@@ -12,6 +12,14 @@ from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough, RunnableLambda
 import spacy
 # Function to extract text from PDF
 def extract_text_from_pdf(uploaded_file):
     text = ""
@@ -21,20 +29,19 @@ def extract_text_from_pdf(uploaded_file):
     return text
 # Function to extract text from Word document
-def extract_keywords(text, top_n=10):
-    nlp = load_spacy_model()
-    nlp.add_pipe("textrank", last=True)
-    doc = nlp(text)
-    keywords = [phrase.text for phrase in doc._.phrases[:top_n]]
-    return keywords
 # Function to summarize text
 def summarize_text(text, max_length=1000, min_length=30):
     max_length = min(max_length, 1000)  # Ensure max_length doesn't exceed 1000
     try:
-        # Initialize the summarizer pipeline with a robust model
         summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
         summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
         if isinstance(summary, list) and len(summary) > 0:
@@ -44,26 +51,16 @@ def summarize_text(text, max_length=1000, min_length=30):
     except Exception as e:
         return f"Error in summarization: {e}"
-def load_spacy_model():
-    try:
-        return spacy.load("en_core_web_sm")
-    except OSError:
-        # If model is not found, attempt to download it
-        from spacy.cli import download
-        download("en_core_web_sm")
-        return spacy.load("en_core_web_sm")
 # Function to extract keywords using spaCy and PyTextRank
 def extract_keywords(text, top_n=10):
     nlp = spacy.load("en_core_web_sm")
     nlp.add_pipe("textrank", last=True)
     doc = nlp(text)
     keywords = [phrase.text for phrase in doc._.phrases[:top_n]]
     return keywords
 # Initialize Google Generative AI chat model
 def initialize_chat_model():
     with open("key.txt", "r") as f:

 from langchain_core.runnables import RunnablePassthrough, RunnableLambda
 import spacy
+# Function to check and download spaCy model
+def ensure_spacy_model(model_name="en_core_web_sm"):
+    try:
+        spacy.load(model_name)
+    except OSError:
+        subprocess.run(["python", "-m", "spacy", "download", model_name])
+        spacy.load(model_name)
 # Function to extract text from PDF
 def extract_text_from_pdf(uploaded_file):
     text = ""
     return text
 # Function to extract text from Word document
+def extract_text_from_word(uploaded_file):
+    text = ""
+    doc = Document(uploaded_file)
+    for paragraph in doc.paragraphs:
+        text += paragraph.text + "\n"
+    return text
 # Function to summarize text
 def summarize_text(text, max_length=1000, min_length=30):
     max_length = min(max_length, 1000)  # Ensure max_length doesn't exceed 1000
     try:
+        # Initialize the summarizer pipeline
         summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
         summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
         if isinstance(summary, list) and len(summary) > 0:
     except Exception as e:
         return f"Error in summarization: {e}"
 # Function to extract keywords using spaCy and PyTextRank
 def extract_keywords(text, top_n=10):
+    ensure_spacy_model("en_core_web_sm")
     nlp = spacy.load("en_core_web_sm")
     nlp.add_pipe("textrank", last=True)
     doc = nlp(text)
     keywords = [phrase.text for phrase in doc._.phrases[:top_n]]
     return keywords
 # Initialize Google Generative AI chat model
 def initialize_chat_model():
     with open("key.txt", "r") as f: