Bhanuprasadchouki commited on
Commit
d4b14dd
·
verified ·
1 Parent(s): 8733a0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -20
app.py CHANGED
@@ -12,6 +12,14 @@ from langchain_core.output_parsers import StrOutputParser
12
  from langchain_core.runnables import RunnablePassthrough, RunnableLambda
13
  import spacy
14
 
 
 
 
 
 
 
 
 
15
  # Function to extract text from PDF
16
  def extract_text_from_pdf(uploaded_file):
17
  text = ""
@@ -21,20 +29,19 @@ def extract_text_from_pdf(uploaded_file):
21
  return text
22
 
23
  # Function to extract text from Word document
24
- def extract_keywords(text, top_n=10):
25
- nlp = load_spacy_model()
26
- nlp.add_pipe("textrank", last=True)
27
- doc = nlp(text)
28
- keywords = [phrase.text for phrase in doc._.phrases[:top_n]]
29
- return keywords
30
-
31
 
32
  # Function to summarize text
33
  def summarize_text(text, max_length=1000, min_length=30):
34
  max_length = min(max_length, 1000) # Ensure max_length doesn't exceed 1000
35
 
36
  try:
37
- # Initialize the summarizer pipeline with a robust model
38
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
39
  summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
40
  if isinstance(summary, list) and len(summary) > 0:
@@ -44,26 +51,16 @@ def summarize_text(text, max_length=1000, min_length=30):
44
  except Exception as e:
45
  return f"Error in summarization: {e}"
46
 
47
-
48
-
49
- def load_spacy_model():
50
- try:
51
- return spacy.load("en_core_web_sm")
52
- except OSError:
53
- # If model is not found, attempt to download it
54
- from spacy.cli import download
55
- download("en_core_web_sm")
56
- return spacy.load("en_core_web_sm")
57
-
58
-
59
  # Function to extract keywords using spaCy and PyTextRank
60
  def extract_keywords(text, top_n=10):
 
61
  nlp = spacy.load("en_core_web_sm")
62
  nlp.add_pipe("textrank", last=True)
63
  doc = nlp(text)
64
  keywords = [phrase.text for phrase in doc._.phrases[:top_n]]
65
  return keywords
66
 
 
67
  # Initialize Google Generative AI chat model
68
  def initialize_chat_model():
69
  with open("key.txt", "r") as f:
 
12
  from langchain_core.runnables import RunnablePassthrough, RunnableLambda
13
  import spacy
14
 
15
+ # Function to check and download spaCy model
16
+ def ensure_spacy_model(model_name="en_core_web_sm"):
17
+ try:
18
+ spacy.load(model_name)
19
+ except OSError:
20
+ subprocess.run(["python", "-m", "spacy", "download", model_name])
21
+ spacy.load(model_name)
22
+
23
  # Function to extract text from PDF
24
  def extract_text_from_pdf(uploaded_file):
25
  text = ""
 
29
  return text
30
 
31
  # Function to extract text from Word document
32
+ def extract_text_from_word(uploaded_file):
33
+ text = ""
34
+ doc = Document(uploaded_file)
35
+ for paragraph in doc.paragraphs:
36
+ text += paragraph.text + "\n"
37
+ return text
 
38
 
39
  # Function to summarize text
40
  def summarize_text(text, max_length=1000, min_length=30):
41
  max_length = min(max_length, 1000) # Ensure max_length doesn't exceed 1000
42
 
43
  try:
44
+ # Initialize the summarizer pipeline
45
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
46
  summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
47
  if isinstance(summary, list) and len(summary) > 0:
 
51
  except Exception as e:
52
  return f"Error in summarization: {e}"
53
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  # Function to extract keywords using spaCy and PyTextRank
55
  def extract_keywords(text, top_n=10):
56
+ ensure_spacy_model("en_core_web_sm")
57
  nlp = spacy.load("en_core_web_sm")
58
  nlp.add_pipe("textrank", last=True)
59
  doc = nlp(text)
60
  keywords = [phrase.text for phrase in doc._.phrases[:top_n]]
61
  return keywords
62
 
63
+
64
  # Initialize Google Generative AI chat model
65
  def initialize_chat_model():
66
  with open("key.txt", "r") as f: