Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,14 @@ from langchain_core.output_parsers import StrOutputParser
|
|
12 |
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
|
13 |
import spacy
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
# Function to extract text from PDF
|
16 |
def extract_text_from_pdf(uploaded_file):
|
17 |
text = ""
|
@@ -21,20 +29,19 @@ def extract_text_from_pdf(uploaded_file):
|
|
21 |
return text
|
22 |
|
23 |
# Function to extract text from Word document
|
24 |
-
def
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
return
|
30 |
-
|
31 |
|
32 |
# Function to summarize text
|
33 |
def summarize_text(text, max_length=1000, min_length=30):
|
34 |
max_length = min(max_length, 1000) # Ensure max_length doesn't exceed 1000
|
35 |
|
36 |
try:
|
37 |
-
# Initialize the summarizer pipeline
|
38 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
39 |
summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
|
40 |
if isinstance(summary, list) and len(summary) > 0:
|
@@ -44,26 +51,16 @@ def summarize_text(text, max_length=1000, min_length=30):
|
|
44 |
except Exception as e:
|
45 |
return f"Error in summarization: {e}"
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
def load_spacy_model():
|
50 |
-
try:
|
51 |
-
return spacy.load("en_core_web_sm")
|
52 |
-
except OSError:
|
53 |
-
# If model is not found, attempt to download it
|
54 |
-
from spacy.cli import download
|
55 |
-
download("en_core_web_sm")
|
56 |
-
return spacy.load("en_core_web_sm")
|
57 |
-
|
58 |
-
|
59 |
# Function to extract keywords using spaCy and PyTextRank
|
60 |
def extract_keywords(text, top_n=10):
|
|
|
61 |
nlp = spacy.load("en_core_web_sm")
|
62 |
nlp.add_pipe("textrank", last=True)
|
63 |
doc = nlp(text)
|
64 |
keywords = [phrase.text for phrase in doc._.phrases[:top_n]]
|
65 |
return keywords
|
66 |
|
|
|
67 |
# Initialize Google Generative AI chat model
|
68 |
def initialize_chat_model():
|
69 |
with open("key.txt", "r") as f:
|
|
|
12 |
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
|
13 |
import spacy
|
14 |
|
15 |
+
# Function to check and download spaCy model
|
16 |
+
def ensure_spacy_model(model_name="en_core_web_sm"):
|
17 |
+
try:
|
18 |
+
spacy.load(model_name)
|
19 |
+
except OSError:
|
20 |
+
subprocess.run(["python", "-m", "spacy", "download", model_name])
|
21 |
+
spacy.load(model_name)
|
22 |
+
|
23 |
# Function to extract text from PDF
|
24 |
def extract_text_from_pdf(uploaded_file):
|
25 |
text = ""
|
|
|
29 |
return text
|
30 |
|
31 |
# Function to extract text from Word document
|
32 |
+
def extract_text_from_word(uploaded_file):
|
33 |
+
text = ""
|
34 |
+
doc = Document(uploaded_file)
|
35 |
+
for paragraph in doc.paragraphs:
|
36 |
+
text += paragraph.text + "\n"
|
37 |
+
return text
|
|
|
38 |
|
39 |
# Function to summarize text
|
40 |
def summarize_text(text, max_length=1000, min_length=30):
|
41 |
max_length = min(max_length, 1000) # Ensure max_length doesn't exceed 1000
|
42 |
|
43 |
try:
|
44 |
+
# Initialize the summarizer pipeline
|
45 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
46 |
summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
|
47 |
if isinstance(summary, list) and len(summary) > 0:
|
|
|
51 |
except Exception as e:
|
52 |
return f"Error in summarization: {e}"
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
# Function to extract keywords using spaCy and PyTextRank
|
55 |
def extract_keywords(text, top_n=10):
|
56 |
+
ensure_spacy_model("en_core_web_sm")
|
57 |
nlp = spacy.load("en_core_web_sm")
|
58 |
nlp.add_pipe("textrank", last=True)
|
59 |
doc = nlp(text)
|
60 |
keywords = [phrase.text for phrase in doc._.phrases[:top_n]]
|
61 |
return keywords
|
62 |
|
63 |
+
|
64 |
# Initialize Google Generative AI chat model
|
65 |
def initialize_chat_model():
|
66 |
with open("key.txt", "r") as f:
|