HarnithaS commited on
Commit
bdac891
·
1 Parent(s): 122bcbd

intial commit

Browse files
Files changed (2) hide show
  1. app.py +105 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ # import langchain
3
+ import PyPDF2
4
+ import os
5
+ from transformers import BartTokenizer , BartForConditionalGeneration
6
+
7
+ tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
8
+ model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
9
+
10
+
11
+ def save_uploaded_file(uploaded_file):
12
+ temp_dir = "temp_files"
13
+ os.makedirs(temp_dir, exist_ok=True)
14
+ file_path = os.path.join(temp_dir, uploaded_file.name)
15
+ with open(file_path, "wb") as f:
16
+ f.write(uploaded_file.getbuffer())
17
+ return file_path
18
+
19
+ # Function to extract text from PDF
20
+ def extract_text_from_pdf(pdf_file):
21
+ text = ""
22
+ RP_file = save_uploaded_file(pdf_file)
23
+ with open(RP_file, "rb") as file:
24
+ pdf_reader = PyPDF2.PdfReader(file)
25
+ num_pages = len(pdf_reader.pages)
26
+ for page_num in range(num_pages):
27
+ page = pdf_reader.pages[page_num]
28
+ text += page.extract_text()
29
+ return text
30
+
31
+ def generate_summary(text: str):
32
+ # Tokenize the text
33
+ tokens = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
34
+ summary_ids = model.generate(tokens.input_ids, num_beams = 4, max_length = 200, early_stopping = True)
35
+
36
+
37
+ return summary_ids
38
+
39
+
40
+
41
+
42
+ # Function to summarize text
43
+ def summarize_text(text: str) -> str:
44
+ summary_ids = generate_summary(text)
45
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_ip_tokenization_spaces=False)
46
+ return summary
47
+
48
+ # Function to extract key information from the paper
49
+ def extract_paper_info(text):
50
+ # Logic to extract key information from the paper (e.g., using regex, NLP techniques)
51
+ # This part can be expanded based on the specific requirements
52
+ pass
53
+
54
+
55
+ # Function to build and fine-tune the chatbot
56
+ def build_chatbot():
57
+ # Fine-tuning language model for chatbot using Langchain
58
+ lang_model = ''
59
+
60
+ # Additional fine-tuning steps can be added here
61
+
62
+ return lang_model
63
+
64
+
65
+ # Main function to run the Streamlit app
66
+ def main():
67
+ st.title("Research Paper Understanding Chatbot")
68
+ st.write("As of now supports only summarization.")
69
+
70
+ # Upload PDF file
71
+ uploaded_file = st.file_uploader("Upload a research paper (PDF)", type="pdf")
72
+
73
+ if uploaded_file is not None:
74
+ st.write("Paper uploaded successfully!")
75
+
76
+ # Extract text from PDF
77
+ text = extract_text_from_pdf(uploaded_file)
78
+
79
+ # Display summary of the paper
80
+ st.subheader("Summary of the Paper")
81
+ with st.spinner("Brewing a potion for your paper's essence..."):
82
+ summary = summarize_text(text)
83
+ st.write(summary)
84
+
85
+ # # Extract key information from the paper
86
+ # st.subheader("Key Information")
87
+ # paper_info = extract_paper_info(text)
88
+ # st.write(paper_info)
89
+
90
+ # # Build chatbot
91
+ # st.subheader("Chatbot")
92
+ # chatbot = build_chatbot()
93
+
94
+ # # Chat interface
95
+ # user_input = st.text_input("You: ")
96
+ # if user_input:
97
+ # response = chatbot.generate_response(user_input)
98
+ # st.write("Chatbot:", response)
99
+
100
+ else:
101
+ st.write("Please upload a PDF file")
102
+
103
+
104
+ if __name__ == "__main__":
105
+ main()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ transformers
3
+ langchain
4
+ PyPDF2
5
+ tensorflow
6
+ tf-keras
7
+ torch