Spaces:

HarnithaS
/

ResearchPaperAnalyser

Build error

App Files Files Community

HarnithaS commited on Apr 16, 2024

Commit

bdac891

1 Parent(s): 122bcbd

intial commit

Browse files

Files changed (2) hide show

app.py +105 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import streamlit as st
+# import langchain
+import PyPDF2
+import os
+from transformers import BartTokenizer , BartForConditionalGeneration
+tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
+model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
+def save_uploaded_file(uploaded_file):
+    temp_dir = "temp_files"
+    os.makedirs(temp_dir, exist_ok=True)
+    file_path = os.path.join(temp_dir, uploaded_file.name)
+    with open(file_path, "wb") as f:
+        f.write(uploaded_file.getbuffer())
+    return file_path
+# Function to extract text from PDF
+def extract_text_from_pdf(pdf_file):
+    text = ""
+    RP_file = save_uploaded_file(pdf_file)
+    with open(RP_file, "rb") as file:
+        pdf_reader = PyPDF2.PdfReader(file)
+        num_pages = len(pdf_reader.pages)
+        for page_num in range(num_pages):
+            page = pdf_reader.pages[page_num]
+            text += page.extract_text()
+    return text
+def generate_summary(text: str):
+    # Tokenize the text
+    tokens = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
+    summary_ids = model.generate(tokens.input_ids, num_beams = 4, max_length = 200, early_stopping = True)
+    return summary_ids
+# Function to summarize text
+def summarize_text(text: str) -> str:
+    summary_ids = generate_summary(text)
+    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_ip_tokenization_spaces=False)
+    return summary
+# Function to extract key information from the paper
+def extract_paper_info(text):
+    # Logic to extract key information from the paper (e.g., using regex, NLP techniques)
+    # This part can be expanded based on the specific requirements
+    pass
+# Function to build and fine-tune the chatbot
+def build_chatbot():
+    # Fine-tuning language model for chatbot using Langchain
+    lang_model = ''
+    # Additional fine-tuning steps can be added here
+    return lang_model
+# Main function to run the Streamlit app
+def main():
+    st.title("Research Paper Understanding Chatbot")
+    st.write("As of now supports only summarization.")
+    # Upload PDF file
+    uploaded_file = st.file_uploader("Upload a research paper (PDF)", type="pdf")
+    if uploaded_file is not None:
+        st.write("Paper uploaded successfully!")
+        # Extract text from PDF
+        text = extract_text_from_pdf(uploaded_file)
+        # Display summary of the paper
+        st.subheader("Summary of the Paper")
+        with st.spinner("Brewing a potion for your paper's essence..."):
+            summary = summarize_text(text)
+            st.write(summary)
+        # # Extract key information from the paper
+        # st.subheader("Key Information")
+        # paper_info = extract_paper_info(text)
+        # st.write(paper_info)
+        # # Build chatbot
+        # st.subheader("Chatbot")
+        # chatbot = build_chatbot()
+        # # Chat interface
+        # user_input = st.text_input("You: ")
+        # if user_input:
+        #     response = chatbot.generate_response(user_input)
+        #     st.write("Chatbot:", response)
+    else:
+        st.write("Please upload a PDF file")
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+streamlit
+transformers
+langchain
+PyPDF2
+tensorflow
+tf-keras
+torch