Spaces:

MahatirTusher
/

PaperQuest-Research_Finder_and_Text_Companion

Sleeping

App Files Files Community

MahatirTusher commited on Apr 2

Commit

9ff0fe4

verified ·

1 Parent(s): c5e8d1d

Upload app.py

Browse files

Files changed (1) hide show

app.py +1033 -0

app.py ADDED Viewed

	@@ -0,0 +1,1033 @@

+import streamlit as st
+import requests
+import pandas as pd
+import transformers
+from transformers import pipeline
+import tensorflow
+import io
+import base64
+import xml.etree.ElementTree as ET
+import json
+import time
+# Set page configuration and styling
+st.set_page_config(
+    page_title="PaperQuest: Research Finder",
+    page_icon="📚",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS to make the UI more professional
+st.markdown("""
+<style>
+    /* Main theme colors */
+    :root {
+        --primary-color: #4361ee;
+        --secondary-color: #3a0ca3;
+        --accent-color: #4cc9f0;
+        --background-color: #f8f9fa;
+        --text-color: #212529;
+    }
+    /* Overall page styling */
+    .main {
+        background-color: var(--background-color);
+        color: var(--text-color);
+    }
+    /* Header styling */
+    h1, h2, h3 {
+        color: var(--primary-color);
+        font-family: 'Helvetica Neue', sans-serif;
+    }
+    /* Custom button styling */
+    .stButton > button {
+        background-color: var(--primary-color);
+        color: white;
+        border-radius: 6px;
+        border: none;
+        padding: 0.5rem 1rem;
+        font-weight: 600;
+        transition: all 0.3s;
+    }
+    .stButton > button:hover {
+        background-color: var(--secondary-color);
+        transform: translateY(-2px);
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+    }
+    /* Custom sidebar styling */
+    .css-1d391kg {
+        background-color: #f1f3f8;
+    }
+    /* Card-like containers */
+    .card {
+        background-color: white;
+        border-radius: 10px;
+        padding: 20px;
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+        margin-bottom: 20px;
+    }
+    /* Hero section */
+    .hero {
+        background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
+        color: white;
+        padding: 2rem;
+        border-radius: 10px;
+        margin-bottom: 2rem;
+        text-align: center;
+    }
+    /* Tables */
+    .dataframe {
+        width: 100%;
+        border-collapse: collapse;
+    }
+    .dataframe th {
+        background-color: var(--primary-color);
+        color: white;
+        text-align: left;
+        padding: 12px;
+    }
+    .dataframe td {
+        padding: 8px 12px;
+        border-bottom: 1px solid #ddd;
+    }
+    .dataframe tr:nth-child(even) {
+        background-color: #f9f9f9;
+    }
+    /* Feature icons */
+    .feature-icon {
+        font-size: 2.5rem;
+        color: var(--primary-color);
+        margin-bottom: 1rem;
+        text-align: center;
+    }
+    /* Footer */
+    .footer {
+        text-align: center;
+        padding: 20px;
+        background-color: #f1f3f8;
+        margin-top: 40px;
+        border-radius: 10px;
+    }
+</style>
+""", unsafe_allow_html=True)
+import requests
+import xml.etree.ElementTree as ET
+import pandas as pd
+import streamlit as st
+import re
+# Function to search CrossRef using the user's query
+def search_crossref(query, rows=10):
+    url = "https://api.crossref.org/works"
+    params = {
+        "query": query,
+        "rows": rows,
+        "filter": "type:journal-article"
+    }
+    try:
+        response = requests.get(url, params=params)
+        response.raise_for_status()
+        return response.json()
+    except requests.exceptions.HTTPError as e:
+        st.error(f"HTTP error occurred: {e}")
+        return None
+    except Exception as e:
+        st.error(f"An error occurred: {e}")
+        return None
+# Function to search Semantic Scholar using the user's query
+def search_semantic_scholar(query, limit=10):
+    url = "https://api.semanticscholar.org/graph/v1/paper/search"
+    params = {
+        "query": query,
+        "limit": limit,
+        "fields": "title,authors,venue,year,abstract,url,externalIds"
+    }
+    headers = {
+        "Accept": "application/json"
+        # Add your API key if you have one: "x-api-key": "YOUR_API_KEY"
+    }
+    try:
+        response = requests.get(url, params=params, headers=headers)
+        response.raise_for_status()
+        return response.json()
+    except requests.exceptions.HTTPError as e:
+        st.error(f"Semantic Scholar HTTP error: {e}")
+        return None
+    except Exception as e:
+        st.error(f"Semantic Scholar error: {e}")
+        return None
+# Function to search arXiv using the user's query
+def search_arxiv(query, max_results=10):
+    base_url = "http://export.arxiv.org/api/query"
+    params = {
+        "search_query": f"all:{query}",
+        "max_results": max_results,
+        "sortBy": "relevance",
+        "sortOrder": "descending"
+    }
+    try:
+        response = requests.get(base_url, params=params)
+        response.raise_for_status()
+        # Parse the XML response
+        root = ET.fromstring(response.content)
+        # Initialize list to store entries
+        entries = []
+        # Extract data from each entry
+        for entry in root.findall('{http://www.w3.org/2005/Atom}entry'):
+            title = entry.find('{http://www.w3.org/2005/Atom}title').text.strip()
+            # Get authors
+            authors = []
+            for author in entry.findall('{http://www.w3.org/2005/Atom}author'):
+                name = author.find('{http://www.w3.org/2005/Atom}name').text
+                authors.append(name)
+            # Get abstract
+            abstract = entry.find('{http://www.w3.org/2005/Atom}summary').text.strip()
+            # Get URL
+            url = ""
+            for link in entry.findall('{http://www.w3.org/2005/Atom}link'):
+                if link.get('title') == 'pdf':
+                    url = link.get('href')
+                    break
+            # Get published date
+            published = entry.find('{http://www.w3.org/2005/Atom}published').text.split('T')[0]
+            # Get DOI if available
+            doi = ""
+            arxiv_id = entry.find('{http://www.w3.org/2005/Atom}id').text.split('/abs/')[-1]
+            entries.append({
+                "title": title,
+                "authors": ', '.join(authors),
+                "abstract": abstract,
+                "url": url,
+                "published": published,
+                "arxiv_id": arxiv_id,
+                "doi": doi
+            })
+        return {"entries": entries}
+    except requests.exceptions.HTTPError as e:
+        st.error(f"arXiv HTTP error: {e}")
+        return None
+    except Exception as e:
+        st.error(f"arXiv error: {e}")
+        return None
+# Function to fetch abstract from PubMed using DOI
+def fetch_pubmed_abstract(doi):
+    try:
+        # First, search for the PubMed ID using the DOI
+        search_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term={doi}[doi]&retmode=json"
+        search_response = requests.get(search_url)
+        search_data = search_response.json()
+        # Check if we found a PubMed ID
+        id_list = search_data.get('esearchresult', {}).get('idlist', [])
+        if not id_list:
+            return ""
+        pubmed_id = id_list[0]
+        # Now fetch the abstract using the PubMed ID
+        fetch_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id={pubmed_id}&retmode=xml"
+        fetch_response = requests.get(fetch_url)
+        # Parse the XML response
+        root = ET.fromstring(fetch_response.content)
+        # Look for AbstractText in the XML
+        abstract_elements = root.findall(".//AbstractText")
+        if abstract_elements:
+            # Combine all abstract sections if there are multiple
+            abstract = " ".join([elem.text for elem in abstract_elements if elem.text])
+            return abstract
+        return ""
+    except Exception as e:
+        # If anything goes wrong, return empty string
+        return ""
+# Function to display CrossRef results with enhanced abstract fetching
+def display_crossref_results(data):
+    if data:
+        items = data.get('message', {}).get('items', [])
+        if not items:
+            st.warning("No CrossRef results found.")
+            return None
+        paper_list = []
+        # Use a progress bar for abstract fetching
+        progress_bar = st.progress(0)
+        status_text = st.empty()
+        for i, item in enumerate(items):
+            status_text.text(f"Processing CrossRef paper {i+1}/{len(items)}...")
+            doi = item.get('DOI', '')
+            # Try to get abstract from PubMed for papers with DOI
+            abstract = ""
+            if doi:
+                abstract = fetch_pubmed_abstract(doi)
+            # If we couldn't get an abstract from PubMed, try using CrossRef's abstract if available
+            if not abstract and 'abstract' in item:
+                abstract = re.sub(r'<[^>]+>', '', item['abstract'])
+            paper = {
+                "Source": "CrossRef",
+                "Title": item.get('title', [''])[0],
+                "Author(s)": ', '.join([author.get('family', '') for author in item.get('author', [])]),
+                "Journal": item.get('container-title', [''])[0],
+                "Abstract": abstract,
+                "DOI": doi,
+                "Link": item.get('URL', ''),
+                "Published": item.get('issued', {}).get('date-parts', [[None]])[0][0] if 'issued' in item else "N/A"
+            }
+            paper_list.append(paper)
+            # Update progress bar
+            progress_bar.progress((i+1)/len(items))
+        # Clear progress indicators
+        progress_bar.empty()
+        status_text.empty()
+        return paper_list
+    else:
+        st.warning("No CrossRef data to display.")
+        return None
+# Function to display Semantic Scholar results
+def display_semantic_scholar_results(data):
+    if data:
+        items = data.get('data', [])
+        if not items:
+            st.warning("No Semantic Scholar results found.")
+            return None
+        paper_list = []
+        for item in items:
+            authors = item.get('authors', [])
+            author_names = ', '.join([author.get('name', '') for author in authors])
+            doi = item.get('externalIds', {}).get('DOI', '')
+            paper = {
+                "Source": "Semantic Scholar",
+                "Title": item.get('title', ''),
+                "Author(s)": author_names,
+                "Journal": item.get('venue', ''),
+                "Abstract": item.get('abstract', ''),
+                "DOI": doi,
+                "Link": item.get('url', ''),
+                "Published": item.get('year', 'N/A')
+            }
+            paper_list.append(paper)
+        return paper_list
+    else:
+        st.warning("No Semantic Scholar data to display.")
+        return None
+# Function to display arXiv results
+def display_arxiv_results(data):
+    if data:
+        entries = data.get('entries', [])
+        if not entries:
+            st.warning("No arXiv results found.")
+            return None
+        paper_list = []
+        for entry in entries:
+            paper = {
+                "Source": "arXiv",
+                "Title": entry.get('title', ''),
+                "Author(s)": entry.get('authors', ''),
+                "Journal": "arXiv preprint",
+                "Abstract": entry.get('abstract', ''),
+                "DOI": entry.get('doi', ''),
+                "Link": entry.get('url', ''),
+                "Published": entry.get('published', 'N/A'),
+                "arXiv ID": entry.get('arxiv_id', '')
+            }
+            paper_list.append(paper)
+        return paper_list
+    else:
+        st.warning("No arXiv data to display.")
+        return None
+# Function to run a comprehensive search across all APIs
+def run_comprehensive_search(query, max_results=10):
+    with st.spinner("Searching multiple academic databases..."):
+        # Create columns for status indicators
+        col1, col2, col3 = st.columns(3)
+        # Search CrossRef
+        with col1:
+            with st.spinner("Searching CrossRef..."):
+                crossref_data = search_crossref(query, rows=max_results)
+                crossref_results = display_crossref_results(crossref_data)
+                if crossref_results:
+                    st.success(f"Found {len(crossref_results)} results in CrossRef")
+                else:
+                    st.info("No results from CrossRef")
+        # Search Semantic Scholar
+        with col2:
+            with st.spinner("Searching Semantic Scholar..."):
+                semantic_data = search_semantic_scholar(query, limit=max_results)
+                semantic_results = display_semantic_scholar_results(semantic_data)
+                if semantic_results:
+                    st.success(f"Found {len(semantic_results)} results in Semantic Scholar")
+                else:
+                    st.info("No results from Semantic Scholar")
+        # Search arXiv
+        with col3:
+            with st.spinner("Searching arXiv..."):
+                arxiv_data = search_arxiv(query, max_results=max_results)
+                arxiv_results = display_arxiv_results(arxiv_data)
+                if arxiv_results:
+                    st.success(f"Found {len(arxiv_results)} results in arXiv")
+                else:
+                    st.info("No results from arXiv")
+    # Combine results
+    all_results = []
+    if crossref_results:
+        all_results.extend(crossref_results)
+    if semantic_results:
+        all_results.extend(semantic_results)
+    if arxiv_results:
+        all_results.extend(arxiv_results)
+    if all_results:
+        df = pd.DataFrame(all_results)
+        return df
+    else:
+        st.warning("No results found across any of the academic databases.")
+        return None
+# Function to display the results in a table format
+def display_results(data):
+    if data:
+        items = data.get('message', {}).get('items', [])
+        if not items:
+            st.warning("No results found for the query.")
+            return None
+        paper_list = []
+        for item in items:
+            # Extract abstract if available
+            abstract = ""
+            if 'abstract' in item:
+                # Clean up the abstract text - remove HTML tags if present
+                abstract = re.sub(r'<[^>]+>', '', item['abstract'])
+            paper = {
+                "Title": item.get('title', [''])[0],
+                "Author(s)": ', '.join([author.get('family', '') for author in item.get('author', [])]),
+                "Journal": item.get('container-title', [''])[0],
+                "Abstract": abstract,
+                "DOI": item.get('DOI', ''),
+                "Link": item.get('URL', ''),
+                "Published": item.get('issued', {}).get('date-parts', [[None]])[0][0] if 'issued' in item else "N/A"
+            }
+            paper_list.append(paper)
+        df = pd.DataFrame(paper_list)
+        # Display the dataframe with a scrollable container for long abstracts
+        st.write(df)
+        return df
+    else:
+        st.warning("No data to display.")
+        return None
+# Add the generate_literature_survey function below your other function definitions
+def generate_literature_survey(papers, api_key="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"):
+    """
+    Generate a literature survey based on paper abstracts using Groq API with Llama-3.3-70B-Instruct
+    Parameters:
+    papers (list): List of papers with abstracts
+    api_key (str): Groq API key
+    Returns:
+    str: Generated literature survey
+    """
+    # Check if we have papers with abstracts
+    if not papers or len(papers) == 0:
+        return "No papers found to generate a literature survey."
+    # Filter papers that have abstracts
+    papers_with_abstracts = [p for p in papers if p.get("Abstract") and len(p.get("Abstract")) > 50]
+    if len(papers_with_abstracts) == 0:
+        return "Cannot generate a literature survey because none of the papers have substantial abstracts."
+    # Construct the prompt for the LLM
+    paper_info = []
+    for i, paper in enumerate(papers_with_abstracts[:10]):  # Limit to 10 papers to avoid token limits
+        paper_info.append(f"Paper {i+1}:\nTitle: {paper.get('Title', 'Unknown')}\nAuthors: {paper.get('Author(s)', 'Unknown')}\nYear: {paper.get('Published', 'Unknown')}\nAbstract: {paper.get('Abstract', 'No abstract available')}\n")
+    papers_text = "\n".join(paper_info)
+    prompt = f"""You are an expert academic researcher. Based on the following papers and their abstracts,
+write a concise literature survey that:
+1. Identifies the main themes and research directions
+2. Highlights methodological approaches
+3. Summarizes key findings
+4. Points out research gaps if evident
+5. Suggests potential future research directions
+Here are the papers:
+{papers_text}
+Please organize the survey by themes rather than by individual papers, creating connections between studies.
+Format your response with markdown headings for better readability.
+"""
+    # Make the API request to Groq
+    url = "https://api.groq.com/openai/v1/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    data = {
+        "model": "meta-llama/Llama-3.3-70B-Instruct",
+        "messages": [
+            {"role": "system", "content": "You are an academic research assistant that creates comprehensive literature surveys."},
+            {"role": "user", "content": prompt}
+        ],
+        "temperature": 0.3,
+        "max_tokens": 2000
+    }
+    try:
+        response = requests.post(url, headers=headers, data=json.dumps(data))
+        response.raise_for_status()
+        result = response.json()
+        survey_text = result["choices"][0]["message"]["content"]
+        return survey_text
+    except Exception as e:
+        st.error(f"Error generating literature survey: {e}")
+        return f"Failed to generate literature survey due to an error: {str(e)}"
+# Add the add_literature_survey_button function
+def add_literature_survey_button(search_results_df):
+    """
+    Add a button to generate a literature survey based on search results
+    Parameters:
+    search_results_df (pandas.DataFrame): DataFrame containing search results
+    """
+    if search_results_df is not None and not search_results_df.empty:
+        # Check if arXiv results are included
+        has_arxiv = "Source" in search_results_df.columns and "arXiv" in search_results_df["Source"].values
+        if not has_arxiv:
+            st.warning("For best literature survey results, include arXiv in your search sources. arXiv papers typically have more comprehensive abstracts.")
+        if st.button("Generate Literature Survey"):
+            with st.spinner("Generating literature survey using AI... This may take a minute."):
+                # Convert DataFrame to list of dictionaries
+                papers = search_results_df.to_dict('records')
+                # Generate the survey
+                survey = generate_literature_survey(papers)
+                # Display the survey with proper markdown rendering
+                st.markdown("## Literature Survey")
+                st.markdown(survey)
+                # Add a download button for the survey
+                st.download_button(
+                    label="Download Survey as Text",
+                    data=survey,
+                    file_name="literature_survey.md",
+                    mime="text/markdown"
+                )
+    else:
+        st.info("Run a search first to generate a literature survey.")
+def literature_survey_page():
+    st.markdown('<div class="card">', unsafe_allow_html=True)
+    st.title("Literature Survey Generator")
+    st.write("Generate comprehensive literature surveys from your search results.")
+    if st.session_state.search_results_df is not None and not st.session_state.search_results_df.empty:
+        st.write(f"Using {len(st.session_state.search_results_df)} papers from your last search.")
+        add_literature_survey_button(st.session_state.search_results_df)
+    else:
+        st.info("Please perform a search first to gather papers for your literature survey.")
+        if st.button("Go to Search Page"):
+            st.session_state.page = "search"
+    st.markdown('</div>', unsafe_allow_html=True)
+# Function to summarize text using the specified model
+def summarize_text(text):
+    try:
+        # Initialize the summarization model with PyTorch
+        summarizer = pipeline("text2text-generation", model="spacemanidol/flan-t5-large-website-summarizer", framework="pt")
+        summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
+        return summary[0]['generated_text']
+    except Exception as e:
+        st.error(f"An error occurred during summarization: {e}")
+        return "Summary could not be generated."
+# Function to generate text
+def generate_text(text):
+    try:
+        # Initialize the text generation model with PyTorch
+        text_generator = pipeline("text2text-generation", model="JorgeSarry/est5-summarize", framework="pt")
+        generated_text = text_generator(text, max_length=150, min_length=50, do_sample=False)
+        return generated_text[0]['generated_text']
+    except Exception as e:
+        st.error(f"An error occurred during text generation: {e}")
+        return "Generated text could not be created."
+# Function to convert DataFrame to CSV
+def convert_df_to_csv(df):
+    return df.to_csv(index=False).encode('utf-8')
+# Function to convert DataFrame to text
+def convert_df_to_txt(df):
+    buffer = io.StringIO()
+    # Write header
+    buffer.write("PaperQuest Research Results\n\n")
+    # Format headers
+    headers = "\t".join(df.columns)
+    buffer.write(headers + "\n")
+    buffer.write("-" * len(headers) + "\n")
+    # Format rows
+    for _, row in df.iterrows():
+        buffer.write("\t".join([str(item) for item in row.values]) + "\n")
+    return buffer.getvalue()
+# Function to create download button
+def get_download_button(df, file_type="csv", button_text="Download as CSV"):
+    if file_type == "csv":
+        csv_bytes = convert_df_to_csv(df)
+        b64 = base64.b64encode(csv_bytes).decode()
+        href = f'data:text/csv;base64,{b64}'
+    else:  # text
+        text_data = convert_df_to_txt(df)
+        b64 = base64.b64encode(text_data.encode()).decode()
+        href = f'data:text/plain;base64,{b64}'
+    return f'<a href="{href}" download="research_results.{file_type}" class="download-button">{button_text}</a>'
+# Navigation functions
+def home_page():
+    # Hero section
+    st.markdown('<div class="hero">', unsafe_allow_html=True)
+    st.title("PaperQuest: Research Finder and Text Companion")
+    st.markdown("Discover academic insights and enhance your research journey with our powerful tools")
+    st.markdown('</div>', unsafe_allow_html=True)
+    # Search bar directly on the home page
+    st.markdown('<div class="card">', unsafe_allow_html=True)
+    st.subheader("📚 Find Research Papers")
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        query = st.text_input("Enter your research topic or keywords", value="machine learning optimization")
+    with col2:
+        num_papers = st.slider("Results per source", min_value=5, max_value=50, value=10)
+    search_sources = st.multiselect(
+        "Select sources",
+        options=["CrossRef", "Semantic Scholar", "arXiv", "All"],
+        default=["CrossRef"]
+    )
+    search_clicked = st.button("Search Papers", key="search_home")
+    # Store the search results in session state
+    if search_clicked:
+        if query:
+            if "All" in search_sources or len(search_sources) > 1:
+                # Use comprehensive search function
+                st.session_state.search_results_df = run_comprehensive_search(query, max_results=num_papers)
+                if st.session_state.search_results_df is not None:
+                    # Display filtered results
+                    st.subheader(f"Found {len(st.session_state.search_results_df)} papers")
+                    # Display download buttons
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        st.markdown(get_download_button(st.session_state.search_results_df, "csv", "📊 Download as CSV"), unsafe_allow_html=True)
+                    with col2:
+                        st.markdown(get_download_button(st.session_state.search_results_df, "txt", "📝 Download as Text"), unsafe_allow_html=True)
+            else:
+                # Original single-source search
+                with st.spinner('Searching for papers...'):
+                    if "CrossRef" in search_sources:
+                        response_data = search_crossref(query, rows=num_papers)
+                        paper_list = display_crossref_results(response_data)
+                        if paper_list:
+                            st.session_state.search_results_df = pd.DataFrame(paper_list)
+                    elif "Semantic Scholar" in search_sources:
+                        response_data = search_semantic_scholar(query, limit=num_papers)
+                        paper_list = display_semantic_scholar_results(response_data)
+                        if paper_list:
+                            st.session_state.search_results_df = pd.DataFrame(paper_list)
+                    elif "arXiv" in search_sources:
+                        response_data = search_arxiv(query, max_results=num_papers)
+                        paper_list = display_arxiv_results(response_data)
+                        if paper_list:
+                            st.session_state.search_results_df = pd.DataFrame(paper_list)
+                    if st.session_state.search_results_df is not None:
+                        st.write(st.session_state.search_results_df)
+                        # Display download buttons
+                        col1, col2 = st.columns(2)
+                        with col1:
+                            st.markdown(get_download_button(st.session_state.search_results_df, "csv", "📊 Download as CSV"), unsafe_allow_html=True)
+                        with col2:
+                            st.markdown(get_download_button(st.session_state.search_results_df, "txt", "📝 Download as Text"), unsafe_allow_html=True)
+        else:
+            st.warning("Please enter a search query.")
+    st.markdown('</div>', unsafe_allow_html=True)
+    # Features section
+    st.markdown("<h2 style='text-align: center; margin-top: 40px;'>Features</h2>", unsafe_allow_html=True)
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.markdown('<div class="card">', unsafe_allow_html=True)
+        st.markdown('<div class="feature-icon">🔍</div>', unsafe_allow_html=True)
+        st.markdown("<h3 style='text-align: center;'>Comprehensive Search</h3>", unsafe_allow_html=True)
+        st.markdown("<p style='text-align: center;'>Access thousands of academic papers from CrossRef, Semantic Scholar, and arXiv</p>", unsafe_allow_html=True)
+        st.markdown('</div>', unsafe_allow_html=True)
+    with col2:
+        st.markdown('<div class="card">', unsafe_allow_html=True)
+        st.markdown('<div class="feature-icon">📝</div>', unsafe_allow_html=True)
+        st.markdown("<h3 style='text-align: center;'>Text Summarization</h3>", unsafe_allow_html=True)
+        st.markdown("<p style='text-align: center;'>Extract key insights from complex research papers</p>", unsafe_allow_html=True)
+        st.markdown('</div>', unsafe_allow_html=True)
+    with col3:
+        st.markdown('<div class="card">', unsafe_allow_html=True)
+        st.markdown('<div class="feature-icon">✨</div>', unsafe_allow_html=True)
+        st.markdown("<h3 style='text-align: center;'>Smart Text Generation</h3>", unsafe_allow_html=True)
+        st.markdown("<p style='text-align: center;'>Get assistance with creating coherent research content</p>", unsafe_allow_html=True)
+        st.markdown('</div>', unsafe_allow_html=True)
+def search_page():
+    st.markdown('<div class="card">', unsafe_allow_html=True)
+    st.title("Research Paper Search")
+    st.write("Find and explore academic papers across various disciplines.")
+    query = st.text_input("Enter your research topic or keywords", value="machine learning optimization")
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        num_papers = st.slider("Results per source", min_value=5, max_value=50, value=10)
+    with col2:
+        search_sources = st.multiselect(
+            "Select sources",
+            options=["CrossRef", "Semantic Scholar", "arXiv", "All"],
+            default=["CrossRef"]
+        )
+    with col3:
+        st.write(" ")  # Spacer
+        st.write(" ")  # Spacer
+        search_clicked = st.button("Search")
+    if search_clicked:
+        if query:
+            if "All" in search_sources or len(search_sources) > 1:
+                # Use comprehensive search function
+                results_df = run_comprehensive_search(query, max_results=num_papers)
+                if results_df is not None:
+                    st.subheader(f"Found {len(results_df)} papers across all selected sources")
+                    # Add filters
+                    st.subheader("Filter Results")
+                    selected_sources = st.multiselect(
+                        "Filter by sources",
+                        options=results_df["Source"].unique(),
+                        default=results_df["Source"].unique()
+                    )
+                    # Convert Published column to string to handle potential numeric values
+                    results_df["Published"] = results_df["Published"].astype(str)
+                    # Extract year from Published column where possible
+                    def get_year(published_str):
+                        try:
+                            if isinstance(published_str, str):
+                                return int(published_str.split('-')[0]) if '-' in published_str else int(published_str)
+                            return int(published_str) if published_str else None
+                        except:
+                            return None
+                    results_df["Year"] = results_df["Published"].apply(get_year)
+                    # Filter out None values for the slider
+                    valid_years = [year for year in results_df["Year"] if year is not None]
+                    if valid_years:
+                        min_year = min(valid_years)
+                        max_year = max(valid_years)
+                        year_range = st.slider(
+                            "Publication year range",
+                            min_value=min_year,
+                            max_value=max_year,
+                            value=(min_year, max_year)
+                        )
+                        # Apply filters
+                        filtered_df = results_df[
+                            (results_df["Source"].isin(selected_sources)) &
+                            ((results_df["Year"] >= year_range[0]) & (results_df["Year"] <= year_range[1]) | (results_df["Year"].isna()))
+                        ]
+                    else:
+                        # Just apply source filter if no valid years
+                        filtered_df = results_df[results_df["Source"].isin(selected_sources)]
+                    # Display filtered results
+                    st.subheader(f"Showing {len(filtered_df)} filtered results")
+                    # Display results with expandable rows
+                    for i, row in filtered_df.iterrows():
+                        with st.expander(f"{row['Title']} ({row['Source']}, {row['Published']})"):
+                            st.write(f"**Authors:** {row['Author(s)']}")
+                            st.write(f"**Journal/Venue:** {row['Journal']}")
+                            st.write(f"**Abstract:**")
+                            st.write(row['Abstract'] if row['Abstract'] and row['Abstract'].strip() else "No abstract available")
+                            # Display links
+                            if row['DOI']:
+                                st.write(f"**DOI:** https://doi.org/{row['DOI']}")
+                            if row['Link']:
+                                st.write(f"**Link:** {row['Link']}")
+                            if 'arXiv ID' in row and row['arXiv ID']:
+                                st.write(f"**arXiv ID:** {row['arXiv ID']}")
+                    st.session_state.search_results_df = filtered_df
+                    # Display download buttons
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        st.markdown(get_download_button(filtered_df, "csv", "📊 Download as CSV"), unsafe_allow_html=True)
+                    with col2:
+                        st.markdown(get_download_button(filtered_df, "txt", "📝 Download as Text"), unsafe_allow_html=True)
+            else:
+                # Original single-source search
+                with st.spinner('Searching for papers...'):
+                    if "CrossRef" in search_sources:
+                        response_data = search_crossref(query, rows=num_papers)
+                        paper_list = display_crossref_results(response_data)
+                        if paper_list:
+                            st.session_state.search_results_df = pd.DataFrame(paper_list)
+                    elif "Semantic Scholar" in search_sources:
+                        response_data = search_semantic_scholar(query, limit=num_papers)
+                        paper_list = display_semantic_scholar_results(response_data)
+                        if paper_list:
+                            st.session_state.search_results_df = pd.DataFrame(paper_list)
+                    elif "arXiv" in search_sources:
+                        response_data = search_arxiv(query, max_results=num_papers)
+                        paper_list = display_arxiv_results(response_data)
+                        if paper_list:
+                            st.session_state.search_results_df = pd.DataFrame(paper_list)
+                    if st.session_state.search_results_df is not None:
+                        st.write(st.session_state.search_results_df)
+                        # Display download buttons
+                        col1, col2 = st.columns(2)
+                        with col1:
+                            st.markdown(get_download_button(st.session_state.search_results_df, "csv", "📊 Download as CSV"), unsafe_allow_html=True)
+                        with col2:
+                            st.markdown(get_download_button(st.session_state.search_results_df, "txt", "📝 Download as Text"), unsafe_allow_html=True)
+        else:
+            st.warning("Please enter a search query.")
+    st.markdown('</div>', unsafe_allow_html=True)
+def summarize_page():
+    st.markdown('<div class="card">', unsafe_allow_html=True)
+    st.title("Text Summarization")
+    st.write("Generate concise summaries from lengthy academic text.")
+    user_text = st.text_area("Enter text to summarize", height=200)
+    if st.button("Summarize"):
+        if user_text:
+            with st.spinner('Summarizing text...'):
+                summary = summarize_text(user_text)
+                st.success("Summary:")
+                st.write(summary)
+        else:
+            st.warning("Please enter text to summarize.")
+    st.markdown('</div>', unsafe_allow_html=True)
+def generate_page():
+    st.markdown('<div class="card">', unsafe_allow_html=True)
+    st.title("Text Generation")
+    st.write("Generate text based on your input to assist with research writing.")
+    user_text = st.text_area("Enter text prompt", height=200)
+    if st.button("Generate Text"):
+        if user_text:
+            with st.spinner('Generating text...'):
+                generated = generate_text(user_text)
+                st.success("Generated Text:")
+                st.write(generated)
+        else:
+            st.warning("Please enter text to generate from.")
+    st.markdown('</div>', unsafe_allow_html=True)
+def about_page():
+    st.markdown('<div class="card">', unsafe_allow_html=True)
+    st.title("About PaperQuest")
+    st.write("""
+    ## Our Mission
+    PaperQuest is dedicated to empowering researchers, students, and academics with powerful tools to streamline their research process. Our platform combines comprehensive paper search capabilities with advanced text summarization and generation tools to help you work more efficiently.
+    ## Our Technology
+    PaperQuest leverages state-of-the-art natural language processing models to deliver high-quality text summarization and generation. Our search functionality connects to CrossRef's extensive database, providing access to millions of academic papers across disciplines.
+    ## The Team
+    Our team consists of researchers and developers passionate about improving the academic research process through technology.
+    """)
+    st.markdown('</div>', unsafe_allow_html=True)
+def how_to_use_page():
+    st.markdown('<div class="card">', unsafe_allow_html=True)
+    st.title("How to Use PaperQuest")
+    st.write("""
+    ## Quick Start Guide
+    ### Finding Research Papers
+    1. Navigate to the Home or Search page
+    2. Enter your research topic or keywords in the search bar
+    3. Adjust the number of results using the slider
+    4. Click "Search" to retrieve papers
+    5. Download your results in CSV or TXT format
+    ### Summarizing Text
+    1. Navigate to the Summarize page
+    2. Paste the text you want to summarize
+    3. Click "Summarize" to get a concise version
+    ### Generating Text
+    1. Navigate to the Generate page
+    2. Enter a prompt or starting text
+    3. Click "Generate Text" to get AI-assisted content
+    ## Tips for Better Results
+    - Use specific keywords for more targeted search results
+    - For summarization, provide complete paragraphs for better context
+    - When generating text, provide clear prompts that describe what you need
+    """)
+    st.markdown('</div>', unsafe_allow_html=True)
+# Main function
+def main():
+    # Initialize session state for page navigation
+    if 'page' not in st.session_state:
+        st.session_state.page = 'home'
+    if 'search_results_df' not in st.session_state:
+        st.session_state.search_results_df = None
+    # Sidebar navigation
+    st.sidebar.title("Navigation")
+    pages = {
+        "home": "🏠 Home",
+        "search": "🔍 Search Papers",
+        "summarize": "📝 Summarize Text",
+        "generate": "✨ Generate Text",
+        "literature": "📚 Literature Survey"
+        "about": "ℹ️ About Us",
+        "how_to_use": "❓ How to Use"
+    }
+    for page_id, page_name in pages.items():
+        if st.sidebar.button(page_name, key=page_id):
+            st.session_state.page = page_id
+    # App logo and branding in sidebar
+    st.sidebar.markdown("---")
+    st.sidebar.markdown("<div style='text-align: center;'><h3>PaperQuest</h3><p>Research Finder & Text Companion</p></div>", unsafe_allow_html=True)
+    # Display the selected page
+    if st.session_state.page == 'home':
+        home_page()
+    elif st.session_state.page == 'search':
+        search_page()
+    elif st.session_state.page == 'summarize':
+        summarize_page()
+    elif st.session_state.page == 'generate':
+        generate_page()
+    elif st.session_state.page == 'about':
+        about_page()
+    elif st.session_state.page == 'how_to_use':
+        how_to_use_page()
+    elif st.session_state.page == 'literature':
+        literature_survey_page()
+    # Footer
+    st.markdown('<div class="footer">', unsafe_allow_html=True)
+    st.markdown("© 2025 PaperQuest | Research Finder and Text Companion", unsafe_allow_html=True)
+    st.markdown('</div>', unsafe_allow_html=True)
+# Run the app
+if __name__ == "__main__":
+    main()