|
import streamlit as st |
|
import requests |
|
import pandas as pd |
|
import transformers |
|
from transformers import pipeline |
|
import tensorflow |
|
import io |
|
import base64 |
|
import xml.etree.ElementTree as ET |
|
import json |
|
import time |
|
|
|
|
|
st.set_page_config( |
|
page_title="PaperQuest: Research Finder", |
|
page_icon="π", |
|
layout="wide", |
|
initial_sidebar_state="expanded" |
|
) |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
/* Main theme colors */ |
|
:root { |
|
--primary-color: #4361ee; |
|
--secondary-color: #3a0ca3; |
|
--accent-color: #4cc9f0; |
|
--background-color: #f8f9fa; |
|
--text-color: #212529; |
|
} |
|
|
|
/* Overall page styling */ |
|
.main { |
|
background-color: var(--background-color); |
|
color: var(--text-color); |
|
} |
|
|
|
/* Header styling */ |
|
h1, h2, h3 { |
|
color: var(--primary-color); |
|
font-family: 'Helvetica Neue', sans-serif; |
|
} |
|
|
|
/* Custom button styling */ |
|
.stButton > button { |
|
background-color: var(--primary-color); |
|
color: white; |
|
border-radius: 6px; |
|
border: none; |
|
padding: 0.5rem 1rem; |
|
font-weight: 600; |
|
transition: all 0.3s; |
|
} |
|
|
|
.stButton > button:hover { |
|
background-color: var(--secondary-color); |
|
transform: translateY(-2px); |
|
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
|
} |
|
|
|
/* Custom sidebar styling */ |
|
.css-1d391kg { |
|
background-color: #f1f3f8; |
|
} |
|
|
|
/* Card-like containers */ |
|
.card { |
|
background-color: white; |
|
border-radius: 10px; |
|
padding: 20px; |
|
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
|
margin-bottom: 20px; |
|
} |
|
|
|
/* Hero section */ |
|
.hero { |
|
background: linear-gradient(135deg, var(--primary-color), var(--secondary-color)); |
|
color: white; |
|
padding: 2rem; |
|
border-radius: 10px; |
|
margin-bottom: 2rem; |
|
text-align: center; |
|
} |
|
|
|
/* Tables */ |
|
.dataframe { |
|
width: 100%; |
|
border-collapse: collapse; |
|
} |
|
|
|
.dataframe th { |
|
background-color: var(--primary-color); |
|
color: white; |
|
text-align: left; |
|
padding: 12px; |
|
} |
|
|
|
.dataframe td { |
|
padding: 8px 12px; |
|
border-bottom: 1px solid #ddd; |
|
} |
|
|
|
.dataframe tr:nth-child(even) { |
|
background-color: #f9f9f9; |
|
} |
|
|
|
/* Feature icons */ |
|
.feature-icon { |
|
font-size: 2.5rem; |
|
color: var(--primary-color); |
|
margin-bottom: 1rem; |
|
text-align: center; |
|
} |
|
|
|
/* Footer */ |
|
.footer { |
|
text-align: center; |
|
padding: 20px; |
|
background-color: #f1f3f8; |
|
margin-top: 40px; |
|
border-radius: 10px; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
import requests |
|
import xml.etree.ElementTree as ET |
|
import pandas as pd |
|
import streamlit as st |
|
import re |
|
|
|
|
|
def search_crossref(query, rows=10): |
|
url = "https://api.crossref.org/works" |
|
|
|
params = { |
|
"query": query, |
|
"rows": rows, |
|
"filter": "type:journal-article" |
|
} |
|
|
|
try: |
|
response = requests.get(url, params=params) |
|
response.raise_for_status() |
|
return response.json() |
|
except requests.exceptions.HTTPError as e: |
|
st.error(f"HTTP error occurred: {e}") |
|
return None |
|
except Exception as e: |
|
st.error(f"An error occurred: {e}") |
|
return None |
|
|
|
|
|
def search_semantic_scholar(query, limit=10): |
|
url = "https://api.semanticscholar.org/graph/v1/paper/search" |
|
|
|
params = { |
|
"query": query, |
|
"limit": limit, |
|
"fields": "title,authors,venue,year,abstract,url,externalIds" |
|
} |
|
|
|
headers = { |
|
"Accept": "application/json" |
|
|
|
} |
|
|
|
try: |
|
response = requests.get(url, params=params, headers=headers) |
|
response.raise_for_status() |
|
return response.json() |
|
except requests.exceptions.HTTPError as e: |
|
st.error(f"Semantic Scholar HTTP error: {e}") |
|
return None |
|
except Exception as e: |
|
st.error(f"Semantic Scholar error: {e}") |
|
return None |
|
|
|
|
|
def search_arxiv(query, max_results=10): |
|
base_url = "http://export.arxiv.org/api/query" |
|
|
|
params = { |
|
"search_query": f"all:{query}", |
|
"max_results": max_results, |
|
"sortBy": "relevance", |
|
"sortOrder": "descending" |
|
} |
|
|
|
try: |
|
response = requests.get(base_url, params=params) |
|
response.raise_for_status() |
|
|
|
|
|
root = ET.fromstring(response.content) |
|
|
|
|
|
entries = [] |
|
|
|
|
|
for entry in root.findall('{http://www.w3.org/2005/Atom}entry'): |
|
title = entry.find('{http://www.w3.org/2005/Atom}title').text.strip() |
|
|
|
|
|
authors = [] |
|
for author in entry.findall('{http://www.w3.org/2005/Atom}author'): |
|
name = author.find('{http://www.w3.org/2005/Atom}name').text |
|
authors.append(name) |
|
|
|
|
|
abstract = entry.find('{http://www.w3.org/2005/Atom}summary').text.strip() |
|
|
|
|
|
url = "" |
|
for link in entry.findall('{http://www.w3.org/2005/Atom}link'): |
|
if link.get('title') == 'pdf': |
|
url = link.get('href') |
|
break |
|
|
|
|
|
published = entry.find('{http://www.w3.org/2005/Atom}published').text.split('T')[0] |
|
|
|
|
|
doi = "" |
|
arxiv_id = entry.find('{http://www.w3.org/2005/Atom}id').text.split('/abs/')[-1] |
|
|
|
entries.append({ |
|
"title": title, |
|
"authors": ', '.join(authors), |
|
"abstract": abstract, |
|
"url": url, |
|
"published": published, |
|
"arxiv_id": arxiv_id, |
|
"doi": doi |
|
}) |
|
|
|
return {"entries": entries} |
|
except requests.exceptions.HTTPError as e: |
|
st.error(f"arXiv HTTP error: {e}") |
|
return None |
|
except Exception as e: |
|
st.error(f"arXiv error: {e}") |
|
return None |
|
|
|
|
|
def fetch_pubmed_abstract(doi): |
|
try: |
|
|
|
search_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term={doi}[doi]&retmode=json" |
|
search_response = requests.get(search_url) |
|
search_data = search_response.json() |
|
|
|
|
|
id_list = search_data.get('esearchresult', {}).get('idlist', []) |
|
if not id_list: |
|
return "" |
|
|
|
pubmed_id = id_list[0] |
|
|
|
|
|
fetch_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id={pubmed_id}&retmode=xml" |
|
fetch_response = requests.get(fetch_url) |
|
|
|
|
|
root = ET.fromstring(fetch_response.content) |
|
|
|
|
|
abstract_elements = root.findall(".//AbstractText") |
|
if abstract_elements: |
|
|
|
abstract = " ".join([elem.text for elem in abstract_elements if elem.text]) |
|
return abstract |
|
|
|
return "" |
|
except Exception as e: |
|
|
|
return "" |
|
|
|
|
|
def display_crossref_results(data): |
|
if data: |
|
items = data.get('message', {}).get('items', []) |
|
if not items: |
|
st.warning("No CrossRef results found.") |
|
return None |
|
|
|
paper_list = [] |
|
|
|
|
|
progress_bar = st.progress(0) |
|
status_text = st.empty() |
|
|
|
for i, item in enumerate(items): |
|
status_text.text(f"Processing CrossRef paper {i+1}/{len(items)}...") |
|
|
|
doi = item.get('DOI', '') |
|
|
|
|
|
abstract = "" |
|
if doi: |
|
abstract = fetch_pubmed_abstract(doi) |
|
|
|
|
|
if not abstract and 'abstract' in item: |
|
abstract = re.sub(r'<[^>]+>', '', item['abstract']) |
|
|
|
paper = { |
|
"Source": "CrossRef", |
|
"Title": item.get('title', [''])[0], |
|
"Author(s)": ', '.join([author.get('family', '') for author in item.get('author', [])]), |
|
"Journal": item.get('container-title', [''])[0], |
|
"Abstract": abstract, |
|
"DOI": doi, |
|
"Link": item.get('URL', ''), |
|
"Published": item.get('issued', {}).get('date-parts', [[None]])[0][0] if 'issued' in item else "N/A" |
|
} |
|
paper_list.append(paper) |
|
|
|
|
|
progress_bar.progress((i+1)/len(items)) |
|
|
|
|
|
progress_bar.empty() |
|
status_text.empty() |
|
|
|
return paper_list |
|
else: |
|
st.warning("No CrossRef data to display.") |
|
return None |
|
|
|
|
|
def display_semantic_scholar_results(data): |
|
if data: |
|
items = data.get('data', []) |
|
if not items: |
|
st.warning("No Semantic Scholar results found.") |
|
return None |
|
|
|
paper_list = [] |
|
|
|
for item in items: |
|
authors = item.get('authors', []) |
|
author_names = ', '.join([author.get('name', '') for author in authors]) |
|
|
|
doi = item.get('externalIds', {}).get('DOI', '') |
|
|
|
paper = { |
|
"Source": "Semantic Scholar", |
|
"Title": item.get('title', ''), |
|
"Author(s)": author_names, |
|
"Journal": item.get('venue', ''), |
|
"Abstract": item.get('abstract', ''), |
|
"DOI": doi, |
|
"Link": item.get('url', ''), |
|
"Published": item.get('year', 'N/A') |
|
} |
|
paper_list.append(paper) |
|
|
|
return paper_list |
|
else: |
|
st.warning("No Semantic Scholar data to display.") |
|
return None |
|
|
|
|
|
def display_arxiv_results(data): |
|
if data: |
|
entries = data.get('entries', []) |
|
if not entries: |
|
st.warning("No arXiv results found.") |
|
return None |
|
|
|
paper_list = [] |
|
|
|
for entry in entries: |
|
paper = { |
|
"Source": "arXiv", |
|
"Title": entry.get('title', ''), |
|
"Author(s)": entry.get('authors', ''), |
|
"Journal": "arXiv preprint", |
|
"Abstract": entry.get('abstract', ''), |
|
"DOI": entry.get('doi', ''), |
|
"Link": entry.get('url', ''), |
|
"Published": entry.get('published', 'N/A'), |
|
"arXiv ID": entry.get('arxiv_id', '') |
|
} |
|
paper_list.append(paper) |
|
|
|
return paper_list |
|
else: |
|
st.warning("No arXiv data to display.") |
|
return None |
|
|
|
|
|
def run_comprehensive_search(query, max_results=10): |
|
with st.spinner("Searching multiple academic databases..."): |
|
|
|
col1, col2, col3 = st.columns(3) |
|
|
|
|
|
with col1: |
|
with st.spinner("Searching CrossRef..."): |
|
crossref_data = search_crossref(query, rows=max_results) |
|
crossref_results = display_crossref_results(crossref_data) |
|
if crossref_results: |
|
st.success(f"Found {len(crossref_results)} results in CrossRef") |
|
else: |
|
st.info("No results from CrossRef") |
|
|
|
|
|
with col2: |
|
with st.spinner("Searching Semantic Scholar..."): |
|
semantic_data = search_semantic_scholar(query, limit=max_results) |
|
semantic_results = display_semantic_scholar_results(semantic_data) |
|
if semantic_results: |
|
st.success(f"Found {len(semantic_results)} results in Semantic Scholar") |
|
else: |
|
st.info("No results from Semantic Scholar") |
|
|
|
|
|
with col3: |
|
with st.spinner("Searching arXiv..."): |
|
arxiv_data = search_arxiv(query, max_results=max_results) |
|
arxiv_results = display_arxiv_results(arxiv_data) |
|
if arxiv_results: |
|
st.success(f"Found {len(arxiv_results)} results in arXiv") |
|
else: |
|
st.info("No results from arXiv") |
|
|
|
|
|
all_results = [] |
|
if crossref_results: |
|
all_results.extend(crossref_results) |
|
if semantic_results: |
|
all_results.extend(semantic_results) |
|
if arxiv_results: |
|
all_results.extend(arxiv_results) |
|
|
|
if all_results: |
|
df = pd.DataFrame(all_results) |
|
return df |
|
else: |
|
st.warning("No results found across any of the academic databases.") |
|
return None |
|
|
|
|
|
def display_results(data): |
|
if data: |
|
items = data.get('message', {}).get('items', []) |
|
if not items: |
|
st.warning("No results found for the query.") |
|
return None |
|
|
|
paper_list = [] |
|
for item in items: |
|
|
|
abstract = "" |
|
if 'abstract' in item: |
|
|
|
abstract = re.sub(r'<[^>]+>', '', item['abstract']) |
|
|
|
paper = { |
|
"Title": item.get('title', [''])[0], |
|
"Author(s)": ', '.join([author.get('family', '') for author in item.get('author', [])]), |
|
"Journal": item.get('container-title', [''])[0], |
|
"Abstract": abstract, |
|
"DOI": item.get('DOI', ''), |
|
"Link": item.get('URL', ''), |
|
"Published": item.get('issued', {}).get('date-parts', [[None]])[0][0] if 'issued' in item else "N/A" |
|
} |
|
paper_list.append(paper) |
|
|
|
df = pd.DataFrame(paper_list) |
|
|
|
|
|
st.write(df) |
|
|
|
return df |
|
else: |
|
st.warning("No data to display.") |
|
return None |
|
|
|
def generate_literature_survey(papers, api_key="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"): |
|
""" |
|
Generate a literature survey based on paper abstracts using Groq API with Llama-3.3-70B-Instruct |
|
|
|
Parameters: |
|
papers (list): List of papers with abstracts |
|
api_key (str): Groq API key |
|
|
|
Returns: |
|
str: Generated literature survey |
|
""" |
|
|
|
if not papers or len(papers) == 0: |
|
return "No papers found to generate a literature survey." |
|
|
|
|
|
papers_with_abstracts = [p for p in papers if p.get("Abstract") and len(p.get("Abstract")) > 50] |
|
|
|
if len(papers_with_abstracts) == 0: |
|
return "Cannot generate a literature survey because none of the papers have substantial abstracts." |
|
|
|
|
|
paper_info = [] |
|
for i, paper in enumerate(papers_with_abstracts[:10]): |
|
paper_info.append(f"Paper {i+1}:\nTitle: {paper.get('Title', 'Unknown')}\nAuthors: {paper.get('Author(s)', 'Unknown')}\nYear: {paper.get('Published', 'Unknown')}\nAbstract: {paper.get('Abstract', 'No abstract available')}\n") |
|
|
|
papers_text = "\n".join(paper_info) |
|
|
|
prompt = f"""You are an expert academic researcher. Based on the following papers and their abstracts, |
|
write a concise literature survey that: |
|
1. Identifies the main themes and research directions |
|
2. Highlights methodological approaches |
|
3. Summarizes key findings |
|
4. Points out research gaps if evident |
|
5. Suggests potential future research directions |
|
Here are the papers: |
|
{papers_text} |
|
Please organize the survey by themes rather than by individual papers, creating connections between studies. |
|
Format your response with markdown headings for better readability. |
|
""" |
|
|
|
|
|
url = "https://api.groq.com/openai/v1/chat/completions" |
|
headers = { |
|
"Authorization": f"Bearer {api_key}", |
|
"Content-Type": "application/json" |
|
} |
|
|
|
data = { |
|
"model": "llama-3.3-70b-instruct", |
|
"messages": [ |
|
{"role": "system", "content": "You are an academic research assistant that creates comprehensive literature surveys."}, |
|
{"role": "user", "content": prompt} |
|
], |
|
"temperature": 0.3, |
|
"max_tokens": 2000 |
|
} |
|
|
|
try: |
|
response = requests.post(url, headers=headers, data=json.dumps(data)) |
|
print(f"Response Status Code: {response.status_code}") |
|
print(f"Response Body: {response.text}") |
|
response.raise_for_status() |
|
result = response.json() |
|
survey_text = result["choices"][0]["message"]["content"] |
|
return survey_text |
|
except requests.exceptions.HTTPError as e: |
|
print(f"HTTP Error: {e}") |
|
return f"Failed to generate literature survey due to an HTTP error: {str(e)}" |
|
except Exception as e: |
|
print(f"Unexpected Error: {e}") |
|
return f"Failed to generate literature survey due to an error: {str(e)}" |
|
|
|
|
|
papers = [ |
|
{"Title": "Sample Paper", "Author(s)": "Mahatir Ahmed Tusher", "Published": "2023", "Abstract": "This is a sample abstract with more than 50 characters to test the function."} |
|
] |
|
survey = generate_literature_survey(papers, api_key="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") |
|
print(survey) |
|
|
|
|
|
def add_literature_survey_button(search_results_df): |
|
""" |
|
Add a button to generate a literature survey based on search results |
|
|
|
Parameters: |
|
search_results_df (pandas.DataFrame): DataFrame containing search results |
|
""" |
|
if search_results_df is not None and not search_results_df.empty: |
|
|
|
has_arxiv = "Source" in search_results_df.columns and "arXiv" in search_results_df["Source"].values |
|
|
|
if not has_arxiv: |
|
st.warning("For best literature survey results, include arXiv in your search sources. arXiv papers typically have more comprehensive abstracts.") |
|
|
|
if st.button("Generate Literature Survey"): |
|
with st.spinner("Generating literature survey using AI... This may take a minute."): |
|
|
|
papers = search_results_df.to_dict('records') |
|
|
|
|
|
survey = generate_literature_survey(papers) |
|
|
|
|
|
st.markdown("## Literature Survey") |
|
st.markdown(survey) |
|
|
|
|
|
st.download_button( |
|
label="Download Survey as Text", |
|
data=survey, |
|
file_name="literature_survey.md", |
|
mime="text/markdown" |
|
) |
|
else: |
|
st.info("Run a search first to generate a literature survey.") |
|
|
|
def literature_survey_page(): |
|
st.markdown('<div class="card">', unsafe_allow_html=True) |
|
st.title("Literature Survey Generator") |
|
st.write("Generate comprehensive literature surveys from your search results.") |
|
|
|
if st.session_state.search_results_df is not None and not st.session_state.search_results_df.empty: |
|
st.write(f"Using {len(st.session_state.search_results_df)} papers from your last search.") |
|
add_literature_survey_button(st.session_state.search_results_df) |
|
else: |
|
st.info("Please perform a search first to gather papers for your literature survey.") |
|
if st.button("Go to Search Page"): |
|
st.session_state.page = "search" |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
|
|
def summarize_text(text): |
|
try: |
|
|
|
summarizer = pipeline("text2text-generation", model="spacemanidol/flan-t5-large-website-summarizer", framework="pt") |
|
summary = summarizer(text, max_length=150, min_length=50, do_sample=False) |
|
return summary[0]['generated_text'] |
|
except Exception as e: |
|
st.error(f"An error occurred during summarization: {e}") |
|
return "Summary could not be generated." |
|
|
|
|
|
def generate_text(text): |
|
try: |
|
|
|
text_generator = pipeline("text2text-generation", model="JorgeSarry/est5-summarize", framework="pt") |
|
generated_text = text_generator(text, max_length=150, min_length=50, do_sample=False) |
|
return generated_text[0]['generated_text'] |
|
except Exception as e: |
|
st.error(f"An error occurred during text generation: {e}") |
|
return "Generated text could not be created." |
|
|
|
|
|
def convert_df_to_csv(df): |
|
return df.to_csv(index=False).encode('utf-8') |
|
|
|
|
|
def convert_df_to_txt(df): |
|
buffer = io.StringIO() |
|
|
|
|
|
buffer.write("PaperQuest Research Results\n\n") |
|
|
|
|
|
headers = "\t".join(df.columns) |
|
buffer.write(headers + "\n") |
|
buffer.write("-" * len(headers) + "\n") |
|
|
|
|
|
for _, row in df.iterrows(): |
|
buffer.write("\t".join([str(item) for item in row.values]) + "\n") |
|
|
|
return buffer.getvalue() |
|
|
|
|
|
def get_download_button(df, file_type="csv", button_text="Download as CSV"): |
|
if file_type == "csv": |
|
csv_bytes = convert_df_to_csv(df) |
|
b64 = base64.b64encode(csv_bytes).decode() |
|
href = f'data:text/csv;base64,{b64}' |
|
else: |
|
text_data = convert_df_to_txt(df) |
|
b64 = base64.b64encode(text_data.encode()).decode() |
|
href = f'data:text/plain;base64,{b64}' |
|
|
|
return f'<a href="{href}" download="research_results.{file_type}" class="download-button">{button_text}</a>' |
|
|
|
|
|
def home_page(): |
|
|
|
st.markdown('<div class="hero">', unsafe_allow_html=True) |
|
st.title("PaperQuest: Research Finder and Text Companion") |
|
st.markdown("Discover academic insights and enhance your research journey with our powerful tools") |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
|
|
st.markdown('<div class="card">', unsafe_allow_html=True) |
|
st.subheader("π Find Research Papers") |
|
|
|
col1, col2 = st.columns([3, 1]) |
|
with col1: |
|
query = st.text_input("Enter your research topic or keywords", value="machine learning optimization") |
|
with col2: |
|
num_papers = st.slider("Results per source", min_value=5, max_value=50, value=10) |
|
|
|
search_sources = st.multiselect( |
|
"Select sources", |
|
options=["CrossRef", "Semantic Scholar", "arXiv", "All"], |
|
default=["CrossRef"] |
|
) |
|
|
|
search_clicked = st.button("Search Papers", key="search_home") |
|
|
|
|
|
if search_clicked: |
|
if query: |
|
if "All" in search_sources or len(search_sources) > 1: |
|
|
|
st.session_state.search_results_df = run_comprehensive_search(query, max_results=num_papers) |
|
|
|
if st.session_state.search_results_df is not None: |
|
|
|
st.subheader(f"Found {len(st.session_state.search_results_df)} papers") |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.markdown(get_download_button(st.session_state.search_results_df, "csv", "π Download as CSV"), unsafe_allow_html=True) |
|
with col2: |
|
st.markdown(get_download_button(st.session_state.search_results_df, "txt", "π Download as Text"), unsafe_allow_html=True) |
|
else: |
|
|
|
with st.spinner('Searching for papers...'): |
|
if "CrossRef" in search_sources: |
|
response_data = search_crossref(query, rows=num_papers) |
|
paper_list = display_crossref_results(response_data) |
|
if paper_list: |
|
st.session_state.search_results_df = pd.DataFrame(paper_list) |
|
elif "Semantic Scholar" in search_sources: |
|
response_data = search_semantic_scholar(query, limit=num_papers) |
|
paper_list = display_semantic_scholar_results(response_data) |
|
if paper_list: |
|
st.session_state.search_results_df = pd.DataFrame(paper_list) |
|
elif "arXiv" in search_sources: |
|
response_data = search_arxiv(query, max_results=num_papers) |
|
paper_list = display_arxiv_results(response_data) |
|
if paper_list: |
|
st.session_state.search_results_df = pd.DataFrame(paper_list) |
|
|
|
if st.session_state.search_results_df is not None: |
|
st.write(st.session_state.search_results_df) |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.markdown(get_download_button(st.session_state.search_results_df, "csv", "π Download as CSV"), unsafe_allow_html=True) |
|
with col2: |
|
st.markdown(get_download_button(st.session_state.search_results_df, "txt", "π Download as Text"), unsafe_allow_html=True) |
|
else: |
|
st.warning("Please enter a search query.") |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
|
|
st.markdown("<h2 style='text-align: center; margin-top: 40px;'>Features</h2>", unsafe_allow_html=True) |
|
|
|
col1, col2, col3 = st.columns(3) |
|
|
|
with col1: |
|
st.markdown('<div class="card">', unsafe_allow_html=True) |
|
st.markdown('<div class="feature-icon">π</div>', unsafe_allow_html=True) |
|
st.markdown("<h3 style='text-align: center;'>Comprehensive Search</h3>", unsafe_allow_html=True) |
|
st.markdown("<p style='text-align: center;'>Access thousands of academic papers from CrossRef, Semantic Scholar, and arXiv</p>", unsafe_allow_html=True) |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
with col2: |
|
st.markdown('<div class="card">', unsafe_allow_html=True) |
|
st.markdown('<div class="feature-icon">π</div>', unsafe_allow_html=True) |
|
st.markdown("<h3 style='text-align: center;'>Text Summarization</h3>", unsafe_allow_html=True) |
|
st.markdown("<p style='text-align: center;'>Extract key insights from complex research papers</p>", unsafe_allow_html=True) |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
with col3: |
|
st.markdown('<div class="card">', unsafe_allow_html=True) |
|
st.markdown('<div class="feature-icon">β¨</div>', unsafe_allow_html=True) |
|
st.markdown("<h3 style='text-align: center;'>Smart Text Generation</h3>", unsafe_allow_html=True) |
|
st.markdown("<p style='text-align: center;'>Get assistance with creating coherent research content</p>", unsafe_allow_html=True) |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
def search_page(): |
|
st.markdown('<div class="card">', unsafe_allow_html=True) |
|
st.title("Research Paper Search") |
|
st.write("Find and explore academic papers across various disciplines.") |
|
|
|
query = st.text_input("Enter your research topic or keywords", value="machine learning optimization") |
|
|
|
col1, col2, col3 = st.columns(3) |
|
with col1: |
|
num_papers = st.slider("Results per source", min_value=5, max_value=50, value=10) |
|
with col2: |
|
search_sources = st.multiselect( |
|
"Select sources", |
|
options=["CrossRef", "Semantic Scholar", "arXiv", "All"], |
|
default=["CrossRef"] |
|
) |
|
with col3: |
|
st.write(" ") |
|
st.write(" ") |
|
search_clicked = st.button("Search") |
|
|
|
if search_clicked: |
|
if query: |
|
if "All" in search_sources or len(search_sources) > 1: |
|
|
|
results_df = run_comprehensive_search(query, max_results=num_papers) |
|
|
|
if results_df is not None: |
|
st.subheader(f"Found {len(results_df)} papers across all selected sources") |
|
|
|
|
|
st.subheader("Filter Results") |
|
selected_sources = st.multiselect( |
|
"Filter by sources", |
|
options=results_df["Source"].unique(), |
|
default=results_df["Source"].unique() |
|
) |
|
|
|
|
|
results_df["Published"] = results_df["Published"].astype(str) |
|
|
|
|
|
def get_year(published_str): |
|
try: |
|
if isinstance(published_str, str): |
|
return int(published_str.split('-')[0]) if '-' in published_str else int(published_str) |
|
return int(published_str) if published_str else None |
|
except: |
|
return None |
|
|
|
results_df["Year"] = results_df["Published"].apply(get_year) |
|
|
|
|
|
valid_years = [year for year in results_df["Year"] if year is not None] |
|
if valid_years: |
|
min_year = min(valid_years) |
|
max_year = max(valid_years) |
|
year_range = st.slider( |
|
"Publication year range", |
|
min_value=min_year, |
|
max_value=max_year, |
|
value=(min_year, max_year) |
|
) |
|
|
|
|
|
filtered_df = results_df[ |
|
(results_df["Source"].isin(selected_sources)) & |
|
((results_df["Year"] >= year_range[0]) & (results_df["Year"] <= year_range[1]) | (results_df["Year"].isna())) |
|
] |
|
else: |
|
|
|
filtered_df = results_df[results_df["Source"].isin(selected_sources)] |
|
|
|
|
|
st.subheader(f"Showing {len(filtered_df)} filtered results") |
|
|
|
|
|
for i, row in filtered_df.iterrows(): |
|
with st.expander(f"{row['Title']} ({row['Source']}, {row['Published']})"): |
|
st.write(f"**Authors:** {row['Author(s)']}") |
|
st.write(f"**Journal/Venue:** {row['Journal']}") |
|
st.write(f"**Abstract:**") |
|
st.write(row['Abstract'] if row['Abstract'] and row['Abstract'].strip() else "No abstract available") |
|
|
|
|
|
if row['DOI']: |
|
st.write(f"**DOI:** https://doi.org/{row['DOI']}") |
|
if row['Link']: |
|
st.write(f"**Link:** {row['Link']}") |
|
if 'arXiv ID' in row and row['arXiv ID']: |
|
st.write(f"**arXiv ID:** {row['arXiv ID']}") |
|
|
|
st.session_state.search_results_df = filtered_df |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.markdown(get_download_button(filtered_df, "csv", "π Download as CSV"), unsafe_allow_html=True) |
|
with col2: |
|
st.markdown(get_download_button(filtered_df, "txt", "π Download as Text"), unsafe_allow_html=True) |
|
|
|
else: |
|
|
|
with st.spinner('Searching for papers...'): |
|
if "CrossRef" in search_sources: |
|
response_data = search_crossref(query, rows=num_papers) |
|
paper_list = display_crossref_results(response_data) |
|
if paper_list: |
|
st.session_state.search_results_df = pd.DataFrame(paper_list) |
|
elif "Semantic Scholar" in search_sources: |
|
response_data = search_semantic_scholar(query, limit=num_papers) |
|
paper_list = display_semantic_scholar_results(response_data) |
|
if paper_list: |
|
st.session_state.search_results_df = pd.DataFrame(paper_list) |
|
elif "arXiv" in search_sources: |
|
response_data = search_arxiv(query, max_results=num_papers) |
|
paper_list = display_arxiv_results(response_data) |
|
if paper_list: |
|
st.session_state.search_results_df = pd.DataFrame(paper_list) |
|
|
|
if st.session_state.search_results_df is not None: |
|
st.write(st.session_state.search_results_df) |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.markdown(get_download_button(st.session_state.search_results_df, "csv", "π Download as CSV"), unsafe_allow_html=True) |
|
with col2: |
|
st.markdown(get_download_button(st.session_state.search_results_df, "txt", "π Download as Text"), unsafe_allow_html=True) |
|
else: |
|
st.warning("Please enter a search query.") |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
def summarize_page(): |
|
st.markdown('<div class="card">', unsafe_allow_html=True) |
|
st.title("Text Summarization") |
|
st.write("Generate concise summaries from lengthy academic text.") |
|
|
|
user_text = st.text_area("Enter text to summarize", height=200) |
|
|
|
if st.button("Summarize"): |
|
if user_text: |
|
with st.spinner('Summarizing text...'): |
|
summary = summarize_text(user_text) |
|
st.success("Summary:") |
|
st.write(summary) |
|
else: |
|
st.warning("Please enter text to summarize.") |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
def generate_page(): |
|
st.markdown('<div class="card">', unsafe_allow_html=True) |
|
st.title("Text Generation") |
|
st.write("Generate text based on your input to assist with research writing.") |
|
|
|
user_text = st.text_area("Enter text prompt", height=200) |
|
|
|
if st.button("Generate Text"): |
|
if user_text: |
|
with st.spinner('Generating text...'): |
|
generated = generate_text(user_text) |
|
st.success("Generated Text:") |
|
st.write(generated) |
|
else: |
|
st.warning("Please enter text to generate from.") |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
def about_page(): |
|
st.markdown('<div class="card">', unsafe_allow_html=True) |
|
st.title("About PaperQuest") |
|
|
|
st.write(""" |
|
## Our Mission |
|
|
|
PaperQuest is dedicated to empowering researchers, students, and academics with powerful tools to streamline their research process. Our platform combines comprehensive paper search capabilities with advanced text summarization and generation tools to help you work more efficiently. |
|
|
|
## Our Technology |
|
|
|
PaperQuest leverages state-of-the-art natural language processing models to deliver high-quality text summarization and generation. Our search functionality connects to CrossRef's extensive database, providing access to millions of academic papers across disciplines. |
|
|
|
## The Team |
|
|
|
Our team consists of researchers and developers passionate about improving the academic research process through technology. |
|
""") |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
def how_to_use_page(): |
|
st.markdown('<div class="card">', unsafe_allow_html=True) |
|
st.title("How to Use PaperQuest") |
|
|
|
st.write(""" |
|
## Quick Start Guide |
|
|
|
### Finding Research Papers |
|
1. Navigate to the Home or Search page |
|
2. Enter your research topic or keywords in the search bar |
|
3. Adjust the number of results using the slider |
|
4. Click "Search" to retrieve papers |
|
5. Download your results in CSV or TXT format |
|
|
|
### Summarizing Text |
|
1. Navigate to the Summarize page |
|
2. Paste the text you want to summarize |
|
3. Click "Summarize" to get a concise version |
|
|
|
### Generating Text |
|
1. Navigate to the Generate page |
|
2. Enter a prompt or starting text |
|
3. Click "Generate Text" to get AI-assisted content |
|
|
|
## Tips for Better Results |
|
|
|
- Use specific keywords for more targeted search results |
|
- For summarization, provide complete paragraphs for better context |
|
- When generating text, provide clear prompts that describe what you need |
|
""") |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
|
|
def main(): |
|
|
|
if 'page' not in st.session_state: |
|
st.session_state.page = 'home' |
|
|
|
if 'search_results_df' not in st.session_state: |
|
st.session_state.search_results_df = None |
|
|
|
|
|
st.sidebar.title("Navigation") |
|
pages = { |
|
"home": "π Home", |
|
"search": "π Search Papers", |
|
"summarize": "π Summarize Text", |
|
"generate": "β¨ Generate Text", |
|
"literature": "π Literature Survey", |
|
"about": "βΉοΈ About Us", |
|
"how_to_use": "β How to Use" |
|
} |
|
|
|
for page_id, page_name in pages.items(): |
|
if st.sidebar.button(page_name, key=page_id): |
|
st.session_state.page = page_id |
|
|
|
|
|
st.sidebar.markdown("---") |
|
st.sidebar.markdown("<div style='text-align: center;'><h3>PaperQuest</h3><p>Research Finder & Text Companion</p></div>", unsafe_allow_html=True) |
|
|
|
|
|
if st.session_state.page == 'home': |
|
home_page() |
|
elif st.session_state.page == 'search': |
|
search_page() |
|
elif st.session_state.page == 'summarize': |
|
summarize_page() |
|
elif st.session_state.page == 'generate': |
|
generate_page() |
|
elif st.session_state.page == 'about': |
|
about_page() |
|
elif st.session_state.page == 'how_to_use': |
|
how_to_use_page() |
|
elif st.session_state.page == 'literature': |
|
literature_survey_page() |
|
|
|
|
|
st.markdown('<div class="footer">', unsafe_allow_html=True) |
|
st.markdown("Β© 2025 PaperQuest | Research Finder and Text Companion", unsafe_allow_html=True) |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |