import streamlit as st import requests import pandas as pd import transformers from transformers import pipeline import tensorflow import io import base64 import xml.etree.ElementTree as ET import json import time # Set page configuration and styling st.set_page_config( page_title="PaperQuest: Research Finder", page_icon="đ", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS to make the UI more professional st.markdown(""" """, unsafe_allow_html=True) import requests import xml.etree.ElementTree as ET import pandas as pd import streamlit as st import re # Function to search CrossRef using the user's query def search_crossref(query, rows=10): url = "https://api.crossref.org/works" params = { "query": query, "rows": rows, "filter": "type:journal-article" } try: response = requests.get(url, params=params) response.raise_for_status() return response.json() except requests.exceptions.HTTPError as e: st.error(f"HTTP error occurred: {e}") return None except Exception as e: st.error(f"An error occurred: {e}") return None # Function to search Semantic Scholar using the user's query def search_semantic_scholar(query, limit=10): url = "https://api.semanticscholar.org/graph/v1/paper/search" params = { "query": query, "limit": limit, "fields": "title,authors,venue,year,abstract,url,externalIds" } headers = { "Accept": "application/json" # Add your API key if you have one: "x-api-key": "YOUR_API_KEY" } try: response = requests.get(url, params=params, headers=headers) response.raise_for_status() return response.json() except requests.exceptions.HTTPError as e: st.error(f"Semantic Scholar HTTP error: {e}") return None except Exception as e: st.error(f"Semantic Scholar error: {e}") return None # Function to search arXiv using the user's query def search_arxiv(query, max_results=10): base_url = "http://export.arxiv.org/api/query" params = { "search_query": f"all:{query}", "max_results": max_results, "sortBy": "relevance", "sortOrder": "descending" } try: response = requests.get(base_url, params=params) response.raise_for_status() # Parse the XML response root = ET.fromstring(response.content) # Initialize list to store entries entries = [] # Extract data from each entry for entry in root.findall('{http://www.w3.org/2005/Atom}entry'): title = entry.find('{http://www.w3.org/2005/Atom}title').text.strip() # Get authors authors = [] for author in entry.findall('{http://www.w3.org/2005/Atom}author'): name = author.find('{http://www.w3.org/2005/Atom}name').text authors.append(name) # Get abstract abstract = entry.find('{http://www.w3.org/2005/Atom}summary').text.strip() # Get URL url = "" for link in entry.findall('{http://www.w3.org/2005/Atom}link'): if link.get('title') == 'pdf': url = link.get('href') break # Get published date published = entry.find('{http://www.w3.org/2005/Atom}published').text.split('T')[0] # Get DOI if available doi = "" arxiv_id = entry.find('{http://www.w3.org/2005/Atom}id').text.split('/abs/')[-1] entries.append({ "title": title, "authors": ', '.join(authors), "abstract": abstract, "url": url, "published": published, "arxiv_id": arxiv_id, "doi": doi }) return {"entries": entries} except requests.exceptions.HTTPError as e: st.error(f"arXiv HTTP error: {e}") return None except Exception as e: st.error(f"arXiv error: {e}") return None # Function to fetch abstract from PubMed using DOI def fetch_pubmed_abstract(doi): try: # First, search for the PubMed ID using the DOI search_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term={doi}[doi]&retmode=json" search_response = requests.get(search_url) search_data = search_response.json() # Check if we found a PubMed ID id_list = search_data.get('esearchresult', {}).get('idlist', []) if not id_list: return "" pubmed_id = id_list[0] # Now fetch the abstract using the PubMed ID fetch_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id={pubmed_id}&retmode=xml" fetch_response = requests.get(fetch_url) # Parse the XML response root = ET.fromstring(fetch_response.content) # Look for AbstractText in the XML abstract_elements = root.findall(".//AbstractText") if abstract_elements: # Combine all abstract sections if there are multiple abstract = " ".join([elem.text for elem in abstract_elements if elem.text]) return abstract return "" except Exception as e: # If anything goes wrong, return empty string return "" # Function to display CrossRef results with enhanced abstract fetching def display_crossref_results(data): if data: items = data.get('message', {}).get('items', []) if not items: st.warning("No CrossRef results found.") return None paper_list = [] # Use a progress bar for abstract fetching progress_bar = st.progress(0) status_text = st.empty() for i, item in enumerate(items): status_text.text(f"Processing CrossRef paper {i+1}/{len(items)}...") doi = item.get('DOI', '') # Try to get abstract from PubMed for papers with DOI abstract = "" if doi: abstract = fetch_pubmed_abstract(doi) # If we couldn't get an abstract from PubMed, try using CrossRef's abstract if available if not abstract and 'abstract' in item: abstract = re.sub(r'<[^>]+>', '', item['abstract']) paper = { "Source": "CrossRef", "Title": item.get('title', [''])[0], "Author(s)": ', '.join([author.get('family', '') for author in item.get('author', [])]), "Journal": item.get('container-title', [''])[0], "Abstract": abstract, "DOI": doi, "Link": item.get('URL', ''), "Published": item.get('issued', {}).get('date-parts', [[None]])[0][0] if 'issued' in item else "N/A" } paper_list.append(paper) # Update progress bar progress_bar.progress((i+1)/len(items)) # Clear progress indicators progress_bar.empty() status_text.empty() return paper_list else: st.warning("No CrossRef data to display.") return None # Function to display Semantic Scholar results def display_semantic_scholar_results(data): if data: items = data.get('data', []) if not items: st.warning("No Semantic Scholar results found.") return None paper_list = [] for item in items: authors = item.get('authors', []) author_names = ', '.join([author.get('name', '') for author in authors]) doi = item.get('externalIds', {}).get('DOI', '') paper = { "Source": "Semantic Scholar", "Title": item.get('title', ''), "Author(s)": author_names, "Journal": item.get('venue', ''), "Abstract": item.get('abstract', ''), "DOI": doi, "Link": item.get('url', ''), "Published": item.get('year', 'N/A') } paper_list.append(paper) return paper_list else: st.warning("No Semantic Scholar data to display.") return None # Function to display arXiv results def display_arxiv_results(data): if data: entries = data.get('entries', []) if not entries: st.warning("No arXiv results found.") return None paper_list = [] for entry in entries: paper = { "Source": "arXiv", "Title": entry.get('title', ''), "Author(s)": entry.get('authors', ''), "Journal": "arXiv preprint", "Abstract": entry.get('abstract', ''), "DOI": entry.get('doi', ''), "Link": entry.get('url', ''), "Published": entry.get('published', 'N/A'), "arXiv ID": entry.get('arxiv_id', '') } paper_list.append(paper) return paper_list else: st.warning("No arXiv data to display.") return None # Function to run a comprehensive search across all APIs def run_comprehensive_search(query, max_results=10): with st.spinner("Searching multiple academic databases..."): # Create columns for status indicators col1, col2, col3 = st.columns(3) # Search CrossRef with col1: with st.spinner("Searching CrossRef..."): crossref_data = search_crossref(query, rows=max_results) crossref_results = display_crossref_results(crossref_data) if crossref_results: st.success(f"Found {len(crossref_results)} results in CrossRef") else: st.info("No results from CrossRef") # Search Semantic Scholar with col2: with st.spinner("Searching Semantic Scholar..."): semantic_data = search_semantic_scholar(query, limit=max_results) semantic_results = display_semantic_scholar_results(semantic_data) if semantic_results: st.success(f"Found {len(semantic_results)} results in Semantic Scholar") else: st.info("No results from Semantic Scholar") # Search arXiv with col3: with st.spinner("Searching arXiv..."): arxiv_data = search_arxiv(query, max_results=max_results) arxiv_results = display_arxiv_results(arxiv_data) if arxiv_results: st.success(f"Found {len(arxiv_results)} results in arXiv") else: st.info("No results from arXiv") # Combine results all_results = [] if crossref_results: all_results.extend(crossref_results) if semantic_results: all_results.extend(semantic_results) if arxiv_results: all_results.extend(arxiv_results) if all_results: df = pd.DataFrame(all_results) return df else: st.warning("No results found across any of the academic databases.") return None # Function to display the results in a table format def display_results(data): if data: items = data.get('message', {}).get('items', []) if not items: st.warning("No results found for the query.") return None paper_list = [] for item in items: # Extract abstract if available abstract = "" if 'abstract' in item: # Clean up the abstract text - remove HTML tags if present abstract = re.sub(r'<[^>]+>', '', item['abstract']) paper = { "Title": item.get('title', [''])[0], "Author(s)": ', '.join([author.get('family', '') for author in item.get('author', [])]), "Journal": item.get('container-title', [''])[0], "Abstract": abstract, "DOI": item.get('DOI', ''), "Link": item.get('URL', ''), "Published": item.get('issued', {}).get('date-parts', [[None]])[0][0] if 'issued' in item else "N/A" } paper_list.append(paper) df = pd.DataFrame(paper_list) # Display the dataframe with a scrollable container for long abstracts st.write(df) return df else: st.warning("No data to display.") return None # Add the generate_literature_survey function below your other function definitions def generate_literature_survey(papers, api_key="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"): """ Generate a literature survey based on paper abstracts using Groq API with Llama-3.3-70B-Instruct Parameters: papers (list): List of papers with abstracts api_key (str): Groq API key Returns: str: Generated literature survey """ # Check if we have papers with abstracts if not papers or len(papers) == 0: return "No papers found to generate a literature survey." # Filter papers that have abstracts papers_with_abstracts = [p for p in papers if p.get("Abstract") and len(p.get("Abstract")) > 50] if len(papers_with_abstracts) == 0: return "Cannot generate a literature survey because none of the papers have substantial abstracts." # Construct the prompt for the LLM paper_info = [] for i, paper in enumerate(papers_with_abstracts[:10]): # Limit to 10 papers to avoid token limits paper_info.append(f"Paper {i+1}:\nTitle: {paper.get('Title', 'Unknown')}\nAuthors: {paper.get('Author(s)', 'Unknown')}\nYear: {paper.get('Published', 'Unknown')}\nAbstract: {paper.get('Abstract', 'No abstract available')}\n") papers_text = "\n".join(paper_info) prompt = f"""You are an expert academic researcher. Based on the following papers and their abstracts, write a concise literature survey that: 1. Identifies the main themes and research directions 2. Highlights methodological approaches 3. Summarizes key findings 4. Points out research gaps if evident 5. Suggests potential future research directions Here are the papers: {papers_text} Please organize the survey by themes rather than by individual papers, creating connections between studies. Format your response with markdown headings for better readability. """ # Make the API request to Groq url = "https://api.groq.com/openai/v1/chat/completions" # Ensure this is the correct endpoint headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" } data = { "model": "llama-3.3-70b-instruct", # Updated model name here "messages": [ {"role": "system", "content": "You are an academic research assistant that creates comprehensive literature surveys."}, {"role": "user", "content": prompt} ], "temperature": 0.3, "max_tokens": 2000 } try: response = requests.post(url, headers=headers, data=json.dumps(data)) print(f"Response Status Code: {response.status_code}") # Log status code print(f"Response Body: {response.text}") # Log full response body response.raise_for_status() # Raise an exception for HTTP errors result = response.json() survey_text = result["choices"][0]["message"]["content"] return survey_text except requests.exceptions.HTTPError as e: print(f"HTTP Error: {e}") return f"Failed to generate literature survey due to an HTTP error: {str(e)}" except Exception as e: print(f"Unexpected Error: {e}") return f"Failed to generate literature survey due to an error: {str(e)}" # Example usage papers = [ {"Title": "Sample Paper", "Author(s)": "Mahatir Ahmed Tusher", "Published": "2023", "Abstract": "This is a sample abstract with more than 50 characters to test the function."} ] survey = generate_literature_survey(papers, api_key="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") print(survey) # Add the add_literature_survey_button function def add_literature_survey_button(search_results_df): """ Add a button to generate a literature survey based on search results Parameters: search_results_df (pandas.DataFrame): DataFrame containing search results """ if search_results_df is not None and not search_results_df.empty: # Check if arXiv results are included has_arxiv = "Source" in search_results_df.columns and "arXiv" in search_results_df["Source"].values if not has_arxiv: st.warning("For best literature survey results, include arXiv in your search sources. arXiv papers typically have more comprehensive abstracts.") if st.button("Generate Literature Survey"): with st.spinner("Generating literature survey using AI... This may take a minute."): # Convert DataFrame to list of dictionaries papers = search_results_df.to_dict('records') # Generate the survey survey = generate_literature_survey(papers) # Display the survey with proper markdown rendering st.markdown("## Literature Survey") st.markdown(survey) # Add a download button for the survey st.download_button( label="Download Survey as Text", data=survey, file_name="literature_survey.md", mime="text/markdown" ) else: st.info("Run a search first to generate a literature survey.") def literature_survey_page(): st.markdown('
Access thousands of academic papers from CrossRef, Semantic Scholar, and arXiv
", unsafe_allow_html=True) st.markdown('Extract key insights from complex research papers
", unsafe_allow_html=True) st.markdown('Get assistance with creating coherent research content
", unsafe_allow_html=True) st.markdown('Research Finder & Text Companion