MahatirTusher commited on
Commit
9ff0fe4
Β·
verified Β·
1 Parent(s): c5e8d1d

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +1033 -0
app.py ADDED
@@ -0,0 +1,1033 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import pandas as pd
4
+ import transformers
5
+ from transformers import pipeline
6
+ import tensorflow
7
+ import io
8
+ import base64
9
+ import xml.etree.ElementTree as ET
10
+ import json
11
+ import time
12
+
13
+ # Set page configuration and styling
14
+ st.set_page_config(
15
+ page_title="PaperQuest: Research Finder",
16
+ page_icon="πŸ“š",
17
+ layout="wide",
18
+ initial_sidebar_state="expanded"
19
+ )
20
+
21
+ # Custom CSS to make the UI more professional
22
+ st.markdown("""
23
+ <style>
24
+ /* Main theme colors */
25
+ :root {
26
+ --primary-color: #4361ee;
27
+ --secondary-color: #3a0ca3;
28
+ --accent-color: #4cc9f0;
29
+ --background-color: #f8f9fa;
30
+ --text-color: #212529;
31
+ }
32
+
33
+ /* Overall page styling */
34
+ .main {
35
+ background-color: var(--background-color);
36
+ color: var(--text-color);
37
+ }
38
+
39
+ /* Header styling */
40
+ h1, h2, h3 {
41
+ color: var(--primary-color);
42
+ font-family: 'Helvetica Neue', sans-serif;
43
+ }
44
+
45
+ /* Custom button styling */
46
+ .stButton > button {
47
+ background-color: var(--primary-color);
48
+ color: white;
49
+ border-radius: 6px;
50
+ border: none;
51
+ padding: 0.5rem 1rem;
52
+ font-weight: 600;
53
+ transition: all 0.3s;
54
+ }
55
+
56
+ .stButton > button:hover {
57
+ background-color: var(--secondary-color);
58
+ transform: translateY(-2px);
59
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
60
+ }
61
+
62
+ /* Custom sidebar styling */
63
+ .css-1d391kg {
64
+ background-color: #f1f3f8;
65
+ }
66
+
67
+ /* Card-like containers */
68
+ .card {
69
+ background-color: white;
70
+ border-radius: 10px;
71
+ padding: 20px;
72
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
73
+ margin-bottom: 20px;
74
+ }
75
+
76
+ /* Hero section */
77
+ .hero {
78
+ background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
79
+ color: white;
80
+ padding: 2rem;
81
+ border-radius: 10px;
82
+ margin-bottom: 2rem;
83
+ text-align: center;
84
+ }
85
+
86
+ /* Tables */
87
+ .dataframe {
88
+ width: 100%;
89
+ border-collapse: collapse;
90
+ }
91
+
92
+ .dataframe th {
93
+ background-color: var(--primary-color);
94
+ color: white;
95
+ text-align: left;
96
+ padding: 12px;
97
+ }
98
+
99
+ .dataframe td {
100
+ padding: 8px 12px;
101
+ border-bottom: 1px solid #ddd;
102
+ }
103
+
104
+ .dataframe tr:nth-child(even) {
105
+ background-color: #f9f9f9;
106
+ }
107
+
108
+ /* Feature icons */
109
+ .feature-icon {
110
+ font-size: 2.5rem;
111
+ color: var(--primary-color);
112
+ margin-bottom: 1rem;
113
+ text-align: center;
114
+ }
115
+
116
+ /* Footer */
117
+ .footer {
118
+ text-align: center;
119
+ padding: 20px;
120
+ background-color: #f1f3f8;
121
+ margin-top: 40px;
122
+ border-radius: 10px;
123
+ }
124
+ </style>
125
+ """, unsafe_allow_html=True)
126
+
127
+ import requests
128
+ import xml.etree.ElementTree as ET
129
+ import pandas as pd
130
+ import streamlit as st
131
+ import re
132
+
133
+ # Function to search CrossRef using the user's query
134
+ def search_crossref(query, rows=10):
135
+ url = "https://api.crossref.org/works"
136
+
137
+ params = {
138
+ "query": query,
139
+ "rows": rows,
140
+ "filter": "type:journal-article"
141
+ }
142
+
143
+ try:
144
+ response = requests.get(url, params=params)
145
+ response.raise_for_status()
146
+ return response.json()
147
+ except requests.exceptions.HTTPError as e:
148
+ st.error(f"HTTP error occurred: {e}")
149
+ return None
150
+ except Exception as e:
151
+ st.error(f"An error occurred: {e}")
152
+ return None
153
+
154
+ # Function to search Semantic Scholar using the user's query
155
+ def search_semantic_scholar(query, limit=10):
156
+ url = "https://api.semanticscholar.org/graph/v1/paper/search"
157
+
158
+ params = {
159
+ "query": query,
160
+ "limit": limit,
161
+ "fields": "title,authors,venue,year,abstract,url,externalIds"
162
+ }
163
+
164
+ headers = {
165
+ "Accept": "application/json"
166
+ # Add your API key if you have one: "x-api-key": "YOUR_API_KEY"
167
+ }
168
+
169
+ try:
170
+ response = requests.get(url, params=params, headers=headers)
171
+ response.raise_for_status()
172
+ return response.json()
173
+ except requests.exceptions.HTTPError as e:
174
+ st.error(f"Semantic Scholar HTTP error: {e}")
175
+ return None
176
+ except Exception as e:
177
+ st.error(f"Semantic Scholar error: {e}")
178
+ return None
179
+
180
+ # Function to search arXiv using the user's query
181
+ def search_arxiv(query, max_results=10):
182
+ base_url = "http://export.arxiv.org/api/query"
183
+
184
+ params = {
185
+ "search_query": f"all:{query}",
186
+ "max_results": max_results,
187
+ "sortBy": "relevance",
188
+ "sortOrder": "descending"
189
+ }
190
+
191
+ try:
192
+ response = requests.get(base_url, params=params)
193
+ response.raise_for_status()
194
+
195
+ # Parse the XML response
196
+ root = ET.fromstring(response.content)
197
+
198
+ # Initialize list to store entries
199
+ entries = []
200
+
201
+ # Extract data from each entry
202
+ for entry in root.findall('{http://www.w3.org/2005/Atom}entry'):
203
+ title = entry.find('{http://www.w3.org/2005/Atom}title').text.strip()
204
+
205
+ # Get authors
206
+ authors = []
207
+ for author in entry.findall('{http://www.w3.org/2005/Atom}author'):
208
+ name = author.find('{http://www.w3.org/2005/Atom}name').text
209
+ authors.append(name)
210
+
211
+ # Get abstract
212
+ abstract = entry.find('{http://www.w3.org/2005/Atom}summary').text.strip()
213
+
214
+ # Get URL
215
+ url = ""
216
+ for link in entry.findall('{http://www.w3.org/2005/Atom}link'):
217
+ if link.get('title') == 'pdf':
218
+ url = link.get('href')
219
+ break
220
+
221
+ # Get published date
222
+ published = entry.find('{http://www.w3.org/2005/Atom}published').text.split('T')[0]
223
+
224
+ # Get DOI if available
225
+ doi = ""
226
+ arxiv_id = entry.find('{http://www.w3.org/2005/Atom}id').text.split('/abs/')[-1]
227
+
228
+ entries.append({
229
+ "title": title,
230
+ "authors": ', '.join(authors),
231
+ "abstract": abstract,
232
+ "url": url,
233
+ "published": published,
234
+ "arxiv_id": arxiv_id,
235
+ "doi": doi
236
+ })
237
+
238
+ return {"entries": entries}
239
+ except requests.exceptions.HTTPError as e:
240
+ st.error(f"arXiv HTTP error: {e}")
241
+ return None
242
+ except Exception as e:
243
+ st.error(f"arXiv error: {e}")
244
+ return None
245
+
246
+ # Function to fetch abstract from PubMed using DOI
247
+ def fetch_pubmed_abstract(doi):
248
+ try:
249
+ # First, search for the PubMed ID using the DOI
250
+ search_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term={doi}[doi]&retmode=json"
251
+ search_response = requests.get(search_url)
252
+ search_data = search_response.json()
253
+
254
+ # Check if we found a PubMed ID
255
+ id_list = search_data.get('esearchresult', {}).get('idlist', [])
256
+ if not id_list:
257
+ return ""
258
+
259
+ pubmed_id = id_list[0]
260
+
261
+ # Now fetch the abstract using the PubMed ID
262
+ fetch_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id={pubmed_id}&retmode=xml"
263
+ fetch_response = requests.get(fetch_url)
264
+
265
+ # Parse the XML response
266
+ root = ET.fromstring(fetch_response.content)
267
+
268
+ # Look for AbstractText in the XML
269
+ abstract_elements = root.findall(".//AbstractText")
270
+ if abstract_elements:
271
+ # Combine all abstract sections if there are multiple
272
+ abstract = " ".join([elem.text for elem in abstract_elements if elem.text])
273
+ return abstract
274
+
275
+ return ""
276
+ except Exception as e:
277
+ # If anything goes wrong, return empty string
278
+ return ""
279
+
280
+ # Function to display CrossRef results with enhanced abstract fetching
281
+ def display_crossref_results(data):
282
+ if data:
283
+ items = data.get('message', {}).get('items', [])
284
+ if not items:
285
+ st.warning("No CrossRef results found.")
286
+ return None
287
+
288
+ paper_list = []
289
+
290
+ # Use a progress bar for abstract fetching
291
+ progress_bar = st.progress(0)
292
+ status_text = st.empty()
293
+
294
+ for i, item in enumerate(items):
295
+ status_text.text(f"Processing CrossRef paper {i+1}/{len(items)}...")
296
+
297
+ doi = item.get('DOI', '')
298
+
299
+ # Try to get abstract from PubMed for papers with DOI
300
+ abstract = ""
301
+ if doi:
302
+ abstract = fetch_pubmed_abstract(doi)
303
+
304
+ # If we couldn't get an abstract from PubMed, try using CrossRef's abstract if available
305
+ if not abstract and 'abstract' in item:
306
+ abstract = re.sub(r'<[^>]+>', '', item['abstract'])
307
+
308
+ paper = {
309
+ "Source": "CrossRef",
310
+ "Title": item.get('title', [''])[0],
311
+ "Author(s)": ', '.join([author.get('family', '') for author in item.get('author', [])]),
312
+ "Journal": item.get('container-title', [''])[0],
313
+ "Abstract": abstract,
314
+ "DOI": doi,
315
+ "Link": item.get('URL', ''),
316
+ "Published": item.get('issued', {}).get('date-parts', [[None]])[0][0] if 'issued' in item else "N/A"
317
+ }
318
+ paper_list.append(paper)
319
+
320
+ # Update progress bar
321
+ progress_bar.progress((i+1)/len(items))
322
+
323
+ # Clear progress indicators
324
+ progress_bar.empty()
325
+ status_text.empty()
326
+
327
+ return paper_list
328
+ else:
329
+ st.warning("No CrossRef data to display.")
330
+ return None
331
+
332
+ # Function to display Semantic Scholar results
333
+ def display_semantic_scholar_results(data):
334
+ if data:
335
+ items = data.get('data', [])
336
+ if not items:
337
+ st.warning("No Semantic Scholar results found.")
338
+ return None
339
+
340
+ paper_list = []
341
+
342
+ for item in items:
343
+ authors = item.get('authors', [])
344
+ author_names = ', '.join([author.get('name', '') for author in authors])
345
+
346
+ doi = item.get('externalIds', {}).get('DOI', '')
347
+
348
+ paper = {
349
+ "Source": "Semantic Scholar",
350
+ "Title": item.get('title', ''),
351
+ "Author(s)": author_names,
352
+ "Journal": item.get('venue', ''),
353
+ "Abstract": item.get('abstract', ''),
354
+ "DOI": doi,
355
+ "Link": item.get('url', ''),
356
+ "Published": item.get('year', 'N/A')
357
+ }
358
+ paper_list.append(paper)
359
+
360
+ return paper_list
361
+ else:
362
+ st.warning("No Semantic Scholar data to display.")
363
+ return None
364
+
365
+ # Function to display arXiv results
366
+ def display_arxiv_results(data):
367
+ if data:
368
+ entries = data.get('entries', [])
369
+ if not entries:
370
+ st.warning("No arXiv results found.")
371
+ return None
372
+
373
+ paper_list = []
374
+
375
+ for entry in entries:
376
+ paper = {
377
+ "Source": "arXiv",
378
+ "Title": entry.get('title', ''),
379
+ "Author(s)": entry.get('authors', ''),
380
+ "Journal": "arXiv preprint",
381
+ "Abstract": entry.get('abstract', ''),
382
+ "DOI": entry.get('doi', ''),
383
+ "Link": entry.get('url', ''),
384
+ "Published": entry.get('published', 'N/A'),
385
+ "arXiv ID": entry.get('arxiv_id', '')
386
+ }
387
+ paper_list.append(paper)
388
+
389
+ return paper_list
390
+ else:
391
+ st.warning("No arXiv data to display.")
392
+ return None
393
+
394
+ # Function to run a comprehensive search across all APIs
395
+ def run_comprehensive_search(query, max_results=10):
396
+ with st.spinner("Searching multiple academic databases..."):
397
+ # Create columns for status indicators
398
+ col1, col2, col3 = st.columns(3)
399
+
400
+ # Search CrossRef
401
+ with col1:
402
+ with st.spinner("Searching CrossRef..."):
403
+ crossref_data = search_crossref(query, rows=max_results)
404
+ crossref_results = display_crossref_results(crossref_data)
405
+ if crossref_results:
406
+ st.success(f"Found {len(crossref_results)} results in CrossRef")
407
+ else:
408
+ st.info("No results from CrossRef")
409
+
410
+ # Search Semantic Scholar
411
+ with col2:
412
+ with st.spinner("Searching Semantic Scholar..."):
413
+ semantic_data = search_semantic_scholar(query, limit=max_results)
414
+ semantic_results = display_semantic_scholar_results(semantic_data)
415
+ if semantic_results:
416
+ st.success(f"Found {len(semantic_results)} results in Semantic Scholar")
417
+ else:
418
+ st.info("No results from Semantic Scholar")
419
+
420
+ # Search arXiv
421
+ with col3:
422
+ with st.spinner("Searching arXiv..."):
423
+ arxiv_data = search_arxiv(query, max_results=max_results)
424
+ arxiv_results = display_arxiv_results(arxiv_data)
425
+ if arxiv_results:
426
+ st.success(f"Found {len(arxiv_results)} results in arXiv")
427
+ else:
428
+ st.info("No results from arXiv")
429
+
430
+ # Combine results
431
+ all_results = []
432
+ if crossref_results:
433
+ all_results.extend(crossref_results)
434
+ if semantic_results:
435
+ all_results.extend(semantic_results)
436
+ if arxiv_results:
437
+ all_results.extend(arxiv_results)
438
+
439
+ if all_results:
440
+ df = pd.DataFrame(all_results)
441
+ return df
442
+ else:
443
+ st.warning("No results found across any of the academic databases.")
444
+ return None
445
+
446
+ # Function to display the results in a table format
447
+ def display_results(data):
448
+ if data:
449
+ items = data.get('message', {}).get('items', [])
450
+ if not items:
451
+ st.warning("No results found for the query.")
452
+ return None
453
+
454
+ paper_list = []
455
+ for item in items:
456
+ # Extract abstract if available
457
+ abstract = ""
458
+ if 'abstract' in item:
459
+ # Clean up the abstract text - remove HTML tags if present
460
+ abstract = re.sub(r'<[^>]+>', '', item['abstract'])
461
+
462
+ paper = {
463
+ "Title": item.get('title', [''])[0],
464
+ "Author(s)": ', '.join([author.get('family', '') for author in item.get('author', [])]),
465
+ "Journal": item.get('container-title', [''])[0],
466
+ "Abstract": abstract,
467
+ "DOI": item.get('DOI', ''),
468
+ "Link": item.get('URL', ''),
469
+ "Published": item.get('issued', {}).get('date-parts', [[None]])[0][0] if 'issued' in item else "N/A"
470
+ }
471
+ paper_list.append(paper)
472
+
473
+ df = pd.DataFrame(paper_list)
474
+
475
+ # Display the dataframe with a scrollable container for long abstracts
476
+ st.write(df)
477
+
478
+ return df
479
+ else:
480
+ st.warning("No data to display.")
481
+ return None
482
+ # Add the generate_literature_survey function below your other function definitions
483
+ def generate_literature_survey(papers, api_key="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"):
484
+ """
485
+ Generate a literature survey based on paper abstracts using Groq API with Llama-3.3-70B-Instruct
486
+
487
+ Parameters:
488
+ papers (list): List of papers with abstracts
489
+ api_key (str): Groq API key
490
+
491
+ Returns:
492
+ str: Generated literature survey
493
+ """
494
+ # Check if we have papers with abstracts
495
+ if not papers or len(papers) == 0:
496
+ return "No papers found to generate a literature survey."
497
+
498
+ # Filter papers that have abstracts
499
+ papers_with_abstracts = [p for p in papers if p.get("Abstract") and len(p.get("Abstract")) > 50]
500
+
501
+ if len(papers_with_abstracts) == 0:
502
+ return "Cannot generate a literature survey because none of the papers have substantial abstracts."
503
+
504
+ # Construct the prompt for the LLM
505
+ paper_info = []
506
+ for i, paper in enumerate(papers_with_abstracts[:10]): # Limit to 10 papers to avoid token limits
507
+ paper_info.append(f"Paper {i+1}:\nTitle: {paper.get('Title', 'Unknown')}\nAuthors: {paper.get('Author(s)', 'Unknown')}\nYear: {paper.get('Published', 'Unknown')}\nAbstract: {paper.get('Abstract', 'No abstract available')}\n")
508
+
509
+ papers_text = "\n".join(paper_info)
510
+
511
+ prompt = f"""You are an expert academic researcher. Based on the following papers and their abstracts,
512
+ write a concise literature survey that:
513
+ 1. Identifies the main themes and research directions
514
+ 2. Highlights methodological approaches
515
+ 3. Summarizes key findings
516
+ 4. Points out research gaps if evident
517
+ 5. Suggests potential future research directions
518
+
519
+ Here are the papers:
520
+
521
+ {papers_text}
522
+
523
+ Please organize the survey by themes rather than by individual papers, creating connections between studies.
524
+ Format your response with markdown headings for better readability.
525
+ """
526
+
527
+ # Make the API request to Groq
528
+ url = "https://api.groq.com/openai/v1/chat/completions"
529
+ headers = {
530
+ "Authorization": f"Bearer {api_key}",
531
+ "Content-Type": "application/json"
532
+ }
533
+
534
+ data = {
535
+ "model": "meta-llama/Llama-3.3-70B-Instruct",
536
+ "messages": [
537
+ {"role": "system", "content": "You are an academic research assistant that creates comprehensive literature surveys."},
538
+ {"role": "user", "content": prompt}
539
+ ],
540
+ "temperature": 0.3,
541
+ "max_tokens": 2000
542
+ }
543
+
544
+ try:
545
+ response = requests.post(url, headers=headers, data=json.dumps(data))
546
+ response.raise_for_status()
547
+ result = response.json()
548
+ survey_text = result["choices"][0]["message"]["content"]
549
+ return survey_text
550
+ except Exception as e:
551
+ st.error(f"Error generating literature survey: {e}")
552
+ return f"Failed to generate literature survey due to an error: {str(e)}"
553
+
554
+ # Add the add_literature_survey_button function
555
+ def add_literature_survey_button(search_results_df):
556
+ """
557
+ Add a button to generate a literature survey based on search results
558
+
559
+ Parameters:
560
+ search_results_df (pandas.DataFrame): DataFrame containing search results
561
+ """
562
+ if search_results_df is not None and not search_results_df.empty:
563
+ # Check if arXiv results are included
564
+ has_arxiv = "Source" in search_results_df.columns and "arXiv" in search_results_df["Source"].values
565
+
566
+ if not has_arxiv:
567
+ st.warning("For best literature survey results, include arXiv in your search sources. arXiv papers typically have more comprehensive abstracts.")
568
+
569
+ if st.button("Generate Literature Survey"):
570
+ with st.spinner("Generating literature survey using AI... This may take a minute."):
571
+ # Convert DataFrame to list of dictionaries
572
+ papers = search_results_df.to_dict('records')
573
+
574
+ # Generate the survey
575
+ survey = generate_literature_survey(papers)
576
+
577
+ # Display the survey with proper markdown rendering
578
+ st.markdown("## Literature Survey")
579
+ st.markdown(survey)
580
+
581
+ # Add a download button for the survey
582
+ st.download_button(
583
+ label="Download Survey as Text",
584
+ data=survey,
585
+ file_name="literature_survey.md",
586
+ mime="text/markdown"
587
+ )
588
+ else:
589
+ st.info("Run a search first to generate a literature survey.")
590
+
591
+ def literature_survey_page():
592
+ st.markdown('<div class="card">', unsafe_allow_html=True)
593
+ st.title("Literature Survey Generator")
594
+ st.write("Generate comprehensive literature surveys from your search results.")
595
+
596
+ if st.session_state.search_results_df is not None and not st.session_state.search_results_df.empty:
597
+ st.write(f"Using {len(st.session_state.search_results_df)} papers from your last search.")
598
+ add_literature_survey_button(st.session_state.search_results_df)
599
+ else:
600
+ st.info("Please perform a search first to gather papers for your literature survey.")
601
+ if st.button("Go to Search Page"):
602
+ st.session_state.page = "search"
603
+ st.markdown('</div>', unsafe_allow_html=True)
604
+
605
+ # Function to summarize text using the specified model
606
+ def summarize_text(text):
607
+ try:
608
+ # Initialize the summarization model with PyTorch
609
+ summarizer = pipeline("text2text-generation", model="spacemanidol/flan-t5-large-website-summarizer", framework="pt")
610
+ summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
611
+ return summary[0]['generated_text']
612
+ except Exception as e:
613
+ st.error(f"An error occurred during summarization: {e}")
614
+ return "Summary could not be generated."
615
+
616
+ # Function to generate text
617
+ def generate_text(text):
618
+ try:
619
+ # Initialize the text generation model with PyTorch
620
+ text_generator = pipeline("text2text-generation", model="JorgeSarry/est5-summarize", framework="pt")
621
+ generated_text = text_generator(text, max_length=150, min_length=50, do_sample=False)
622
+ return generated_text[0]['generated_text']
623
+ except Exception as e:
624
+ st.error(f"An error occurred during text generation: {e}")
625
+ return "Generated text could not be created."
626
+
627
+ # Function to convert DataFrame to CSV
628
+ def convert_df_to_csv(df):
629
+ return df.to_csv(index=False).encode('utf-8')
630
+
631
+ # Function to convert DataFrame to text
632
+ def convert_df_to_txt(df):
633
+ buffer = io.StringIO()
634
+
635
+ # Write header
636
+ buffer.write("PaperQuest Research Results\n\n")
637
+
638
+ # Format headers
639
+ headers = "\t".join(df.columns)
640
+ buffer.write(headers + "\n")
641
+ buffer.write("-" * len(headers) + "\n")
642
+
643
+ # Format rows
644
+ for _, row in df.iterrows():
645
+ buffer.write("\t".join([str(item) for item in row.values]) + "\n")
646
+
647
+ return buffer.getvalue()
648
+
649
+ # Function to create download button
650
+ def get_download_button(df, file_type="csv", button_text="Download as CSV"):
651
+ if file_type == "csv":
652
+ csv_bytes = convert_df_to_csv(df)
653
+ b64 = base64.b64encode(csv_bytes).decode()
654
+ href = f'data:text/csv;base64,{b64}'
655
+ else: # text
656
+ text_data = convert_df_to_txt(df)
657
+ b64 = base64.b64encode(text_data.encode()).decode()
658
+ href = f'data:text/plain;base64,{b64}'
659
+
660
+ return f'<a href="{href}" download="research_results.{file_type}" class="download-button">{button_text}</a>'
661
+
662
+ # Navigation functions
663
+ def home_page():
664
+ # Hero section
665
+ st.markdown('<div class="hero">', unsafe_allow_html=True)
666
+ st.title("PaperQuest: Research Finder and Text Companion")
667
+ st.markdown("Discover academic insights and enhance your research journey with our powerful tools")
668
+ st.markdown('</div>', unsafe_allow_html=True)
669
+
670
+ # Search bar directly on the home page
671
+ st.markdown('<div class="card">', unsafe_allow_html=True)
672
+ st.subheader("πŸ“š Find Research Papers")
673
+
674
+ col1, col2 = st.columns([3, 1])
675
+ with col1:
676
+ query = st.text_input("Enter your research topic or keywords", value="machine learning optimization")
677
+ with col2:
678
+ num_papers = st.slider("Results per source", min_value=5, max_value=50, value=10)
679
+
680
+ search_sources = st.multiselect(
681
+ "Select sources",
682
+ options=["CrossRef", "Semantic Scholar", "arXiv", "All"],
683
+ default=["CrossRef"]
684
+ )
685
+
686
+ search_clicked = st.button("Search Papers", key="search_home")
687
+
688
+ # Store the search results in session state
689
+ if search_clicked:
690
+ if query:
691
+ if "All" in search_sources or len(search_sources) > 1:
692
+ # Use comprehensive search function
693
+ st.session_state.search_results_df = run_comprehensive_search(query, max_results=num_papers)
694
+
695
+ if st.session_state.search_results_df is not None:
696
+ # Display filtered results
697
+ st.subheader(f"Found {len(st.session_state.search_results_df)} papers")
698
+
699
+ # Display download buttons
700
+ col1, col2 = st.columns(2)
701
+ with col1:
702
+ st.markdown(get_download_button(st.session_state.search_results_df, "csv", "πŸ“Š Download as CSV"), unsafe_allow_html=True)
703
+ with col2:
704
+ st.markdown(get_download_button(st.session_state.search_results_df, "txt", "πŸ“ Download as Text"), unsafe_allow_html=True)
705
+ else:
706
+ # Original single-source search
707
+ with st.spinner('Searching for papers...'):
708
+ if "CrossRef" in search_sources:
709
+ response_data = search_crossref(query, rows=num_papers)
710
+ paper_list = display_crossref_results(response_data)
711
+ if paper_list:
712
+ st.session_state.search_results_df = pd.DataFrame(paper_list)
713
+ elif "Semantic Scholar" in search_sources:
714
+ response_data = search_semantic_scholar(query, limit=num_papers)
715
+ paper_list = display_semantic_scholar_results(response_data)
716
+ if paper_list:
717
+ st.session_state.search_results_df = pd.DataFrame(paper_list)
718
+ elif "arXiv" in search_sources:
719
+ response_data = search_arxiv(query, max_results=num_papers)
720
+ paper_list = display_arxiv_results(response_data)
721
+ if paper_list:
722
+ st.session_state.search_results_df = pd.DataFrame(paper_list)
723
+
724
+ if st.session_state.search_results_df is not None:
725
+ st.write(st.session_state.search_results_df)
726
+
727
+ # Display download buttons
728
+ col1, col2 = st.columns(2)
729
+ with col1:
730
+ st.markdown(get_download_button(st.session_state.search_results_df, "csv", "πŸ“Š Download as CSV"), unsafe_allow_html=True)
731
+ with col2:
732
+ st.markdown(get_download_button(st.session_state.search_results_df, "txt", "πŸ“ Download as Text"), unsafe_allow_html=True)
733
+ else:
734
+ st.warning("Please enter a search query.")
735
+ st.markdown('</div>', unsafe_allow_html=True)
736
+
737
+ # Features section
738
+ st.markdown("<h2 style='text-align: center; margin-top: 40px;'>Features</h2>", unsafe_allow_html=True)
739
+
740
+ col1, col2, col3 = st.columns(3)
741
+
742
+ with col1:
743
+ st.markdown('<div class="card">', unsafe_allow_html=True)
744
+ st.markdown('<div class="feature-icon">πŸ”</div>', unsafe_allow_html=True)
745
+ st.markdown("<h3 style='text-align: center;'>Comprehensive Search</h3>", unsafe_allow_html=True)
746
+ st.markdown("<p style='text-align: center;'>Access thousands of academic papers from CrossRef, Semantic Scholar, and arXiv</p>", unsafe_allow_html=True)
747
+ st.markdown('</div>', unsafe_allow_html=True)
748
+
749
+ with col2:
750
+ st.markdown('<div class="card">', unsafe_allow_html=True)
751
+ st.markdown('<div class="feature-icon">πŸ“</div>', unsafe_allow_html=True)
752
+ st.markdown("<h3 style='text-align: center;'>Text Summarization</h3>", unsafe_allow_html=True)
753
+ st.markdown("<p style='text-align: center;'>Extract key insights from complex research papers</p>", unsafe_allow_html=True)
754
+ st.markdown('</div>', unsafe_allow_html=True)
755
+
756
+ with col3:
757
+ st.markdown('<div class="card">', unsafe_allow_html=True)
758
+ st.markdown('<div class="feature-icon">✨</div>', unsafe_allow_html=True)
759
+ st.markdown("<h3 style='text-align: center;'>Smart Text Generation</h3>", unsafe_allow_html=True)
760
+ st.markdown("<p style='text-align: center;'>Get assistance with creating coherent research content</p>", unsafe_allow_html=True)
761
+ st.markdown('</div>', unsafe_allow_html=True)
762
+
763
+ def search_page():
764
+ st.markdown('<div class="card">', unsafe_allow_html=True)
765
+ st.title("Research Paper Search")
766
+ st.write("Find and explore academic papers across various disciplines.")
767
+
768
+ query = st.text_input("Enter your research topic or keywords", value="machine learning optimization")
769
+
770
+ col1, col2, col3 = st.columns(3)
771
+ with col1:
772
+ num_papers = st.slider("Results per source", min_value=5, max_value=50, value=10)
773
+ with col2:
774
+ search_sources = st.multiselect(
775
+ "Select sources",
776
+ options=["CrossRef", "Semantic Scholar", "arXiv", "All"],
777
+ default=["CrossRef"]
778
+ )
779
+ with col3:
780
+ st.write(" ") # Spacer
781
+ st.write(" ") # Spacer
782
+ search_clicked = st.button("Search")
783
+
784
+ if search_clicked:
785
+ if query:
786
+ if "All" in search_sources or len(search_sources) > 1:
787
+ # Use comprehensive search function
788
+ results_df = run_comprehensive_search(query, max_results=num_papers)
789
+
790
+ if results_df is not None:
791
+ st.subheader(f"Found {len(results_df)} papers across all selected sources")
792
+
793
+ # Add filters
794
+ st.subheader("Filter Results")
795
+ selected_sources = st.multiselect(
796
+ "Filter by sources",
797
+ options=results_df["Source"].unique(),
798
+ default=results_df["Source"].unique()
799
+ )
800
+
801
+ # Convert Published column to string to handle potential numeric values
802
+ results_df["Published"] = results_df["Published"].astype(str)
803
+
804
+ # Extract year from Published column where possible
805
+ def get_year(published_str):
806
+ try:
807
+ if isinstance(published_str, str):
808
+ return int(published_str.split('-')[0]) if '-' in published_str else int(published_str)
809
+ return int(published_str) if published_str else None
810
+ except:
811
+ return None
812
+
813
+ results_df["Year"] = results_df["Published"].apply(get_year)
814
+
815
+ # Filter out None values for the slider
816
+ valid_years = [year for year in results_df["Year"] if year is not None]
817
+ if valid_years:
818
+ min_year = min(valid_years)
819
+ max_year = max(valid_years)
820
+ year_range = st.slider(
821
+ "Publication year range",
822
+ min_value=min_year,
823
+ max_value=max_year,
824
+ value=(min_year, max_year)
825
+ )
826
+
827
+ # Apply filters
828
+ filtered_df = results_df[
829
+ (results_df["Source"].isin(selected_sources)) &
830
+ ((results_df["Year"] >= year_range[0]) & (results_df["Year"] <= year_range[1]) | (results_df["Year"].isna()))
831
+ ]
832
+ else:
833
+ # Just apply source filter if no valid years
834
+ filtered_df = results_df[results_df["Source"].isin(selected_sources)]
835
+
836
+ # Display filtered results
837
+ st.subheader(f"Showing {len(filtered_df)} filtered results")
838
+
839
+ # Display results with expandable rows
840
+ for i, row in filtered_df.iterrows():
841
+ with st.expander(f"{row['Title']} ({row['Source']}, {row['Published']})"):
842
+ st.write(f"**Authors:** {row['Author(s)']}")
843
+ st.write(f"**Journal/Venue:** {row['Journal']}")
844
+ st.write(f"**Abstract:**")
845
+ st.write(row['Abstract'] if row['Abstract'] and row['Abstract'].strip() else "No abstract available")
846
+
847
+ # Display links
848
+ if row['DOI']:
849
+ st.write(f"**DOI:** https://doi.org/{row['DOI']}")
850
+ if row['Link']:
851
+ st.write(f"**Link:** {row['Link']}")
852
+ if 'arXiv ID' in row and row['arXiv ID']:
853
+ st.write(f"**arXiv ID:** {row['arXiv ID']}")
854
+
855
+ st.session_state.search_results_df = filtered_df
856
+
857
+ # Display download buttons
858
+ col1, col2 = st.columns(2)
859
+ with col1:
860
+ st.markdown(get_download_button(filtered_df, "csv", "πŸ“Š Download as CSV"), unsafe_allow_html=True)
861
+ with col2:
862
+ st.markdown(get_download_button(filtered_df, "txt", "πŸ“ Download as Text"), unsafe_allow_html=True)
863
+
864
+ else:
865
+ # Original single-source search
866
+ with st.spinner('Searching for papers...'):
867
+ if "CrossRef" in search_sources:
868
+ response_data = search_crossref(query, rows=num_papers)
869
+ paper_list = display_crossref_results(response_data)
870
+ if paper_list:
871
+ st.session_state.search_results_df = pd.DataFrame(paper_list)
872
+ elif "Semantic Scholar" in search_sources:
873
+ response_data = search_semantic_scholar(query, limit=num_papers)
874
+ paper_list = display_semantic_scholar_results(response_data)
875
+ if paper_list:
876
+ st.session_state.search_results_df = pd.DataFrame(paper_list)
877
+ elif "arXiv" in search_sources:
878
+ response_data = search_arxiv(query, max_results=num_papers)
879
+ paper_list = display_arxiv_results(response_data)
880
+ if paper_list:
881
+ st.session_state.search_results_df = pd.DataFrame(paper_list)
882
+
883
+ if st.session_state.search_results_df is not None:
884
+ st.write(st.session_state.search_results_df)
885
+
886
+ # Display download buttons
887
+ col1, col2 = st.columns(2)
888
+ with col1:
889
+ st.markdown(get_download_button(st.session_state.search_results_df, "csv", "πŸ“Š Download as CSV"), unsafe_allow_html=True)
890
+ with col2:
891
+ st.markdown(get_download_button(st.session_state.search_results_df, "txt", "πŸ“ Download as Text"), unsafe_allow_html=True)
892
+ else:
893
+ st.warning("Please enter a search query.")
894
+ st.markdown('</div>', unsafe_allow_html=True)
895
+
896
+ def summarize_page():
897
+ st.markdown('<div class="card">', unsafe_allow_html=True)
898
+ st.title("Text Summarization")
899
+ st.write("Generate concise summaries from lengthy academic text.")
900
+
901
+ user_text = st.text_area("Enter text to summarize", height=200)
902
+
903
+ if st.button("Summarize"):
904
+ if user_text:
905
+ with st.spinner('Summarizing text...'):
906
+ summary = summarize_text(user_text)
907
+ st.success("Summary:")
908
+ st.write(summary)
909
+ else:
910
+ st.warning("Please enter text to summarize.")
911
+ st.markdown('</div>', unsafe_allow_html=True)
912
+
913
+ def generate_page():
914
+ st.markdown('<div class="card">', unsafe_allow_html=True)
915
+ st.title("Text Generation")
916
+ st.write("Generate text based on your input to assist with research writing.")
917
+
918
+ user_text = st.text_area("Enter text prompt", height=200)
919
+
920
+ if st.button("Generate Text"):
921
+ if user_text:
922
+ with st.spinner('Generating text...'):
923
+ generated = generate_text(user_text)
924
+ st.success("Generated Text:")
925
+ st.write(generated)
926
+ else:
927
+ st.warning("Please enter text to generate from.")
928
+ st.markdown('</div>', unsafe_allow_html=True)
929
+
930
+ def about_page():
931
+ st.markdown('<div class="card">', unsafe_allow_html=True)
932
+ st.title("About PaperQuest")
933
+
934
+ st.write("""
935
+ ## Our Mission
936
+
937
+ PaperQuest is dedicated to empowering researchers, students, and academics with powerful tools to streamline their research process. Our platform combines comprehensive paper search capabilities with advanced text summarization and generation tools to help you work more efficiently.
938
+
939
+ ## Our Technology
940
+
941
+ PaperQuest leverages state-of-the-art natural language processing models to deliver high-quality text summarization and generation. Our search functionality connects to CrossRef's extensive database, providing access to millions of academic papers across disciplines.
942
+
943
+ ## The Team
944
+
945
+ Our team consists of researchers and developers passionate about improving the academic research process through technology.
946
+ """)
947
+ st.markdown('</div>', unsafe_allow_html=True)
948
+
949
+ def how_to_use_page():
950
+ st.markdown('<div class="card">', unsafe_allow_html=True)
951
+ st.title("How to Use PaperQuest")
952
+
953
+ st.write("""
954
+ ## Quick Start Guide
955
+
956
+ ### Finding Research Papers
957
+ 1. Navigate to the Home or Search page
958
+ 2. Enter your research topic or keywords in the search bar
959
+ 3. Adjust the number of results using the slider
960
+ 4. Click "Search" to retrieve papers
961
+ 5. Download your results in CSV or TXT format
962
+
963
+ ### Summarizing Text
964
+ 1. Navigate to the Summarize page
965
+ 2. Paste the text you want to summarize
966
+ 3. Click "Summarize" to get a concise version
967
+
968
+ ### Generating Text
969
+ 1. Navigate to the Generate page
970
+ 2. Enter a prompt or starting text
971
+ 3. Click "Generate Text" to get AI-assisted content
972
+
973
+ ## Tips for Better Results
974
+
975
+ - Use specific keywords for more targeted search results
976
+ - For summarization, provide complete paragraphs for better context
977
+ - When generating text, provide clear prompts that describe what you need
978
+ """)
979
+ st.markdown('</div>', unsafe_allow_html=True)
980
+
981
+ # Main function
982
+ def main():
983
+ # Initialize session state for page navigation
984
+ if 'page' not in st.session_state:
985
+ st.session_state.page = 'home'
986
+
987
+ if 'search_results_df' not in st.session_state:
988
+ st.session_state.search_results_df = None
989
+
990
+ # Sidebar navigation
991
+ st.sidebar.title("Navigation")
992
+ pages = {
993
+ "home": "🏠 Home",
994
+ "search": "πŸ” Search Papers",
995
+ "summarize": "πŸ“ Summarize Text",
996
+ "generate": "✨ Generate Text",
997
+ "literature": "πŸ“š Literature Survey"
998
+ "about": "ℹ️ About Us",
999
+ "how_to_use": "❓ How to Use"
1000
+ }
1001
+
1002
+ for page_id, page_name in pages.items():
1003
+ if st.sidebar.button(page_name, key=page_id):
1004
+ st.session_state.page = page_id
1005
+
1006
+ # App logo and branding in sidebar
1007
+ st.sidebar.markdown("---")
1008
+ st.sidebar.markdown("<div style='text-align: center;'><h3>PaperQuest</h3><p>Research Finder & Text Companion</p></div>", unsafe_allow_html=True)
1009
+
1010
+ # Display the selected page
1011
+ if st.session_state.page == 'home':
1012
+ home_page()
1013
+ elif st.session_state.page == 'search':
1014
+ search_page()
1015
+ elif st.session_state.page == 'summarize':
1016
+ summarize_page()
1017
+ elif st.session_state.page == 'generate':
1018
+ generate_page()
1019
+ elif st.session_state.page == 'about':
1020
+ about_page()
1021
+ elif st.session_state.page == 'how_to_use':
1022
+ how_to_use_page()
1023
+ elif st.session_state.page == 'literature':
1024
+ literature_survey_page()
1025
+
1026
+ # Footer
1027
+ st.markdown('<div class="footer">', unsafe_allow_html=True)
1028
+ st.markdown("Β© 2025 PaperQuest | Research Finder and Text Companion", unsafe_allow_html=True)
1029
+ st.markdown('</div>', unsafe_allow_html=True)
1030
+
1031
+ # Run the app
1032
+ if __name__ == "__main__":
1033
+ main()