MahatirTusher commited on
Commit
c5e8d1d
·
verified ·
1 Parent(s): 4edf23f

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -1125
app.py DELETED
@@ -1,1125 +0,0 @@
1
- import streamlit as st
2
- import requests
3
- import pandas as pd
4
- import transformers
5
- from transformers import pipeline
6
- import tensorflow
7
- import io
8
- import base64
9
- import xml.etree.ElementTree as ET
10
- import json
11
- import time
12
- from transformers import pipeline
13
-
14
- # Set page configuration and styling
15
- st.set_page_config(
16
- page_title="PaperQuest: Research Finder",
17
- page_icon="📚",
18
- layout="wide",
19
- initial_sidebar_state="expanded"
20
- )
21
-
22
- # Custom CSS to make the UI more professional
23
- st.markdown("""
24
- <style>
25
- /* Main theme colors */
26
- :root {
27
- --primary-color: #4361ee;
28
- --secondary-color: #3a0ca3;
29
- --accent-color: #4cc9f0;
30
- --background-color: #f8f9fa;
31
- --text-color: #212529;
32
- }
33
-
34
- /* Overall page styling */
35
- .main {
36
- background-color: var(--background-color);
37
- color: var(--text-color);
38
- }
39
-
40
- /* Header styling */
41
- h1, h2, h3 {
42
- color: var(--primary-color);
43
- font-family: 'Helvetica Neue', sans-serif;
44
- }
45
-
46
- /* Custom button styling */
47
- .stButton > button {
48
- background-color: var(--primary-color);
49
- color: white;
50
- border-radius: 6px;
51
- border: none;
52
- padding: 0.5rem 1rem;
53
- font-weight: 600;
54
- transition: all 0.3s;
55
- }
56
-
57
- .stButton > button:hover {
58
- background-color: var(--secondary-color);
59
- transform: translateY(-2px);
60
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
61
- }
62
-
63
- /* Custom sidebar styling */
64
- .css-1d391kg {
65
- background-color: #f1f3f8;
66
- }
67
-
68
- /* Card-like containers */
69
- .card {
70
- background-color: white;
71
- border-radius: 10px;
72
- padding: 20px;
73
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
74
- margin-bottom: 20px;
75
- }
76
-
77
- /* Hero section */
78
- .hero {
79
- background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
80
- color: white;
81
- padding: 2rem;
82
- border-radius: 10px;
83
- margin-bottom: 2rem;
84
- text-align: center;
85
- }
86
-
87
- /* Tables */
88
- .dataframe {
89
- width: 100%;
90
- border-collapse: collapse;
91
- }
92
-
93
- .dataframe th {
94
- background-color: var(--primary-color);
95
- color: white;
96
- text-align: left;
97
- padding: 12px;
98
- }
99
-
100
- .dataframe td {
101
- padding: 8px 12px;
102
- border-bottom: 1px solid #ddd;
103
- }
104
-
105
- .dataframe tr:nth-child(even) {
106
- background-color: #f9f9f9;
107
- }
108
-
109
- /* Feature icons */
110
- .feature-icon {
111
- font-size: 2.5rem;
112
- color: var(--primary-color);
113
- margin-bottom: 1rem;
114
- text-align: center;
115
- }
116
-
117
- /* Footer */
118
- .footer {
119
- text-align: center;
120
- padding: 20px;
121
- background-color: #f1f3f8;
122
- margin-top: 40px;
123
- border-radius: 10px;
124
- }
125
- </style>
126
- """, unsafe_allow_html=True)
127
-
128
- import requests
129
- import xml.etree.ElementTree as ET
130
- import pandas as pd
131
- import streamlit as st
132
- import re
133
-
134
- # Function to search CrossRef using the user's query
135
- def search_crossref(query, rows=10):
136
- url = "https://api.crossref.org/works"
137
-
138
- params = {
139
- "query": query,
140
- "rows": rows,
141
- "filter": "type:journal-article"
142
- }
143
-
144
- try:
145
- response = requests.get(url, params=params)
146
- response.raise_for_status()
147
- return response.json()
148
- except requests.exceptions.HTTPError as e:
149
- st.error(f"HTTP error occurred: {e}")
150
- return None
151
- except Exception as e:
152
- st.error(f"An error occurred: {e}")
153
- return None
154
-
155
- # Function to search Semantic Scholar using the user's query
156
- def search_semantic_scholar(query, limit=10):
157
- url = "https://api.semanticscholar.org/graph/v1/paper/search"
158
-
159
- params = {
160
- "query": query,
161
- "limit": limit,
162
- "fields": "title,authors,venue,year,abstract,url,externalIds"
163
- }
164
-
165
- headers = {
166
- "Accept": "application/json"
167
- # Add your API key if you have one: "x-api-key": "YOUR_API_KEY"
168
- }
169
-
170
- try:
171
- response = requests.get(url, params=params, headers=headers)
172
- response.raise_for_status()
173
- return response.json()
174
- except requests.exceptions.HTTPError as e:
175
- st.error(f"Semantic Scholar HTTP error: {e}")
176
- return None
177
- except Exception as e:
178
- st.error(f"Semantic Scholar error: {e}")
179
- return None
180
-
181
- # Function to search arXiv using the user's query
182
- def search_arxiv(query, max_results=10):
183
- base_url = "http://export.arxiv.org/api/query"
184
-
185
- params = {
186
- "search_query": f"all:{query}",
187
- "max_results": max_results,
188
- "sortBy": "relevance",
189
- "sortOrder": "descending"
190
- }
191
-
192
- try:
193
- response = requests.get(base_url, params=params)
194
- response.raise_for_status()
195
-
196
- # Parse the XML response
197
- root = ET.fromstring(response.content)
198
-
199
- # Initialize list to store entries
200
- entries = []
201
-
202
- # Extract data from each entry
203
- for entry in root.findall('{http://www.w3.org/2005/Atom}entry'):
204
- title = entry.find('{http://www.w3.org/2005/Atom}title').text.strip()
205
-
206
- # Get authors
207
- authors = []
208
- for author in entry.findall('{http://www.w3.org/2005/Atom}author'):
209
- name = author.find('{http://www.w3.org/2005/Atom}name').text
210
- authors.append(name)
211
-
212
- # Get abstract
213
- abstract = entry.find('{http://www.w3.org/2005/Atom}summary').text.strip()
214
-
215
- # Get URL
216
- url = ""
217
- for link in entry.findall('{http://www.w3.org/2005/Atom}link'):
218
- if link.get('title') == 'pdf':
219
- url = link.get('href')
220
- break
221
-
222
- # Get published date
223
- published = entry.find('{http://www.w3.org/2005/Atom}published').text.split('T')[0]
224
-
225
- # Get DOI if available
226
- doi = ""
227
- arxiv_id = entry.find('{http://www.w3.org/2005/Atom}id').text.split('/abs/')[-1]
228
-
229
- entries.append({
230
- "title": title,
231
- "authors": ', '.join(authors),
232
- "abstract": abstract,
233
- "url": url,
234
- "published": published,
235
- "arxiv_id": arxiv_id,
236
- "doi": doi
237
- })
238
-
239
- return {"entries": entries}
240
- except requests.exceptions.HTTPError as e:
241
- st.error(f"arXiv HTTP error: {e}")
242
- return None
243
- except Exception as e:
244
- st.error(f"arXiv error: {e}")
245
- return None
246
-
247
- # Function to fetch abstract from PubMed using DOI
248
- def fetch_pubmed_abstract(doi):
249
- try:
250
- # First, search for the PubMed ID using the DOI
251
- search_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term={doi}[doi]&retmode=json"
252
- search_response = requests.get(search_url)
253
- search_data = search_response.json()
254
-
255
- # Check if we found a PubMed ID
256
- id_list = search_data.get('esearchresult', {}).get('idlist', [])
257
- if not id_list:
258
- return ""
259
-
260
- pubmed_id = id_list[0]
261
-
262
- # Now fetch the abstract using the PubMed ID
263
- fetch_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id={pubmed_id}&retmode=xml"
264
- fetch_response = requests.get(fetch_url)
265
-
266
- # Parse the XML response
267
- root = ET.fromstring(fetch_response.content)
268
-
269
- # Look for AbstractText in the XML
270
- abstract_elements = root.findall(".//AbstractText")
271
- if abstract_elements:
272
- # Combine all abstract sections if there are multiple
273
- abstract = " ".join([elem.text for elem in abstract_elements if elem.text])
274
- return abstract
275
-
276
- return ""
277
- except Exception as e:
278
- # If anything goes wrong, return empty string
279
- return ""
280
-
281
- # Function to display CrossRef results with enhanced abstract fetching
282
- def display_crossref_results(data):
283
- if data:
284
- items = data.get('message', {}).get('items', [])
285
- if not items:
286
- st.warning("No CrossRef results found.")
287
- return None
288
-
289
- paper_list = []
290
-
291
- # Use a progress bar for abstract fetching
292
- progress_bar = st.progress(0)
293
- status_text = st.empty()
294
-
295
- for i, item in enumerate(items):
296
- status_text.text(f"Processing CrossRef paper {i+1}/{len(items)}...")
297
-
298
- doi = item.get('DOI', '')
299
-
300
- # Try to get abstract from PubMed for papers with DOI
301
- abstract = ""
302
- if doi:
303
- abstract = fetch_pubmed_abstract(doi)
304
-
305
- # If we couldn't get an abstract from PubMed, try using CrossRef's abstract if available
306
- if not abstract and 'abstract' in item:
307
- abstract = re.sub(r'<[^>]+>', '', item['abstract'])
308
-
309
- paper = {
310
- "Source": "CrossRef",
311
- "Title": item.get('title', [''])[0],
312
- "Author(s)": ', '.join([author.get('family', '') for author in item.get('author', [])]),
313
- "Journal": item.get('container-title', [''])[0],
314
- "Abstract": abstract,
315
- "DOI": doi,
316
- "Link": item.get('URL', ''),
317
- "Published": item.get('issued', {}).get('date-parts', [[None]])[0][0] if 'issued' in item else "N/A"
318
- }
319
- paper_list.append(paper)
320
-
321
- # Update progress bar
322
- progress_bar.progress((i+1)/len(items))
323
-
324
- # Clear progress indicators
325
- progress_bar.empty()
326
- status_text.empty()
327
-
328
- return paper_list
329
- else:
330
- st.warning("No CrossRef data to display.")
331
- return None
332
-
333
- # Function to display Semantic Scholar results
334
- def display_semantic_scholar_results(data):
335
- if data:
336
- items = data.get('data', [])
337
- if not items:
338
- st.warning("No Semantic Scholar results found.")
339
- return None
340
-
341
- paper_list = []
342
-
343
- for item in items:
344
- authors = item.get('authors', [])
345
- author_names = ', '.join([author.get('name', '') for author in authors])
346
-
347
- doi = item.get('externalIds', {}).get('DOI', '')
348
-
349
- paper = {
350
- "Source": "Semantic Scholar",
351
- "Title": item.get('title', ''),
352
- "Author(s)": author_names,
353
- "Journal": item.get('venue', ''),
354
- "Abstract": item.get('abstract', ''),
355
- "DOI": doi,
356
- "Link": item.get('url', ''),
357
- "Published": item.get('year', 'N/A')
358
- }
359
- paper_list.append(paper)
360
-
361
- return paper_list
362
- else:
363
- st.warning("No Semantic Scholar data to display.")
364
- return None
365
-
366
- # Function to display arXiv results
367
- def display_arxiv_results(data):
368
- if data:
369
- entries = data.get('entries', [])
370
- if not entries:
371
- st.warning("No arXiv results found.")
372
- return None
373
-
374
- paper_list = []
375
-
376
- for entry in entries:
377
- paper = {
378
- "Source": "arXiv",
379
- "Title": entry.get('title', ''),
380
- "Author(s)": entry.get('authors', ''),
381
- "Journal": "arXiv preprint",
382
- "Abstract": entry.get('abstract', ''),
383
- "DOI": entry.get('doi', ''),
384
- "Link": entry.get('url', ''),
385
- "Published": entry.get('published', 'N/A'),
386
- "arXiv ID": entry.get('arxiv_id', '')
387
- }
388
- paper_list.append(paper)
389
-
390
- return paper_list
391
- else:
392
- st.warning("No arXiv data to display.")
393
- return None
394
-
395
- # Function to run a comprehensive search across all APIs
396
- def run_comprehensive_search(query, max_results=10):
397
- with st.spinner("Searching multiple academic databases..."):
398
- # Create columns for status indicators
399
- col1, col2, col3 = st.columns(3)
400
-
401
- # Search CrossRef
402
- with col1:
403
- with st.spinner("Searching CrossRef..."):
404
- crossref_data = search_crossref(query, rows=max_results)
405
- crossref_results = display_crossref_results(crossref_data)
406
- if crossref_results:
407
- st.success(f"Found {len(crossref_results)} results in CrossRef")
408
- else:
409
- st.info("No results from CrossRef")
410
-
411
- # Search Semantic Scholar
412
- with col2:
413
- with st.spinner("Searching Semantic Scholar..."):
414
- semantic_data = search_semantic_scholar(query, limit=max_results)
415
- semantic_results = display_semantic_scholar_results(semantic_data)
416
- if semantic_results:
417
- st.success(f"Found {len(semantic_results)} results in Semantic Scholar")
418
- else:
419
- st.info("No results from Semantic Scholar")
420
-
421
- # Search arXiv
422
- with col3:
423
- with st.spinner("Searching arXiv..."):
424
- arxiv_data = search_arxiv(query, max_results=max_results)
425
- arxiv_results = display_arxiv_results(arxiv_data)
426
- if arxiv_results:
427
- st.success(f"Found {len(arxiv_results)} results in arXiv")
428
- else:
429
- st.info("No results from arXiv")
430
-
431
- # Combine results
432
- all_results = []
433
- if crossref_results:
434
- all_results.extend(crossref_results)
435
- if semantic_results:
436
- all_results.extend(semantic_results)
437
- if arxiv_results:
438
- all_results.extend(arxiv_results)
439
-
440
- if all_results:
441
- df = pd.DataFrame(all_results)
442
- return df
443
- else:
444
- st.warning("No results found across any of the academic databases.")
445
- return None
446
-
447
- # Function to display the results in a table format
448
- def display_results(data):
449
- if data:
450
- items = data.get('message', {}).get('items', [])
451
- if not items:
452
- st.warning("No results found for the query.")
453
- return None
454
-
455
- paper_list = []
456
- for item in items:
457
- # Extract abstract if available
458
- abstract = ""
459
- if 'abstract' in item:
460
- # Clean up the abstract text - remove HTML tags if present
461
- abstract = re.sub(r'<[^>]+>', '', item['abstract'])
462
-
463
- paper = {
464
- "Title": item.get('title', [''])[0],
465
- "Author(s)": ', '.join([author.get('family', '') for author in item.get('author', [])]),
466
- "Journal": item.get('container-title', [''])[0],
467
- "Abstract": abstract,
468
- "DOI": item.get('DOI', ''),
469
- "Link": item.get('URL', ''),
470
- "Published": item.get('issued', {}).get('date-parts', [[None]])[0][0] if 'issued' in item else "N/A"
471
- }
472
- paper_list.append(paper)
473
-
474
- df = pd.DataFrame(paper_list)
475
-
476
- # Display the dataframe with a scrollable container for long abstracts
477
- st.write(df)
478
-
479
- return df
480
- else:
481
- st.warning("No data to display.")
482
- return None
483
- # Add the generate_literature_survey function below your other function definitions
484
- def generate_literature_survey(papers, api_key="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"):
485
- """
486
- Generate a literature survey based on paper abstracts using Groq API with Llama-3.3-70B-Instruct
487
-
488
- Parameters:
489
- papers (list): List of papers with abstracts
490
- api_key (str): Groq API key
491
-
492
- Returns:
493
- str: Generated literature survey
494
- """
495
- # Check if we have papers with abstracts
496
- if not papers or len(papers) == 0:
497
- return "No papers found to generate a literature survey."
498
-
499
- # Filter papers that have abstracts
500
- papers_with_abstracts = [p for p in papers if p.get("Abstract") and len(p.get("Abstract")) > 50]
501
-
502
- if len(papers_with_abstracts) == 0:
503
- return "Cannot generate a literature survey because none of the papers have substantial abstracts."
504
-
505
- # Construct the prompt for the LLM
506
- paper_info = []
507
- for i, paper in enumerate(papers_with_abstracts[:10]): # Limit to 10 papers to avoid token limits
508
- paper_info.append(f"Paper {i+1}:\nTitle: {paper.get('Title', 'Unknown')}\nAuthors: {paper.get('Author(s)', 'Unknown')}\nYear: {paper.get('Published', 'Unknown')}\nAbstract: {paper.get('Abstract', 'No abstract available')}\n")
509
-
510
- papers_text = "\n".join(paper_info)
511
-
512
- prompt = f"""You are an expert academic researcher. Based on the following papers and their abstracts,
513
- write a concise literature survey that:
514
- 1. Identifies the main themes and research directions
515
- 2. Highlights methodological approaches
516
- 3. Summarizes key findings
517
- 4. Points out research gaps if evident
518
- 5. Suggests potential future research directions
519
-
520
- Here are the papers:
521
-
522
- {papers_text}
523
-
524
- Please organize the survey by themes rather than by individual papers, creating connections between studies.
525
- Format your response with markdown headings for better readability.
526
- """
527
-
528
- # Make the API request to Groq
529
- url = "https://api.groq.com/openai/v1/chat/completions"
530
- headers = {
531
- "Authorization": f"Bearer {api_key}",
532
- "Content-Type": "application/json"
533
- }
534
-
535
- data = {
536
- "model": "meta-llama/Llama-3.3-70B-Instruct",
537
- "messages": [
538
- {"role": "system", "content": "You are an academic research assistant that creates comprehensive literature surveys."},
539
- {"role": "user", "content": prompt}
540
- ],
541
- "temperature": 0.3,
542
- "max_tokens": 2000
543
- }
544
-
545
- try:
546
- response = requests.post(url, headers=headers, data=json.dumps(data))
547
- response.raise_for_status()
548
- result = response.json()
549
- survey_text = result["choices"][0]["message"]["content"]
550
- return survey_text
551
- except Exception as e:
552
- st.error(f"Error generating literature survey: {e}")
553
- return f"Failed to generate literature survey due to an error: {str(e)}"
554
-
555
- # Add the add_literature_survey_button function
556
-
557
- # Function to generate a literature survey using Hugging Face API
558
- def generate_literature_survey(papers, api_key=None):
559
- """
560
- Generate a literature survey based on paper abstracts using HuggingFace API
561
-
562
- Parameters:
563
- papers (list): List of papers with abstracts
564
- api_key (str): Optional HuggingFace API key
565
-
566
- Returns:
567
- str: Generated literature survey
568
- """
569
- # Retrieve the API key from Hugging Face Spaces Secrets or local secrets
570
- api_key = api_key or st.secrets.get("hf_api_key")
571
-
572
- if not api_key:
573
- st.error("No Hugging Face API key found. Please add it to your Space Secrets or .streamlit/secrets.toml.")
574
- return "Error: Missing API key."
575
-
576
- # Check if we have papers with abstracts
577
- if not papers or len(papers) == 0:
578
- return "No papers found to generate a literature survey."
579
-
580
- # Filter papers that have abstracts
581
- papers_with_abstracts = [p for p in papers if p.get("Abstract") and len(p.get("Abstract")) > 50]
582
-
583
- if len(papers_with_abstracts) == 0:
584
- return "Cannot generate a literature survey because none of the papers have substantial abstracts."
585
-
586
- # Construct the prompt for the LLM
587
- paper_info = []
588
- for i, paper in enumerate(papers_with_abstracts[:10]): # Limit to 10 papers to avoid token limits
589
- paper_info.append(f"Paper {i+1}:\nTitle: {paper.get('Title', 'Unknown')}\nAuthors: {paper.get('Author(s)', 'Unknown')}\nYear: {paper.get('Published', 'Unknown')}\nAbstract: {paper.get('Abstract', 'No abstract available')}\n")
590
-
591
- papers_text = "\n".join(paper_info)
592
-
593
- prompt = f"""You are an expert academic researcher. Based on the following papers and their abstracts,
594
- write a concise literature survey that:
595
- 1. Identifies the main themes and research directions
596
- 2. Highlights methodological approaches
597
- 3. Summarizes key findings
598
- 4. Points out research gaps if evident
599
- 5. Suggests potential future research directions
600
-
601
- Here are the papers:
602
-
603
- {papers_text}
604
-
605
- Please organize the survey by themes rather than by individual papers, creating connections between studies.
606
- Format your response with markdown headings for better readability.
607
- """
608
-
609
- headers = {"Authorization": f"Bearer {api_key}"}
610
-
611
- # Use HuggingFace's Inference API
612
- try:
613
- # Try using Mistral or another available LLM on HuggingFace
614
- API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
615
- response = requests.post(
616
- API_URL,
617
- headers=headers,
618
- json={
619
- "inputs": prompt,
620
- "parameters": {"max_new_tokens": 2000, "temperature": 0.3}
621
- }
622
- )
623
- response.raise_for_status()
624
-
625
- # Parse the response
626
- result = response.json()
627
- if isinstance(result, list) and len(result) > 0:
628
- if isinstance(result[0], dict) and "generated_text" in result[0]:
629
- survey_text = result[0]["generated_text"]
630
- else:
631
- survey_text = str(result[0])
632
- else:
633
- survey_text = str(result)
634
-
635
- return survey_text
636
- except Exception as e:
637
- st.error(f"Error generating literature survey: {e}")
638
- # Fallback to local summarization as a last resort
639
- try:
640
- summarizer = pipeline("summarization")
641
- chunks = [p.get("Abstract", "") for p in papers_with_abstracts]
642
- summary = "# Literature Survey\n\n"
643
- for i, chunk in enumerate(chunks):
644
- if chunk and len(chunk) > 50:
645
- chunk_summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
646
- summary += f"## Paper {i+1}: {papers_with_abstracts[i].get('Title', 'Unknown')}\n\n{chunk_summary}\n\n"
647
- return summary
648
- except Exception as fallback_error:
649
- return f"Failed to generate literature survey due to an error: {str(fallback_error)}"
650
-
651
- # Generate Literature Survey Page
652
- def generate_literature_survey_page():
653
- st.title("📚 Generate Literature Survey")
654
- st.markdown(
655
- """
656
- Upload a list of research papers (with abstracts) and generate a comprehensive literature survey.
657
- The survey will identify themes, methodologies, key findings, and future research directions.
658
- """
659
- )
660
-
661
- # Input: Papers Data
662
- st.subheader("Upload Papers Data")
663
- uploaded_file = st.file_uploader("Upload a CSV file containing papers (with columns: Title, Author(s), Abstract, Published)", type=["csv"])
664
-
665
- if uploaded_file is not None:
666
- try:
667
- # Load the uploaded CSV file
668
- papers_df = pd.read_csv(uploaded_file)
669
-
670
- # Validate required columns
671
- required_columns = {"Title", "Author(s)", "Abstract", "Published"}
672
- if not required_columns.issubset(papers_df.columns):
673
- st.error(f"CSV file must contain the following columns: {', '.join(required_columns)}")
674
- return
675
-
676
- # Convert DataFrame to list of dictionaries
677
- papers = papers_df.to_dict(orient="records")
678
- st.success(f"Successfully loaded {len(papers)} papers.")
679
- except Exception as e:
680
- st.error(f"Error loading CSV file: {e}")
681
- return
682
-
683
- # Button to Generate Literature Survey
684
- if st.button("Generate Literature Survey"):
685
- with st.spinner("Generating literature survey..."):
686
- survey = generate_literature_survey(papers)
687
- st.success("Literature Survey Generated!")
688
- st.markdown(survey)
689
-
690
- else:
691
- st.info("Please upload a CSV file to proceed.")
692
-
693
- # Add this page to the multi-page navigation
694
- if __name__ == "__main__":
695
- generate_literature_survey_page()
696
-
697
- # Function to summarize text using the specified model
698
- def summarize_text(text):
699
- try:
700
- # Initialize the summarization model with PyTorch
701
- summarizer = pipeline("text2text-generation", model="spacemanidol/flan-t5-large-website-summarizer", framework="pt")
702
- summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
703
- return summary[0]['generated_text']
704
- except Exception as e:
705
- st.error(f"An error occurred during summarization: {e}")
706
- return "Summary could not be generated."
707
-
708
- # Function to generate text
709
- def generate_text(text):
710
- try:
711
- # Initialize the text generation model with PyTorch
712
- text_generator = pipeline("text2text-generation", model="JorgeSarry/est5-summarize", framework="pt")
713
- generated_text = text_generator(text, max_length=150, min_length=50, do_sample=False)
714
- return generated_text[0]['generated_text']
715
- except Exception as e:
716
- st.error(f"An error occurred during text generation: {e}")
717
- return "Generated text could not be created."
718
-
719
- # Function to convert DataFrame to CSV
720
- def convert_df_to_csv(df):
721
- return df.to_csv(index=False).encode('utf-8')
722
-
723
- # Function to convert DataFrame to text
724
- def convert_df_to_txt(df):
725
- buffer = io.StringIO()
726
-
727
- # Write header
728
- buffer.write("PaperQuest Research Results\n\n")
729
-
730
- # Format headers
731
- headers = "\t".join(df.columns)
732
- buffer.write(headers + "\n")
733
- buffer.write("-" * len(headers) + "\n")
734
-
735
- # Format rows
736
- for _, row in df.iterrows():
737
- buffer.write("\t".join([str(item) for item in row.values]) + "\n")
738
-
739
- return buffer.getvalue()
740
-
741
- # Function to create download button
742
- def get_download_button(df, file_type="csv", button_text="Download as CSV"):
743
- if file_type == "csv":
744
- csv_bytes = convert_df_to_csv(df)
745
- b64 = base64.b64encode(csv_bytes).decode()
746
- href = f'data:text/csv;base64,{b64}'
747
- else: # text
748
- text_data = convert_df_to_txt(df)
749
- b64 = base64.b64encode(text_data.encode()).decode()
750
- href = f'data:text/plain;base64,{b64}'
751
-
752
- return f'<a href="{href}" download="research_results.{file_type}" class="download-button">{button_text}</a>'
753
-
754
- # Navigation functions
755
- def home_page():
756
- # Hero section
757
- st.markdown('<div class="hero">', unsafe_allow_html=True)
758
- st.title("PaperQuest: Research Finder and Text Companion")
759
- st.markdown("Discover academic insights and enhance your research journey with our powerful tools")
760
- st.markdown('</div>', unsafe_allow_html=True)
761
-
762
- # Search bar directly on the home page
763
- st.markdown('<div class="card">', unsafe_allow_html=True)
764
- st.subheader("📚 Find Research Papers")
765
-
766
- col1, col2 = st.columns([3, 1])
767
- with col1:
768
- query = st.text_input("Enter your research topic or keywords", value="machine learning optimization")
769
- with col2:
770
- num_papers = st.slider("Results per source", min_value=5, max_value=50, value=10)
771
-
772
- search_sources = st.multiselect(
773
- "Select sources",
774
- options=["CrossRef", "Semantic Scholar", "arXiv", "All"],
775
- default=["CrossRef"]
776
- )
777
-
778
- search_clicked = st.button("Search Papers", key="search_home")
779
-
780
- # Store the search results in session state
781
- if search_clicked:
782
- if query:
783
- if "All" in search_sources or len(search_sources) > 1:
784
- # Use comprehensive search function
785
- st.session_state.search_results_df = run_comprehensive_search(query, max_results=num_papers)
786
-
787
- if st.session_state.search_results_df is not None:
788
- # Display filtered results
789
- st.subheader(f"Found {len(st.session_state.search_results_df)} papers")
790
-
791
- # Display download buttons
792
- col1, col2 = st.columns(2)
793
- with col1:
794
- st.markdown(get_download_button(st.session_state.search_results_df, "csv", "📊 Download as CSV"), unsafe_allow_html=True)
795
- with col2:
796
- st.markdown(get_download_button(st.session_state.search_results_df, "txt", "📝 Download as Text"), unsafe_allow_html=True)
797
- else:
798
- # Original single-source search
799
- with st.spinner('Searching for papers...'):
800
- if "CrossRef" in search_sources:
801
- response_data = search_crossref(query, rows=num_papers)
802
- paper_list = display_crossref_results(response_data)
803
- if paper_list:
804
- st.session_state.search_results_df = pd.DataFrame(paper_list)
805
- elif "Semantic Scholar" in search_sources:
806
- response_data = search_semantic_scholar(query, limit=num_papers)
807
- paper_list = display_semantic_scholar_results(response_data)
808
- if paper_list:
809
- st.session_state.search_results_df = pd.DataFrame(paper_list)
810
- elif "arXiv" in search_sources:
811
- response_data = search_arxiv(query, max_results=num_papers)
812
- paper_list = display_arxiv_results(response_data)
813
- if paper_list:
814
- st.session_state.search_results_df = pd.DataFrame(paper_list)
815
-
816
- if st.session_state.search_results_df is not None:
817
- st.write(st.session_state.search_results_df)
818
-
819
- # Display download buttons
820
- col1, col2 = st.columns(2)
821
- with col1:
822
- st.markdown(get_download_button(st.session_state.search_results_df, "csv", "📊 Download as CSV"), unsafe_allow_html=True)
823
- with col2:
824
- st.markdown(get_download_button(st.session_state.search_results_df, "txt", "📝 Download as Text"), unsafe_allow_html=True)
825
- else:
826
- st.warning("Please enter a search query.")
827
- st.markdown('</div>', unsafe_allow_html=True)
828
-
829
- # Features section
830
- st.markdown("<h2 style='text-align: center; margin-top: 40px;'>Features</h2>", unsafe_allow_html=True)
831
-
832
- col1, col2, col3 = st.columns(3)
833
-
834
- with col1:
835
- st.markdown('<div class="card">', unsafe_allow_html=True)
836
- st.markdown('<div class="feature-icon">🔍</div>', unsafe_allow_html=True)
837
- st.markdown("<h3 style='text-align: center;'>Comprehensive Search</h3>", unsafe_allow_html=True)
838
- st.markdown("<p style='text-align: center;'>Access thousands of academic papers from CrossRef, Semantic Scholar, and arXiv</p>", unsafe_allow_html=True)
839
- st.markdown('</div>', unsafe_allow_html=True)
840
-
841
- with col2:
842
- st.markdown('<div class="card">', unsafe_allow_html=True)
843
- st.markdown('<div class="feature-icon">📝</div>', unsafe_allow_html=True)
844
- st.markdown("<h3 style='text-align: center;'>Text Summarization</h3>", unsafe_allow_html=True)
845
- st.markdown("<p style='text-align: center;'>Extract key insights from complex research papers</p>", unsafe_allow_html=True)
846
- st.markdown('</div>', unsafe_allow_html=True)
847
-
848
- with col3:
849
- st.markdown('<div class="card">', unsafe_allow_html=True)
850
- st.markdown('<div class="feature-icon">✨</div>', unsafe_allow_html=True)
851
- st.markdown("<h3 style='text-align: center;'>Smart Text Generation</h3>", unsafe_allow_html=True)
852
- st.markdown("<p style='text-align: center;'>Get assistance with creating coherent research content</p>", unsafe_allow_html=True)
853
- st.markdown('</div>', unsafe_allow_html=True)
854
-
855
- def search_page():
856
- st.markdown('<div class="card">', unsafe_allow_html=True)
857
- st.title("Research Paper Search")
858
- st.write("Find and explore academic papers across various disciplines.")
859
-
860
- query = st.text_input("Enter your research topic or keywords", value="machine learning optimization")
861
-
862
- col1, col2, col3 = st.columns(3)
863
- with col1:
864
- num_papers = st.slider("Results per source", min_value=5, max_value=50, value=10)
865
- with col2:
866
- search_sources = st.multiselect(
867
- "Select sources",
868
- options=["CrossRef", "Semantic Scholar", "arXiv", "All"],
869
- default=["CrossRef"]
870
- )
871
- with col3:
872
- st.write(" ") # Spacer
873
- st.write(" ") # Spacer
874
- search_clicked = st.button("Search")
875
-
876
- if search_clicked:
877
- if query:
878
- if "All" in search_sources or len(search_sources) > 1:
879
- # Use comprehensive search function
880
- results_df = run_comprehensive_search(query, max_results=num_papers)
881
-
882
- if results_df is not None:
883
- st.subheader(f"Found {len(results_df)} papers across all selected sources")
884
-
885
- # Add filters
886
- st.subheader("Filter Results")
887
- selected_sources = st.multiselect(
888
- "Filter by sources",
889
- options=results_df["Source"].unique(),
890
- default=results_df["Source"].unique()
891
- )
892
-
893
- # Convert Published column to string to handle potential numeric values
894
- results_df["Published"] = results_df["Published"].astype(str)
895
-
896
- # Extract year from Published column where possible
897
- def get_year(published_str):
898
- try:
899
- if isinstance(published_str, str):
900
- return int(published_str.split('-')[0]) if '-' in published_str else int(published_str)
901
- return int(published_str) if published_str else None
902
- except:
903
- return None
904
-
905
- results_df["Year"] = results_df["Published"].apply(get_year)
906
-
907
- # Filter out None values for the slider
908
- valid_years = [year for year in results_df["Year"] if year is not None]
909
- if valid_years:
910
- min_year = min(valid_years)
911
- max_year = max(valid_years)
912
- year_range = st.slider(
913
- "Publication year range",
914
- min_value=min_year,
915
- max_value=max_year,
916
- value=(min_year, max_year)
917
- )
918
-
919
- # Apply filters
920
- filtered_df = results_df[
921
- (results_df["Source"].isin(selected_sources)) &
922
- ((results_df["Year"] >= year_range[0]) & (results_df["Year"] <= year_range[1]) | (results_df["Year"].isna()))
923
- ]
924
- else:
925
- # Just apply source filter if no valid years
926
- filtered_df = results_df[results_df["Source"].isin(selected_sources)]
927
-
928
- # Display filtered results
929
- st.subheader(f"Showing {len(filtered_df)} filtered results")
930
-
931
- # Display results with expandable rows
932
- for i, row in filtered_df.iterrows():
933
- with st.expander(f"{row['Title']} ({row['Source']}, {row['Published']})"):
934
- st.write(f"**Authors:** {row['Author(s)']}")
935
- st.write(f"**Journal/Venue:** {row['Journal']}")
936
- st.write(f"**Abstract:**")
937
- st.write(row['Abstract'] if row['Abstract'] and row['Abstract'].strip() else "No abstract available")
938
-
939
- # Display links
940
- if row['DOI']:
941
- st.write(f"**DOI:** https://doi.org/{row['DOI']}")
942
- if row['Link']:
943
- st.write(f"**Link:** {row['Link']}")
944
- if 'arXiv ID' in row and row['arXiv ID']:
945
- st.write(f"**arXiv ID:** {row['arXiv ID']}")
946
-
947
- st.session_state.search_results_df = filtered_df
948
-
949
- # Display download buttons
950
- col1, col2 = st.columns(2)
951
- with col1:
952
- st.markdown(get_download_button(filtered_df, "csv", "📊 Download as CSV"), unsafe_allow_html=True)
953
- with col2:
954
- st.markdown(get_download_button(filtered_df, "txt", "📝 Download as Text"), unsafe_allow_html=True)
955
-
956
- else:
957
- # Original single-source search
958
- with st.spinner('Searching for papers...'):
959
- if "CrossRef" in search_sources:
960
- response_data = search_crossref(query, rows=num_papers)
961
- paper_list = display_crossref_results(response_data)
962
- if paper_list:
963
- st.session_state.search_results_df = pd.DataFrame(paper_list)
964
- elif "Semantic Scholar" in search_sources:
965
- response_data = search_semantic_scholar(query, limit=num_papers)
966
- paper_list = display_semantic_scholar_results(response_data)
967
- if paper_list:
968
- st.session_state.search_results_df = pd.DataFrame(paper_list)
969
- elif "arXiv" in search_sources:
970
- response_data = search_arxiv(query, max_results=num_papers)
971
- paper_list = display_arxiv_results(response_data)
972
- if paper_list:
973
- st.session_state.search_results_df = pd.DataFrame(paper_list)
974
-
975
- if st.session_state.search_results_df is not None:
976
- st.write(st.session_state.search_results_df)
977
-
978
- # Display download buttons
979
- col1, col2 = st.columns(2)
980
- with col1:
981
- st.markdown(get_download_button(st.session_state.search_results_df, "csv", "📊 Download as CSV"), unsafe_allow_html=True)
982
- with col2:
983
- st.markdown(get_download_button(st.session_state.search_results_df, "txt", "📝 Download as Text"), unsafe_allow_html=True)
984
- else:
985
- st.warning("Please enter a search query.")
986
- st.markdown('</div>', unsafe_allow_html=True)
987
-
988
- def summarize_page():
989
- st.markdown('<div class="card">', unsafe_allow_html=True)
990
- st.title("Text Summarization")
991
- st.write("Generate concise summaries from lengthy academic text.")
992
-
993
- user_text = st.text_area("Enter text to summarize", height=200)
994
-
995
- if st.button("Summarize"):
996
- if user_text:
997
- with st.spinner('Summarizing text...'):
998
- summary = summarize_text(user_text)
999
- st.success("Summary:")
1000
- st.write(summary)
1001
- else:
1002
- st.warning("Please enter text to summarize.")
1003
- st.markdown('</div>', unsafe_allow_html=True)
1004
-
1005
- def generate_page():
1006
- st.markdown('<div class="card">', unsafe_allow_html=True)
1007
- st.title("Text Generation")
1008
- st.write("Generate text based on your input to assist with research writing.")
1009
-
1010
- user_text = st.text_area("Enter text prompt", height=200)
1011
-
1012
- if st.button("Generate Text"):
1013
- if user_text:
1014
- with st.spinner('Generating text...'):
1015
- generated = generate_text(user_text)
1016
- st.success("Generated Text:")
1017
- st.write(generated)
1018
- else:
1019
- st.warning("Please enter text to generate from.")
1020
- st.markdown('</div>', unsafe_allow_html=True)
1021
-
1022
- def about_page():
1023
- st.markdown('<div class="card">', unsafe_allow_html=True)
1024
- st.title("About PaperQuest")
1025
-
1026
- st.write("""
1027
- ## Our Mission
1028
-
1029
- PaperQuest is dedicated to empowering researchers, students, and academics with powerful tools to streamline their research process. Our platform combines comprehensive paper search capabilities with advanced text summarization and generation tools to help you work more efficiently.
1030
-
1031
- ## Our Technology
1032
-
1033
- PaperQuest leverages state-of-the-art natural language processing models to deliver high-quality text summarization and generation. Our search functionality connects to CrossRef's extensive database, providing access to millions of academic papers across disciplines.
1034
-
1035
- ## The Team
1036
-
1037
- Our team consists of researchers and developers passionate about improving the academic research process through technology.
1038
- """)
1039
- st.markdown('</div>', unsafe_allow_html=True)
1040
-
1041
- def how_to_use_page():
1042
- st.markdown('<div class="card">', unsafe_allow_html=True)
1043
- st.title("How to Use PaperQuest")
1044
-
1045
- st.write("""
1046
- ## Quick Start Guide
1047
-
1048
- ### Finding Research Papers
1049
- 1. Navigate to the Home or Search page
1050
- 2. Enter your research topic or keywords in the search bar
1051
- 3. Adjust the number of results using the slider
1052
- 4. Click "Search" to retrieve papers
1053
- 5. Download your results in CSV or TXT format
1054
-
1055
- ### Summarizing Text
1056
- 1. Navigate to the Summarize page
1057
- 2. Paste the text you want to summarize
1058
- 3. Click "Summarize" to get a concise version
1059
-
1060
- ### Generating Text
1061
- 1. Navigate to the Generate page
1062
- 2. Enter a prompt or starting text
1063
- 3. Click "Generate Text" to get AI-assisted content
1064
-
1065
- ## Tips for Better Results
1066
-
1067
- - Use specific keywords for more targeted search results
1068
- - For summarization, provide complete paragraphs for better context
1069
- - When generating text, provide clear prompts that describe what you need
1070
- """)
1071
- st.markdown('</div>', unsafe_allow_html=True)
1072
-
1073
- # Main function
1074
- def main():
1075
- # Initialize session state for page navigation
1076
- if 'page' not in st.session_state:
1077
- st.session_state.page = 'home'
1078
-
1079
- if 'search_results_df' not in st.session_state:
1080
- st.session_state.search_results_df = None
1081
-
1082
- # Sidebar navigation
1083
- st.sidebar.title("Navigation")
1084
- pages = {
1085
- "home": "🏠 Home",
1086
- "search": "🔍 Search Papers",
1087
- "summarize": "📝 Summarize Text",
1088
- "generate": "✨ Generate Text",
1089
- "literature": "📚 Literature Survey",
1090
- "about": "ℹ️ About Us",
1091
- "how_to_use": "❓ How to Use"
1092
- }
1093
-
1094
- for page_id, page_name in pages.items():
1095
- if st.sidebar.button(page_name, key=page_id):
1096
- st.session_state.page = page_id
1097
-
1098
- # App logo and branding in sidebar
1099
- st.sidebar.markdown("---")
1100
- st.sidebar.markdown("<div style='text-align: center;'><h3>PaperQuest</h3><p>Research Finder & Text Companion</p></div>", unsafe_allow_html=True)
1101
-
1102
- # Display the selected page
1103
- if st.session_state.page == 'home':
1104
- home_page()
1105
- elif st.session_state.page == 'search':
1106
- search_page()
1107
- elif st.session_state.page == 'summarize':
1108
- summarize_page()
1109
- elif st.session_state.page == 'generate':
1110
- generate_page()
1111
- elif st.session_state.page == 'about':
1112
- about_page()
1113
- elif st.session_state.page == 'how_to_use':
1114
- how_to_use_page()
1115
- elif st.session_state.page == 'literature':
1116
- literature_survey_page()
1117
-
1118
- # Footer
1119
- st.markdown('<div class="footer">', unsafe_allow_html=True)
1120
- st.markdown("© 2025 PaperQuest | Research Finder and Text Companion", unsafe_allow_html=True)
1121
- st.markdown('</div>', unsafe_allow_html=True)
1122
-
1123
- # Run the app
1124
- if __name__ == "__main__":
1125
- main()