awacke1 commited on
Commit
ff16fff
·
verified ·
1 Parent(s): b027e97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -0
app.py CHANGED
@@ -443,6 +443,37 @@ def extract_terms(markdown_text):
443
  terms.append(line)
444
  return terms
445
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
446
  def display_terms_with_links(terms):
447
  """Display terms with various search links."""
448
  search_urls = {
 
443
  terms.append(line)
444
  return terms
445
 
446
+ def extract_urls(text):
447
+ try:
448
+ date_pattern = re.compile(r'### (\d{2} \w{3} \d{4})')
449
+ abs_link_pattern = re.compile(r'\[(.*?)\]\((https://arxiv\.org/abs/\d+\.\d+)\)')
450
+ pdf_link_pattern = re.compile(r'\[⬇️\]\((https://arxiv\.org/pdf/\d+\.\d+)\)')
451
+ title_pattern = re.compile(r'### \d{2} \w{3} \d{4} \| \[(.*?)\]')
452
+ date_matches = date_pattern.findall(text)
453
+ abs_link_matches = abs_link_pattern.findall(text)
454
+ pdf_link_matches = pdf_link_pattern.findall(text)
455
+ title_matches = title_pattern.findall(text)
456
+
457
+ # markdown with the extracted fields
458
+ markdown_text = ""
459
+ for i in range(len(date_matches)):
460
+ date = date_matches[i]
461
+ title = title_matches[i]
462
+ abs_link = abs_link_matches[i][1]
463
+ pdf_link = pdf_link_matches[i]
464
+ markdown_text += f"**Date:** {date}\n\n"
465
+ markdown_text += f"**Title:** {title}\n\n"
466
+ markdown_text += f"**Abstract Link:** [{abs_link}]({abs_link})\n\n"
467
+ markdown_text += f"**PDF Link:** [{pdf_link}]({pdf_link})\n\n"
468
+ markdown_text += "---\n\n"
469
+ return markdown_text
470
+
471
+ except:
472
+ st.write('.')
473
+ return ''
474
+
475
+
476
+
477
  def display_terms_with_links(terms):
478
  """Display terms with various search links."""
479
  search_urls = {