Spaces:

awacke1
/

AIKnowledgeTreeBuilder

Running

awacke1 commited on Nov 14, 2024

Commit

ff16fff

verified ·

1 Parent(s): b027e97

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -443,6 +443,37 @@ def extract_terms(markdown_text):
             terms.append(line)
     return terms
 def display_terms_with_links(terms):
     """Display terms with various search links."""
     search_urls = {

             terms.append(line)
     return terms
+def extract_urls(text):
+    try:
+        date_pattern = re.compile(r'### (\d{2} \w{3} \d{4})')
+        abs_link_pattern = re.compile(r'\[(.*?)\]\((https://arxiv\.org/abs/\d+\.\d+)\)')
+        pdf_link_pattern = re.compile(r'\[⬇️\]\((https://arxiv\.org/pdf/\d+\.\d+)\)')
+        title_pattern = re.compile(r'### \d{2} \w{3} \d{4} \| \[(.*?)\]')
+        date_matches = date_pattern.findall(text)
+        abs_link_matches = abs_link_pattern.findall(text)
+        pdf_link_matches = pdf_link_pattern.findall(text)
+        title_matches = title_pattern.findall(text)
+        # markdown with the extracted fields
+        markdown_text = ""
+        for i in range(len(date_matches)):
+            date = date_matches[i]
+            title = title_matches[i]
+            abs_link = abs_link_matches[i][1]
+            pdf_link = pdf_link_matches[i]
+            markdown_text += f"**Date:** {date}\n\n"
+            markdown_text += f"**Title:** {title}\n\n"
+            markdown_text += f"**Abstract Link:** [{abs_link}]({abs_link})\n\n"
+            markdown_text += f"**PDF Link:** [{pdf_link}]({pdf_link})\n\n"
+            markdown_text += "---\n\n"
+        return markdown_text
+    except:
+        st.write('.')
+        return ''
 def display_terms_with_links(terms):
     """Display terms with various search links."""
     search_urls = {