Spaces:

awacke1
/

RescuerOfStolenBikes

Running

awacke1 commited on Nov 25

Commit

8e7b22f

•

1 Parent(s): 1fc49dd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -340,17 +340,36 @@ def process_video_with_gpt(video_input, user_prompt):
     return response.choices[0].message.content
-# ArXiv Search Functions
-def search_arxiv_old(query):
-    """Search ArXiv papers using Hugging Face client."""
-    client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
-    response = client.predict(
-        query,
-        "mistralai/Mixtral-8x7B-Instruct-v0.1",
-        True,
-        api_name="/ask_llm"
-    )
-    return response
 def search_arxiv(query):

     return response.choices[0].message.content
+def extract_urls(text):
+    try:
+        date_pattern = re.compile(r'### (\d{2} \w{3} \d{4})')
+        abs_link_pattern = re.compile(r'\[(.*?)\]\((https://arxiv\.org/abs/\d+\.\d+)\)')
+        pdf_link_pattern = re.compile(r'\[⬇️\]\((https://arxiv\.org/pdf/\d+\.\d+)\)')
+        title_pattern = re.compile(r'### \d{2} \w{3} \d{4} \| \[(.*?)\]')
+        date_matches = date_pattern.findall(text)
+        abs_link_matches = abs_link_pattern.findall(text)
+        pdf_link_matches = pdf_link_pattern.findall(text)
+        title_matches = title_pattern.findall(text)
+        # markdown with the extracted fields
+        markdown_text = ""
+        for i in range(len(date_matches)):
+            date = date_matches[i]
+            title = title_matches[i]
+            abs_link = abs_link_matches[i][1]
+            pdf_link = pdf_link_matches[i]
+            markdown_text += f"**Date:** {date}\n\n"
+            markdown_text += f"**Title:** {title}\n\n"
+            markdown_text += f"**Abstract Link:** [{abs_link}]({abs_link})\n\n"
+            markdown_text += f"**PDF Link:** [{pdf_link}]({pdf_link})\n\n"
+            markdown_text += "---\n\n"
+        return markdown_text
+    except:
+        st.write('.')
+        return ''
 def search_arxiv(query):