awacke1 commited on
Commit
8e7b22f
1 Parent(s): 1fc49dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -11
app.py CHANGED
@@ -340,17 +340,36 @@ def process_video_with_gpt(video_input, user_prompt):
340
 
341
  return response.choices[0].message.content
342
 
343
- # ArXiv Search Functions
344
- def search_arxiv_old(query):
345
- """Search ArXiv papers using Hugging Face client."""
346
- client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
347
- response = client.predict(
348
- query,
349
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
350
- True,
351
- api_name="/ask_llm"
352
- )
353
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
 
355
  def search_arxiv(query):
356
 
 
340
 
341
  return response.choices[0].message.content
342
 
343
+
344
+ def extract_urls(text):
345
+ try:
346
+ date_pattern = re.compile(r'### (\d{2} \w{3} \d{4})')
347
+ abs_link_pattern = re.compile(r'\[(.*?)\]\((https://arxiv\.org/abs/\d+\.\d+)\)')
348
+ pdf_link_pattern = re.compile(r'\[⬇️\]\((https://arxiv\.org/pdf/\d+\.\d+)\)')
349
+ title_pattern = re.compile(r'### \d{2} \w{3} \d{4} \| \[(.*?)\]')
350
+ date_matches = date_pattern.findall(text)
351
+ abs_link_matches = abs_link_pattern.findall(text)
352
+ pdf_link_matches = pdf_link_pattern.findall(text)
353
+ title_matches = title_pattern.findall(text)
354
+
355
+ # markdown with the extracted fields
356
+ markdown_text = ""
357
+ for i in range(len(date_matches)):
358
+ date = date_matches[i]
359
+ title = title_matches[i]
360
+ abs_link = abs_link_matches[i][1]
361
+ pdf_link = pdf_link_matches[i]
362
+ markdown_text += f"**Date:** {date}\n\n"
363
+ markdown_text += f"**Title:** {title}\n\n"
364
+ markdown_text += f"**Abstract Link:** [{abs_link}]({abs_link})\n\n"
365
+ markdown_text += f"**PDF Link:** [{pdf_link}]({pdf_link})\n\n"
366
+ markdown_text += "---\n\n"
367
+ return markdown_text
368
+
369
+ except:
370
+ st.write('.')
371
+ return ''
372
+
373
 
374
  def search_arxiv(query):
375