DeepResearchEvaluator

Running

App Files Files Community

awacke1 commited on Dec 31, 2024

Commit

ad5d2a3

verified ·

1 Parent(s): 81a7060

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -88

app.py CHANGED Viewed

@@ -309,92 +309,80 @@ def parse_arxiv_refs(ref_text: str):
     Returns list of dicts with paper details, limited to 20 papers.
     Returns empty list if parsing fails.
     """
-    if not ref_text:
-        return []
-    # Split on the paper header pattern
-    papers = re.split(r'\*\*.*?\|\s*.*?\|\s*.*?\*\*', ref_text)
-    headers = re.findall(r'\*\*.*?\|\s*.*?\|\s*.*?\*\*', ref_text)
-    results = []
-    for i, (header, content) in enumerate(zip(headers, papers[1:])):
-        if i >= 20:  # Limit to 20 papers
-            break
-        # Parse header parts
-        header_parts = [p.strip() for p in header.strip('*').split('|')]
-        if len(header_parts) >= 2:
-            date_str = header_parts[0].strip()
-            title = header_parts[1].strip()
-            # Parse content into authors and summary
-            content_parts = content.strip().split('\n', 1)
-            authors = content_parts[0].strip('*') if content_parts else ""
-            summary = content_parts[1].strip() if len(content_parts) > 1 else ""
-            # Extract year from date
-            year_match = re.search(r'20\d{2}', date_str)
-            year = int(year_match.group(0)) if year_match else None
-            results.append({
-                'title': title,
-                'summary': summary,
-                'authors': authors,
-                'year': year,
-                'date': date_str
-            })
 def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
                       titles_summary=True, full_audio=False):
     """Perform Arxiv search and generate audio summaries."""
     start = time.time()
-    # 🎯 1) Query the HF RAG pipeline
     client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
     refs = client.predict(q,20,"Semantic Search","mistralai/Mixtral-8x7B-Instruct-v0.1",api_name="/update_with_rag_md")[0]
     r2 = client.predict(q,"mistralai/Mixtral-8x7B-Instruct-v0.1",True,api_name="/ask_llm")
-    # 🎯 2) Combine for final text output
     result = f"### 🔎 {q}\n\n{r2}\n\n{refs}"
     st.markdown(result)
-    # 🎯 3) Generate "all at once" audio if requested
-    if full_audio:
-        complete_text = f"Complete response for query: {q}. {clean_for_speech(r2)} {clean_for_speech(refs)}"
-        audio_file_full = speak_with_edge_tts(complete_text)
-        st.write("### 📚 Full Audio")
-        play_and_download_audio(audio_file_full)
-    if vocal_summary:
-        main_text = clean_for_speech(r2)
-        audio_file_main = speak_with_edge_tts(main_text)
-        st.write("### 🎙 Short Audio")
-        play_and_download_audio(audio_file_main)
-    if extended_refs:
-        summaries_text = "Extended references: " + refs.replace('"','')
-        summaries_text = clean_for_speech(summaries_text)
-        audio_file_refs = speak_with_edge_tts(summaries_text)
-        st.write("### 📜 Long Refs")
-        play_and_download_audio(audio_file_refs)
-    # --------------------------------------
-    # NEW: Parse references, show sorted list
-    # --------------------------------------
     parsed_refs = parse_arxiv_refs(refs)
-    # Sort by year descending (put None at bottom)
-    # If you want to skip older than 2022, you can filter them:
-    # parsed_refs = [r for r in parsed_refs if (r["year"] is not None and r["year"] >= 2022)]
-    parsed_refs.sort(key=lambda x: x["year"] if x["year"] else 0, reverse=True)
-    st.write("## Individual Papers (Most Recent First)")
     for idx, paper in enumerate(parsed_refs):
-        year_str = paper["year"] if paper["year"] else "Unknown Year"
-        st.markdown(f"**{idx+1}. {paper['title']}**  \n*Year:* {year_str}")
-        st.markdown(f"*Summary:* {paper['summary']}")
-        # Two new TTS buttons: Title only or Title+Summary
         colA, colB = st.columns(2)
         with colA:
             if st.button(f"🔊 Title", key=f"title_{idx}"):
@@ -403,34 +391,17 @@ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
                 play_and_download_audio(audio_file_title)
         with colB:
-            if st.button(f"🔊 Title+Summary", key=f"summary_{idx}"):
-                text_tts = clean_for_speech(paper['title'] + ". " + paper['summary'])
                 audio_file_title_summary = speak_with_edge_tts(text_tts)
                 play_and_download_audio(audio_file_title_summary)
         st.write("---")
-    # Keep your original block for "Titles Only" if you want:
-    if titles_summary:
-        # This is your existing code block
-        titles = []
-        for line in refs.split('\n'):
-            m = re.search(r"\[([^\]]+)\]", line)
-            if m:
-                titles.append(m.group(1))
-        if titles:
-            titles_text = "Titles: " + ", ".join(titles)
-            titles_text = clean_for_speech(titles_text)
-            audio_file_titles = speak_with_edge_tts(titles_text)
-            st.write("### 🔖 Titles (All-In-One)")
-            play_and_download_audio(audio_file_titles)
     elapsed = time.time()-start
     st.write(f"**Total Elapsed:** {elapsed:.2f} s")
-    # Always create a file with the result
     create_file(q, result, "md")
     return result
 def process_with_gpt(text):

     Returns list of dicts with paper details, limited to 20 papers.
     Returns empty list if parsing fails.
     """
+    try:
+        if not ref_text:
+            return []
+        # Split on the paper header pattern
+        papers = re.split(r'\*\*.*?\|\s*.*?\|\s*.*?\*\*', ref_text)
+        headers = re.findall(r'\*\*.*?\|\s*.*?\|\s*.*?\*\*', ref_text)
+        results = []
+        for i, (header, content) in enumerate(zip(headers, papers[1:])):
+            if i >= 20:  # Limit to 20 papers
+                break
+            try:
+                # Parse header parts
+                header_parts = [p.strip() for p in header.strip('*').split('|')]
+                if len(header_parts) >= 2:
+                    date_str = header_parts[0].strip()
+                    title = header_parts[1].strip()
+                    # Parse content into authors and summary
+                    content_parts = content.strip().split('\n', 1)
+                    authors = content_parts[0].strip('*') if content_parts else ""
+                    summary = content_parts[1].strip() if len(content_parts) > 1 else ""
+                    # Extract year from date
+                    year_match = re.search(r'20\d{2}', date_str)
+                    year = int(year_match.group(0)) if year_match else None
+                    results.append({
+                        'title': title,
+                        'summary': summary,
+                        'authors': authors,
+                        'year': year,
+                        'date': date_str
+                    })
+            except Exception as e:
+                st.warning(f"Error parsing paper {i+1}: {str(e)}")
+                continue
+        return results
+    except Exception as e:
+        st.error(f"Error parsing papers: {str(e)}")
+        return []
 def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
                       titles_summary=True, full_audio=False):
     """Perform Arxiv search and generate audio summaries."""
     start = time.time()
+    # Query the HF RAG pipeline
     client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
     refs = client.predict(q,20,"Semantic Search","mistralai/Mixtral-8x7B-Instruct-v0.1",api_name="/update_with_rag_md")[0]
     r2 = client.predict(q,"mistralai/Mixtral-8x7B-Instruct-v0.1",True,api_name="/ask_llm")
+    # Combine for final text output
     result = f"### 🔎 {q}\n\n{r2}\n\n{refs}"
     st.markdown(result)
+    # Parse references
     parsed_refs = parse_arxiv_refs(refs)
+    # Sort only if we have results
+    if parsed_refs:
+        parsed_refs.sort(key=lambda x: x.get("year", 0) if x.get("year") else 0, reverse=True)
+    # Display papers
+    st.write("## Research Papers")
     for idx, paper in enumerate(parsed_refs):
+        st.markdown(f"**{paper['date']} | {paper['title']} | ⬇️**")
+        st.markdown(f"*{paper['authors']}*")
+        st.markdown(paper['summary'])
+        # Audio controls
         colA, colB = st.columns(2)
         with colA:
             if st.button(f"🔊 Title", key=f"title_{idx}"):
                 play_and_download_audio(audio_file_title)
         with colB:
+            if st.button(f"🔊 Full Details", key=f"summary_{idx}"):
+                text_tts = clean_for_speech(f"{paper['title']} by {paper['authors']}. {paper['summary']}")
                 audio_file_title_summary = speak_with_edge_tts(text_tts)
                 play_and_download_audio(audio_file_title_summary)
         st.write("---")
+    # Rest of your existing function...
     elapsed = time.time()-start
     st.write(f"**Total Elapsed:** {elapsed:.2f} s")
     create_file(q, result, "md")
     return result
 def process_with_gpt(text):