awacke1 commited on
Commit
b4bdbf1
Β·
verified Β·
1 Parent(s): 5c13f31

Create backup21.app.py

Browse files
Files changed (1) hide show
  1. backup21.app.py +947 -0
backup21.app.py ADDED
@@ -0,0 +1,947 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, textract, time, zipfile
3
+ import plotly.graph_objects as go
4
+ import streamlit.components.v1 as components
5
+ from datetime import datetime
6
+ from audio_recorder_streamlit import audio_recorder
7
+ from bs4 import BeautifulSoup
8
+ from collections import defaultdict, deque, Counter
9
+ from dotenv import load_dotenv
10
+ from gradio_client import Client
11
+ from huggingface_hub import InferenceClient
12
+ from io import BytesIO
13
+ from PIL import Image
14
+ from PyPDF2 import PdfReader
15
+ from urllib.parse import quote
16
+ from xml.etree import ElementTree as ET
17
+ from openai import OpenAI
18
+ import extra_streamlit_components as stx
19
+ from streamlit.runtime.scriptrunner import get_script_run_ctx
20
+ import asyncio
21
+ import edge_tts
22
+ from streamlit_marquee import streamlit_marquee
23
+
24
+ # ─────────────────────────────────────────────────────────
25
+ # 1. CORE CONFIGURATION & SETUP
26
+ # ─────────────────────────────────────────────────────────
27
+ st.set_page_config(
28
+ page_title="🚲TalkingAIResearcherπŸ†",
29
+ page_icon="πŸš²πŸ†",
30
+ layout="wide",
31
+ initial_sidebar_state="auto",
32
+ menu_items={
33
+ 'Get Help': 'https://huggingface.co/awacke1',
34
+ 'Report a bug': 'https://huggingface.co/spaces/awacke1',
35
+ 'About': "🚲TalkingAIResearcherπŸ†"
36
+ }
37
+ )
38
+ load_dotenv()
39
+
40
+ # Available English voices for Edge TTS
41
+ EDGE_TTS_VOICES = [
42
+ "en-US-AriaNeural",
43
+ "en-US-GuyNeural",
44
+ "en-US-JennyNeural",
45
+ "en-GB-SoniaNeural",
46
+ "en-GB-RyanNeural",
47
+ "en-AU-NatashaNeural",
48
+ "en-AU-WilliamNeural",
49
+ "en-CA-ClaraNeural",
50
+ "en-CA-LiamNeural"
51
+ ]
52
+
53
+ # Session state variables
54
+ if 'marquee_settings' not in st.session_state:
55
+ st.session_state['marquee_settings'] = {
56
+ "background": "#1E1E1E",
57
+ "color": "#FFFFFF",
58
+ "font-size": "14px",
59
+ "animationDuration": "20s",
60
+ "width": "100%",
61
+ "lineHeight": "35px"
62
+ }
63
+
64
+ if 'tts_voice' not in st.session_state:
65
+ st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
66
+
67
+ if 'audio_format' not in st.session_state:
68
+ st.session_state['audio_format'] = 'mp3'
69
+
70
+ if 'transcript_history' not in st.session_state:
71
+ st.session_state['transcript_history'] = []
72
+
73
+ if 'chat_history' not in st.session_state:
74
+ st.session_state['chat_history'] = []
75
+
76
+ if 'openai_model' not in st.session_state:
77
+ st.session_state['openai_model'] = "gpt-4o-2024-05-13"
78
+
79
+ if 'messages' not in st.session_state:
80
+ st.session_state['messages'] = []
81
+
82
+ if 'last_voice_input' not in st.session_state:
83
+ st.session_state['last_voice_input'] = ""
84
+
85
+ if 'editing_file' not in st.session_state:
86
+ st.session_state['editing_file'] = None
87
+
88
+ if 'edit_new_name' not in st.session_state:
89
+ st.session_state['edit_new_name'] = ""
90
+
91
+ if 'edit_new_content' not in st.session_state:
92
+ st.session_state['edit_new_content'] = ""
93
+
94
+ if 'viewing_prefix' not in st.session_state:
95
+ st.session_state['viewing_prefix'] = None
96
+
97
+ if 'should_rerun' not in st.session_state:
98
+ st.session_state['should_rerun'] = False
99
+
100
+ if 'old_val' not in st.session_state:
101
+ st.session_state['old_val'] = None
102
+
103
+ if 'last_query' not in st.session_state:
104
+ st.session_state['last_query'] = ""
105
+
106
+ if 'marquee_content' not in st.session_state:
107
+ st.session_state['marquee_content'] = "πŸš€ Welcome to TalkingAIResearcher | πŸ€– Your Research Assistant"
108
+
109
+ # API Keys
110
+ openai_api_key = os.getenv('OPENAI_API_KEY', "")
111
+ anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
112
+ xai_key = os.getenv('xai',"")
113
+ if 'OPENAI_API_KEY' in st.secrets:
114
+ openai_api_key = st.secrets['OPENAI_API_KEY']
115
+ if 'ANTHROPIC_API_KEY' in st.secrets:
116
+ anthropic_key = st.secrets["ANTHROPIC_API_KEY"]
117
+
118
+ openai.api_key = openai_api_key
119
+ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_ORG_ID'))
120
+ HF_KEY = os.getenv('HF_KEY')
121
+ API_URL = os.getenv('API_URL')
122
+
123
+ # Helper constants
124
+ FILE_EMOJIS = {
125
+ "md": "πŸ“",
126
+ "mp3": "🎡",
127
+ "wav": "πŸ”Š"
128
+ }
129
+
130
+ # ─────────────────────────────────────────────────────────
131
+ # 2. HELPER FUNCTIONS
132
+ # ─────────────────────────────────────────────────────────
133
+
134
+ def get_central_time():
135
+ """Get current time in US Central timezone."""
136
+ central = pytz.timezone('US/Central')
137
+ return datetime.now(central)
138
+
139
+ def format_timestamp_prefix():
140
+ """Generate timestamp prefix in format MM_dd_yy_hh_mm_AM/PM."""
141
+ ct = get_central_time()
142
+ return ct.strftime("%m_%d_%y_%I_%M_%p")
143
+
144
+ def initialize_marquee_settings():
145
+ if 'marquee_settings' not in st.session_state:
146
+ st.session_state['marquee_settings'] = {
147
+ "background": "#1E1E1E",
148
+ "color": "#FFFFFF",
149
+ "font-size": "14px",
150
+ "animationDuration": "20s",
151
+ "width": "100%",
152
+ "lineHeight": "35px"
153
+ }
154
+
155
+ def get_marquee_settings():
156
+ initialize_marquee_settings()
157
+ return st.session_state['marquee_settings']
158
+
159
+ def update_marquee_settings_ui():
160
+ """Add color pickers & sliders for marquee config in sidebar."""
161
+ st.sidebar.markdown("### 🎯 Marquee Settings")
162
+ cols = st.sidebar.columns(2)
163
+ with cols[0]:
164
+ bg_color = st.color_picker("🎨 Background",
165
+ st.session_state['marquee_settings']["background"],
166
+ key="bg_color_picker")
167
+ text_color = st.color_picker("✍️ Text",
168
+ st.session_state['marquee_settings']["color"],
169
+ key="text_color_picker")
170
+ with cols[1]:
171
+ font_size = st.slider("πŸ“ Size", 10, 24, 14, key="font_size_slider")
172
+ duration = st.slider("⏱️ Speed", 1, 20, 20, key="duration_slider")
173
+
174
+ st.session_state['marquee_settings'].update({
175
+ "background": bg_color,
176
+ "color": text_color,
177
+ "font-size": f"{font_size}px",
178
+ "animationDuration": f"{duration}s"
179
+ })
180
+
181
+ def display_marquee(text, settings, key_suffix=""):
182
+ """Show marquee text with style from settings."""
183
+ truncated_text = text[:280] + "..." if len(text) > 280 else text
184
+ streamlit_marquee(
185
+ content=truncated_text,
186
+ **settings,
187
+ key=f"marquee_{key_suffix}"
188
+ )
189
+ st.write("")
190
+
191
+ def get_high_info_terms(text: str, top_n=10) -> list:
192
+ """Extract top_n freq words or bigrams (excluding stopwords)."""
193
+ stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
194
+ words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
195
+ bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
196
+ combined = words + bi_grams
197
+ filtered = [term for term in combined if term not in stop_words and len(term.split()) <= 2]
198
+ counter = Counter(filtered)
199
+ return [term for term, freq in counter.most_common(top_n)]
200
+
201
+ def clean_text_for_filename(text: str) -> str:
202
+ """Remove special chars, short words, etc. for filenames."""
203
+ text = text.lower()
204
+ text = re.sub(r'[^\w\s-]', '', text)
205
+ words = text.split()
206
+ # remove short or unhelpful words
207
+ stop_short = set(['the', 'and', 'for', 'with', 'this', 'that', 'ai', 'library'])
208
+ filtered = [w for w in words if len(w) > 3 and w not in stop_short]
209
+ return '_'.join(filtered)[:200]
210
+
211
+ def generate_filename(prompt, response, file_type="md", max_length=200):
212
+ """
213
+ Generate a shortened filename by:
214
+ 1) extracting high-info terms,
215
+ 2) snippet from prompt+response,
216
+ 3) remove duplicates,
217
+ 4) truncate if needed.
218
+ """
219
+ prefix = format_timestamp_prefix() + "_"
220
+ combined_text = (prompt + " " + response)[:200]
221
+ info_terms = get_high_info_terms(combined_text, top_n=5)
222
+ snippet = (prompt[:40] + " " + response[:40]).strip()
223
+ snippet_cleaned = clean_text_for_filename(snippet)
224
+
225
+ # remove duplicates
226
+ name_parts = info_terms + [snippet_cleaned]
227
+ seen = set()
228
+ unique_parts = []
229
+ for part in name_parts:
230
+ if part not in seen:
231
+ seen.add(part)
232
+ unique_parts.append(part)
233
+
234
+ full_name = '_'.join(unique_parts).strip('_')
235
+ leftover_chars = max_length - len(prefix) - len(file_type) - 1
236
+ if len(full_name) > leftover_chars:
237
+ full_name = full_name[:leftover_chars]
238
+
239
+ return f"{prefix}{full_name}.{file_type}"
240
+
241
+ def create_file(prompt, response, file_type="md"):
242
+ """Create a text file from prompt + response with sanitized filename."""
243
+ filename = generate_filename(prompt.strip(), response.strip(), file_type)
244
+ with open(filename, 'w', encoding='utf-8') as f:
245
+ f.write(prompt + "\n\n" + response)
246
+ return filename
247
+
248
+ def get_download_link(file, file_type="zip"):
249
+ """
250
+ Convert a file to base64 and return an HTML link for download.
251
+ """
252
+ with open(file, "rb") as f:
253
+ b64 = base64.b64encode(f.read()).decode()
254
+ if file_type == "zip":
255
+ return f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(file)}">πŸ“‚ Download {os.path.basename(file)}</a>'
256
+ elif file_type == "mp3":
257
+ return f'<a href="data:audio/mpeg;base64,{b64}" download="{os.path.basename(file)}">🎡 Download {os.path.basename(file)}</a>'
258
+ elif file_type == "wav":
259
+ return f'<a href="data:audio/wav;base64,{b64}" download="{os.path.basename(file)}">πŸ”Š Download {os.path.basename(file)}</a>'
260
+ elif file_type == "md":
261
+ return f'<a href="data:text/markdown;base64,{b64}" download="{os.path.basename(file)}">πŸ“ Download {os.path.basename(file)}</a>'
262
+ else:
263
+ return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}</a>'
264
+
265
+ def clean_for_speech(text: str) -> str:
266
+ """Clean up text for TTS output."""
267
+ text = text.replace("\n", " ")
268
+ text = text.replace("</s>", " ")
269
+ text = text.replace("#", "")
270
+ text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
271
+ text = re.sub(r"\s+", " ", text).strip()
272
+ return text
273
+
274
+ async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
275
+ """Async TTS generation with edge-tts library."""
276
+ text = clean_for_speech(text)
277
+ if not text.strip():
278
+ return None
279
+ rate_str = f"{rate:+d}%"
280
+ pitch_str = f"{pitch:+d}Hz"
281
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
282
+ out_fn = generate_filename(text, text, file_type=file_format)
283
+ await communicate.save(out_fn)
284
+ return out_fn
285
+
286
+ def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
287
+ """Wrapper for the async TTS generate call."""
288
+ return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch, file_format))
289
+
290
+ def play_and_download_audio(file_path, file_type="mp3"):
291
+ """Streamlit audio + a quick download link."""
292
+ if file_path and os.path.exists(file_path):
293
+ st.audio(file_path)
294
+ dl_link = get_download_link(file_path, file_type=file_type)
295
+ st.markdown(dl_link, unsafe_allow_html=True)
296
+
297
+ def save_qa_with_audio(question, answer, voice=None):
298
+ """Save Q&A to markdown and also generate audio."""
299
+ if not voice:
300
+ voice = st.session_state['tts_voice']
301
+
302
+ combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
303
+ md_file = create_file(question, answer, "md")
304
+ audio_text = f"{question}\n\nAnswer: {answer}"
305
+ audio_file = speak_with_edge_tts(
306
+ audio_text,
307
+ voice=voice,
308
+ file_format=st.session_state['audio_format']
309
+ )
310
+ return md_file, audio_file
311
+
312
+ # ─────────────────────────────────────────────────────────
313
+ # 3. PAPER PARSING & DISPLAY
314
+ # ─────────────────────────────────────────────────────────
315
+
316
+ def parse_arxiv_refs(ref_text: str):
317
+ """
318
+ Given a multi-line markdown with arxiv references, parse them into
319
+ a list of dicts: {date, title, url, authors, summary, ...}.
320
+ """
321
+ if not ref_text:
322
+ return []
323
+
324
+ results = []
325
+ current_paper = {}
326
+ lines = ref_text.split('\n')
327
+
328
+ for i, line in enumerate(lines):
329
+ if line.count('|') == 2:
330
+ # Found a new paper line
331
+ if current_paper:
332
+ results.append(current_paper)
333
+ if len(results) >= 20:
334
+ break
335
+ try:
336
+ header_parts = line.strip('* ').split('|')
337
+ date = header_parts[0].strip()
338
+ title = header_parts[1].strip()
339
+ url_match = re.search(r'(https://arxiv.org/\S+)', line)
340
+ url = url_match.group(1) if url_match else f"paper_{len(results)}"
341
+
342
+ current_paper = {
343
+ 'date': date,
344
+ 'title': title,
345
+ 'url': url,
346
+ 'authors': '',
347
+ 'summary': '',
348
+ 'full_audio': None,
349
+ 'download_base64': '',
350
+ }
351
+ except Exception as e:
352
+ st.warning(f"Error parsing paper header: {str(e)}")
353
+ current_paper = {}
354
+ continue
355
+
356
+ elif current_paper:
357
+ # If authors not set, fill it; otherwise, fill summary
358
+ if not current_paper['authors']:
359
+ current_paper['authors'] = line.strip('* ')
360
+ else:
361
+ if current_paper['summary']:
362
+ current_paper['summary'] += ' ' + line.strip()
363
+ else:
364
+ current_paper['summary'] = line.strip()
365
+
366
+ if current_paper:
367
+ results.append(current_paper)
368
+
369
+ return results[:20]
370
+
371
+ def create_paper_links_md(papers):
372
+ """Creates a minimal .md content linking to each paper's arxiv URL."""
373
+ lines = ["# Paper Links\n"]
374
+ for i, p in enumerate(papers, start=1):
375
+ lines.append(f"{i}. **{p['title']}** β€” [Arxiv]({p['url']})")
376
+ return "\n".join(lines)
377
+
378
+ def create_paper_audio_files(papers, input_question):
379
+ """
380
+ For each paper, generate TTS audio summary, store the path in `paper['full_audio']`,
381
+ and also store a base64 link for stable downloading.
382
+ """
383
+ for paper in papers:
384
+ try:
385
+ audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
386
+ audio_text = clean_for_speech(audio_text)
387
+ file_format = st.session_state['audio_format']
388
+ audio_file = speak_with_edge_tts(
389
+ audio_text,
390
+ voice=st.session_state['tts_voice'],
391
+ file_format=file_format
392
+ )
393
+ paper['full_audio'] = audio_file
394
+
395
+ if audio_file:
396
+ with open(audio_file, "rb") as af:
397
+ b64_data = base64.b64encode(af.read()).decode()
398
+ download_filename = os.path.basename(audio_file)
399
+ mime_type = "mpeg" if file_format == "mp3" else "wav"
400
+ paper['download_base64'] = (
401
+ f'<a href="data:audio/{mime_type};base64,{b64_data}" '
402
+ f'download="{download_filename}">🎡 Download {download_filename}</a>'
403
+ )
404
+
405
+ except Exception as e:
406
+ st.warning(f"Error processing paper {paper['title']}: {str(e)}")
407
+ paper['full_audio'] = None
408
+ paper['download_base64'] = ''
409
+
410
+
411
+ def display_file_history_in_sidebar():
412
+ """
413
+ Shows a history of files grouped by query, with lazy loading of audio and content.
414
+ """
415
+ st.sidebar.markdown("---")
416
+ st.sidebar.markdown("### πŸ“‚ File History")
417
+
418
+ # Gather all files
419
+ md_files = glob.glob("*.md")
420
+ mp3_files = glob.glob("*.mp3")
421
+ wav_files = glob.glob("*.wav")
422
+ all_files = md_files + mp3_files + wav_files
423
+
424
+ if not all_files:
425
+ st.sidebar.write("No files found.")
426
+ return
427
+
428
+ # Group files by their query prefix (timestamp_query)
429
+ grouped_files = {}
430
+ for f in all_files:
431
+ fname = os.path.basename(f)
432
+ prefix = '_'.join(fname.split('_')[:6]) # Get timestamp part
433
+ if prefix not in grouped_files:
434
+ grouped_files[prefix] = {'md': [], 'audio': [], 'loaded': False}
435
+
436
+ ext = os.path.splitext(fname)[1].lower()
437
+ if ext == '.md':
438
+ grouped_files[prefix]['md'].append(f)
439
+ elif ext in ['.mp3', '.wav']:
440
+ grouped_files[prefix]['audio'].append(f)
441
+
442
+ # Sort groups by timestamp (newest first)
443
+ sorted_groups = sorted(grouped_files.items(), key=lambda x: x[0], reverse=True)
444
+
445
+ # πŸ—‘β¬‡οΈ Sidebar delete all and zip all download
446
+ col1, col4 = st.sidebar.columns(2)
447
+ with col1:
448
+ if st.button("πŸ—‘ Delete All"):
449
+ for f in all_files:
450
+ os.remove(f)
451
+ st.session_state.should_rerun = True
452
+ with col4:
453
+ if st.button("⬇️ Zip All"):
454
+ zip_name = create_zip_of_files(md_files, mp3_files, wav_files,
455
+ st.session_state.get('last_query', ''))
456
+ if zip_name:
457
+ st.sidebar.markdown(get_download_link(zip_name, "zip"),
458
+ unsafe_allow_html=True)
459
+
460
+ # Display grouped files
461
+ for prefix, files in sorted_groups:
462
+ # Get a preview of content from first MD file
463
+ preview = ""
464
+ if files['md']:
465
+ with open(files['md'][0], "r", encoding="utf-8") as f:
466
+ preview = f.read(200).replace("\n", " ")
467
+ if len(preview) > 200:
468
+ preview += "..."
469
+
470
+ # Create unique key for this group
471
+ group_key = f"group_{prefix}"
472
+ if group_key not in st.session_state:
473
+ st.session_state[group_key] = False
474
+
475
+ # Display group expander
476
+ with st.sidebar.expander(f"πŸ“‘ Query Group: {prefix}"):
477
+ st.write("**Preview:**")
478
+ st.write(preview)
479
+
480
+ # Load full content button
481
+ if st.button("πŸ“– View Full Content", key=f"btn_{prefix}"):
482
+ st.session_state[group_key] = True
483
+
484
+ # Only show full content and audio if button was clicked
485
+ if st.session_state[group_key]:
486
+ # Display markdown files
487
+ for md_file in files['md']:
488
+ with open(md_file, "r", encoding="utf-8") as f:
489
+ content = f.read()
490
+ st.markdown("**Full Content:**")
491
+ st.markdown(content)
492
+ st.markdown(get_download_link(md_file, file_type="md"),
493
+ unsafe_allow_html=True)
494
+
495
+ # Display audio files
496
+ usePlaySidebar=False
497
+ if usePlaySidebar:
498
+ for audio_file in files['audio']:
499
+ ext = os.path.splitext(audio_file)[1].replace('.', '')
500
+ st.audio(audio_file)
501
+ st.markdown(get_download_link(audio_file, file_type=ext),
502
+ unsafe_allow_html=True)
503
+
504
+ def display_papers(papers, marquee_settings):
505
+ """Display paper info with both abs and PDF links."""
506
+ st.write("## Research Papers")
507
+ for i, paper in enumerate(papers, start=1):
508
+ marquee_text = f"πŸ“„ {paper['title']} | πŸ‘€ {paper['authors'][:120]}"
509
+ display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
510
+
511
+ with st.expander(f"{i}. πŸ“„ {paper['title']}", expanded=True):
512
+ # Create PDF link by replacing 'abs' with 'pdf' in arxiv URL
513
+ pdf_url = paper['url'].replace('/abs/', '/pdf/')
514
+ st.markdown(f"""
515
+ **{paper['date']} | {paper['title']}**
516
+ πŸ“„ [Abstract]({paper['url']}) | πŸ“‘ [PDF]({pdf_url})
517
+ """)
518
+ st.markdown(f"*Authors:* {paper['authors']}")
519
+ st.markdown(paper['summary'])
520
+ if paper.get('full_audio'):
521
+ st.write("πŸ“š Paper Audio")
522
+ st.audio(paper['full_audio'])
523
+ if paper['download_base64']:
524
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
525
+
526
+ def display_papers_in_sidebar(papers):
527
+ """Mirrors the paper listing in sidebar with lazy loading."""
528
+ st.sidebar.title("🎢 Papers & Audio")
529
+ for i, paper in enumerate(papers, start=1):
530
+ paper_key = f"paper_{paper['url']}"
531
+ if paper_key not in st.session_state:
532
+ st.session_state[paper_key] = False
533
+
534
+ with st.sidebar.expander(f"{i}. {paper['title']}"):
535
+ # Create PDF link
536
+ pdf_url = paper['url'].replace('/abs/', '/pdf/')
537
+ st.markdown(f"πŸ“„ [Abstract]({paper['url']}) | πŸ“‘ [PDF]({pdf_url})")
538
+
539
+ # Preview of authors and summary
540
+ st.markdown(f"**Authors:** {paper['authors'][:100]}...")
541
+ if paper['summary']:
542
+ st.markdown(f"**Summary:** {paper['summary'][:200]}...")
543
+
544
+ # Load audio button
545
+ if paper['full_audio'] and st.button("🎡 Load Audio",
546
+ key=f"btn_{paper_key}"):
547
+ st.session_state[paper_key] = True
548
+
549
+ # Show audio player and download only if requested
550
+ if st.session_state[paper_key] and paper['full_audio']:
551
+ st.audio(paper['full_audio'])
552
+ if paper['download_base64']:
553
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
554
+
555
+ # ─────────────────────────────────────────────────────────
556
+ # 4. ZIP FUNCTION
557
+ # ─────────────────────────────────────────────────────────
558
+
559
+ def create_zip_of_files(md_files, mp3_files, wav_files, input_question):
560
+ """
561
+ Zip up all relevant files, limiting the final zip name to ~20 chars
562
+ to avoid overly long base64 strings.
563
+ """
564
+ md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
565
+ all_files = md_files + mp3_files + wav_files
566
+ if not all_files:
567
+ return None
568
+
569
+ all_content = []
570
+ for f in all_files:
571
+ if f.endswith('.md'):
572
+ with open(f, 'r', encoding='utf-8') as file:
573
+ all_content.append(file.read())
574
+ elif f.endswith('.mp3') or f.endswith('.wav'):
575
+ basename = os.path.splitext(os.path.basename(f))[0]
576
+ words = basename.replace('_', ' ')
577
+ all_content.append(words)
578
+
579
+ all_content.append(input_question)
580
+ combined_content = " ".join(all_content)
581
+ info_terms = get_high_info_terms(combined_content, top_n=10)
582
+
583
+ timestamp = format_timestamp_prefix()
584
+ name_text = '-'.join(term for term in info_terms[:5])
585
+ short_zip_name = (timestamp + "_" + name_text)[:20] + ".zip"
586
+
587
+ with zipfile.ZipFile(short_zip_name, 'w') as z:
588
+ for f in all_files:
589
+ z.write(f)
590
+ return short_zip_name
591
+
592
+ # ─────────────────────────────────────────────────────────
593
+ # 5. MAIN LOGIC: AI LOOKUP & VOICE INPUT
594
+ # ─────────────────────────────────────────────────────────
595
+
596
+ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
597
+ titles_summary=True, full_audio=False):
598
+ """Main routine that uses Anthropic (Claude) + Gradio ArXiv RAG pipeline."""
599
+ start = time.time()
600
+ ai_constitution = """
601
+ You are a talented AI coder and songwriter...
602
+ """
603
+
604
+ # --- 1) Claude API
605
+ client = anthropic.Anthropic(api_key=anthropic_key)
606
+ user_input = q
607
+ response = client.messages.create(
608
+ model="claude-3-sonnet-20240229",
609
+ max_tokens=1000,
610
+ messages=[
611
+ {"role": "user", "content": user_input}
612
+ ])
613
+ st.write("Claude's reply 🧠:")
614
+ st.markdown(response.content[0].text)
615
+
616
+ # Save & produce audio
617
+ result = response.content[0].text
618
+ create_file(q, result)
619
+ md_file, audio_file = save_qa_with_audio(q, result)
620
+ st.subheader("πŸ“ Main Response Audio")
621
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
622
+
623
+ # --- 2) Arxiv RAG
624
+ #st.write("Arxiv's AI this Evening is Mixtral 8x7B...")
625
+ st.write('Running Arxiv RAG with Claude inputs.')
626
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
627
+ refs = client.predict(
628
+ q,
629
+ 10,
630
+ "Semantic Search",
631
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
632
+ api_name="/update_with_rag_md"
633
+ )[0]
634
+
635
+ #r2 = client.predict(
636
+ # q,
637
+ # "mistralai/Mixtral-8x7B-Instruct-v0.1",
638
+ # True,
639
+ # api_name="/ask_llm"
640
+ #)
641
+
642
+ # --- 3) Claude API with arxiv list of papers to app.py
643
+ client = anthropic.Anthropic(api_key=anthropic_key)
644
+ user_input = q + '\n\n' + 'Use the paper list below to answer the question thinking through step by step how to create a streamlit app.py and requirements.txt for the solution that answers the questions with a working app to demonstrate.'+ '\n\n'
645
+ response = client.messages.create(
646
+ model="claude-3-sonnet-20240229",
647
+ max_tokens=1000,
648
+ messages=[
649
+ {"role": "user", "content": user_input}
650
+ ])
651
+ r2 = response.content[0].text
652
+ st.write("Claude's reply 🧠:")
653
+ st.markdown(r2)
654
+
655
+ #result = f"### πŸ”Ž {q}\n\n{r2}\n\n{refs}"
656
+ result = f"πŸ”Ž {r2}\n\n{refs}"
657
+ md_file, audio_file = save_qa_with_audio(q, result)
658
+ st.subheader("πŸ“ Main Response Audio")
659
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
660
+
661
+ # --- 3) Parse + handle papers
662
+ papers = parse_arxiv_refs(refs)
663
+ if papers:
664
+ # Create minimal links page first
665
+ paper_links = create_paper_links_md(papers)
666
+ links_file = create_file(q, paper_links, "md")
667
+ st.markdown(paper_links)
668
+
669
+ # Then create audio for each paper
670
+ create_paper_audio_files(papers, input_question=q)
671
+ display_papers(papers, get_marquee_settings())
672
+ display_papers_in_sidebar(papers)
673
+ else:
674
+ st.warning("No papers found in the response.")
675
+
676
+ elapsed = time.time() - start
677
+ st.write(f"**Total Elapsed:** {elapsed:.2f} s")
678
+ return result
679
+
680
+ def process_voice_input(text):
681
+ """When user sends voice query, we run the AI lookup + Q&A with audio."""
682
+ if not text:
683
+ return
684
+ st.subheader("πŸ” Search Results")
685
+ result = perform_ai_lookup(
686
+ text,
687
+ vocal_summary=True,
688
+ extended_refs=False,
689
+ titles_summary=True,
690
+ full_audio=True
691
+ )
692
+ md_file, audio_file = save_qa_with_audio(text, result)
693
+ st.subheader("πŸ“ Generated Files")
694
+ st.write(f"Markdown: {md_file}")
695
+ st.write(f"Audio: {audio_file}")
696
+ play_and_download_audio(audio_file, st.session_state['audio_format'])
697
+
698
+ # ─────────────────────────────────────────────────────────
699
+ # 6. FILE HISTORY SIDEBAR
700
+ # ─────────────────────────────────────────────────────────
701
+
702
+ def display_file_history_in_sidebar():
703
+ """
704
+ Shows a history of each local .md, .mp3, .wav file in descending
705
+ order of modification time, with quick icons and optional download links.
706
+ """
707
+ st.sidebar.markdown("---")
708
+ st.sidebar.markdown("### πŸ“‚ File History")
709
+
710
+ # Gather all files
711
+ md_files = glob.glob("*.md")
712
+ mp3_files = glob.glob("*.mp3")
713
+ wav_files = glob.glob("*.wav")
714
+ all_files = md_files + mp3_files + wav_files
715
+
716
+ if not all_files:
717
+ st.sidebar.write("No files found.")
718
+ return
719
+
720
+ # πŸ—‘β¬‡οΈ Sidebar delete all and zip all download
721
+ col1, col4 = st.sidebar.columns(2)
722
+ with col1:
723
+ if st.button("πŸ—‘ Delete All"):
724
+ for f in all_md:
725
+ os.remove(f)
726
+ for f in all_mp3:
727
+ os.remove(f)
728
+ for f in all_wav:
729
+ os.remove(f)
730
+ st.session_state.should_rerun = True
731
+ with col4:
732
+ if st.button("⬇️ Zip All"):
733
+ zip_name = create_zip_of_files(md_files, mp3_files, wav_files, st.session_state.get('last_query', ''))
734
+ if zip_name:
735
+ st.sidebar.markdown(get_download_link(zip_name, "zip"), unsafe_allow_html=True)
736
+
737
+ # Sort newest first
738
+ all_files = sorted(all_files, key=os.path.getmtime, reverse=True)
739
+
740
+ for f in all_files:
741
+ fname = os.path.basename(f)
742
+ ext = os.path.splitext(fname)[1].lower().strip('.')
743
+ emoji = FILE_EMOJIS.get(ext, 'πŸ“¦')
744
+ time_str = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%Y-%m-%d %H:%M:%S")
745
+
746
+ with st.sidebar.expander(f"{emoji} {fname}"):
747
+ st.write(f"**Modified:** {time_str}")
748
+ if ext == "md":
749
+ with open(f, "r", encoding="utf-8") as file_in:
750
+ snippet = file_in.read(200).replace("\n", " ")
751
+ if len(snippet) == 200:
752
+ snippet += "..."
753
+ st.write(snippet)
754
+ st.markdown(get_download_link(f, file_type="md"), unsafe_allow_html=True)
755
+ elif ext in ["mp3","wav"]:
756
+ st.audio(f)
757
+ st.markdown(get_download_link(f, file_type=ext), unsafe_allow_html=True)
758
+ else:
759
+ st.markdown(get_download_link(f), unsafe_allow_html=True)
760
+
761
+ # ─────────────────────────────────────────────────────────
762
+ # 7. MAIN APP
763
+ # ─────────────────────────────────────────────────────────
764
+
765
+ def main():
766
+ # 1) Setup marquee UI in the sidebar
767
+ update_marquee_settings_ui()
768
+ marquee_settings = get_marquee_settings()
769
+
770
+ # 2) Display the marquee welcome
771
+ display_marquee(st.session_state['marquee_content'],
772
+ {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
773
+ key_suffix="welcome")
774
+
775
+ # 3) Main action tabs
776
+ tab_main = st.radio("Action:", ["🎀 Voice", "πŸ“Έ Media", "πŸ” ArXiv", "πŸ“ Editor"],
777
+ horizontal=True)
778
+
779
+ # Example custom component usage
780
+ mycomponent = components.declare_component("mycomponent", path="mycomponent")
781
+ val = mycomponent(my_input_value="Hello")
782
+
783
+ if val:
784
+ val_stripped = val.replace('\\n', ' ')
785
+ edited_input = st.text_area("✏️ Edit Input:", value=val_stripped, height=100)
786
+ run_option = st.selectbox("Model:", ["Arxiv"])
787
+ col1, col2 = st.columns(2)
788
+ with col1:
789
+ autorun = st.checkbox("βš™ AutoRun", value=True)
790
+ with col2:
791
+ full_audio = st.checkbox("πŸ“šFullAudio", value=False)
792
+
793
+ input_changed = (val != st.session_state.old_val)
794
+
795
+ if autorun and input_changed:
796
+ st.session_state.old_val = val
797
+ st.session_state.last_query = edited_input
798
+ perform_ai_lookup(edited_input,
799
+ vocal_summary=True,
800
+ extended_refs=False,
801
+ titles_summary=True,
802
+ full_audio=full_audio)
803
+ else:
804
+ if st.button("β–Ά Run"):
805
+ st.session_state.old_val = val
806
+ st.session_state.last_query = edited_input
807
+ perform_ai_lookup(edited_input,
808
+ vocal_summary=True,
809
+ extended_refs=False,
810
+ titles_summary=True,
811
+ full_audio=full_audio)
812
+
813
+ # ─────────────────────────────────────────────────────────
814
+ # TAB: ArXiv
815
+ # ─────────────────────────────────────────────────────────
816
+ if tab_main == "πŸ” ArXiv":
817
+ st.subheader("πŸ” Query ArXiv")
818
+ q = st.text_input("πŸ” Query:", key="arxiv_query")
819
+
820
+ st.markdown("### πŸŽ› Options")
821
+ vocal_summary = st.checkbox("πŸŽ™ShortAudio", value=True, key="option_vocal_summary")
822
+ extended_refs = st.checkbox("πŸ“œLongRefs", value=False, key="option_extended_refs")
823
+ titles_summary = st.checkbox("πŸ”–TitlesOnly", value=True, key="option_titles_summary")
824
+ full_audio = st.checkbox("πŸ“šFullAudio", value=False, key="option_full_audio")
825
+ full_transcript = st.checkbox("🧾FullTranscript", value=False, key="option_full_transcript")
826
+
827
+ if q and st.button("πŸ”Run"):
828
+ st.session_state.last_query = q
829
+ result = perform_ai_lookup(q, vocal_summary=vocal_summary, extended_refs=extended_refs,
830
+ titles_summary=titles_summary, full_audio=full_audio)
831
+ if full_transcript:
832
+ create_file(q, result, "md")
833
+
834
+ # ─────────────────────────────────────────────────────────
835
+ # TAB: Voice
836
+ # ─────────────────────────────────────────────────────────
837
+ elif tab_main == "🎀 Voice":
838
+ st.subheader("🎀 Voice Input")
839
+
840
+ st.markdown("### 🎀 Voice Settings")
841
+ selected_voice = st.selectbox(
842
+ "Select TTS Voice:",
843
+ options=EDGE_TTS_VOICES,
844
+ index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
845
+ )
846
+
847
+ st.markdown("### πŸ”Š Audio Format")
848
+ selected_format = st.radio(
849
+ "Choose Audio Format:",
850
+ options=["MP3", "WAV"],
851
+ index=0
852
+ )
853
+
854
+ # Update session state if voice/format changes
855
+ if selected_voice != st.session_state['tts_voice']:
856
+ st.session_state['tts_voice'] = selected_voice
857
+ st.rerun()
858
+ if selected_format.lower() != st.session_state['audio_format']:
859
+ st.session_state['audio_format'] = selected_format.lower()
860
+ st.rerun()
861
+
862
+ # Input text
863
+ user_text = st.text_area("πŸ’¬ Message:", height=100)
864
+ user_text = user_text.strip().replace('\n', ' ')
865
+
866
+ if st.button("πŸ“¨ Send"):
867
+ process_voice_input(user_text)
868
+
869
+ st.subheader("πŸ“œ Chat History")
870
+ for c in st.session_state.chat_history:
871
+ st.write("**You:**", c["user"])
872
+ st.write("**Response:**", c["claude"])
873
+
874
+ # ─────────────────────────────────────────────────────────
875
+ # TAB: Media
876
+ # ─────────────────────────────────────────────────────────
877
+ elif tab_main == "πŸ“Έ Media":
878
+ st.header("πŸ“Έ Media Gallery")
879
+
880
+ # By default, show audio first
881
+ tabs = st.tabs(["🎡 Audio", "πŸ–Ό Images", "πŸŽ₯ Video"])
882
+
883
+ # AUDIO sub-tab
884
+ with tabs[0]:
885
+ st.subheader("🎡 Audio Files")
886
+ audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
887
+ if audio_files:
888
+ for a in audio_files:
889
+ with st.expander(os.path.basename(a)):
890
+ st.audio(a)
891
+ ext = os.path.splitext(a)[1].replace('.', '')
892
+ dl_link = get_download_link(a, file_type=ext)
893
+ st.markdown(dl_link, unsafe_allow_html=True)
894
+ else:
895
+ st.write("No audio files found.")
896
+
897
+ # IMAGES sub-tab
898
+ with tabs[1]:
899
+ st.subheader("πŸ–Ό Image Files")
900
+ imgs = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
901
+ if imgs:
902
+ c = st.slider("Cols", 1, 5, 3, key="cols_images")
903
+ cols = st.columns(c)
904
+ for i, f in enumerate(imgs):
905
+ with cols[i % c]:
906
+ st.image(Image.open(f), use_container_width=True)
907
+ else:
908
+ st.write("No images found.")
909
+
910
+ # VIDEO sub-tab
911
+ with tabs[2]:
912
+ st.subheader("πŸŽ₯ Video Files")
913
+ vids = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi")
914
+ if vids:
915
+ for v in vids:
916
+ with st.expander(os.path.basename(v)):
917
+ st.video(v)
918
+ else:
919
+ st.write("No videos found.")
920
+
921
+ # ─────────────────────────────────────────────────────────
922
+ # TAB: Editor
923
+ # ─────────────────────────────────────────────────────────
924
+ elif tab_main == "πŸ“ Editor":
925
+ st.write("Select or create a file to edit. (Currently minimal demo)")
926
+
927
+ # ─────────────────────────────────────────────────────────
928
+ # SIDEBAR: FILE HISTORY
929
+ # ─────────────────────────────────────────────────────────
930
+ display_file_history_in_sidebar()
931
+
932
+ # Some light CSS styling
933
+ st.markdown("""
934
+ <style>
935
+ .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
936
+ .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
937
+ .stButton>button { margin-right: 0.5rem; }
938
+ </style>
939
+ """, unsafe_allow_html=True)
940
+
941
+ # Rerun if needed
942
+ if st.session_state.should_rerun:
943
+ st.session_state.should_rerun = False
944
+ st.rerun()
945
+
946
+ if __name__ == "__main__":
947
+ main()