awacke1 commited on
Commit
10717ba
·
verified ·
1 Parent(s): bd477c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -644
app.py CHANGED
@@ -5,7 +5,7 @@ import streamlit.components.v1 as components
5
  from datetime import datetime
6
  from audio_recorder_streamlit import audio_recorder
7
  from bs4 import BeautifulSoup
8
- from collections import defaultdict, deque
9
  from dotenv import load_dotenv
10
  from gradio_client import Client
11
  from huggingface_hub import InferenceClient
@@ -20,128 +20,54 @@ from streamlit.runtime.scriptrunner import get_script_run_ctx
20
  import asyncio
21
  import edge_tts
22
 
23
- # 🎯 1. Core Configuration & Setup
24
  st.set_page_config(
25
- page_title="🚲BikeAI🏆 Claude/GPT Research",
26
  page_icon="🚲🏆",
27
  layout="wide",
28
  initial_sidebar_state="auto",
29
  menu_items={
30
  'Get Help': 'https://huggingface.co/awacke1',
31
  'Report a bug': 'https://huggingface.co/spaces/awacke1',
32
- 'About': "🚲BikeAI🏆 Claude/GPT Research AI"
33
  }
34
  )
35
  load_dotenv()
36
 
37
- # 🧠 2. Text Cleaning Functionality
38
- class TextCleaner:
39
- """Helper class for text cleaning operations"""
40
- def __init__(self):
41
- self.replacements = {
42
- "\\n": " ", # Replace escaped newlines
43
- "</s>": "", # Remove end tags
44
- "<s>": "", # Remove start tags
45
- "\n": " ", # Replace actual newlines
46
- "\r": " ", # Replace carriage returns
47
- "\t": " ", # Replace tabs
48
- }
49
-
50
- self.preserve_replacements = {
51
- "\\n": "\n", # Convert escaped to actual newlines
52
- "</s>": "", # Remove end tags
53
- "<s>": "", # Remove start tags
54
- "\r": "\n", # Convert returns to newlines
55
- "\t": " " # Convert tabs to spaces
56
- }
57
-
58
- def clean_text(self, text: str, preserve_format: bool = False) -> str:
59
- """
60
- Clean text removing problematic characters and normalizing whitespace.
61
- Args:
62
- text: Text to clean
63
- preserve_format: Whether to preserve some formatting (newlines etc)
64
- Returns:
65
- Cleaned text string
66
- """
67
- if not text or not isinstance(text, str):
68
- return ""
69
-
70
- replacements = (self.preserve_replacements if preserve_format
71
- else self.replacements)
72
-
73
- cleaned = text
74
- for old, new in replacements.items():
75
- cleaned = cleaned.replace(old, new)
76
-
77
- # Normalize whitespace while preserving paragraphs if needed
78
- if preserve_format:
79
- cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)
80
- else:
81
- cleaned = re.sub(r'\s+', ' ', cleaned)
82
-
83
- return cleaned.strip()
84
-
85
- def clean_dict(self, data: dict, fields: list) -> dict:
86
- """Clean specified fields in a dictionary"""
87
- if not data or not isinstance(data, dict):
88
- return {}
89
-
90
- cleaned = data.copy()
91
- for field in fields:
92
- if field in cleaned:
93
- cleaned[field] = self.clean_text(cleaned[field])
94
- return cleaned
95
-
96
- def clean_list(self, items: list, fields: list) -> list:
97
- """Clean specified fields in a list of dictionaries"""
98
- if not isinstance(items, list):
99
- return []
100
- return [self.clean_dict(item, fields) for item in items]
101
-
102
- # Initialize cleaner
103
- cleaner = TextCleaner()
104
-
105
- # 🔑 3. API Setup & Clients
106
- openai_api_key = os.getenv('OPENAI_API_KEY', "")
107
- anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
108
- xai_key = os.getenv('xai',"")
109
- if 'OPENAI_API_KEY' in st.secrets:
110
- openai_api_key = st.secrets['OPENAI_API_KEY']
111
- if 'ANTHROPIC_API_KEY' in st.secrets:
112
- anthropic_key = st.secrets["ANTHROPIC_API_KEY"]
113
 
114
- openai.api_key = openai_api_key
115
- claude_client = anthropic.Anthropic(api_key=anthropic_key)
116
- openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_ORG_ID'))
117
- HF_KEY = os.getenv('HF_KEY')
118
- API_URL = os.getenv('API_URL')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
- # 📝 4. Session State Management
121
- if 'transcript_history' not in st.session_state:
122
- st.session_state['transcript_history'] = []
123
- if 'chat_history' not in st.session_state:
124
- st.session_state['chat_history'] = []
125
- if 'openai_model' not in st.session_state:
126
- st.session_state['openai_model'] = "gpt-4-1106-preview"
127
- if 'messages' not in st.session_state:
128
- st.session_state['messages'] = []
129
- if 'last_voice_input' not in st.session_state:
130
- st.session_state['last_voice_input'] = ""
131
- if 'editing_file' not in st.session_state:
132
- st.session_state['editing_file'] = None
133
- if 'edit_new_name' not in st.session_state:
134
- st.session_state['edit_new_name'] = ""
135
- if 'edit_new_content' not in st.session_state:
136
- st.session_state['edit_new_content'] = ""
137
- if 'viewing_prefix' not in st.session_state:
138
- st.session_state['viewing_prefix'] = None
139
- if 'should_rerun' not in st.session_state:
140
- st.session_state['should_rerun'] = False
141
- if 'old_val' not in st.session_state:
142
- st.session_state['old_val'] = None
143
 
144
- # 🎨 5. Custom CSS
145
  st.markdown("""
146
  <style>
147
  .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
@@ -150,565 +76,69 @@ st.markdown("""
150
  </style>
151
  """, unsafe_allow_html=True)
152
 
153
- FILE_EMOJIS = {
154
- "md": "📝",
155
- "mp3": "🎵",
156
- }
157
-
158
- # 🧠 6. High-Information Content Extraction
159
  def get_high_info_terms(text: str) -> list:
160
- """Extract high-information terms from text, including key phrases."""
161
- text = cleaner.clean_text(text)
162
-
163
- # ... rest of function remains the same ...
164
- [Your existing get_high_info_terms implementation]
 
165
 
166
- def clean_text_for_filename(text: str) -> str:
167
- """Remove punctuation and short filler words, return a compact string."""
168
- text = cleaner.clean_text(text)
169
-
170
- # ... rest of function remains the same ...
171
- [Your existing clean_text_for_filename implementation]
172
 
173
- # 📁 7. File Operations
174
  def generate_filename(prompt, response, file_type="md"):
175
- """Generate filename with meaningful terms."""
176
- cleaned_prompt = cleaner.clean_text(prompt)
177
- cleaned_response = cleaner.clean_text(response)
178
-
179
- prefix = datetime.now().strftime("%y%m_%H%M") + "_"
180
- combined = (cleaned_prompt + " " + cleaned_response).strip()
181
- info_terms = get_high_info_terms(combined)
182
-
183
- snippet = (cleaned_prompt[:100] + " " + cleaned_response[:100]).strip()
184
- snippet_cleaned = clean_text_for_filename(snippet)
185
-
186
- name_parts = info_terms + [snippet_cleaned]
187
- full_name = '_'.join(name_parts)
188
 
189
- if len(full_name) > 150:
190
- full_name = full_name[:150]
191
-
192
- filename = f"{prefix}{full_name}.{file_type}"
193
- return filename
194
 
195
  def create_file(prompt, response, file_type="md"):
196
- """Create file with intelligent naming"""
197
- filename = generate_filename(prompt.strip(), response.strip(), file_type)
198
-
199
- cleaned_prompt = cleaner.clean_text(prompt)
200
- cleaned_response = cleaner.clean_text(response, preserve_format=True)
201
-
202
  with open(filename, 'w', encoding='utf-8') as f:
203
- f.write(cleaned_prompt + "\n\n" + cleaned_response)
204
  return filename
205
 
206
- def get_download_link(file):
207
- """Generate download link for file"""
208
- with open(file, "rb") as f:
209
- b64 = base64.b64encode(f.read()).decode()
210
- return f'<a href="data:file/zip;base64,{b64}" download="{os.path.basename(file)}">📂 Download {os.path.basename(file)}</a>'
211
-
212
- # 🔊 8. Audio Processing
213
- def clean_for_speech(text: str) -> str:
214
- """Clean text for speech synthesis"""
215
- text = cleaner.clean_text(text)
216
- text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
217
- return text
218
-
219
- @st.cache_resource
220
- def speech_synthesis_html(result):
221
- """Create HTML for speech synthesis"""
222
- cleaned_result = clean_for_speech(result)
223
- html_code = f"""
224
- <html><body>
225
- <script>
226
- var msg = new SpeechSynthesisUtterance("{cleaned_result.replace('"', '')}");
227
- window.speechSynthesis.speak(msg);
228
- </script>
229
- </body></html>
230
- """
231
- components.html(html_code, height=0)
232
-
233
- async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0):
234
- """Generate audio using Edge TTS"""
235
- text = clean_for_speech(text)
236
- if not text.strip():
237
- return None
238
- rate_str = f"{rate:+d}%"
239
- pitch_str = f"{pitch:+d}Hz"
240
- communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
241
- out_fn = generate_filename(text, text, "mp3")
242
- await communicate.save(out_fn)
243
- return out_fn
244
-
245
- def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0):
246
- """Wrapper for edge TTS generation"""
247
- return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch))
248
 
249
  def play_and_download_audio(file_path):
250
- """Play and provide download link for audio"""
251
  if file_path and os.path.exists(file_path):
252
  st.audio(file_path)
253
- dl_link = f'<a href="data:audio/mpeg;base64,{base64.b64encode(open(file_path,"rb").read()).decode()}" download="{os.path.basename(file_path)}">Download {os.path.basename(file_path)}</a>'
254
- st.markdown(dl_link, unsafe_allow_html=True)
255
-
256
- # 🎬 9. Media Processing
257
- def process_image(image_path, user_prompt):
258
- """Process image with GPT-4V"""
259
- with open(image_path, "rb") as imgf:
260
- image_data = imgf.read()
261
- b64img = base64.b64encode(image_data).decode("utf-8")
262
-
263
- cleaned_prompt = cleaner.clean_text(user_prompt)
264
-
265
- resp = openai_client.chat.completions.create(
266
- model=st.session_state["openai_model"],
267
- messages=[
268
- {"role": "system", "content": "You are a helpful assistant."},
269
- {"role": "user", "content": [
270
- {"type": "text", "text": cleaned_prompt},
271
- {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64img}"}}
272
- ]}
273
- ],
274
- temperature=0.0,
275
- )
276
- return cleaner.clean_text(resp.choices[0].message.content, preserve_format=True)
277
-
278
- def process_audio(audio_path):
279
- """Process audio with Whisper"""
280
- with open(audio_path, "rb") as f:
281
- transcription = openai_client.audio.transcriptions.create(model="whisper-1", file=f)
282
-
283
- cleaned_text = cleaner.clean_text(transcription.text)
284
- st.session_state.messages.append({
285
- "role": "user",
286
- "content": cleaned_text
287
- })
288
- return cleaned_text
289
 
290
- def process_video(video_path, seconds_per_frame=1):
291
- """Extract frames from video"""
292
- # ... function remains the same as it handles binary data ...
293
- [Your existing process_video implementation]
294
 
295
- def process_video_with_gpt(video_path, prompt):
296
- """Analyze video frames with GPT-4V"""
297
- frames = process_video(video_path)
298
- cleaned_prompt = cleaner.clean_text(prompt)
299
-
300
- resp = openai_client.chat.completions.create(
301
- model=st.session_state["openai_model"],
302
- messages=[
303
- {"role":"system","content":"Analyze video frames."},
304
- {"role":"user","content":[
305
- {"type":"text","text":cleaned_prompt},
306
- *[{"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{fr}"}}
307
- for fr in frames]
308
- ]}
309
- ]
310
- )
311
- return cleaner.clean_text(resp.choices[0].message.content, preserve_format=True)
312
-
313
- # 🤖 10. AI Model Integration
314
- def process_with_claude(text):
315
- """Process text with Claude"""
316
- if not text: return
317
-
318
- cleaned_input = cleaner.clean_text(text)
319
- with st.chat_message("user"):
320
- st.markdown(cleaned_input)
321
-
322
- with st.chat_message("assistant"):
323
- r = claude_client.messages.create(
324
- model="claude-3-sonnet-20240229",
325
- max_tokens=1000,
326
- messages=[{"role":"user","content":cleaned_input}]
327
- )
328
- raw_response = r.content[0].text
329
- cleaned_response = cleaner.clean_text(raw_response, preserve_format=True)
330
-
331
- st.write("Claude-3.5: " + cleaned_response)
332
- create_file(cleaned_input, cleaned_response, "md")
333
- st.session_state.chat_history.append({
334
- "user": cleaned_input,
335
- "claude": cleaned_response
336
- })
337
- return cleaned_response
338
 
339
- def process_with_gpt(text):
340
- """Process text with GPT-4"""
341
- if not text: return
342
-
343
- cleaned_input = cleaner.clean_text(text)
344
- st.session_state.messages.append({
345
- "role": "user",
346
- "content": cleaned_input
347
- })
348
-
349
- with st.chat_message("user"):
350
- st.markdown(cleaned_input)
351
-
352
- with st.chat_message("assistant"):
353
- c = openai_client.chat.completions.create(
354
- model=st.session_state["openai_model"],
355
- messages=st.session_state.messages,
356
- stream=False
357
- )
358
- raw_response = c.choices[0].message.content
359
- cleaned_response = cleaner.clean_text(raw_response, preserve_format=True)
360
-
361
- st.write("GPT-4o: " + cleaned_response)
362
- create_file(cleaned_input, cleaned_response, "md")
363
- st.session_state.messages.append({
364
- "role": "assistant",
365
- "content": cleaned_response
366
- })
367
- return cleaned_response
368
 
369
- def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False):
370
- """Perform Arxiv search and generate audio summaries"""
371
- cleaned_query = cleaner.clean_text(q)
372
- start = time.time()
373
-
374
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
375
- refs = client.predict(cleaned_query, 20, "Semantic Search",
376
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
377
- api_name="/update_with_rag_md")[0]
378
- r2 = client.predict(cleaned_query, "mistralai/Mixtral-8x7B-Instruct-v0.1",
379
- True, api_name="/ask_llm")
380
-
381
- # Clean responses
382
- cleaned_r2 = cleaner.clean_text(r2, preserve_format=True)
383
- cleaned_refs = cleaner.clean_text(refs, preserve_format=True)
384
-
385
- result = f"### 🔎 {cleaned_query}\n\n{cleaned_r2}\n\n{cleaned_refs}"
386
- st.markdown(result)
387
-
388
  if full_audio:
389
- complete_text = f"Complete response for query: {cleaned_query}. {clean_for_speech(cleaned_r2)} {clean_for_speech(cleaned_refs)}"
390
- audio_file_full = speak_with_edge_tts(complete_text)
391
- st.write("### 📚 Full Audio")
392
- play_and_download_audio(audio_file_full)
393
-
394
- if vocal_summary:
395
- main_text = clean_for_speech(cleaned_r2)
396
- audio_file_main = speak_with_edge_tts(main_text)
397
- st.write("### 🎙 Short Audio")
398
- play_and_download_audio(audio_file_main)
399
-
400
- if extended_refs:
401
- summaries_text = "Extended references: " + cleaned_refs.replace('"','')
402
- summaries_text = clean_for_speech(summaries_text)
403
- audio_file_refs = speak_with_edge_tts(summaries_text)
404
- st.write("### 📜 Long Refs")
405
- play_and_download_audio(audio_file_refs)
406
-
407
- if titles_summary:
408
- titles = []
409
- for line in cleaned_refs.split('\n'):
410
- m = re.search(r"\[([^\]]+)\]", line)
411
- if m:
412
- titles.append(m.group(1))
413
- if titles:
414
- titles_text = "Titles: " + ", ".join(titles)
415
- titles_text = clean_for_speech(titles_text)
416
- audio_file_titles = speak_with_edge_tts(titles_text)
417
- st.write("### 🔖 Titles")
418
- play_and_download_audio(audio_file_titles)
419
-
420
- elapsed = time.time() - start
421
- st.write(f"**Total Elapsed:** {elapsed:.2f} s")
422
 
423
- create_file(cleaned_query, result, "md")
424
- return result
425
 
426
- def save_full_transcript(query, text):
427
- """Save full transcript of results as a file."""
428
- cleaned_query = cleaner.clean_text(query)
429
- cleaned_text = cleaner.clean_text(text, preserve_format=True)
430
- create_file(cleaned_query, cleaned_text, "md")
431
-
432
- # 📂 11. File Management
433
- def create_zip_of_files(md_files, mp3_files):
434
- """Create zip with intelligent naming"""
435
- md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
436
- all_files = md_files + mp3_files
437
- if not all_files:
438
- return None
439
-
440
- all_content = []
441
- for f in all_files:
442
- if f.endswith('.md'):
443
- with open(f, 'r', encoding='utf-8') as file:
444
- content = file.read()
445
- cleaned_content = cleaner.clean_text(content)
446
- all_content.append(cleaned_content)
447
- elif f.endswith('.mp3'):
448
- all_content.append(os.path.basename(f))
449
-
450
- combined_content = " ".join(all_content)
451
- info_terms = get_high_info_terms(combined_content)
452
-
453
- timestamp = datetime.now().strftime("%y%m_%H%M")
454
- name_text = '_'.join(term.replace(' ', '-') for term in info_terms[:3])
455
- zip_name = f"{timestamp}_{name_text}.zip"
456
-
457
- with zipfile.ZipFile(zip_name, 'w') as z:
458
- for f in all_files:
459
- z.write(f)
460
-
461
- return zip_name
462
-
463
- def load_files_for_sidebar():
464
- """Load and group files for sidebar display"""
465
- md_files = glob.glob("*.md")
466
- mp3_files = glob.glob("*.mp3")
467
-
468
- md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
469
- all_files = md_files + mp3_files
470
-
471
- groups = defaultdict(list)
472
- for f in all_files:
473
- fname = os.path.basename(f)
474
- prefix = fname[:10]
475
- groups[prefix].append(f)
476
-
477
- for prefix in groups:
478
- groups[prefix].sort(key=lambda x: os.path.getmtime(x), reverse=True)
479
-
480
- sorted_prefixes = sorted(groups.keys(),
481
- key=lambda pre: max(os.path.getmtime(x) for x in groups[pre]),
482
- reverse=True)
483
- return groups, sorted_prefixes
484
-
485
- def extract_keywords_from_md(files):
486
- """Extract keywords from markdown files"""
487
- text = ""
488
- for f in files:
489
- if f.endswith(".md"):
490
- with open(f, 'r', encoding='utf-8') as file:
491
- content = file.read()
492
- cleaned_content = cleaner.clean_text(content)
493
- text += " " + cleaned_content
494
- return get_high_info_terms(text)
495
-
496
- def display_file_manager_sidebar(groups, sorted_prefixes):
497
- """Display file manager in sidebar"""
498
- st.sidebar.title("🎵 Audio & Docs Manager")
499
-
500
- all_md = []
501
- all_mp3 = []
502
- for prefix in groups:
503
- for f in groups[prefix]:
504
- if f.endswith(".md"):
505
- all_md.append(f)
506
- elif f.endswith(".mp3"):
507
- all_mp3.append(f)
508
-
509
- top_bar = st.sidebar.columns(3)
510
- with top_bar[0]:
511
- if st.button("🗑 DelAllMD"):
512
- for f in all_md:
513
- os.remove(f)
514
- st.session_state.should_rerun = True
515
- with top_bar[1]:
516
- if st.button("🗑 DelAllMP3"):
517
- for f in all_mp3:
518
- os.remove(f)
519
- st.session_state.should_rerun = True
520
- with top_bar[2]:
521
- if st.button("⬇️ ZipAll"):
522
- z = create_zip_of_files(all_md, all_mp3)
523
- if z:
524
- st.sidebar.markdown(get_download_link(z), unsafe_allow_html=True)
525
-
526
- for prefix in sorted_prefixes:
527
- files = groups[prefix]
528
- kw = extract_keywords_from_md(files)
529
- keywords_str = " ".join(kw) if kw else "No Keywords"
530
- with st.sidebar.expander(f"{prefix} Files ({len(files)}) - KW: {keywords_str}", expanded=True):
531
- c1, c2 = st.columns(2)
532
- with c1:
533
- if st.button("👀ViewGrp", key="view_group_"+prefix):
534
- st.session_state.viewing_prefix = prefix
535
- with c2:
536
- if st.button("🗑DelGrp", key="del_group_"+prefix):
537
- for f in files:
538
- os.remove(f)
539
- st.success(f"Deleted group {prefix}!")
540
- st.session_state.should_rerun = True
541
-
542
- for f in files:
543
- fname = os.path.basename(f)
544
- ctime = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%Y-%m-%d %H:%M:%S")
545
- st.write(f"**{fname}** - {ctime}")
546
-
547
- # 🎯 12. Main Application
548
  def main():
549
- st.sidebar.markdown("### 🚲BikeAI🏆 Multi-Agent Research")
550
- tab_main = st.radio("Action:", ["🎤 Voice", "📸 Media", "🔍 ArXiv", "📝 Editor"], horizontal=True)
551
-
552
- mycomponent = components.declare_component("mycomponent", path="mycomponent")
553
- val = mycomponent(my_input_value="Hello")
554
-
555
- # Show input in a text box for editing if detected
556
- if val:
557
- cleaned_val = cleaner.clean_text(val)
558
- edited_input = st.text_area("✏️ Edit Input:", value=cleaned_val, height=100)
559
- run_option = st.selectbox("Model:", ["Arxiv", "GPT-4o", "Claude-3.5"])
560
- col1, col2 = st.columns(2)
561
- with col1:
562
- autorun = st.checkbox("⚙ AutoRun", value=True)
563
- with col2:
564
- full_audio = st.checkbox("📚FullAudio", value=False,
565
- help="Generate full audio response")
566
-
567
- input_changed = (val != st.session_state.old_val)
568
-
569
- if autorun and input_changed:
570
- st.session_state.old_val = val
571
- if run_option == "Arxiv":
572
- perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
573
- titles_summary=True, full_audio=full_audio)
574
- else:
575
- if run_option == "GPT-4o":
576
- process_with_gpt(edited_input)
577
- elif run_option == "Claude-3.5":
578
- process_with_claude(edited_input)
579
- else:
580
- if st.button("▶ Run"):
581
- st.session_state.old_val = val
582
- if run_option == "Arxiv":
583
- perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
584
- titles_summary=True, full_audio=full_audio)
585
- else:
586
- if run_option == "GPT-4o":
587
- process_with_gpt(edited_input)
588
- elif run_option == "Claude-3.5":
589
- process_with_claude(edited_input)
590
-
591
- if tab_main == "🔍 ArXiv":
592
- st.subheader("🔍 Query ArXiv")
593
- q = st.text_input("🔍 Query:")
594
- q = cleaner.clean_text(q)
595
-
596
- st.markdown("### 🎛 Options")
597
- vocal_summary = st.checkbox("🎙ShortAudio", value=True)
598
- extended_refs = st.checkbox("📜LongRefs", value=False)
599
- titles_summary = st.checkbox("🔖TitlesOnly", value=True)
600
- full_audio = st.checkbox("📚FullAudio", value=False,
601
- help="Generate full audio response")
602
- full_transcript = st.checkbox("🧾FullTranscript", value=False,
603
- help="Generate a full transcript file")
604
-
605
- if q and st.button("🔍Run"):
606
- result = perform_ai_lookup(q, vocal_summary=vocal_summary,
607
- extended_refs=extended_refs,
608
- titles_summary=titles_summary,
609
- full_audio=full_audio)
610
- if full_transcript:
611
- save_full_transcript(q, result)
612
-
613
- st.markdown("### Change Prompt & Re-Run")
614
- q_new = st.text_input("🔄 Modify Query:")
615
- q_new = cleaner.clean_text(q_new)
616
- if q_new and st.button("🔄 Re-Run with Modified Query"):
617
- result = perform_ai_lookup(q_new, vocal_summary=vocal_summary,
618
- extended_refs=extended_refs,
619
- titles_summary=titles_summary,
620
- full_audio=full_audio)
621
- if full_transcript:
622
- save_full_transcript(q_new, result)
623
-
624
- elif tab_main == "🎤 Voice":
625
- st.subheader("🎤 Voice Input")
626
- user_text = st.text_area("💬 Message:", height=100)
627
- user_text = cleaner.clean_text(user_text)
628
- if st.button("📨 Send"):
629
- process_with_gpt(user_text)
630
- st.subheader("📜 Chat History")
631
- t1, t2 = st.tabs(["Claude History", "GPT-4o History"])
632
- with t1:
633
- for c in st.session_state.chat_history:
634
- st.write("**You:**", cleaner.clean_text(c["user"]))
635
- st.write("**Claude:**", cleaner.clean_text(c["claude"], preserve_format=True))
636
- with t2:
637
- for m in st.session_state.messages:
638
- with st.chat_message(m["role"]):
639
- if m["role"] == "user":
640
- st.markdown(cleaner.clean_text(m["content"]))
641
- else:
642
- st.markdown(cleaner.clean_text(m["content"], preserve_format=True))
643
-
644
- elif tab_main == "📸 Media":
645
- st.header("📸 Images & 🎥 Videos")
646
- tabs = st.tabs(["🖼 Images", "🎥 Video"])
647
- with tabs[0]:
648
- imgs = glob.glob("*.png") + glob.glob("*.jpg")
649
- if imgs:
650
- c = st.slider("Cols", 1, 5, 3)
651
- cols = st.columns(c)
652
- for i, f in enumerate(imgs):
653
- with cols[i%c]:
654
- st.image(Image.open(f), use_container_width=True)
655
- if st.button(f"👀 Analyze {os.path.basename(f)}", key=f"analyze_{f}"):
656
- a = process_image(f, "Describe this image.")
657
- st.markdown(cleaner.clean_text(a, preserve_format=True))
658
- else:
659
- st.write("No images found.")
660
- with tabs[1]:
661
- vids = glob.glob("*.mp4")
662
- if vids:
663
- for v in vids:
664
- with st.expander(f"🎥 {os.path.basename(v)}"):
665
- st.video(v)
666
- if st.button(f"Analyze {os.path.basename(v)}", key=f"analyze_{v}"):
667
- a = process_video_with_gpt(v, "Describe video.")
668
- st.markdown(cleaner.clean_text(a, preserve_format=True))
669
- else:
670
- st.write("No videos found.")
671
-
672
- elif tab_main == "📝 Editor":
673
- if getattr(st.session_state, 'current_file', None):
674
- st.subheader(f"Editing: {st.session_state.current_file}")
675
- with open(st.session_state.current_file, 'r', encoding='utf-8') as f:
676
- content = f.read()
677
- content = cleaner.clean_text(content, preserve_format=True)
678
- new_text = st.text_area("✏️ Content:", content, height=300)
679
- if st.button("💾 Save"):
680
- cleaned_content = cleaner.clean_text(new_text, preserve_format=True)
681
- with open(st.session_state.current_file, 'w', encoding='utf-8') as f:
682
- f.write(cleaned_content)
683
- st.success("Updated!")
684
- st.session_state.should_rerun = True
685
- else:
686
- st.write("Select a file from the sidebar to edit.")
687
-
688
- groups, sorted_prefixes = load_files_for_sidebar()
689
- display_file_manager_sidebar(groups, sorted_prefixes)
690
 
691
- if st.session_state.viewing_prefix and st.session_state.viewing_prefix in groups:
692
- st.write("---")
693
- st.write(f"**Viewing Group:** {st.session_state.viewing_prefix}")
694
- for f in groups[st.session_state.viewing_prefix]:
695
- fname = os.path.basename(f)
696
- ext = os.path.splitext(fname)[1].lower().strip('.')
697
- st.write(f"### {fname}")
698
- if ext == "md":
699
- with open(f, 'r', encoding='utf-8') as file:
700
- content = file.read()
701
- st.markdown(cleaner.clean_text(content, preserve_format=True))
702
- elif ext == "mp3":
703
- st.audio(f)
704
- else:
705
- st.markdown(get_download_link(f), unsafe_allow_html=True)
706
- if st.button("❌ Close"):
707
- st.session_state.viewing_prefix = None
708
 
709
- if st.session_state.should_rerun:
710
- st.session_state.should_rerun = False
711
- st.rerun()
712
 
713
  if __name__ == "__main__":
714
- main()
 
5
  from datetime import datetime
6
  from audio_recorder_streamlit import audio_recorder
7
  from bs4 import BeautifulSoup
8
+ from collections import defaultdict
9
  from dotenv import load_dotenv
10
  from gradio_client import Client
11
  from huggingface_hub import InferenceClient
 
20
  import asyncio
21
  import edge_tts
22
 
23
+ # --- Configuration & Setup ---
24
  st.set_page_config(
25
+ page_title="BikeAI Claude/GPT Research",
26
  page_icon="🚲🏆",
27
  layout="wide",
28
  initial_sidebar_state="auto",
29
  menu_items={
30
  'Get Help': 'https://huggingface.co/awacke1',
31
  'Report a bug': 'https://huggingface.co/spaces/awacke1',
32
+ 'About': "BikeAI Claude/GPT Research AI"
33
  }
34
  )
35
  load_dotenv()
36
 
37
+ # --- API Setup & Clients ---
38
+ def init_api_clients():
39
+ api_keys = {k: os.getenv(k, "") for k in ('OPENAI_API_KEY', 'ANTHROPIC_API_KEY_3', 'xai', 'HF_KEY', 'API_URL')}
40
+ api_keys.update({k: v for k, v in st.secrets.items() if k in api_keys})
41
+ openai.api_key = api_keys['OPENAI_API_KEY']
42
+ return {
43
+ 'claude': anthropic.Anthropic(api_key=api_keys['ANTHROPIC_API_KEY_3']),
44
+ 'openai': OpenAI(api_key=openai.api_key)
45
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ api_clients = init_api_clients()
48
+
49
+ # --- Session State Management ---
50
+ def initialize_session_state():
51
+ defaults = {
52
+ 'transcript_history': [],
53
+ 'chat_history': [],
54
+ 'openai_model': "gpt-4o-2024-05-13",
55
+ 'messages': [],
56
+ 'last_voice_input': "",
57
+ 'editing_file': None,
58
+ 'edit_new_name': "",
59
+ 'edit_new_content': "",
60
+ 'viewing_prefix': None,
61
+ 'should_rerun': False,
62
+ 'old_val': None
63
+ }
64
+ for k, v in defaults.items():
65
+ if k not in st.session_state:
66
+ st.session_state[k] = v
67
 
68
+ initialize_session_state()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ # --- Custom CSS ---
71
  st.markdown("""
72
  <style>
73
  .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
 
76
  </style>
77
  """, unsafe_allow_html=True)
78
 
79
+ # --- Helper Functions ---
 
 
 
 
 
80
  def get_high_info_terms(text: str) -> list:
81
+ stop_words = set(['the', 'a', 'an', 'and', 'or', 'in', 'on', 'at', 'to', 'for', 'with'])
82
+ key_phrases = ['artificial intelligence', 'machine learning', 'neural network']
83
+ preserved = [p for p in key_phrases if p in text.lower()]
84
+ words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
85
+ high_info_words = [w for w in words if w not in stop_words and len(w) > 3]
86
+ return list(dict.fromkeys(preserved + high_info_words))[:5]
87
 
 
 
 
 
 
 
88
 
 
89
  def generate_filename(prompt, response, file_type="md"):
90
+ prefix = datetime.now().strftime("%y%m_%H%M")
91
+ info_terms = get_high_info_terms(prompt + response)
92
+ snippet = '_'.join(info_terms)
93
+ return f"{prefix}_{snippet[:150]}.{file_type}"
 
 
 
 
 
 
 
 
 
94
 
 
 
 
 
 
95
 
96
  def create_file(prompt, response, file_type="md"):
97
+ filename = generate_filename(prompt, response, file_type)
 
 
 
 
 
98
  with open(filename, 'w', encoding='utf-8') as f:
99
+ f.write(f"{prompt}\n\n{response}")
100
  return filename
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  def play_and_download_audio(file_path):
 
104
  if file_path and os.path.exists(file_path):
105
  st.audio(file_path)
106
+ b64 = base64.b64encode(open(file_path, "rb").read()).decode()
107
+ st.markdown(f'<a href="data:audio/mpeg;base64,{b64}" download="{file_path}">Download {file_path}</a>', unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
 
 
 
 
109
 
110
+ async def edge_tts_generate_audio(text, voice="en-US-AriaNeural"):
111
+ out_fn = generate_filename(text, text, "mp3")
112
+ communicate = edge_tts.Communicate(text, voice)
113
+ await communicate.save(out_fn)
114
+ return out_fn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ # --- ArXiv Lookup ---
118
+ def perform_ai_lookup(query, full_audio=False):
 
 
 
119
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
120
+ result = client.predict(query, api_name="/ask_llm")
121
+ st.markdown(f"### 🔎 {query}\n{result}")
 
 
 
 
 
 
 
 
 
 
 
122
  if full_audio:
123
+ audio_file = asyncio.run(edge_tts_generate_audio(result))
124
+ play_and_download_audio(audio_file)
125
+ create_file(query, result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
 
 
127
 
128
+ # --- Main App ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  def main():
130
+ st.sidebar.title("📂 File Manager")
131
+ action = st.radio("Action:", ["🎤 Voice", "🔍 ArXiv"])
132
+ if action == "🔍 ArXiv":
133
+ query = st.text_input("Query")
134
+ if st.button("Run"):
135
+ perform_ai_lookup(query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
+ elif action == "🎤 Voice":
138
+ text = st.text_area("Message")
139
+ if st.button("Send"):
140
+ process_with_gpt(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
 
 
 
142
 
143
  if __name__ == "__main__":
144
+ main()