awacke1 commited on
Commit
5f1ea92
β€’
1 Parent(s): a2dcff5

Update backup19-processInputButton-app.py

Browse files
Files changed (1) hide show
  1. backup19-processInputButton-app.py +176 -89
backup19-processInputButton-app.py CHANGED
@@ -20,7 +20,7 @@ from streamlit.runtime.scriptrunner import get_script_run_ctx
20
  import asyncio
21
  import edge_tts
22
 
23
- # πŸ”§ Config & Setup
24
  st.set_page_config(
25
  page_title="🚲BikeAIπŸ† Claude/GPT Research",
26
  page_icon="πŸš²πŸ†",
@@ -34,6 +34,7 @@ st.set_page_config(
34
  )
35
  load_dotenv()
36
 
 
37
  openai_api_key = os.getenv('OPENAI_API_KEY', "")
38
  anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
39
  if 'OPENAI_API_KEY' in st.secrets:
@@ -47,6 +48,7 @@ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_OR
47
  HF_KEY = os.getenv('HF_KEY')
48
  API_URL = os.getenv('API_URL')
49
 
 
50
  if 'transcript_history' not in st.session_state:
51
  st.session_state['transcript_history'] = []
52
  if 'chat_history' not in st.session_state:
@@ -70,7 +72,7 @@ if 'should_rerun' not in st.session_state:
70
  if 'old_val' not in st.session_state:
71
  st.session_state['old_val'] = None
72
 
73
- # 🎨 Minimal Custom CSS
74
  st.markdown("""
75
  <style>
76
  .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
@@ -86,35 +88,99 @@ FILE_EMOJIS = {
86
  "mp3": "🎡",
87
  }
88
 
89
- def clean_for_speech(text: str) -> str:
90
- text = text.replace("\n", " ")
91
- text = text.replace("</s>", " ")
92
- text = text.replace("#", "")
93
- # Remove links
94
- text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
95
- text = re.sub(r"\s+", " ", text).strip()
96
- return text
97
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  def generate_filename(content, file_type="md"):
 
99
  prefix = datetime.now().strftime("%y%m_%H%M") + "_"
100
- words = re.findall(r"\w+", content)
101
- name_text = '_'.join(words[:3]) if words else 'file'
 
 
 
 
 
102
  filename = f"{prefix}{name_text}.{file_type}"
103
  return filename
104
 
105
  def create_file(prompt, response, file_type="md"):
 
106
  filename = generate_filename(response.strip() if response.strip() else prompt.strip(), file_type)
107
  with open(filename, 'w', encoding='utf-8') as f:
108
  f.write(prompt + "\n\n" + response)
109
  return filename
110
 
111
  def get_download_link(file):
 
112
  with open(file, "rb") as f:
113
  b64 = base64.b64encode(f.read()).decode()
114
  return f'<a href="data:file/zip;base64,{b64}" download="{os.path.basename(file)}">πŸ“‚ Download {os.path.basename(file)}</a>'
115
 
 
 
 
 
 
 
 
 
 
 
116
  @st.cache_resource
117
  def speech_synthesis_html(result):
 
118
  html_code = f"""
119
  <html><body>
120
  <script>
@@ -126,74 +192,31 @@ def speech_synthesis_html(result):
126
  components.html(html_code, height=0)
127
 
128
  async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0):
 
129
  text = clean_for_speech(text)
130
  if not text.strip():
131
  return None
132
  rate_str = f"{rate:+d}%"
133
  pitch_str = f"{pitch:+d}Hz"
134
  communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
135
- out_fn = generate_filename(text,"mp3")
136
  await communicate.save(out_fn)
137
  return out_fn
138
 
139
  def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0):
 
140
  return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch))
141
 
142
  def play_and_download_audio(file_path):
 
143
  if file_path and os.path.exists(file_path):
144
  st.audio(file_path)
145
  dl_link = f'<a href="data:audio/mpeg;base64,{base64.b64encode(open(file_path,"rb").read()).decode()}" download="{os.path.basename(file_path)}">Download {os.path.basename(file_path)}</a>'
146
  st.markdown(dl_link, unsafe_allow_html=True)
147
 
148
- def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False):
149
- start = time.time()
150
- client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
151
- r = client.predict(q,20,"Semantic Search","mistralai/Mixtral-8x7B-Instruct-v0.1",api_name="/update_with_rag_md")
152
- refs = r[0]
153
- r2 = client.predict(q,"mistralai/Mixtral-8x7B-Instruct-v0.1",True,api_name="/ask_llm")
154
- result = f"### πŸ”Ž {q}\n\n{r2}\n\n{refs}"
155
-
156
- st.markdown(result)
157
-
158
- # Generate full audio version if requested
159
- if full_audio:
160
- complete_text = f"Complete response for query: {q}. {clean_for_speech(r2)} {clean_for_speech(refs)}"
161
- audio_file_full = speak_with_edge_tts(complete_text)
162
- st.write("### πŸ“š Complete Audio Response")
163
- play_and_download_audio(audio_file_full)
164
-
165
- if vocal_summary:
166
- main_text = clean_for_speech(r2)
167
- audio_file_main = speak_with_edge_tts(main_text)
168
- st.write("### πŸŽ™οΈ Vocal Summary (Short Answer)")
169
- play_and_download_audio(audio_file_main)
170
-
171
- if extended_refs:
172
- summaries_text = "Here are the summaries from the references: " + refs.replace('"','')
173
- summaries_text = clean_for_speech(summaries_text)
174
- audio_file_refs = speak_with_edge_tts(summaries_text)
175
- st.write("### πŸ“œ Extended References & Summaries")
176
- play_and_download_audio(audio_file_refs)
177
-
178
- if titles_summary:
179
- titles = []
180
- for line in refs.split('\n'):
181
- m = re.search(r"\[([^\]]+)\]", line)
182
- if m:
183
- titles.append(m.group(1))
184
- if titles:
185
- titles_text = "Here are the titles of the papers: " + ", ".join(titles)
186
- titles_text = clean_for_speech(titles_text)
187
- audio_file_titles = speak_with_edge_tts(titles_text)
188
- st.write("### πŸ”– Paper Titles")
189
- play_and_download_audio(audio_file_titles)
190
-
191
- elapsed = time.time()-start
192
- st.write(f"**Total Elapsed:** {elapsed:.2f} s")
193
- create_file(q, result, "md")
194
- return result
195
-
196
  def process_image(image_path, user_prompt):
 
197
  with open(image_path, "rb") as imgf:
198
  image_data = imgf.read()
199
  b64img = base64.b64encode(image_data).decode("utf-8")
@@ -211,12 +234,14 @@ def process_image(image_path, user_prompt):
211
  return resp.choices[0].message.content
212
 
213
  def process_audio(audio_path):
 
214
  with open(audio_path, "rb") as f:
215
  transcription = openai_client.audio.transcriptions.create(model="whisper-1", file=f)
216
  st.session_state.messages.append({"role": "user", "content": transcription.text})
217
  return transcription.text
218
 
219
  def process_video(video_path, seconds_per_frame=1):
 
220
  vid = cv2.VideoCapture(video_path)
221
  total = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
222
  fps = vid.get(cv2.CAP_PROP_FPS)
@@ -232,6 +257,7 @@ def process_video(video_path, seconds_per_frame=1):
232
  return frames_b64
233
 
234
  def process_video_with_gpt(video_path, prompt):
 
235
  frames = process_video(video_path)
236
  resp = openai_client.chat.completions.create(
237
  model=st.session_state["openai_model"],
@@ -245,7 +271,58 @@ def process_video_with_gpt(video_path, prompt):
245
  )
246
  return resp.choices[0].message.content
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  def process_with_gpt(text):
 
249
  if not text: return
250
  st.session_state.messages.append({"role":"user","content":text})
251
  with st.chat_message("user"):
@@ -263,6 +340,7 @@ def process_with_gpt(text):
263
  return ans
264
 
265
  def process_with_claude(text):
 
266
  if not text: return
267
  with st.chat_message("user"):
268
  st.markdown(text)
@@ -278,27 +356,42 @@ def process_with_claude(text):
278
  st.session_state.chat_history.append({"user":text,"claude":ans})
279
  return ans
280
 
 
281
  def create_zip_of_files(md_files, mp3_files):
 
282
  md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
283
  all_files = md_files + mp3_files
284
  if not all_files:
285
  return None
286
- stems = [os.path.splitext(os.path.basename(f))[0] for f in all_files]
287
- joined = "_".join(stems)
288
- if len(joined) > 50:
289
- joined = joined[:50] + "_etc"
290
- zip_name = f"{joined}.zip"
 
 
 
 
 
 
 
 
 
 
 
 
291
  with zipfile.ZipFile(zip_name,'w') as z:
292
  for f in all_files:
293
  z.write(f)
 
294
  return zip_name
295
 
296
  def load_files_for_sidebar():
 
297
  md_files = glob.glob("*.md")
298
  mp3_files = glob.glob("*.mp3")
299
 
300
  md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
301
-
302
  all_files = md_files + mp3_files
303
 
304
  groups = defaultdict(list)
@@ -310,25 +403,22 @@ def load_files_for_sidebar():
310
  for prefix in groups:
311
  groups[prefix].sort(key=lambda x: os.path.getmtime(x), reverse=True)
312
 
313
- sorted_prefixes = sorted(groups.keys(), key=lambda pre: max(os.path.getmtime(x) for x in groups[pre]), reverse=True)
 
 
314
  return groups, sorted_prefixes
315
 
316
  def extract_keywords_from_md(files):
 
317
  text = ""
318
  for f in files:
319
  if f.endswith(".md"):
320
  c = open(f,'r',encoding='utf-8').read()
321
  text += " " + c
322
- words = re.findall(r"\w+", text.lower())
323
- unique_words = []
324
- for w in words:
325
- if w not in unique_words:
326
- unique_words.append(w)
327
- if len(unique_words) == 5:
328
- break
329
- return unique_words
330
 
331
  def display_file_manager_sidebar(groups, sorted_prefixes):
 
332
  st.sidebar.title("🎡 Audio & Document Manager")
333
 
334
  all_md = []
@@ -378,16 +468,7 @@ def display_file_manager_sidebar(groups, sorted_prefixes):
378
  ctime = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%Y-%m-%d %H:%M:%S")
379
  st.write(f"**{fname}** - {ctime}")
380
 
381
- def run_selected_model(option, user_input):
382
- user_input = user_input.strip()
383
- if option == "Arxiv":
384
- st.subheader("Arxiv Only Results:")
385
- perform_ai_lookup(user_input, vocal_summary=True, extended_refs=False, titles_summary=True)
386
- elif option == "GPT-4o":
387
- process_with_gpt(user_input)
388
- elif option == "Claude-3.5":
389
- process_with_claude(user_input)
390
-
391
  def main():
392
  st.sidebar.markdown("### 🚲BikeAIπŸ† Multi-Agent Research AI")
393
  tab_main = st.radio("Action:",["🎀 Voice Input","πŸ“Έ Media Gallery","πŸ” Search ArXiv","πŸ“ File Editor"],horizontal=True)
@@ -415,7 +496,10 @@ def main():
415
  perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
416
  titles_summary=True, full_audio=full_audio)
417
  else:
418
- run_selected_model(run_option, edited_input)
 
 
 
419
  else:
420
  if st.button("Process Input"):
421
  st.session_state.old_val = val
@@ -423,7 +507,10 @@ def main():
423
  perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
424
  titles_summary=True, full_audio=full_audio)
425
  else:
426
- run_selected_model(run_option, edited_input)
 
 
 
427
 
428
  if tab_main == "πŸ” Search ArXiv":
429
  st.subheader("πŸ” Search ArXiv")
 
20
  import asyncio
21
  import edge_tts
22
 
23
+ # 🎯 1. Core Configuration & Setup
24
  st.set_page_config(
25
  page_title="🚲BikeAIπŸ† Claude/GPT Research",
26
  page_icon="πŸš²πŸ†",
 
34
  )
35
  load_dotenv()
36
 
37
+ # πŸ”‘ 2. API Setup & Clients
38
  openai_api_key = os.getenv('OPENAI_API_KEY', "")
39
  anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
40
  if 'OPENAI_API_KEY' in st.secrets:
 
48
  HF_KEY = os.getenv('HF_KEY')
49
  API_URL = os.getenv('API_URL')
50
 
51
+ # πŸ“ 3. Session State Management
52
  if 'transcript_history' not in st.session_state:
53
  st.session_state['transcript_history'] = []
54
  if 'chat_history' not in st.session_state:
 
72
  if 'old_val' not in st.session_state:
73
  st.session_state['old_val'] = None
74
 
75
+ # 🎨 4. Custom CSS
76
  st.markdown("""
77
  <style>
78
  .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
 
88
  "mp3": "🎡",
89
  }
90
 
91
+ # 🧠 5. High-Information Content Extraction
92
+ def get_high_info_terms(text: str) -> list:
93
+ """Extract high-information terms from text, including key phrases"""
94
+ stop_words = set([
95
+ 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
96
+ 'by', 'from', 'up', 'about', 'into', 'over', 'after', 'is', 'are', 'was', 'were',
97
+ 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would',
98
+ 'should', 'could', 'might', 'must', 'shall', 'can', 'may', 'this', 'that', 'these',
99
+ 'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'what', 'which', 'who',
100
+ 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most',
101
+ 'other', 'some', 'such', 'than', 'too', 'very', 'just', 'there'
102
+ ])
103
+
104
+ key_phrases = [
105
+ 'artificial intelligence', 'machine learning', 'deep learning', 'neural network',
106
+ 'personal assistant', 'natural language', 'computer vision', 'data science',
107
+ 'reinforcement learning', 'knowledge graph', 'semantic search', 'time series',
108
+ 'large language model', 'transformer model', 'attention mechanism',
109
+ 'autonomous system', 'edge computing', 'quantum computing', 'blockchain technology',
110
+ 'cognitive science', 'human computer', 'decision making', 'arxiv search',
111
+ 'research paper', 'scientific study', 'empirical analysis'
112
+ ]
113
+
114
+ # First identify key phrases
115
+ preserved_phrases = []
116
+ lower_text = text.lower()
117
+ for phrase in key_phrases:
118
+ if phrase in lower_text:
119
+ preserved_phrases.append(phrase)
120
+ text = text.replace(phrase, '')
121
+
122
+ # Then extract individual high-info words
123
+ words = re.findall(r'\b\w+(?:-\w+)*\b', text)
124
+ high_info_words = [
125
+ word.lower() for word in words
126
+ if len(word) > 3
127
+ and word.lower() not in stop_words
128
+ and not word.isdigit()
129
+ and any(c.isalpha() for c in word)
130
+ ]
131
+
132
+ # Combine and deduplicate while preserving order
133
+ all_terms = preserved_phrases + high_info_words
134
+ seen = set()
135
+ unique_terms = []
136
+ for term in all_terms:
137
+ if term not in seen:
138
+ seen.add(term)
139
+ unique_terms.append(term)
140
+
141
+ max_terms = 5
142
+ return unique_terms[:max_terms]
143
+
144
+ # πŸ“ 6. File Operations
145
  def generate_filename(content, file_type="md"):
146
+ """Generate filename with meaningful terms"""
147
  prefix = datetime.now().strftime("%y%m_%H%M") + "_"
148
+ info_terms = get_high_info_terms(content)
149
+ name_text = '_'.join(term.replace(' ', '-') for term in info_terms) if info_terms else 'file'
150
+
151
+ max_length = 100
152
+ if len(name_text) > max_length:
153
+ name_text = name_text[:max_length]
154
+
155
  filename = f"{prefix}{name_text}.{file_type}"
156
  return filename
157
 
158
  def create_file(prompt, response, file_type="md"):
159
+ """Create file with intelligent naming"""
160
  filename = generate_filename(response.strip() if response.strip() else prompt.strip(), file_type)
161
  with open(filename, 'w', encoding='utf-8') as f:
162
  f.write(prompt + "\n\n" + response)
163
  return filename
164
 
165
  def get_download_link(file):
166
+ """Generate download link for file"""
167
  with open(file, "rb") as f:
168
  b64 = base64.b64encode(f.read()).decode()
169
  return f'<a href="data:file/zip;base64,{b64}" download="{os.path.basename(file)}">πŸ“‚ Download {os.path.basename(file)}</a>'
170
 
171
+ # πŸ”Š 7. Audio Processing
172
+ def clean_for_speech(text: str) -> str:
173
+ """Clean text for speech synthesis"""
174
+ text = text.replace("\n", " ")
175
+ text = text.replace("</s>", " ")
176
+ text = text.replace("#", "")
177
+ text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
178
+ text = re.sub(r"\s+", " ", text).strip()
179
+ return text
180
+
181
  @st.cache_resource
182
  def speech_synthesis_html(result):
183
+ """Create HTML for speech synthesis"""
184
  html_code = f"""
185
  <html><body>
186
  <script>
 
192
  components.html(html_code, height=0)
193
 
194
  async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0):
195
+ """Generate audio using Edge TTS"""
196
  text = clean_for_speech(text)
197
  if not text.strip():
198
  return None
199
  rate_str = f"{rate:+d}%"
200
  pitch_str = f"{pitch:+d}Hz"
201
  communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
202
+ out_fn = generate_filename(text, "mp3")
203
  await communicate.save(out_fn)
204
  return out_fn
205
 
206
  def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0):
207
+ """Wrapper for edge TTS generation"""
208
  return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch))
209
 
210
  def play_and_download_audio(file_path):
211
+ """Play and provide download link for audio"""
212
  if file_path and os.path.exists(file_path):
213
  st.audio(file_path)
214
  dl_link = f'<a href="data:audio/mpeg;base64,{base64.b64encode(open(file_path,"rb").read()).decode()}" download="{os.path.basename(file_path)}">Download {os.path.basename(file_path)}</a>'
215
  st.markdown(dl_link, unsafe_allow_html=True)
216
 
217
+ # 🎬 8. Media Processing
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  def process_image(image_path, user_prompt):
219
+ """Process image with GPT-4V"""
220
  with open(image_path, "rb") as imgf:
221
  image_data = imgf.read()
222
  b64img = base64.b64encode(image_data).decode("utf-8")
 
234
  return resp.choices[0].message.content
235
 
236
  def process_audio(audio_path):
237
+ """Process audio with Whisper"""
238
  with open(audio_path, "rb") as f:
239
  transcription = openai_client.audio.transcriptions.create(model="whisper-1", file=f)
240
  st.session_state.messages.append({"role": "user", "content": transcription.text})
241
  return transcription.text
242
 
243
  def process_video(video_path, seconds_per_frame=1):
244
+ """Extract frames from video"""
245
  vid = cv2.VideoCapture(video_path)
246
  total = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
247
  fps = vid.get(cv2.CAP_PROP_FPS)
 
257
  return frames_b64
258
 
259
  def process_video_with_gpt(video_path, prompt):
260
+ """Analyze video frames with GPT-4V"""
261
  frames = process_video(video_path)
262
  resp = openai_client.chat.completions.create(
263
  model=st.session_state["openai_model"],
 
271
  )
272
  return resp.choices[0].message.content
273
 
274
+ # πŸ€– 9. AI Model Integration
275
+ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False):
276
+ """Perform Arxiv search and generate audio summaries"""
277
+ start = time.time()
278
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
279
+ r = client.predict(q,20,"Semantic Search","mistralai/Mixtral-8x7B-Instruct-v0.1",api_name="/update_with_rag_md")
280
+ refs = r[0]
281
+ r2 = client.predict(q,"mistralai/Mixtral-8x7B-Instruct-v0.1",True,api_name="/ask_llm")
282
+ result = f"### πŸ”Ž {q}\n\n{r2}\n\n{refs}"
283
+
284
+ st.markdown(result)
285
+
286
+ # Generate full audio version if requested
287
+ if full_audio:
288
+ complete_text = f"Complete response for query: {q}. {clean_for_speech(r2)} {clean_for_speech(refs)}"
289
+ audio_file_full = speak_with_edge_tts(complete_text)
290
+ st.write("### πŸ“š Complete Audio Response")
291
+ play_and_download_audio(audio_file_full)
292
+
293
+ if vocal_summary:
294
+ main_text = clean_for_speech(r2)
295
+ audio_file_main = speak_with_edge_tts(main_text)
296
+ st.write("### πŸŽ™οΈ Vocal Summary (Short Answer)")
297
+ play_and_download_audio(audio_file_main)
298
+
299
+ if extended_refs:
300
+ summaries_text = "Here are the summaries from the references: " + refs.replace('"','')
301
+ summaries_text = clean_for_speech(summaries_text)
302
+ audio_file_refs = speak_with_edge_tts(summaries_text)
303
+ st.write("### πŸ“œ Extended References & Summaries")
304
+ play_and_download_audio(audio_file_refs)
305
+
306
+ if titles_summary:
307
+ titles = []
308
+ for line in refs.split('\n'):
309
+ m = re.search(r"\[([^\]]+)\]", line)
310
+ if m:
311
+ titles.append(m.group(1))
312
+ if titles:
313
+ titles_text = "Here are the titles of the papers: " + ", ".join(titles)
314
+ titles_text = clean_for_speech(titles_text)
315
+ audio_file_titles = speak_with_edge_tts(titles_text)
316
+ st.write("### πŸ”– Paper Titles")
317
+ play_and_download_audio(audio_file_titles)
318
+
319
+ elapsed = time.time()-start
320
+ st.write(f"**Total Elapsed:** {elapsed:.2f} s")
321
+ create_file(q, result, "md")
322
+ return result
323
+
324
  def process_with_gpt(text):
325
+ """Process text with GPT-4"""
326
  if not text: return
327
  st.session_state.messages.append({"role":"user","content":text})
328
  with st.chat_message("user"):
 
340
  return ans
341
 
342
  def process_with_claude(text):
343
+ """Process text with Claude"""
344
  if not text: return
345
  with st.chat_message("user"):
346
  st.markdown(text)
 
356
  st.session_state.chat_history.append({"user":text,"claude":ans})
357
  return ans
358
 
359
+ # πŸ“‚ 10. File Management
360
  def create_zip_of_files(md_files, mp3_files):
361
+ """Create zip with intelligent naming"""
362
  md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
363
  all_files = md_files + mp3_files
364
  if not all_files:
365
  return None
366
+
367
+ # Collect content for high-info term extraction
368
+ all_content = []
369
+ for f in all_files:
370
+ if f.endswith('.md'):
371
+ with open(f, 'r', encoding='utf-8') as file:
372
+ all_content.append(file.read())
373
+ elif f.endswith('.mp3'):
374
+ all_content.append(os.path.basename(f))
375
+
376
+ combined_content = " ".join(all_content)
377
+ info_terms = get_high_info_terms(combined_content)
378
+
379
+ timestamp = datetime.now().strftime("%y%m_%H%M")
380
+ name_text = '_'.join(term.replace(' ', '-') for term in info_terms[:3])
381
+ zip_name = f"{timestamp}_{name_text}.zip"
382
+
383
  with zipfile.ZipFile(zip_name,'w') as z:
384
  for f in all_files:
385
  z.write(f)
386
+
387
  return zip_name
388
 
389
  def load_files_for_sidebar():
390
+ """Load and group files for sidebar display"""
391
  md_files = glob.glob("*.md")
392
  mp3_files = glob.glob("*.mp3")
393
 
394
  md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
 
395
  all_files = md_files + mp3_files
396
 
397
  groups = defaultdict(list)
 
403
  for prefix in groups:
404
  groups[prefix].sort(key=lambda x: os.path.getmtime(x), reverse=True)
405
 
406
+ sorted_prefixes = sorted(groups.keys(),
407
+ key=lambda pre: max(os.path.getmtime(x) for x in groups[pre]),
408
+ reverse=True)
409
  return groups, sorted_prefixes
410
 
411
  def extract_keywords_from_md(files):
412
+ """Extract keywords from markdown files"""
413
  text = ""
414
  for f in files:
415
  if f.endswith(".md"):
416
  c = open(f,'r',encoding='utf-8').read()
417
  text += " " + c
418
+ return get_high_info_terms(text)
 
 
 
 
 
 
 
419
 
420
  def display_file_manager_sidebar(groups, sorted_prefixes):
421
+ """Display file manager in sidebar"""
422
  st.sidebar.title("🎡 Audio & Document Manager")
423
 
424
  all_md = []
 
468
  ctime = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%Y-%m-%d %H:%M:%S")
469
  st.write(f"**{fname}** - {ctime}")
470
 
471
+ # 🎯 11. Main Application
 
 
 
 
 
 
 
 
 
472
  def main():
473
  st.sidebar.markdown("### 🚲BikeAIπŸ† Multi-Agent Research AI")
474
  tab_main = st.radio("Action:",["🎀 Voice Input","πŸ“Έ Media Gallery","πŸ” Search ArXiv","πŸ“ File Editor"],horizontal=True)
 
496
  perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
497
  titles_summary=True, full_audio=full_audio)
498
  else:
499
+ if run_option == "GPT-4o":
500
+ process_with_gpt(edited_input)
501
+ elif run_option == "Claude-3.5":
502
+ process_with_claude(edited_input)
503
  else:
504
  if st.button("Process Input"):
505
  st.session_state.old_val = val
 
507
  perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
508
  titles_summary=True, full_audio=full_audio)
509
  else:
510
+ if run_option == "GPT-4o":
511
+ process_with_gpt(edited_input)
512
+ elif run_option == "Claude-3.5":
513
+ process_with_claude(edited_input)
514
 
515
  if tab_main == "πŸ” Search ArXiv":
516
  st.subheader("πŸ” Search ArXiv")