awacke1 commited on
Commit
cd34468
Β·
verified Β·
1 Parent(s): 59077d5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +752 -0
app.py ADDED
@@ -0,0 +1,752 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, textract, time, zipfile
3
+ import plotly.graph_objects as go
4
+ import streamlit.components.v1 as components
5
+ from datetime import datetime
6
+ from audio_recorder_streamlit import audio_recorder
7
+ from bs4 import BeautifulSoup
8
+ from collections import defaultdict, deque
9
+ from dotenv import load_dotenv
10
+ from gradio_client import Client
11
+ from huggingface_hub import InferenceClient
12
+ from io import BytesIO
13
+ from PIL import Image
14
+ from PyPDF2 import PdfReader
15
+ from urllib.parse import quote
16
+ from xml.etree import ElementTree as ET
17
+ from openai import OpenAI
18
+ import extra_streamlit_components as stx
19
+ from streamlit.runtime.scriptrunner import get_script_run_ctx
20
+ import asyncio
21
+ import edge_tts
22
+ import io
23
+ import sys
24
+ import subprocess
25
+
26
+ # 1. Core Configuration & Setup
27
+ st.set_page_config(
28
+ page_title="🚲BikeAIπŸ† Claude/GPT Research",
29
+ page_icon="πŸš²πŸ†",
30
+ layout="wide",
31
+ initial_sidebar_state="auto",
32
+ menu_items={
33
+ 'Get Help': 'https://huggingface.co/awacke1',
34
+ 'Report a bug': 'https://huggingface.co/spaces/awacke1',
35
+ 'About': "🚲BikeAIπŸ† Claude/GPT Research AI"
36
+ }
37
+ )
38
+ load_dotenv()
39
+
40
+ # 2. API Setup & Clients
41
+ openai_api_key = os.getenv('OPENAI_API_KEY', "")
42
+ anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
43
+ if 'OPENAI_API_KEY' in st.secrets:
44
+ openai_api_key = st.secrets['OPENAI_API_KEY']
45
+ if 'ANTHROPIC_API_KEY' in st.secrets:
46
+ anthropic_key = st.secrets["ANTHROPIC_API_KEY"]
47
+
48
+ openai.api_key = openai_api_key
49
+ claude_client = anthropic.Anthropic(api_key=anthropic_key)
50
+ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_ORG_ID'))
51
+ HF_KEY = os.getenv('HF_KEY')
52
+ API_URL = os.getenv('API_URL')
53
+
54
+ # 3. Session State Management
55
+ if 'transcript_history' not in st.session_state:
56
+ st.session_state['transcript_history'] = []
57
+ if 'chat_history' not in st.session_state:
58
+ st.session_state['chat_history'] = []
59
+ if 'openai_model' not in st.session_state:
60
+ st.session_state['openai_model'] = "gpt-4o-2024-05-13"
61
+ if 'messages' not in st.session_state:
62
+ st.session_state['messages'] = []
63
+ if 'last_voice_input' not in st.session_state:
64
+ st.session_state['last_voice_input'] = ""
65
+ if 'editing_file' not in st.session_state:
66
+ st.session_state['editing_file'] = None
67
+ if 'edit_new_name' not in st.session_state:
68
+ st.session_state['edit_new_name'] = ""
69
+ if 'edit_new_content' not in st.session_state:
70
+ st.session_state['edit_new_content'] = ""
71
+ if 'viewing_prefix' not in st.session_state:
72
+ st.session_state['viewing_prefix'] = None
73
+ if 'should_rerun' not in st.session_state:
74
+ st.session_state['should_rerun'] = False
75
+ if 'old_val' not in st.session_state:
76
+ st.session_state['old_val'] = None
77
+
78
+ # 4. Custom CSS
79
+ st.markdown("""
80
+ <style>
81
+ .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
82
+ .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
83
+ .stButton>button {
84
+ margin-right: 0.5rem;
85
+ }
86
+ </style>
87
+ """, unsafe_allow_html=True)
88
+
89
+ FILE_EMOJIS = {
90
+ "md": "πŸ“",
91
+ "mp3": "🎡",
92
+ }
93
+
94
+ # 5. High-Information Content Extraction
95
+ def get_high_info_terms(text: str) -> list:
96
+ stop_words = set([
97
+ 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
98
+ 'by', 'from', 'up', 'about', 'into', 'over', 'after', 'is', 'are', 'was', 'were',
99
+ 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would',
100
+ 'should', 'could', 'might', 'must', 'shall', 'can', 'may', 'this', 'that', 'these',
101
+ 'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'what', 'which', 'who',
102
+ 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most',
103
+ 'other', 'some', 'such', 'than', 'too', 'very', 'just', 'there'
104
+ ])
105
+
106
+ key_phrases = [
107
+ 'artificial intelligence', 'machine learning', 'deep learning', 'neural network',
108
+ 'personal assistant', 'natural language', 'computer vision', 'data science',
109
+ 'reinforcement learning', 'knowledge graph', 'semantic search', 'time series',
110
+ 'large language model', 'transformer model', 'attention mechanism',
111
+ 'autonomous system', 'edge computing', 'quantum computing', 'blockchain technology',
112
+ 'cognitive science', 'human computer', 'decision making', 'arxiv search',
113
+ 'research paper', 'scientific study', 'empirical analysis'
114
+ ]
115
+
116
+ preserved_phrases = []
117
+ lower_text = text.lower()
118
+ for phrase in key_phrases:
119
+ if phrase in lower_text:
120
+ preserved_phrases.append(phrase)
121
+ text = text.replace(phrase, '')
122
+
123
+ words = re.findall(r'\b\w+(?:-\w+)*\b', text)
124
+ high_info_words = [
125
+ word.lower() for word in words
126
+ if len(word) > 3
127
+ and word.lower() not in stop_words
128
+ and not word.isdigit()
129
+ and any(c.isalpha() for c in word)
130
+ ]
131
+
132
+ all_terms = preserved_phrases + high_info_words
133
+ seen = set()
134
+ unique_terms = []
135
+ for term in all_terms:
136
+ if term not in seen:
137
+ seen.add(term)
138
+ unique_terms.append(term)
139
+
140
+ max_terms = 5
141
+ return unique_terms[:max_terms]
142
+
143
+ # 6. Filename Generation
144
+ def generate_filename(content, file_type="md"):
145
+ prefix = datetime.now().strftime("%y%m_%H%M") + "_"
146
+ info_terms = get_high_info_terms(content)
147
+ name_text = '_'.join(term.replace(' ', '-') for term in info_terms) if info_terms else 'file'
148
+
149
+ max_length = 100
150
+ if len(name_text) > max_length:
151
+ name_text = name_text[:max_length]
152
+
153
+ filename = f"{prefix}{name_text}.{file_type}"
154
+ return filename
155
+
156
+ # 7. Audio Processing
157
+ def clean_for_speech(text: str) -> str:
158
+ text = text.replace("\n", " ")
159
+ text = text.replace("</s>", " ")
160
+ text = text.replace("#", "")
161
+ text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
162
+ text = re.sub(r"\s+", " ", text).strip()
163
+ return text
164
+
165
+ @st.cache_resource
166
+ def speech_synthesis_html(result):
167
+ html_code = f"""
168
+ <html><body>
169
+ <script>
170
+ var msg = new SpeechSynthesisUtterance("{result.replace('"', '')}");
171
+ window.speechSynthesis.speak(msg);
172
+ </script>
173
+ </body></html>
174
+ """
175
+ components.html(html_code, height=0)
176
+
177
+ async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0):
178
+ text = clean_for_speech(text)
179
+ if not text.strip():
180
+ return None
181
+ rate_str = f"{rate:+d}%"
182
+ pitch_str = f"{pitch:+d}Hz"
183
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
184
+ out_fn = generate_filename(text, "mp3")
185
+ await communicate.save(out_fn)
186
+ return out_fn
187
+
188
+ def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0):
189
+ return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch))
190
+
191
+ def play_and_download_audio(file_path):
192
+ if file_path and os.path.exists(file_path):
193
+ st.audio(file_path)
194
+ dl_link = f'<a href="data:audio/mpeg;base64,{base64.b64encode(open(file_path,"rb").read()).decode()}" download="{os.path.basename(file_path)}">Download {os.path.basename(file_path)}</a>'
195
+ st.markdown(dl_link, unsafe_allow_html=True)
196
+
197
+ # 8. Media Processing
198
+ def process_image(image_path, user_prompt):
199
+ with open(image_path, "rb") as imgf:
200
+ image_data = imgf.read()
201
+ b64img = base64.b64encode(image_data).decode("utf-8")
202
+ resp = openai_client.chat.completions.create(
203
+ model=st.session_state["openai_model"],
204
+ messages=[
205
+ {"role": "system", "content": "You are a helpful assistant."},
206
+ {"role": "user", "content": [
207
+ {"type": "text", "text": user_prompt},
208
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64img}"}}
209
+ ]}
210
+ ],
211
+ temperature=0.0,
212
+ )
213
+ return resp.choices[0].message.content
214
+
215
+ def process_audio(audio_path):
216
+ with open(audio_path, "rb") as f:
217
+ transcription = openai_client.audio.transcriptions.create(model="whisper-1", file=f)
218
+ st.session_state.messages.append({"role":"user","content":transcription.text})
219
+ return transcription.text
220
+
221
+ def process_video(video_path, seconds_per_frame=1):
222
+ vid = cv2.VideoCapture(video_path)
223
+ total = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
224
+ fps = vid.get(cv2.CAP_PROP_FPS)
225
+ skip = int(fps*seconds_per_frame)
226
+ frames_b64 = []
227
+ for i in range(0, total, skip):
228
+ vid.set(cv2.CAP_PROP_POS_FRAMES, i)
229
+ ret, frame = vid.read()
230
+ if not ret: break
231
+ _, buf = cv2.imencode(".jpg", frame)
232
+ frames_b64.append(base64.b64encode(buf).decode("utf-8"))
233
+ vid.release()
234
+ return frames_b64
235
+
236
+ def process_video_with_gpt(video_path, prompt):
237
+ frames = process_video(video_path)
238
+ resp = openai_client.chat.completions.create(
239
+ model=st.session_state["openai_model"],
240
+ messages=[
241
+ {"role":"system","content":"Analyze video frames."},
242
+ {"role":"user","content":[
243
+ {"type":"text","text":prompt},
244
+ *[{"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{fr}"}} for fr in frames]
245
+ ]}
246
+ ]
247
+ )
248
+ return resp.choices[0].message.content
249
+
250
+ # Execution context for code blocks
251
+ context = {}
252
+
253
+ # 9. Updated create_file function with error handling
254
+ def create_file(filename, prompt, response, should_save=True):
255
+ if not should_save:
256
+ return
257
+
258
+ base_filename, ext = os.path.splitext(filename)
259
+ combined_content = ""
260
+
261
+ combined_content += "# Prompt πŸ“\n" + prompt + "\n\n"
262
+ combined_content += "# Response πŸ’¬\n" + response + "\n\n"
263
+
264
+ resources = re.findall(r"```([\s\S]*?)```", response)
265
+ for resource in resources:
266
+ if "python" in resource.lower():
267
+ cleaned_code = re.sub(r'^\s*python', '', resource, flags=re.IGNORECASE | re.MULTILINE)
268
+
269
+ combined_content += "# Code Results πŸš€\n"
270
+
271
+ original_stdout = sys.stdout
272
+ sys.stdout = io.StringIO()
273
+ try:
274
+ exec(cleaned_code, context)
275
+ code_output = sys.stdout.getvalue()
276
+ combined_content += f"```\n{code_output}\n```\n\n"
277
+ realtimeEvalResponse = "# Code Results πŸš€\n" + "```" + code_output + "```\n\n"
278
+ st.code(realtimeEvalResponse)
279
+ except Exception as e:
280
+ combined_content += f"```python\nError executing Python code: {e}\n```\n\n"
281
+ sys.stdout = original_stdout
282
+ else:
283
+ combined_content += "# Resource πŸ› οΈ\n" + "```" + resource + "```\n\n"
284
+
285
+ if should_save:
286
+ with open(f"{base_filename}.md", 'w') as file:
287
+ file.write(combined_content)
288
+ st.code(combined_content)
289
+
290
+ with open(f"{base_filename}.md", 'rb') as file:
291
+ encoded_file = base64.b64encode(file.read()).decode()
292
+ href = f'<a href="data:file/markdown;base64,{encoded_file}" download="{filename}">Download File πŸ“„</a>'
293
+ st.markdown(href, unsafe_allow_html=True)
294
+
295
+ def generate_code_from_paper(title, summary, instructions):
296
+ code_prompt = f"""
297
+ You are a coding assistant.
298
+ Given the paper titled: "{title}"
299
+ Summary: "{summary}"
300
+
301
+ The user wants to implement the following steps in Python code. Provide a minimal, self-contained Python code snippet that:
302
+
303
+ 1. Uses only standard libraries if possible. If a library is required, include a code snippet that uses subprocess to install it (like `subprocess.run(['pip','install','somepackage'])`).
304
+ 2. Implement the requested functionality as simple functions and variables, minimal code.
305
+ 3. Include error handling: if a file is missing, print an error message. Wrap code in a `try/except` block.
306
+ 4. Output should be minimal, just the code block (no extra explanations), enclosed in triple backticks.
307
+
308
+ User instructions: "{instructions}"
309
+ """
310
+
311
+ try:
312
+ completion = openai_client.chat.completions.create(
313
+ model=st.session_state["openai_model"],
314
+ messages=[
315
+ {"role": "system", "content": "You are a helpful coding assistant."},
316
+ {"role": "user", "content": code_prompt}
317
+ ],
318
+ temperature=0.0
319
+ )
320
+ generated_code = completion.choices[0].message.content
321
+ return generated_code
322
+ except Exception as e:
323
+ st.error(f"Error generating code: {e}")
324
+ return ""
325
+
326
+ # 10. AI Model Integration
327
+ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False):
328
+ start = time.time()
329
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
330
+ r = client.predict(q,20,"Semantic Search","mistralai/Mixtral-8x7B-Instruct-v0.1",api_name="/update_with_rag_md")
331
+ refs = r[0]
332
+ r2 = client.predict(q,"mistralai/Mixtral-8x7B-Instruct-v0.1",True,api_name="/ask_llm")
333
+ result = f"### πŸ”Ž {q}\n\n{r2}\n\n{refs}"
334
+
335
+ st.markdown(result)
336
+
337
+ if full_audio:
338
+ complete_text = f"Complete response for query: {q}. {clean_for_speech(r2)} {clean_for_speech(refs)}"
339
+ audio_file_full = speak_with_edge_tts(complete_text)
340
+ st.write("### πŸ“š Complete Audio Response")
341
+ play_and_download_audio(audio_file_full)
342
+
343
+ if vocal_summary:
344
+ main_text = clean_for_speech(r2)
345
+ audio_file_main = speak_with_edge_tts(main_text)
346
+ st.write("### πŸŽ™οΈ Vocal Summary (Short Answer)")
347
+ play_and_download_audio(audio_file_main)
348
+
349
+ if extended_refs:
350
+ summaries_text = "Here are the summaries from the references: " + refs.replace('"','')
351
+ summaries_text = clean_for_speech(summaries_text)
352
+ audio_file_refs = speak_with_edge_tts(summaries_text)
353
+ st.write("### πŸ“œ Extended References & Summaries")
354
+ play_and_download_audio(audio_file_refs)
355
+
356
+ if titles_summary:
357
+ titles = []
358
+ for line in refs.split('\n'):
359
+ m = re.search(r"\[([^\]]+)\]", line)
360
+ if m:
361
+ titles.append(m.group(1))
362
+ if titles:
363
+ titles_text = "Here are the titles of the papers: " + ", ".join(titles)
364
+ titles_text = clean_for_speech(titles_text)
365
+ audio_file_titles = speak_with_edge_tts(titles_text)
366
+ st.write("### πŸ”– Paper Titles")
367
+ play_and_download_audio(audio_file_titles)
368
+
369
+ elapsed = time.time()-start
370
+ st.write(f"**Total Elapsed:** {elapsed:.2f} s")
371
+
372
+ filename = generate_filename(result, "md")
373
+ create_file(filename, q, result, should_save=True)
374
+
375
+ # Parse out papers
376
+ papers_raw = refs.strip().split("[Title]")
377
+ papers = []
378
+ for p in papers_raw:
379
+ p = p.strip()
380
+ if not p:
381
+ continue
382
+ lines = p.split("\n")
383
+ title_line = lines[0].strip() if lines else ""
384
+ summary_line = ""
385
+ link_line = ""
386
+ pdf_line = ""
387
+ for line in lines[1:]:
388
+ line = line.strip()
389
+ if line.startswith("Summary:"):
390
+ summary_line = line.replace("Summary:", "").strip()
391
+ elif line.startswith("Link:"):
392
+ link_line = line.replace("Link:", "").strip()
393
+ elif line.startswith("PDF:"):
394
+ pdf_line = line.replace("PDF:", "").strip()
395
+
396
+ if title_line and summary_line:
397
+ papers.append({
398
+ "title": title_line,
399
+ "summary": summary_line,
400
+ "link": link_line,
401
+ "pdf": pdf_line
402
+ })
403
+
404
+ st.write("## Code Interpreter Options for Each Paper")
405
+ for i, paper in enumerate(papers):
406
+ st.write(f"**Paper {i+1}:** {paper['title']}")
407
+ st.write(f"**Summary:** {paper['summary']}")
408
+ if paper['link']:
409
+ st.write(f"[Arxiv Link]({paper['link']})")
410
+ if paper['pdf']:
411
+ st.write(f"[PDF]({paper['pdf']})")
412
+
413
+ # UI for generating code steps
414
+ with st.expander("Generate Python Code Steps"):
415
+ instructions = st.text_area(
416
+ f"Enter instructions for Python code implementation for this paper:",
417
+ height=100, key=f"code_task_{i}"
418
+ )
419
+ if st.button(f"Generate Python Code Steps for Paper {i+1}", key=f"gen_code_{i}"):
420
+ if instructions.strip():
421
+ generated_code = generate_code_from_paper(paper['title'], paper['summary'], instructions)
422
+ if generated_code.strip():
423
+ st.write("### Generated Code")
424
+ st.code(generated_code, language="python")
425
+
426
+ # Attempt to run the generated code
427
+ if '```' in generated_code:
428
+ # Extract code blocks
429
+ code_blocks = re.findall(r"```([\s\S]*?)```", generated_code)
430
+ for cb in code_blocks:
431
+ # Try executing cb
432
+ original_stdout = sys.stdout
433
+ sys.stdout = io.StringIO()
434
+ try:
435
+ exec(cb, {})
436
+ exec_output = sys.stdout.getvalue()
437
+ if exec_output.strip():
438
+ st.write("### Code Output")
439
+ st.write(exec_output)
440
+ # TTS on code output
441
+ audio_file = speak_with_edge_tts(exec_output)
442
+ if audio_file:
443
+ play_and_download_audio(audio_file)
444
+ except Exception as e:
445
+ st.error(f"Error executing code: {e}")
446
+ finally:
447
+ sys.stdout = original_stdout
448
+ else:
449
+ st.error("No code was generated.")
450
+ else:
451
+ st.warning("Please provide instructions before generating code.")
452
+
453
+ return result
454
+
455
+ def process_with_gpt(text):
456
+ if not text: return
457
+ st.session_state.messages.append({"role":"user","content":text})
458
+ with st.chat_message("user"):
459
+ st.markdown(text)
460
+ with st.chat_message("assistant"):
461
+ try:
462
+ c = openai_client.chat.completions.create(
463
+ model=st.session_state["openai_model"],
464
+ messages=st.session_state.messages,
465
+ stream=False
466
+ )
467
+ ans = c.choices[0].message.content
468
+ except Exception as e:
469
+ ans = f"Error calling GPT-4 API: {e}"
470
+
471
+ st.write("GPT-4o: " + ans)
472
+ filename = generate_filename(ans.strip() if ans.strip() else text.strip(), "md")
473
+ create_file(filename, text, ans, should_save=True)
474
+ st.session_state.messages.append({"role":"assistant","content":ans})
475
+ return ans
476
+
477
+ def process_with_claude(text):
478
+ if not text: return
479
+ with st.chat_message("user"):
480
+ st.markdown(text)
481
+ with st.chat_message("assistant"):
482
+ try:
483
+ r = claude_client.messages.create(
484
+ model="claude-3-sonnet-20240229",
485
+ max_tokens=1000,
486
+ messages=[{"role":"user","content":text}]
487
+ )
488
+ ans = r.content[0].text
489
+ except Exception as e:
490
+ ans = f"Error calling Claude API: {e}"
491
+
492
+ st.write("Claude-3.5: " + ans)
493
+ filename = generate_filename(ans.strip() if ans.strip() else text.strip(), "md")
494
+ create_file(filename, text, ans, should_save=True)
495
+ st.session_state.chat_history.append({"user":text,"claude":ans})
496
+ return ans
497
+
498
+ # 11. File Management
499
+ def create_zip_of_files(md_files, mp3_files):
500
+ md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
501
+ all_files = md_files + mp3_files
502
+ if not all_files:
503
+ return None
504
+
505
+ all_content = []
506
+ for f in all_files:
507
+ if f.endswith('.md'):
508
+ with open(f, 'r', encoding='utf-8') as file:
509
+ all_content.append(file.read())
510
+ elif f.endswith('.mp3'):
511
+ all_content.append(os.path.basename(f))
512
+
513
+ combined_content = " ".join(all_content)
514
+ info_terms = get_high_info_terms(combined_content)
515
+
516
+ timestamp = datetime.now().strftime("%y%m_%H%M")
517
+ name_text = '_'.join(term.replace(' ', '-') for term in info_terms[:3])
518
+ zip_name = f"{timestamp}_{name_text}.zip"
519
+
520
+ with zipfile.ZipFile(zip_name,'w') as z:
521
+ for f in all_files:
522
+ z.write(f)
523
+
524
+ return zip_name
525
+
526
+ def load_files_for_sidebar():
527
+ md_files = glob.glob("*.md")
528
+ mp3_files = glob.glob("*.mp3")
529
+
530
+ md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
531
+ all_files = md_files + mp3_files
532
+
533
+ groups = defaultdict(list)
534
+ for f in all_files:
535
+ fname = os.path.basename(f)
536
+ prefix = fname[:10]
537
+ groups[prefix].append(f)
538
+
539
+ for prefix in groups:
540
+ groups[prefix].sort(key=lambda x: os.path.getmtime(x), reverse=True)
541
+
542
+ sorted_prefixes = sorted(groups.keys(),
543
+ key=lambda pre: max(os.path.getmtime(x) for x in groups[pre]),
544
+ reverse=True)
545
+ return groups, sorted_prefixes
546
+
547
+ def extract_keywords_from_md(files):
548
+ text = ""
549
+ for f in files:
550
+ if f.endswith(".md"):
551
+ c = open(f,'r',encoding='utf-8').read()
552
+ text += " " + c
553
+ return get_high_info_terms(text)
554
+
555
+ def display_file_manager_sidebar(groups, sorted_prefixes):
556
+ st.sidebar.title("🎡 Audio & Document Manager")
557
+
558
+ all_md = []
559
+ all_mp3 = []
560
+ for prefix in groups:
561
+ for f in groups[prefix]:
562
+ if f.endswith(".md"):
563
+ all_md.append(f)
564
+ elif f.endswith(".mp3"):
565
+ all_mp3.append(f)
566
+
567
+ top_bar = st.sidebar.columns(3)
568
+ with top_bar[0]:
569
+ if st.button("πŸ—‘ Del All MD"):
570
+ for f in all_md:
571
+ os.remove(f)
572
+ st.session_state.should_rerun = True
573
+ with top_bar[1]:
574
+ if st.button("πŸ—‘ Del All MP3"):
575
+ for f in all_mp3:
576
+ os.remove(f)
577
+ st.session_state.should_rerun = True
578
+ with top_bar[2]:
579
+ if st.button("⬇️ Zip All"):
580
+ z = create_zip_of_files(all_md, all_mp3)
581
+ if z:
582
+ with open(z, "rb") as f:
583
+ b64 = base64.b64encode(f.read()).decode()
584
+ dl_link = f'<a href="data:file/zip;base64,{b64}" download="{os.path.basename(z)}">πŸ“‚ Download {os.path.basename(z)}</a>'
585
+ st.sidebar.markdown(dl_link,unsafe_allow_html=True)
586
+
587
+ for prefix in sorted_prefixes:
588
+ files = groups[prefix]
589
+ kw = extract_keywords_from_md(files)
590
+ keywords_str = " ".join(kw) if kw else "No Keywords"
591
+ with st.sidebar.expander(f"{prefix} Files ({len(files)}) - Keywords: {keywords_str}", expanded=True):
592
+ c1,c2 = st.columns(2)
593
+ with c1:
594
+ if st.button("πŸ‘€View Group", key="view_group_"+prefix):
595
+ st.session_state.viewing_prefix = prefix
596
+ with c2:
597
+ if st.button("πŸ—‘Del Group", key="del_group_"+prefix):
598
+ for f in files:
599
+ os.remove(f)
600
+ st.success(f"Deleted all files in group {prefix} successfully!")
601
+ st.session_state.should_rerun = True
602
+
603
+ for f in files:
604
+ fname = os.path.basename(f)
605
+ ctime = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%Y-%m-%d %H:%M:%S")
606
+ st.write(f"**{fname}** - {ctime}")
607
+
608
+ # 12. Main Application
609
+ def main():
610
+ st.sidebar.markdown("### 🚲BikeAIπŸ† Multi-Agent Research AI")
611
+ tab_main = st.radio("Action:",["🎀 Voice Input","πŸ“Έ Media Gallery","πŸ” Search ArXiv","πŸ“ File Editor"],horizontal=True)
612
+
613
+ mycomponent = components.declare_component("mycomponent", path="mycomponent")
614
+ val = mycomponent(my_input_value="Hello")
615
+
616
+ # Show input in a text box for editing if detected
617
+ if val:
618
+ val_stripped = val.replace('\n', ' ')
619
+ edited_input = st.text_area("Edit your detected input:", value=val_stripped, height=100)
620
+ run_option = st.selectbox("Select AI Model:", ["Arxiv", "GPT-4o", "Claude-3.5"])
621
+ col1, col2 = st.columns(2)
622
+ with col1:
623
+ autorun = st.checkbox("AutoRun on input change", value=False)
624
+ with col2:
625
+ full_audio = st.checkbox("Generate Complete Audio", value=False,
626
+ help="Generate audio for the complete response including all papers and summaries")
627
+
628
+ input_changed = (val != st.session_state.old_val)
629
+
630
+ if autorun and input_changed:
631
+ st.session_state.old_val = val
632
+ if run_option == "Arxiv":
633
+ perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
634
+ titles_summary=True, full_audio=full_audio)
635
+ else:
636
+ if run_option == "GPT-4o":
637
+ process_with_gpt(edited_input)
638
+ elif run_option == "Claude-3.5":
639
+ process_with_claude(edited_input)
640
+ else:
641
+ if st.button("Process Input"):
642
+ st.session_state.old_val = val
643
+ if run_option == "Arxiv":
644
+ perform_ai_lookup(edited_input, vocal_summary=True, extended_refs=False,
645
+ titles_summary=True, full_audio=full_audio)
646
+ else:
647
+ if run_option == "GPT-4o":
648
+ process_with_gpt(edited_input)
649
+ elif run_option == "Claude-3.5":
650
+ process_with_claude(edited_input)
651
+
652
+ if tab_main == "πŸ” Search ArXiv":
653
+ st.subheader("πŸ” Search ArXiv")
654
+ q = st.text_input("Research query:")
655
+
656
+ st.markdown("### πŸŽ›οΈ Audio Generation Options")
657
+ vocal_summary = st.checkbox("πŸŽ™οΈ Vocal Summary (Short Answer)", value=True)
658
+ extended_refs = st.checkbox("πŸ“œ Extended References & Summaries (Long)", value=False)
659
+ titles_summary = st.checkbox("πŸ”– Paper Titles Only", value=True)
660
+ full_audio = st.checkbox("πŸ“š Generate Complete Audio Response", value=False,
661
+ help="Generate audio for the complete response including all papers and summaries")
662
+
663
+ if q and st.button("Run ArXiv Query"):
664
+ perform_ai_lookup(q, vocal_summary=vocal_summary, extended_refs=extended_refs,
665
+ titles_summary=titles_summary, full_audio=full_audio)
666
+
667
+ elif tab_main == "🎀 Voice Input":
668
+ st.subheader("🎀 Voice Recognition")
669
+ user_text = st.text_area("Message:", height=100)
670
+ user_text = user_text.strip().replace('\n', ' ')
671
+ if st.button("Send πŸ“¨"):
672
+ process_with_gpt(user_text)
673
+ st.subheader("πŸ“œ Chat History")
674
+ t1,t2=st.tabs(["Claude History","GPT-4o History"])
675
+ with t1:
676
+ for c in st.session_state.chat_history:
677
+ st.write("**You:**", c["user"])
678
+ st.write("**Claude:**", c["claude"])
679
+ with t2:
680
+ for m in st.session_state.messages:
681
+ with st.chat_message(m["role"]):
682
+ st.markdown(m["content"])
683
+
684
+ elif tab_main == "πŸ“Έ Media Gallery":
685
+ st.header("🎬 Media Gallery - Images and Videos")
686
+ tabs = st.tabs(["πŸ–ΌοΈ Images", "πŸŽ₯ Video"])
687
+ with tabs[0]:
688
+ imgs = glob.glob("*.png")+glob.glob("*.jpg")
689
+ if imgs:
690
+ c = st.slider("Cols",1,5,3)
691
+ cols = st.columns(c)
692
+ for i,f in enumerate(imgs):
693
+ with cols[i%c]:
694
+ st.image(Image.open(f),use_container_width=True)
695
+ if st.button(f"πŸ‘€ Analyze {os.path.basename(f)}", key=f"analyze_{f}"):
696
+ a = process_image(f,"Describe this image.")
697
+ st.markdown(a)
698
+ else:
699
+ st.write("No images found.")
700
+ with tabs[1]:
701
+ vids = glob.glob("*.mp4")
702
+ if vids:
703
+ for v in vids:
704
+ with st.expander(f"πŸŽ₯ {os.path.basename(v)}"):
705
+ st.video(v)
706
+ if st.button(f"Analyze {os.path.basename(v)}", key=f"analyze_{v}"):
707
+ a = process_video_with_gpt(v,"Describe video.")
708
+ st.markdown(a)
709
+ else:
710
+ st.write("No videos found.")
711
+
712
+ elif tab_main == "πŸ“ File Editor":
713
+ if getattr(st.session_state,'current_file',None):
714
+ st.subheader(f"Editing: {st.session_state.current_file}")
715
+ new_text = st.text_area("Content:", st.session_state.file_content, height=300)
716
+ if st.button("Save"):
717
+ with open(st.session_state.current_file,'w',encoding='utf-8') as f:
718
+ f.write(new_text)
719
+ st.success("Updated!")
720
+ st.session_state.should_rerun = True
721
+ else:
722
+ st.write("Select a file from the sidebar to edit.")
723
+
724
+ groups, sorted_prefixes = load_files_for_sidebar()
725
+ display_file_manager_sidebar(groups, sorted_prefixes)
726
+
727
+ if st.session_state.viewing_prefix and st.session_state.viewing_prefix in groups:
728
+ st.write("---")
729
+ st.write(f"**Viewing Group:** {st.session_state.viewing_prefix}")
730
+ for f in groups[st.session_state.viewing_prefix]:
731
+ fname = os.path.basename(f)
732
+ ext = os.path.splitext(fname)[1].lower().strip('.')
733
+ st.write(f"### {fname}")
734
+ if ext == "md":
735
+ content = open(f,'r',encoding='utf-8').read()
736
+ st.markdown(content)
737
+ elif ext == "mp3":
738
+ st.audio(f)
739
+ else:
740
+ with open(f, "rb") as file:
741
+ b64 = base64.b64encode(file.read()).decode()
742
+ dl_link = f'<a href="data:file/{ext};base64,{b64}" download="{fname}">Download {fname}</a>'
743
+ st.markdown(dl_link, unsafe_allow_html=True)
744
+ if st.button("Close Group View"):
745
+ st.session_state.viewing_prefix = None
746
+
747
+ if st.session_state.should_rerun:
748
+ st.session_state.should_rerun = False
749
+ st.rerun()
750
+
751
+ if __name__=="__main__":
752
+ main()