awacke1 commited on
Commit
ac9fced
ยท
verified ยท
1 Parent(s): a4b1e84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +192 -117
app.py CHANGED
@@ -1,51 +1,28 @@
1
- # requirements.txt additions:
2
- """
3
- streamlit-marquee
4
- """
5
-
6
- # app.py
7
  import streamlit as st
8
  import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile
9
- import plotly.graph_objects as go
10
- import streamlit.components.v1 as components
11
  from datetime import datetime
12
  from audio_recorder_streamlit import audio_recorder
13
- from collections import defaultdict, deque, Counter
14
  from dotenv import load_dotenv
15
  from gradio_client import Client
16
  from huggingface_hub import InferenceClient
17
- from io import BytesIO
18
  from PIL import Image
19
- from PyPDF2 import PdfReader
20
- from urllib.parse import quote
21
- from xml.etree import ElementTree as ET
22
  from openai import OpenAI
23
- import extra_streamlit_components as stx
24
  import asyncio
25
  import edge_tts
26
- from streamlit_marquee import st_marquee
27
 
28
- # Core setup
29
  st.set_page_config(
30
  page_title="๐ŸšฒTalkingAIResearcher๐Ÿ†",
31
  page_icon="๐Ÿšฒ๐Ÿ†",
32
- layout="wide",
33
- initial_sidebar_state="auto",
34
  )
35
 
36
- # Initialize session state
37
- if 'tts_voice' not in st.session_state:
38
- st.session_state['tts_voice'] = "en-US-AriaNeural"
39
- if 'audio_format' not in st.session_state:
40
- st.session_state['audio_format'] = 'mp3'
41
- if 'scroll_text' not in st.session_state:
42
- st.session_state['scroll_text'] = ''
43
-
44
  EDGE_TTS_VOICES = [
45
  "en-US-AriaNeural",
46
- "en-US-GuyNeural",
47
  "en-US-JennyNeural",
48
- "en-GB-SoniaNeural",
49
  ]
50
 
51
  FILE_EMOJIS = {
@@ -53,81 +30,113 @@ FILE_EMOJIS = {
53
  "mp3": "๐ŸŽต",
54
  "wav": "๐Ÿ”Š",
55
  "txt": "๐Ÿ“„",
56
- "pdf": "๐Ÿ“‘",
57
- "json": "๐Ÿ“Š",
58
- "csv": "๐Ÿ“ˆ",
59
- "zip": "๐Ÿ“ฆ"
60
  }
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  @st.cache_resource
63
  def get_cached_audio_b64(file_path):
64
- """Cache audio file as base64"""
65
  with open(file_path, "rb") as f:
66
  return base64.b64encode(f.read()).decode()
67
 
68
  def beautify_filename(filename):
69
- """Make filename more readable"""
70
  name = os.path.splitext(filename)[0]
71
- name = name.replace('_', ' ').replace('.', ' ')
72
- return name
73
-
74
- def load_files_for_sidebar():
75
- """Load and group files by timestamp prefix"""
76
- md_files = glob.glob("*.md")
77
- mp3_files = glob.glob("*.mp3")
78
- wav_files = glob.glob("*.wav")
79
 
80
- md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
81
  all_files = md_files + mp3_files + wav_files
 
 
 
 
 
 
 
 
82
 
 
 
 
 
 
 
 
 
 
83
  groups = defaultdict(list)
84
- for f in all_files:
85
  basename = os.path.basename(f)
86
  group_name = basename[:9] if len(basename) >= 9 else 'Other'
87
  groups[group_name].append(f)
88
-
89
  return sorted(groups.items(),
90
  key=lambda x: max(os.path.getmtime(f) for f in x[1]),
91
  reverse=True)
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  def display_file_manager_sidebar(groups_sorted):
94
- """Enhanced sidebar with audio players and beautified names"""
95
  st.sidebar.title("๐Ÿ“š File Manager")
96
-
97
- all_md, all_mp3, all_wav = [], [], []
98
  for _, files in groups_sorted:
99
  for f in files:
100
- if f.endswith(".md"): all_md.append(f)
101
- elif f.endswith(".mp3"): all_mp3.append(f)
102
- elif f.endswith(".wav"): all_wav.append(f)
103
 
104
- # File management buttons
105
  cols = st.sidebar.columns(4)
106
- with cols[0]:
107
- if st.button("๐Ÿ—‘๏ธ MD"):
108
- [os.remove(f) for f in all_md]
109
- st.session_state.should_rerun = True
110
- with cols[1]:
111
- if st.button("๐Ÿ—‘๏ธ MP3"):
112
- [os.remove(f) for f in all_mp3]
113
- st.session_state.should_rerun = True
114
- with cols[2]:
115
- if st.button("๐Ÿ—‘๏ธ WAV"):
116
- [os.remove(f) for f in all_wav]
117
- st.session_state.should_rerun = True
118
- with cols[3]:
119
- if st.button("๐Ÿ“ฆ Zip"):
120
- zip_name = create_zip_of_files(all_md, all_mp3, all_wav,
121
- st.session_state.get('last_query', ''))
122
- if zip_name:
123
- st.sidebar.markdown(get_download_link(zip_name), unsafe_allow_html=True)
124
-
125
- # Display file groups
126
  for group_name, files in groups_sorted:
127
- timestamp_dt = datetime.strptime(group_name, "%y%m_%H%M") if len(group_name) == 9 else None
128
- group_label = timestamp_dt.strftime("%Y-%m-%d %H:%M") if timestamp_dt else group_name
129
 
130
- with st.sidebar.expander(f"๐Ÿ“ {group_label} ({len(files)})", expanded=True):
131
  c1, c2 = st.columns(2)
132
  with c1:
133
  if st.button("๐Ÿ‘€", key=f"view_{group_name}"):
@@ -144,53 +153,119 @@ def display_file_manager_sidebar(groups_sorted):
144
  st.write(f"{emoji} **{pretty_name}**")
145
 
146
  if ext in ['mp3', 'wav']:
147
- audio_b64 = get_cached_audio_b64(f)
148
  st.audio(f)
149
- cols = st.columns([3,1])
150
- with cols[1]:
151
- if st.button("๐Ÿ”„", key=f"loop_{f}"):
152
- components.html(
153
- f'''
154
- <audio id="player_{f}" loop>
155
- <source src="data:audio/{ext};base64,{audio_b64}">
156
- </audio>
157
- <script>
158
- document.getElementById("player_{f}").play();
159
- </script>
160
- ''',
161
- height=0
162
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
  def main():
165
- # Add scrolling banner
166
- st_marquee(
167
- text=" | ".join(st.session_state.get('scroll_text', '๐Ÿš€ Welcome to TalkingAIResearcher').split('\n')),
168
- font_size=20,
 
 
169
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
- # Rest of the main UI code...
172
- # (Keep existing main() implementation but with beautified filenames)
173
 
174
- # Compressed sidebar markdown
175
- sidebar_md = """
176
- # ๐Ÿง  AGI Levels
177
  L0 โŒ No AI
178
- L1 ๐ŸŒฑ Emerging (ChatGPT, Bard)
179
- L2 ๐Ÿ’ช Competent (Watson)
180
- L3 ๐ŸŽฏ Expert (DALLยทE)
181
- L4 ๐Ÿ† Virtuoso (AlphaGo)
182
- L5 ๐Ÿš€ Superhuman (AlphaFold)
183
-
184
- # ๐Ÿงฌ AlphaFold2
185
- 1. ๐Ÿงฌ Input Seq
186
- 2. ๐Ÿ” DB Search
187
- 3. ๐Ÿงฉ MSA
188
- 4. ๐Ÿ“‘ Templates
189
- 5. ๐Ÿ”„ Evoformer
190
- 6. ๐Ÿงฑ Structure
191
- 7. ๐ŸŽฏ 3D Predict
192
- 8. โ™ป๏ธ Recycle x3
193
- """
194
-
195
- if __name__=="__main__":
196
  main()
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile
 
 
3
  from datetime import datetime
4
  from audio_recorder_streamlit import audio_recorder
5
+ from collections import defaultdict, Counter
6
  from dotenv import load_dotenv
7
  from gradio_client import Client
8
  from huggingface_hub import InferenceClient
 
9
  from PIL import Image
 
 
 
10
  from openai import OpenAI
 
11
  import asyncio
12
  import edge_tts
13
+ from streamlit_marquee import streamlit_marquee
14
 
 
15
  st.set_page_config(
16
  page_title="๐ŸšฒTalkingAIResearcher๐Ÿ†",
17
  page_icon="๐Ÿšฒ๐Ÿ†",
18
+ layout="wide"
 
19
  )
20
 
 
 
 
 
 
 
 
 
21
  EDGE_TTS_VOICES = [
22
  "en-US-AriaNeural",
23
+ "en-US-GuyNeural",
24
  "en-US-JennyNeural",
25
+ "en-GB-SoniaNeural"
26
  ]
27
 
28
  FILE_EMOJIS = {
 
30
  "mp3": "๐ŸŽต",
31
  "wav": "๐Ÿ”Š",
32
  "txt": "๐Ÿ“„",
33
+ "pdf": "๐Ÿ“‘"
 
 
 
34
  }
35
 
36
+ # Initialize session states
37
+ if 'tts_voice' not in st.session_state:
38
+ st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
39
+ if 'audio_format' not in st.session_state:
40
+ st.session_state['audio_format'] = 'mp3'
41
+ if 'messages' not in st.session_state:
42
+ st.session_state['messages'] = []
43
+ if 'chat_history' not in st.session_state:
44
+ st.session_state['chat_history'] = []
45
+ if 'viewing_prefix' not in st.session_state:
46
+ st.session_state['viewing_prefix'] = None
47
+ if 'should_rerun' not in st.session_state:
48
+ st.session_state['should_rerun'] = False
49
+
50
+ # API Setup
51
+ openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
52
+ claude_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
53
+
54
  @st.cache_resource
55
  def get_cached_audio_b64(file_path):
 
56
  with open(file_path, "rb") as f:
57
  return base64.b64encode(f.read()).decode()
58
 
59
  def beautify_filename(filename):
 
60
  name = os.path.splitext(filename)[0]
61
+ return name.replace('_', ' ').replace('.', ' ')
 
 
 
 
 
 
 
62
 
63
+ def create_zip_of_files(md_files, mp3_files, wav_files, query=''):
64
  all_files = md_files + mp3_files + wav_files
65
+ if not all_files: return None
66
+
67
+ timestamp = datetime.now().strftime("%y%m_%H%M")
68
+ zip_name = f"{timestamp}_archive.zip"
69
+ with zipfile.ZipFile(zip_name, 'w') as z:
70
+ for f in all_files:
71
+ z.write(f)
72
+ return zip_name
73
 
74
+ def get_download_link(file_path, file_type="zip"):
75
+ with open(file_path, "rb") as f:
76
+ b64 = base64.b64encode(f.read()).decode()
77
+ ext_map = {'zip': '๐Ÿ“ฆ', 'mp3': '๐ŸŽต', 'wav': '๐Ÿ”Š', 'md': '๐Ÿ“'}
78
+ emoji = ext_map.get(file_type, '')
79
+ return f'<a href="data:application/{file_type};base64,{b64}" download="{os.path.basename(file_path)}">{emoji} Download {os.path.basename(file_path)}</a>'
80
+
81
+ def load_files_for_sidebar():
82
+ files = [f for f in glob.glob("*.*") if not f.lower().endswith('readme.md')]
83
  groups = defaultdict(list)
84
+ for f in files:
85
  basename = os.path.basename(f)
86
  group_name = basename[:9] if len(basename) >= 9 else 'Other'
87
  groups[group_name].append(f)
 
88
  return sorted(groups.items(),
89
  key=lambda x: max(os.path.getmtime(f) for f in x[1]),
90
  reverse=True)
91
 
92
+ def display_marquee_controls():
93
+ st.sidebar.markdown("### ๐ŸŽฏ Marquee Settings")
94
+ cols = st.sidebar.columns(2)
95
+ with cols[0]:
96
+ bg_color = st.color_picker("๐ŸŽจ Background", "#1E1E1E")
97
+ text_color = st.color_picker("โœ๏ธ Text", "#FFFFFF")
98
+ with cols[1]:
99
+ font_size = st.slider("๐Ÿ“ Size", 10, 24, 14)
100
+ duration = st.slider("โฑ๏ธ Speed", 1, 20, 10)
101
+
102
+ return {
103
+ "background": bg_color,
104
+ "color": text_color,
105
+ "font-size": f"{font_size}px",
106
+ "animationDuration": f"{duration}s",
107
+ "width": "100%",
108
+ "lineHeight": "35px"
109
+ }
110
+
111
  def display_file_manager_sidebar(groups_sorted):
 
112
  st.sidebar.title("๐Ÿ“š File Manager")
113
+ all_files = {'md': [], 'mp3': [], 'wav': []}
114
+
115
  for _, files in groups_sorted:
116
  for f in files:
117
+ ext = os.path.splitext(f)[1].lower().strip('.')
118
+ if ext in all_files:
119
+ all_files[ext].append(f)
120
 
 
121
  cols = st.sidebar.columns(4)
122
+ for i, (ext, files) in enumerate(all_files.items()):
123
+ with cols[i]:
124
+ if st.button(f"๐Ÿ—‘๏ธ {ext.upper()}"):
125
+ [os.remove(f) for f in files]
126
+ st.session_state.should_rerun = True
127
+
128
+ if st.sidebar.button("๐Ÿ“ฆ Zip All"):
129
+ zip_name = create_zip_of_files(
130
+ all_files['md'], all_files['mp3'], all_files['wav']
131
+ )
132
+ if zip_name:
133
+ st.sidebar.markdown(get_download_link(zip_name), unsafe_allow_html=True)
134
+
 
 
 
 
 
 
 
135
  for group_name, files in groups_sorted:
136
+ timestamp = (datetime.strptime(group_name, "%y%m_%H%M").strftime("%Y-%m-%d %H:%M")
137
+ if len(group_name) == 9 else group_name)
138
 
139
+ with st.sidebar.expander(f"๐Ÿ“ {timestamp} ({len(files)})", expanded=True):
140
  c1, c2 = st.columns(2)
141
  with c1:
142
  if st.button("๐Ÿ‘€", key=f"view_{group_name}"):
 
153
  st.write(f"{emoji} **{pretty_name}**")
154
 
155
  if ext in ['mp3', 'wav']:
 
156
  st.audio(f)
157
+ if st.button("๐Ÿ”„", key=f"loop_{f}"):
158
+ audio_b64 = get_cached_audio_b64(f)
159
+ st.components.v1.html(
160
+ f'''
161
+ <audio id="player_{f}" loop>
162
+ <source src="data:audio/{ext};base64,{audio_b64}">
163
+ </audio>
164
+ <script>
165
+ document.getElementById("player_{f}").play();
166
+ </script>
167
+ ''',
168
+ height=0
169
+ )
170
+
171
+ async def edge_tts_generate(text, voice, file_format="mp3"):
172
+ text = re.sub(r'\s+', ' ', text).strip()
173
+ if not text: return None
174
+ communicate = edge_tts.Communicate(text, voice)
175
+ filename = f"{datetime.now().strftime('%y%m_%H%M')}_{voice}.{file_format}"
176
+ await communicate.save(filename)
177
+ return filename
178
+
179
+ def parse_arxiv_refs(text):
180
+ papers = []
181
+ current_paper = None
182
+
183
+ for line in text.split('\n'):
184
+ if '|' in line:
185
+ if current_paper:
186
+ papers.append(current_paper)
187
+ parts = line.strip('* ').split('|')
188
+ current_paper = {
189
+ 'date': parts[0].strip(),
190
+ 'title': parts[1].strip(),
191
+ 'authors': '',
192
+ 'summary': '',
193
+ 'id': re.search(r'(\d{4}\.\d{5})', line).group(1) if re.search(r'(\d{4}\.\d{5})', line) else ''
194
+ }
195
+ elif current_paper:
196
+ if not current_paper['authors']:
197
+ current_paper['authors'] = line.strip('* ')
198
+ else:
199
+ current_paper['summary'] += ' ' + line.strip()
200
+
201
+ if current_paper:
202
+ papers.append(current_paper)
203
+ return papers
204
+
205
+ def perform_ai_lookup(query):
206
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
207
+ response = client.predict(
208
+ query, 20, "Semantic Search",
209
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
210
+ api_name="/update_with_rag_md"
211
+ )
212
+
213
+ papers = parse_arxiv_refs(response[0])
214
+ marquee_settings = display_marquee_controls()
215
+
216
+ for paper in papers:
217
+ content = f"๐Ÿ“„ {paper['title']} | ๐Ÿ‘ค {paper['authors']} | ๐Ÿ“ {paper['summary']}"
218
+ streamlit_marquee(
219
+ content=content,
220
+ **marquee_settings,
221
+ key=f"paper_{paper['id'] or random.randint(1000,9999)}"
222
+ )
223
+ st.write("") # Spacing
224
+
225
+ return papers
226
 
227
  def main():
228
+ marquee_settings = display_marquee_controls()
229
+
230
+ streamlit_marquee(
231
+ content="๐Ÿš€ Welcome to TalkingAIResearcher | ๐Ÿค– Your Research Assistant",
232
+ **marquee_settings,
233
+ key="welcome"
234
  )
235
+
236
+ tab = st.radio("Action:", ["๐ŸŽค Voice", "๐Ÿ” ArXiv", "๐Ÿ“ Editor"], horizontal=True)
237
+
238
+ if tab == "๐Ÿ” ArXiv":
239
+ query = st.text_input("๐Ÿ” Search:")
240
+ if query:
241
+ papers = perform_ai_lookup(query)
242
+ st.write(f"Found {len(papers)} papers")
243
+
244
+ groups = load_files_for_sidebar()
245
+ display_file_manager_sidebar(groups)
246
+
247
+ if st.session_state.should_rerun:
248
+ st.session_state.should_rerun = False
249
+ st.rerun()
250
 
251
+ # Condensed sidebar markdown
252
+ sidebar_md = """# ๐Ÿ“š Research Papers
253
 
254
+ ## ๐Ÿง  AGI Levels
 
 
255
  L0 โŒ No AI
256
+ L1 ๐ŸŒฑ ChatGPT/Bard [2303.08774v1](https://arxiv.org/abs/2303.08774) [PDF](https://arxiv.org/pdf/2303.08774.pdf)
257
+ L2 ๐Ÿ’ช Watson [2201.11903v1](https://arxiv.org/abs/2201.11903) [PDF](https://arxiv.org/pdf/2201.11903.pdf)
258
+ L3 ๐ŸŽฏ DALLยทE [2204.06125v1](https://arxiv.org/abs/2204.06125) [PDF](https://arxiv.org/pdf/2204.06125.pdf)
259
+ L4 ๐Ÿ† AlphaGo [1712.01815v1](https://arxiv.org/abs/1712.01815) [PDF](https://arxiv.org/pdf/1712.01815.pdf)
260
+ L5 ๐Ÿš€ AlphaFold [2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf)
261
+
262
+ ## ๐Ÿงฌ AlphaFold2
263
+ [2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf)
264
+ 1. ๐Ÿงฌ Input Seq โ†’ 2. ๐Ÿ” DB Search โ†’ 3. ๐Ÿงฉ MSA
265
+ 4. ๐Ÿ“‘ Templates โ†’ 5. ๐Ÿ”„ Evoformer โ†’ 6. ๐Ÿงฑ Structure
266
+ 7. ๐ŸŽฏ 3D Predict โ†’ 8. โ™ป๏ธ Recycle x3"""
267
+
268
+ st.sidebar.markdown(sidebar_md)
269
+
270
+ if __name__ == "__main__":
 
 
 
271
  main()