Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -1,51 +1,28 @@
|
|
1 |
-
# requirements.txt additions:
|
2 |
-
"""
|
3 |
-
streamlit-marquee
|
4 |
-
"""
|
5 |
-
|
6 |
-
# app.py
|
7 |
import streamlit as st
|
8 |
import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile
|
9 |
-
import plotly.graph_objects as go
|
10 |
-
import streamlit.components.v1 as components
|
11 |
from datetime import datetime
|
12 |
from audio_recorder_streamlit import audio_recorder
|
13 |
-
from collections import defaultdict,
|
14 |
from dotenv import load_dotenv
|
15 |
from gradio_client import Client
|
16 |
from huggingface_hub import InferenceClient
|
17 |
-
from io import BytesIO
|
18 |
from PIL import Image
|
19 |
-
from PyPDF2 import PdfReader
|
20 |
-
from urllib.parse import quote
|
21 |
-
from xml.etree import ElementTree as ET
|
22 |
from openai import OpenAI
|
23 |
-
import extra_streamlit_components as stx
|
24 |
import asyncio
|
25 |
import edge_tts
|
26 |
-
from streamlit_marquee import
|
27 |
|
28 |
-
# Core setup
|
29 |
st.set_page_config(
|
30 |
page_title="๐ฒTalkingAIResearcher๐",
|
31 |
page_icon="๐ฒ๐",
|
32 |
-
layout="wide"
|
33 |
-
initial_sidebar_state="auto",
|
34 |
)
|
35 |
|
36 |
-
# Initialize session state
|
37 |
-
if 'tts_voice' not in st.session_state:
|
38 |
-
st.session_state['tts_voice'] = "en-US-AriaNeural"
|
39 |
-
if 'audio_format' not in st.session_state:
|
40 |
-
st.session_state['audio_format'] = 'mp3'
|
41 |
-
if 'scroll_text' not in st.session_state:
|
42 |
-
st.session_state['scroll_text'] = ''
|
43 |
-
|
44 |
EDGE_TTS_VOICES = [
|
45 |
"en-US-AriaNeural",
|
46 |
-
"en-US-GuyNeural",
|
47 |
"en-US-JennyNeural",
|
48 |
-
"en-GB-SoniaNeural"
|
49 |
]
|
50 |
|
51 |
FILE_EMOJIS = {
|
@@ -53,81 +30,113 @@ FILE_EMOJIS = {
|
|
53 |
"mp3": "๐ต",
|
54 |
"wav": "๐",
|
55 |
"txt": "๐",
|
56 |
-
"pdf": "๐"
|
57 |
-
"json": "๐",
|
58 |
-
"csv": "๐",
|
59 |
-
"zip": "๐ฆ"
|
60 |
}
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
@st.cache_resource
|
63 |
def get_cached_audio_b64(file_path):
|
64 |
-
"""Cache audio file as base64"""
|
65 |
with open(file_path, "rb") as f:
|
66 |
return base64.b64encode(f.read()).decode()
|
67 |
|
68 |
def beautify_filename(filename):
|
69 |
-
"""Make filename more readable"""
|
70 |
name = os.path.splitext(filename)[0]
|
71 |
-
|
72 |
-
return name
|
73 |
-
|
74 |
-
def load_files_for_sidebar():
|
75 |
-
"""Load and group files by timestamp prefix"""
|
76 |
-
md_files = glob.glob("*.md")
|
77 |
-
mp3_files = glob.glob("*.mp3")
|
78 |
-
wav_files = glob.glob("*.wav")
|
79 |
|
80 |
-
|
81 |
all_files = md_files + mp3_files + wav_files
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
groups = defaultdict(list)
|
84 |
-
for f in
|
85 |
basename = os.path.basename(f)
|
86 |
group_name = basename[:9] if len(basename) >= 9 else 'Other'
|
87 |
groups[group_name].append(f)
|
88 |
-
|
89 |
return sorted(groups.items(),
|
90 |
key=lambda x: max(os.path.getmtime(f) for f in x[1]),
|
91 |
reverse=True)
|
92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
def display_file_manager_sidebar(groups_sorted):
|
94 |
-
"""Enhanced sidebar with audio players and beautified names"""
|
95 |
st.sidebar.title("๐ File Manager")
|
96 |
-
|
97 |
-
|
98 |
for _, files in groups_sorted:
|
99 |
for f in files:
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
|
104 |
-
# File management buttons
|
105 |
cols = st.sidebar.columns(4)
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
st.
|
118 |
-
|
119 |
-
if st.button("๐ฆ Zip"):
|
120 |
-
zip_name = create_zip_of_files(all_md, all_mp3, all_wav,
|
121 |
-
st.session_state.get('last_query', ''))
|
122 |
-
if zip_name:
|
123 |
-
st.sidebar.markdown(get_download_link(zip_name), unsafe_allow_html=True)
|
124 |
-
|
125 |
-
# Display file groups
|
126 |
for group_name, files in groups_sorted:
|
127 |
-
|
128 |
-
|
129 |
|
130 |
-
with st.sidebar.expander(f"๐ {
|
131 |
c1, c2 = st.columns(2)
|
132 |
with c1:
|
133 |
if st.button("๐", key=f"view_{group_name}"):
|
@@ -144,53 +153,119 @@ def display_file_manager_sidebar(groups_sorted):
|
|
144 |
st.write(f"{emoji} **{pretty_name}**")
|
145 |
|
146 |
if ext in ['mp3', 'wav']:
|
147 |
-
audio_b64 = get_cached_audio_b64(f)
|
148 |
st.audio(f)
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
<
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
def main():
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
|
|
|
|
169 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
|
171 |
-
|
172 |
-
|
173 |
|
174 |
-
|
175 |
-
sidebar_md = """
|
176 |
-
# ๐ง AGI Levels
|
177 |
L0 โ No AI
|
178 |
-
L1 ๐ฑ
|
179 |
-
L2 ๐ช
|
180 |
-
L3 ๐ฏ
|
181 |
-
L4 ๐
|
182 |
-
L5 ๐
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
2. ๐ DB Search
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
"""
|
194 |
-
|
195 |
-
if __name__=="__main__":
|
196 |
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile
|
|
|
|
|
3 |
from datetime import datetime
|
4 |
from audio_recorder_streamlit import audio_recorder
|
5 |
+
from collections import defaultdict, Counter
|
6 |
from dotenv import load_dotenv
|
7 |
from gradio_client import Client
|
8 |
from huggingface_hub import InferenceClient
|
|
|
9 |
from PIL import Image
|
|
|
|
|
|
|
10 |
from openai import OpenAI
|
|
|
11 |
import asyncio
|
12 |
import edge_tts
|
13 |
+
from streamlit_marquee import streamlit_marquee
|
14 |
|
|
|
15 |
st.set_page_config(
|
16 |
page_title="๐ฒTalkingAIResearcher๐",
|
17 |
page_icon="๐ฒ๐",
|
18 |
+
layout="wide"
|
|
|
19 |
)
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
EDGE_TTS_VOICES = [
|
22 |
"en-US-AriaNeural",
|
23 |
+
"en-US-GuyNeural",
|
24 |
"en-US-JennyNeural",
|
25 |
+
"en-GB-SoniaNeural"
|
26 |
]
|
27 |
|
28 |
FILE_EMOJIS = {
|
|
|
30 |
"mp3": "๐ต",
|
31 |
"wav": "๐",
|
32 |
"txt": "๐",
|
33 |
+
"pdf": "๐"
|
|
|
|
|
|
|
34 |
}
|
35 |
|
36 |
+
# Initialize session states
|
37 |
+
if 'tts_voice' not in st.session_state:
|
38 |
+
st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
|
39 |
+
if 'audio_format' not in st.session_state:
|
40 |
+
st.session_state['audio_format'] = 'mp3'
|
41 |
+
if 'messages' not in st.session_state:
|
42 |
+
st.session_state['messages'] = []
|
43 |
+
if 'chat_history' not in st.session_state:
|
44 |
+
st.session_state['chat_history'] = []
|
45 |
+
if 'viewing_prefix' not in st.session_state:
|
46 |
+
st.session_state['viewing_prefix'] = None
|
47 |
+
if 'should_rerun' not in st.session_state:
|
48 |
+
st.session_state['should_rerun'] = False
|
49 |
+
|
50 |
+
# API Setup
|
51 |
+
openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
52 |
+
claude_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
|
53 |
+
|
54 |
@st.cache_resource
|
55 |
def get_cached_audio_b64(file_path):
|
|
|
56 |
with open(file_path, "rb") as f:
|
57 |
return base64.b64encode(f.read()).decode()
|
58 |
|
59 |
def beautify_filename(filename):
|
|
|
60 |
name = os.path.splitext(filename)[0]
|
61 |
+
return name.replace('_', ' ').replace('.', ' ')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
+
def create_zip_of_files(md_files, mp3_files, wav_files, query=''):
|
64 |
all_files = md_files + mp3_files + wav_files
|
65 |
+
if not all_files: return None
|
66 |
+
|
67 |
+
timestamp = datetime.now().strftime("%y%m_%H%M")
|
68 |
+
zip_name = f"{timestamp}_archive.zip"
|
69 |
+
with zipfile.ZipFile(zip_name, 'w') as z:
|
70 |
+
for f in all_files:
|
71 |
+
z.write(f)
|
72 |
+
return zip_name
|
73 |
|
74 |
+
def get_download_link(file_path, file_type="zip"):
|
75 |
+
with open(file_path, "rb") as f:
|
76 |
+
b64 = base64.b64encode(f.read()).decode()
|
77 |
+
ext_map = {'zip': '๐ฆ', 'mp3': '๐ต', 'wav': '๐', 'md': '๐'}
|
78 |
+
emoji = ext_map.get(file_type, '')
|
79 |
+
return f'<a href="data:application/{file_type};base64,{b64}" download="{os.path.basename(file_path)}">{emoji} Download {os.path.basename(file_path)}</a>'
|
80 |
+
|
81 |
+
def load_files_for_sidebar():
|
82 |
+
files = [f for f in glob.glob("*.*") if not f.lower().endswith('readme.md')]
|
83 |
groups = defaultdict(list)
|
84 |
+
for f in files:
|
85 |
basename = os.path.basename(f)
|
86 |
group_name = basename[:9] if len(basename) >= 9 else 'Other'
|
87 |
groups[group_name].append(f)
|
|
|
88 |
return sorted(groups.items(),
|
89 |
key=lambda x: max(os.path.getmtime(f) for f in x[1]),
|
90 |
reverse=True)
|
91 |
|
92 |
+
def display_marquee_controls():
|
93 |
+
st.sidebar.markdown("### ๐ฏ Marquee Settings")
|
94 |
+
cols = st.sidebar.columns(2)
|
95 |
+
with cols[0]:
|
96 |
+
bg_color = st.color_picker("๐จ Background", "#1E1E1E")
|
97 |
+
text_color = st.color_picker("โ๏ธ Text", "#FFFFFF")
|
98 |
+
with cols[1]:
|
99 |
+
font_size = st.slider("๐ Size", 10, 24, 14)
|
100 |
+
duration = st.slider("โฑ๏ธ Speed", 1, 20, 10)
|
101 |
+
|
102 |
+
return {
|
103 |
+
"background": bg_color,
|
104 |
+
"color": text_color,
|
105 |
+
"font-size": f"{font_size}px",
|
106 |
+
"animationDuration": f"{duration}s",
|
107 |
+
"width": "100%",
|
108 |
+
"lineHeight": "35px"
|
109 |
+
}
|
110 |
+
|
111 |
def display_file_manager_sidebar(groups_sorted):
|
|
|
112 |
st.sidebar.title("๐ File Manager")
|
113 |
+
all_files = {'md': [], 'mp3': [], 'wav': []}
|
114 |
+
|
115 |
for _, files in groups_sorted:
|
116 |
for f in files:
|
117 |
+
ext = os.path.splitext(f)[1].lower().strip('.')
|
118 |
+
if ext in all_files:
|
119 |
+
all_files[ext].append(f)
|
120 |
|
|
|
121 |
cols = st.sidebar.columns(4)
|
122 |
+
for i, (ext, files) in enumerate(all_files.items()):
|
123 |
+
with cols[i]:
|
124 |
+
if st.button(f"๐๏ธ {ext.upper()}"):
|
125 |
+
[os.remove(f) for f in files]
|
126 |
+
st.session_state.should_rerun = True
|
127 |
+
|
128 |
+
if st.sidebar.button("๐ฆ Zip All"):
|
129 |
+
zip_name = create_zip_of_files(
|
130 |
+
all_files['md'], all_files['mp3'], all_files['wav']
|
131 |
+
)
|
132 |
+
if zip_name:
|
133 |
+
st.sidebar.markdown(get_download_link(zip_name), unsafe_allow_html=True)
|
134 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
for group_name, files in groups_sorted:
|
136 |
+
timestamp = (datetime.strptime(group_name, "%y%m_%H%M").strftime("%Y-%m-%d %H:%M")
|
137 |
+
if len(group_name) == 9 else group_name)
|
138 |
|
139 |
+
with st.sidebar.expander(f"๐ {timestamp} ({len(files)})", expanded=True):
|
140 |
c1, c2 = st.columns(2)
|
141 |
with c1:
|
142 |
if st.button("๐", key=f"view_{group_name}"):
|
|
|
153 |
st.write(f"{emoji} **{pretty_name}**")
|
154 |
|
155 |
if ext in ['mp3', 'wav']:
|
|
|
156 |
st.audio(f)
|
157 |
+
if st.button("๐", key=f"loop_{f}"):
|
158 |
+
audio_b64 = get_cached_audio_b64(f)
|
159 |
+
st.components.v1.html(
|
160 |
+
f'''
|
161 |
+
<audio id="player_{f}" loop>
|
162 |
+
<source src="data:audio/{ext};base64,{audio_b64}">
|
163 |
+
</audio>
|
164 |
+
<script>
|
165 |
+
document.getElementById("player_{f}").play();
|
166 |
+
</script>
|
167 |
+
''',
|
168 |
+
height=0
|
169 |
+
)
|
170 |
+
|
171 |
+
async def edge_tts_generate(text, voice, file_format="mp3"):
|
172 |
+
text = re.sub(r'\s+', ' ', text).strip()
|
173 |
+
if not text: return None
|
174 |
+
communicate = edge_tts.Communicate(text, voice)
|
175 |
+
filename = f"{datetime.now().strftime('%y%m_%H%M')}_{voice}.{file_format}"
|
176 |
+
await communicate.save(filename)
|
177 |
+
return filename
|
178 |
+
|
179 |
+
def parse_arxiv_refs(text):
|
180 |
+
papers = []
|
181 |
+
current_paper = None
|
182 |
+
|
183 |
+
for line in text.split('\n'):
|
184 |
+
if '|' in line:
|
185 |
+
if current_paper:
|
186 |
+
papers.append(current_paper)
|
187 |
+
parts = line.strip('* ').split('|')
|
188 |
+
current_paper = {
|
189 |
+
'date': parts[0].strip(),
|
190 |
+
'title': parts[1].strip(),
|
191 |
+
'authors': '',
|
192 |
+
'summary': '',
|
193 |
+
'id': re.search(r'(\d{4}\.\d{5})', line).group(1) if re.search(r'(\d{4}\.\d{5})', line) else ''
|
194 |
+
}
|
195 |
+
elif current_paper:
|
196 |
+
if not current_paper['authors']:
|
197 |
+
current_paper['authors'] = line.strip('* ')
|
198 |
+
else:
|
199 |
+
current_paper['summary'] += ' ' + line.strip()
|
200 |
+
|
201 |
+
if current_paper:
|
202 |
+
papers.append(current_paper)
|
203 |
+
return papers
|
204 |
+
|
205 |
+
def perform_ai_lookup(query):
|
206 |
+
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
|
207 |
+
response = client.predict(
|
208 |
+
query, 20, "Semantic Search",
|
209 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
210 |
+
api_name="/update_with_rag_md"
|
211 |
+
)
|
212 |
+
|
213 |
+
papers = parse_arxiv_refs(response[0])
|
214 |
+
marquee_settings = display_marquee_controls()
|
215 |
+
|
216 |
+
for paper in papers:
|
217 |
+
content = f"๐ {paper['title']} | ๐ค {paper['authors']} | ๐ {paper['summary']}"
|
218 |
+
streamlit_marquee(
|
219 |
+
content=content,
|
220 |
+
**marquee_settings,
|
221 |
+
key=f"paper_{paper['id'] or random.randint(1000,9999)}"
|
222 |
+
)
|
223 |
+
st.write("") # Spacing
|
224 |
+
|
225 |
+
return papers
|
226 |
|
227 |
def main():
|
228 |
+
marquee_settings = display_marquee_controls()
|
229 |
+
|
230 |
+
streamlit_marquee(
|
231 |
+
content="๐ Welcome to TalkingAIResearcher | ๐ค Your Research Assistant",
|
232 |
+
**marquee_settings,
|
233 |
+
key="welcome"
|
234 |
)
|
235 |
+
|
236 |
+
tab = st.radio("Action:", ["๐ค Voice", "๐ ArXiv", "๐ Editor"], horizontal=True)
|
237 |
+
|
238 |
+
if tab == "๐ ArXiv":
|
239 |
+
query = st.text_input("๐ Search:")
|
240 |
+
if query:
|
241 |
+
papers = perform_ai_lookup(query)
|
242 |
+
st.write(f"Found {len(papers)} papers")
|
243 |
+
|
244 |
+
groups = load_files_for_sidebar()
|
245 |
+
display_file_manager_sidebar(groups)
|
246 |
+
|
247 |
+
if st.session_state.should_rerun:
|
248 |
+
st.session_state.should_rerun = False
|
249 |
+
st.rerun()
|
250 |
|
251 |
+
# Condensed sidebar markdown
|
252 |
+
sidebar_md = """# ๐ Research Papers
|
253 |
|
254 |
+
## ๐ง AGI Levels
|
|
|
|
|
255 |
L0 โ No AI
|
256 |
+
L1 ๐ฑ ChatGPT/Bard [2303.08774v1](https://arxiv.org/abs/2303.08774) [PDF](https://arxiv.org/pdf/2303.08774.pdf)
|
257 |
+
L2 ๐ช Watson [2201.11903v1](https://arxiv.org/abs/2201.11903) [PDF](https://arxiv.org/pdf/2201.11903.pdf)
|
258 |
+
L3 ๐ฏ DALLยทE [2204.06125v1](https://arxiv.org/abs/2204.06125) [PDF](https://arxiv.org/pdf/2204.06125.pdf)
|
259 |
+
L4 ๐ AlphaGo [1712.01815v1](https://arxiv.org/abs/1712.01815) [PDF](https://arxiv.org/pdf/1712.01815.pdf)
|
260 |
+
L5 ๐ AlphaFold [2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf)
|
261 |
+
|
262 |
+
## ๐งฌ AlphaFold2
|
263 |
+
[2203.15556v1](https://arxiv.org/abs/2203.15556) [PDF](https://arxiv.org/pdf/2203.15556.pdf)
|
264 |
+
1. ๐งฌ Input Seq โ 2. ๐ DB Search โ 3. ๐งฉ MSA
|
265 |
+
4. ๐ Templates โ 5. ๐ Evoformer โ 6. ๐งฑ Structure
|
266 |
+
7. ๐ฏ 3D Predict โ 8. โป๏ธ Recycle x3"""
|
267 |
+
|
268 |
+
st.sidebar.markdown(sidebar_md)
|
269 |
+
|
270 |
+
if __name__ == "__main__":
|
|
|
|
|
|
|
271 |
main()
|