Spaces:

awacke1
/

RescuerOfStolenBikes

Running

App Files Files Community

awacke1 commited on 10 days ago

Commit

4be1d74

•

1 Parent(s): 82cc38a

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -14

app.py CHANGED Viewed

@@ -17,6 +17,8 @@ from xml.etree import ElementTree as ET
 from openai import OpenAI
 import extra_streamlit_components as stx
 from streamlit.runtime.scriptrunner import get_script_run_ctx
 # 🔧 Config & Setup
 st.set_page_config(
@@ -71,6 +73,7 @@ def get_download_link(file):
 @st.cache_resource
 def speech_synthesis_html(result):
     html_code = f"""
     <html><body>
     <script>
@@ -81,6 +84,37 @@ def speech_synthesis_html(result):
     """
     components.html(html_code, height=0)
 def process_image(image_path, user_prompt):
     with open(image_path, "rb") as imgf:
         image_data = imgf.read()
@@ -153,16 +187,27 @@ def perform_ai_lookup(q):
     # Ask model for answer
     r2 = client.predict(q,"mistralai/Mixtral-8x7B-Instruct-v0.1",True,api_name="/ask_llm")
     result = f"### 🔎 {q}\n\n{r2}\n\n{refs}"
     # Speak results
-    speech_synthesis_html(r2)
-    # Attempt to speak summaries and titles from refs
-    # Assuming refs contain a set of references in Markdown with possible titles.
-    # We'll just re-speak refs as "summaries".
     summaries_text = "Here are the summaries from the references: " + refs.replace('"','')
-    speech_synthesis_html(summaries_text)
-    # Extract titles from refs (looking for markdown links [Title](URL))
     titles = []
     for line in refs.split('\n'):
         m = re.search(r"\[([^\]]+)\]", line)
@@ -170,7 +215,11 @@ def perform_ai_lookup(q):
             titles.append(m.group(1))
     if titles:
         titles_text = "Here are the titles of the papers: " + ", ".join(titles)
-        speech_synthesis_html(titles_text)
     st.markdown(result)
     elapsed = time.time()-start
@@ -282,15 +331,13 @@ def display_file_manager():
             if st.button("🗑",key="d"+f):
                 os.remove(f)
                 st.experimental_rerun()
 def main():
     st.sidebar.markdown("### 🚲BikeAI🏆 Multi-Agent Research AI")
     tab_main = st.radio("Action:",["🎤 Voice Input","📸 Media Gallery","🔍 Search ArXiv","📝 File Editor"],horizontal=True)
-    # Changed model order and default:
     model_choice = st.sidebar.radio("AI Model:", ["Arxiv","GPT-4o","Claude-3","GPT+Claude+Arxiv"], index=0)
-    # Speech-to-Text component placeholder (example)
     mycomponent = components.declare_component("mycomponent", path="mycomponent")
     val = mycomponent(my_input_value="Hello")
     if val:
@@ -300,11 +347,9 @@ def main():
         elif model_choice == "Claude-3":
             process_with_claude(user_input)
         elif model_choice == "Arxiv":
-            # Just Arxiv on its own, full column, speak results
             st.subheader("Arxiv Only Results:")
             perform_ai_lookup(user_input)
         else:
-            # GPT+Claude+Arxiv
             col1,col2,col3=st.columns(3)
             with col1:
                 st.subheader("GPT-4o Omni:")
@@ -335,7 +380,6 @@ def main():
                     st.subheader("Arxiv Only Results:")
                     perform_ai_lookup(user_text)
                 else:
-                    # GPT+Claude+Arxiv
                     col1,col2,col3=st.columns(3)
                     with col1:
                         st.subheader("GPT-4o Omni:")
@@ -379,4 +423,4 @@ def main():
     display_file_manager()
 if __name__=="__main__":
-    main()

 from openai import OpenAI
 import extra_streamlit_components as stx
 from streamlit.runtime.scriptrunner import get_script_run_ctx
+import asyncio
+import edge_tts  # ensure this is installed (pip install edge-tts)
 # 🔧 Config & Setup
 st.set_page_config(
 @st.cache_resource
 def speech_synthesis_html(result):
+    # This old function can remain as a fallback, but we won't use it after integrating EdgeTTS.
     html_code = f"""
     <html><body>
     <script>
     """
     components.html(html_code, height=0)
+#------------add EdgeTTS
+# --- NEW FUNCTIONS FOR EDGE TTS ---
+async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0):
+    """
+    Generate audio from text using Edge TTS and return the path to the MP3 file.
+    """
+    if not text.strip():
+        return None
+    rate_str = f"{rate:+d}%"
+    pitch_str = f"{pitch:+d}Hz"
+    communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
+    out_fn = generate_filename(text,"mp3")
+    await communicate.save(out_fn)
+    return out_fn
+def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0):
+    """
+    Synchronous wrapper to call the async TTS generation and return the file path.
+    """
+    return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch))
+def play_and_download_audio(file_path):
+    """
+    Display an audio player and a download link for the generated MP3 file.
+    """
+    if file_path and os.path.exists(file_path):
+        st.audio(file_path)
+        st.markdown(get_download_link(file_path), unsafe_allow_html=True)
+#---------------------------
 def process_image(image_path, user_prompt):
     with open(image_path, "rb") as imgf:
         image_data = imgf.read()
     # Ask model for answer
     r2 = client.predict(q,"mistralai/Mixtral-8x7B-Instruct-v0.1",True,api_name="/ask_llm")
     result = f"### 🔎 {q}\n\n{r2}\n\n{refs}"
+    #---------------------------------------------------------------
     # Speak results
+    #speech_synthesis_html(r2)
+    # Instead of speech_synthesis_html, use EdgeTTS now:
+    st.markdown(result)
+    # Speak main result
+    audio_file_main = speak_with_edge_tts(r2, voice="en-US-AriaNeural", rate=0, pitch=0)
+    st.write("### Audio Output for Main Result")
+    play_and_download_audio(audio_file_main)
+    # Speak references summaries
     summaries_text = "Here are the summaries from the references: " + refs.replace('"','')
+    audio_file_refs = speak_with_edge_tts(summaries_text, voice="en-US-AriaNeural", rate=0, pitch=0)
+    st.write("### Audio Output for References Summaries")
+    play_and_download_audio(audio_file_refs)
+    # Extract titles from refs and speak them
     titles = []
     for line in refs.split('\n'):
         m = re.search(r"\[([^\]]+)\]", line)
             titles.append(m.group(1))
     if titles:
         titles_text = "Here are the titles of the papers: " + ", ".join(titles)
+        audio_file_titles = speak_with_edge_tts(titles_text, voice="en-US-AriaNeural", rate=0, pitch=0)
+        st.write("### Audio Output for Paper Titles")
+        play_and_download_audio(audio_file_titles)
+    # --------------------------------------------
     st.markdown(result)
     elapsed = time.time()-start
             if st.button("🗑",key="d"+f):
                 os.remove(f)
                 st.experimental_rerun()
 def main():
     st.sidebar.markdown("### 🚲BikeAI🏆 Multi-Agent Research AI")
     tab_main = st.radio("Action:",["🎤 Voice Input","📸 Media Gallery","🔍 Search ArXiv","📝 File Editor"],horizontal=True)
     model_choice = st.sidebar.radio("AI Model:", ["Arxiv","GPT-4o","Claude-3","GPT+Claude+Arxiv"], index=0)
+    # A simple component placeholder
     mycomponent = components.declare_component("mycomponent", path="mycomponent")
     val = mycomponent(my_input_value="Hello")
     if val:
         elif model_choice == "Claude-3":
             process_with_claude(user_input)
         elif model_choice == "Arxiv":
             st.subheader("Arxiv Only Results:")
             perform_ai_lookup(user_input)
         else:
             col1,col2,col3=st.columns(3)
             with col1:
                 st.subheader("GPT-4o Omni:")
                     st.subheader("Arxiv Only Results:")
                     perform_ai_lookup(user_text)
                 else:
                     col1,col2,col3=st.columns(3)
                     with col1:
                         st.subheader("GPT-4o Omni:")
     display_file_manager()
 if __name__=="__main__":
+    main()