awacke1 commited on
Commit
24d6aa2
โ€ข
1 Parent(s): 460cd22

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -108
app.py CHANGED
@@ -519,6 +519,169 @@ def display_file_manager():
519
  os.remove(file)
520
  st.rerun()
521
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
522
  # Helper Functions
523
  def generate_filename(prompt, file_type):
524
  central = pytz.timezone('US/Central')
@@ -652,6 +815,7 @@ def set_transcript(text):
652
  def main():
653
  st.sidebar.markdown("### ๐ŸšฒBikeAI๐Ÿ† Claude and GPT Multi-Agent Research AI")
654
 
 
655
  tab_main = st.radio("Choose Action:",
656
  ["๐ŸŽค Voice Input", "๐Ÿ“ธ Media Gallery", "๐Ÿ” Search ArXiv", "๐Ÿ“ File Editor"],
657
  horizontal=True)
@@ -659,119 +823,21 @@ def main():
659
  if tab_main == "๐ŸŽค Voice Input":
660
  st.subheader("Voice Recognition")
661
 
662
- # Initialize session state for transcript
663
  if 'voice_transcript' not in st.session_state:
664
  st.session_state.voice_transcript = ""
665
-
666
- # Add a container to display the transcript
667
- transcript_container = st.empty()
668
-
669
- # Modify the JavaScript to use Streamlit's component communication
670
- # Create a simpler HTML component that just updates a value we can read
671
- speech_recognition_html = """
672
- <!DOCTYPE html>
673
- <html>
674
- <head>
675
- <title>Speech Recognition</title>
676
- </head>
677
- <body>
678
- <div>
679
- <button id="startButton">Start</button>
680
- <button id="stopButton" disabled>Stop</button>
681
- <div id="status">Click Start to begin</div>
682
- <div id="output"></div>
683
- </div>
684
-
685
- <script>
686
- let recognition;
687
- let transcript = '';
688
-
689
- function initSpeechRecognition() {
690
- if ('webkitSpeechRecognition' in window) {
691
- recognition = new webkitSpeechRecognition();
692
- recognition.continuous = true;
693
- recognition.interimResults = true;
694
-
695
- recognition.onresult = (event) => {
696
- let interimTranscript = '';
697
- let finalTranscript = '';
698
-
699
- for (let i = event.resultIndex; i < event.results.length; i++) {
700
- const result = event.results[i][0].transcript;
701
- if (event.results[i].isFinal) {
702
- finalTranscript += result;
703
- } else {
704
- interimTranscript += result;
705
- }
706
- }
707
-
708
- if (finalTranscript) {
709
- transcript += finalTranscript + ' ';
710
- document.getElementById('output').innerText = transcript;
711
- // Set this as the return value for Streamlit
712
- document.getElementById('streamlit-data').value = transcript;
713
- }
714
- };
715
-
716
- recognition.onend = () => {
717
- if (!document.getElementById('stopButton').disabled) {
718
- recognition.start();
719
- }
720
- };
721
- }
722
- }
723
-
724
- document.getElementById('startButton').onclick = () => {
725
- recognition.start();
726
- document.getElementById('startButton').disabled = true;
727
- document.getElementById('stopButton').disabled = false;
728
- document.getElementById('status').innerText = 'Listening...';
729
- };
730
-
731
- document.getElementById('stopButton').onclick = () => {
732
- recognition.stop();
733
- document.getElementById('startButton').disabled = false;
734
- document.getElementById('stopButton').disabled = true;
735
- document.getElementById('status').innerText = 'Stopped';
736
- };
737
-
738
- // Initialize on load
739
- window.onload = () => {
740
- initSpeechRecognition();
741
- };
742
- </script>
743
 
744
- <!-- Hidden input for Streamlit to read -->
745
- <input type="hidden" id="streamlit-data" value="">
746
- </body>
747
- </html>
748
- """
749
 
750
-
751
- # Display the component
752
- components.html(speech_recognition_html, height=400)
753
-
754
- # Add a placeholder for the transcript
755
- transcript_placeholder = st.empty()
756
-
757
- # Add a refresh button
758
- if st.button("Update Transcript"):
759
- st.rerun()
760
-
761
- # Display the current transcript from session state
762
- if 'voice_transcript' in st.session_state:
763
- transcript_placeholder.text_area(
764
- "Transcript:",
765
- value=st.session_state.voice_transcript,
766
- height=150
767
- )
768
-
769
-
770
-
771
-
772
-
773
-
774
 
 
 
 
 
775
  # Model Selection
776
  model_choice = st.sidebar.radio(
777
  "Choose AI Model:",
 
519
  os.remove(file)
520
  st.rerun()
521
 
522
+
523
+ # Speech Recognition HTML Component
524
+ speech_recognition_html = """
525
+ <!DOCTYPE html>
526
+ <html>
527
+ <head>
528
+ <title>Continuous Speech Demo</title>
529
+ <style>
530
+ body {
531
+ font-family: sans-serif;
532
+ padding: 20px;
533
+ max-width: 800px;
534
+ margin: 0 auto;
535
+ }
536
+ button {
537
+ padding: 10px 20px;
538
+ margin: 10px 5px;
539
+ font-size: 16px;
540
+ }
541
+ #status {
542
+ margin: 10px 0;
543
+ padding: 10px;
544
+ background: #e8f5e9;
545
+ border-radius: 4px;
546
+ }
547
+ #output {
548
+ white-space: pre-wrap;
549
+ padding: 15px;
550
+ background: #f5f5f5;
551
+ border-radius: 4px;
552
+ margin: 10px 0;
553
+ min-height: 100px;
554
+ max-height: 400px;
555
+ overflow-y: auto;
556
+ }
557
+ .controls {
558
+ margin: 10px 0;
559
+ }
560
+ </style>
561
+ </head>
562
+ <body>
563
+ <div class="controls">
564
+ <button id="start">Start Listening</button>
565
+ <button id="stop" disabled>Stop Listening</button>
566
+ <button id="clear">Clear Text</button>
567
+ </div>
568
+ <div id="status">Ready</div>
569
+ <div id="output"></div>
570
+
571
+ <!-- Add the hidden input here -->
572
+ <input type="hidden" id="streamlit-data" value="">
573
+
574
+ <script>
575
+ if (!('webkitSpeechRecognition' in window)) {
576
+ alert('Speech recognition not supported');
577
+ } else {
578
+ const recognition = new webkitSpeechRecognition();
579
+ const startButton = document.getElementById('start');
580
+ const stopButton = document.getElementById('stop');
581
+ const clearButton = document.getElementById('clear');
582
+ const status = document.getElementById('status');
583
+ const output = document.getElementById('output');
584
+ let fullTranscript = '';
585
+ let lastUpdateTime = Date.now();
586
+
587
+ // Configure recognition
588
+ recognition.continuous = true;
589
+ recognition.interimResults = true;
590
+
591
+ // Function to start recognition
592
+ const startRecognition = () => {
593
+ try {
594
+ recognition.start();
595
+ status.textContent = 'Listening...';
596
+ startButton.disabled = true;
597
+ stopButton.disabled = false;
598
+ } catch (e) {
599
+ console.error(e);
600
+ status.textContent = 'Error: ' + e.message;
601
+ }
602
+ };
603
+
604
+ // Auto-start on load
605
+ window.addEventListener('load', () => {
606
+ setTimeout(startRecognition, 1000);
607
+ });
608
+
609
+ startButton.onclick = startRecognition;
610
+
611
+ stopButton.onclick = () => {
612
+ recognition.stop();
613
+ status.textContent = 'Stopped';
614
+ startButton.disabled = false;
615
+ stopButton.disabled = true;
616
+ };
617
+
618
+ clearButton.onclick = () => {
619
+ fullTranscript = '';
620
+ output.textContent = '';
621
+ window.parent.postMessage({
622
+ type: 'clear_transcript',
623
+ }, '*');
624
+ };
625
+
626
+ recognition.onresult = (event) => {
627
+ let interimTranscript = '';
628
+ let finalTranscript = '';
629
+
630
+ for (let i = event.resultIndex; i < event.results.length; i++) {
631
+ const transcript = event.results[i][0].transcript;
632
+ if (event.results[i].isFinal) {
633
+ finalTranscript += transcript + '\\n';
634
+ } else {
635
+ interimTranscript += transcript;
636
+ }
637
+ }
638
+
639
+ if (finalTranscript || (Date.now() - lastUpdateTime > 5000)) {
640
+ if (finalTranscript) {
641
+ fullTranscript += finalTranscript;
642
+
643
+ // Update the hidden input value
644
+ document.getElementById('streamlit-data').value = fullTranscript;
645
+ }
646
+ lastUpdateTime = Date.now();
647
+ }
648
+
649
+ output.textContent = fullTranscript + (interimTranscript ? '... ' + interimTranscript : '');
650
+ output.scrollTop = output.scrollHeight;
651
+
652
+ document.getElementById('streamlit-data').value = fullTranscript;
653
+
654
+ };
655
+
656
+ recognition.onend = () => {
657
+ if (!stopButton.disabled) {
658
+ try {
659
+ recognition.start();
660
+ console.log('Restarted recognition');
661
+ } catch (e) {
662
+ console.error('Failed to restart recognition:', e);
663
+ status.textContent = 'Error restarting: ' + e.message;
664
+ startButton.disabled = false;
665
+ stopButton.disabled = true;
666
+ }
667
+ }
668
+ };
669
+
670
+ recognition.onerror = (event) => {
671
+ console.error('Recognition error:', event.error);
672
+ status.textContent = 'Error: ' + event.error;
673
+
674
+ if (event.error === 'not-allowed' || event.error === 'service-not-allowed') {
675
+ startButton.disabled = false;
676
+ stopButton.disabled = true;
677
+ }
678
+ };
679
+ }
680
+ </script>
681
+ </body>
682
+ </html>
683
+ """
684
+
685
  # Helper Functions
686
  def generate_filename(prompt, file_type):
687
  central = pytz.timezone('US/Central')
 
815
  def main():
816
  st.sidebar.markdown("### ๐ŸšฒBikeAI๐Ÿ† Claude and GPT Multi-Agent Research AI")
817
 
818
+ # Main navigation
819
  tab_main = st.radio("Choose Action:",
820
  ["๐ŸŽค Voice Input", "๐Ÿ“ธ Media Gallery", "๐Ÿ” Search ArXiv", "๐Ÿ“ File Editor"],
821
  horizontal=True)
 
823
  if tab_main == "๐ŸŽค Voice Input":
824
  st.subheader("Voice Recognition")
825
 
826
+ # Initialize session state for the transcript
827
  if 'voice_transcript' not in st.session_state:
828
  st.session_state.voice_transcript = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
829
 
830
+ # Display speech recognition component and capture returned value
831
+ transcript = st.components.v1.html(speech_recognition_html, height=400)
 
 
 
832
 
833
+ # Update session state if there's new data
834
+ if transcript is not None and transcript != "":
835
+ st.session_state.voice_transcript = transcript
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
836
 
837
+ # Display the transcript in a Streamlit text area
838
+ # st.markdown("### Processed Voice Input:")
839
+ # st.text_area("Voice Transcript", st.session_state.voice_transcript, height=100)
840
+
841
  # Model Selection
842
  model_choice = st.sidebar.radio(
843
  "Choose AI Model:",