GogetaBlueMUI commited on
Commit
347c33b
·
verified ·
1 Parent(s): e21229e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -5
app.py CHANGED
@@ -9,6 +9,7 @@ import ffmpeg
9
  import time
10
  import json
11
  import psutil
 
12
 
13
  st.set_page_config(layout="wide")
14
 
@@ -589,14 +590,21 @@ def main():
589
  st.session_state['show_timeframe'] = st.checkbox("Show timeframe in transcript", value=st.session_state['show_timeframe'])
590
  st.markdown("### Search Subtitles")
591
  search_query = st.text_input("Search subtitles...", value=st.session_state['search_query'], key="search_input")
592
- st.session_state['search_query'] = search_query.lower()
593
 
594
- # Primary Transcript
595
  st.markdown(f"### {st.session_state['language']} Transcript")
596
  primary_matches = 0
597
  for text, start, end in st.session_state['primary_transcript']:
598
  display_text = text.lower()
599
- if not search_query or search_query in display_text:
 
 
 
 
 
 
 
600
  primary_matches += 1
601
  label = f"[{format_time(start)} - {format_time(end)}] {text}" if st.session_state['show_timeframe'] else text
602
  if st.button(label, key=f"primary_{start}"):
@@ -605,13 +613,20 @@ def main():
605
  if primary_matches == 0 and search_query:
606
  st.info("No matches found in primary transcript for the search query.")
607
 
608
- # English Transcript
609
  if st.session_state['english_transcript']:
610
  st.markdown("### English Translation")
611
  english_matches = 0
612
  for text, start, end in st.session_state['english_transcript']:
613
  display_text = text.lower()
614
- if not search_query or search_query in display_text:
 
 
 
 
 
 
 
615
  english_matches += 1
616
  label = f"[{format_time(start)} - {format_time(end)}] {text}" if st.session_state['show_timeframe'] else text
617
  if st.button(label, key=f"english_{start}"):
 
9
  import time
10
  import json
11
  import psutil
12
+ from fuzzywuzzy import fuzz # Added for fuzzy matching
13
 
14
  st.set_page_config(layout="wide")
15
 
 
590
  st.session_state['show_timeframe'] = st.checkbox("Show timeframe in transcript", value=st.session_state['show_timeframe'])
591
  st.markdown("### Search Subtitles")
592
  search_query = st.text_input("Search subtitles...", value=st.session_state['search_query'], key="search_input")
593
+ st.session_state['search_query'] = search_query.lower().strip()
594
 
595
+ # Primary Transcript with Fuzzy Matching
596
  st.markdown(f"### {st.session_state['language']} Transcript")
597
  primary_matches = 0
598
  for text, start, end in st.session_state['primary_transcript']:
599
  display_text = text.lower()
600
+ # Use fuzzy matching for partial and robust search
601
+ if not search_query:
602
+ match = True
603
+ elif len(search_query) < 3: # Exact match for very short queries
604
+ match = search_query in display_text
605
+ else: # Fuzzy matching for longer queries
606
+ match = fuzz.partial_ratio(search_query, display_text) >= 70
607
+ if match:
608
  primary_matches += 1
609
  label = f"[{format_time(start)} - {format_time(end)}] {text}" if st.session_state['show_timeframe'] else text
610
  if st.button(label, key=f"primary_{start}"):
 
613
  if primary_matches == 0 and search_query:
614
  st.info("No matches found in primary transcript for the search query.")
615
 
616
+ # English Transcript with Fuzzy Matching
617
  if st.session_state['english_transcript']:
618
  st.markdown("### English Translation")
619
  english_matches = 0
620
  for text, start, end in st.session_state['english_transcript']:
621
  display_text = text.lower()
622
+ # Use fuzzy matching for partial and robust search
623
+ if not search_query:
624
+ match = True
625
+ elif len(search_query) < 3: # Exact match for very short queries
626
+ match = search_query in display_text
627
+ else: # Fuzzy matching for longer queries
628
+ match = fuzz.partial_ratio(search_query, display_text) >= 70
629
+ if match:
630
  english_matches += 1
631
  label = f"[{format_time(start)} - {format_time(end)}] {text}" if st.session_state['show_timeframe'] else text
632
  if st.button(label, key=f"english_{start}"):