Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,7 @@ import ffmpeg
|
|
9 |
import time
|
10 |
import json
|
11 |
import psutil
|
|
|
12 |
|
13 |
st.set_page_config(layout="wide")
|
14 |
|
@@ -589,14 +590,21 @@ def main():
|
|
589 |
st.session_state['show_timeframe'] = st.checkbox("Show timeframe in transcript", value=st.session_state['show_timeframe'])
|
590 |
st.markdown("### Search Subtitles")
|
591 |
search_query = st.text_input("Search subtitles...", value=st.session_state['search_query'], key="search_input")
|
592 |
-
st.session_state['search_query'] = search_query.lower()
|
593 |
|
594 |
-
# Primary Transcript
|
595 |
st.markdown(f"### {st.session_state['language']} Transcript")
|
596 |
primary_matches = 0
|
597 |
for text, start, end in st.session_state['primary_transcript']:
|
598 |
display_text = text.lower()
|
599 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
600 |
primary_matches += 1
|
601 |
label = f"[{format_time(start)} - {format_time(end)}] {text}" if st.session_state['show_timeframe'] else text
|
602 |
if st.button(label, key=f"primary_{start}"):
|
@@ -605,13 +613,20 @@ def main():
|
|
605 |
if primary_matches == 0 and search_query:
|
606 |
st.info("No matches found in primary transcript for the search query.")
|
607 |
|
608 |
-
# English Transcript
|
609 |
if st.session_state['english_transcript']:
|
610 |
st.markdown("### English Translation")
|
611 |
english_matches = 0
|
612 |
for text, start, end in st.session_state['english_transcript']:
|
613 |
display_text = text.lower()
|
614 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
615 |
english_matches += 1
|
616 |
label = f"[{format_time(start)} - {format_time(end)}] {text}" if st.session_state['show_timeframe'] else text
|
617 |
if st.button(label, key=f"english_{start}"):
|
|
|
9 |
import time
|
10 |
import json
|
11 |
import psutil
|
12 |
+
from fuzzywuzzy import fuzz # Added for fuzzy matching
|
13 |
|
14 |
st.set_page_config(layout="wide")
|
15 |
|
|
|
590 |
st.session_state['show_timeframe'] = st.checkbox("Show timeframe in transcript", value=st.session_state['show_timeframe'])
|
591 |
st.markdown("### Search Subtitles")
|
592 |
search_query = st.text_input("Search subtitles...", value=st.session_state['search_query'], key="search_input")
|
593 |
+
st.session_state['search_query'] = search_query.lower().strip()
|
594 |
|
595 |
+
# Primary Transcript with Fuzzy Matching
|
596 |
st.markdown(f"### {st.session_state['language']} Transcript")
|
597 |
primary_matches = 0
|
598 |
for text, start, end in st.session_state['primary_transcript']:
|
599 |
display_text = text.lower()
|
600 |
+
# Use fuzzy matching for partial and robust search
|
601 |
+
if not search_query:
|
602 |
+
match = True
|
603 |
+
elif len(search_query) < 3: # Exact match for very short queries
|
604 |
+
match = search_query in display_text
|
605 |
+
else: # Fuzzy matching for longer queries
|
606 |
+
match = fuzz.partial_ratio(search_query, display_text) >= 70
|
607 |
+
if match:
|
608 |
primary_matches += 1
|
609 |
label = f"[{format_time(start)} - {format_time(end)}] {text}" if st.session_state['show_timeframe'] else text
|
610 |
if st.button(label, key=f"primary_{start}"):
|
|
|
613 |
if primary_matches == 0 and search_query:
|
614 |
st.info("No matches found in primary transcript for the search query.")
|
615 |
|
616 |
+
# English Transcript with Fuzzy Matching
|
617 |
if st.session_state['english_transcript']:
|
618 |
st.markdown("### English Translation")
|
619 |
english_matches = 0
|
620 |
for text, start, end in st.session_state['english_transcript']:
|
621 |
display_text = text.lower()
|
622 |
+
# Use fuzzy matching for partial and robust search
|
623 |
+
if not search_query:
|
624 |
+
match = True
|
625 |
+
elif len(search_query) < 3: # Exact match for very short queries
|
626 |
+
match = search_query in display_text
|
627 |
+
else: # Fuzzy matching for longer queries
|
628 |
+
match = fuzz.partial_ratio(search_query, display_text) >= 70
|
629 |
+
if match:
|
630 |
english_matches += 1
|
631 |
label = f"[{format_time(start)} - {format_time(end)}] {text}" if st.session_state['show_timeframe'] else text
|
632 |
if st.button(label, key=f"english_{start}"):
|