VidEp – Revolutionizing Video Subtitle Editing with AI
Upload, transcribe, edit subtitles, and summarize videos effortlessly.
""", unsafe_allow_html=True)
# Initialize session state
if 'app_state' not in st.session_state:
st.session_state['app_state'] = 'upload'
if 'video_path' not in st.session_state:
st.session_state['video_path'] = None
if 'primary_transcript' not in st.session_state:
st.session_state['primary_transcript'] = None
if 'english_transcript' not in st.session_state:
st.session_state['english_transcript'] = None
if 'english_summary' not in st.session_state:
st.session_state['english_summary'] = None
if 'language' not in st.session_state:
st.session_state['language'] = None
if 'language_code' not in st.session_state:
st.session_state['language_code'] = None
if 'translate_to_english' not in st.session_state:
st.session_state['translate_to_english'] = False
if 'summarizer_type' not in st.session_state:
st.session_state['summarizer_type'] = None
if 'summary_generated' not in st.session_state:
st.session_state['summary_generated'] = False
if 'current_time' not in st.session_state:
st.session_state['current_time'] = 0
if 'edited_video_path' not in st.session_state:
st.session_state['edited_video_path'] = None
if 'search_query' not in st.session_state:
st.session_state['search_query'] = ""
if 'show_timeframe' not in st.session_state:
st.session_state['show_timeframe'] = True
if st.session_state['app_state'] == 'upload':
st.markdown("", unsafe_allow_html=True)
st.markdown("
Upload Your Video
", unsafe_allow_html=True)
with st.form(key="upload_form"):
uploaded_file = st.file_uploader("Choose a video file", type=["mp4"], label_visibility="collapsed")
if st.form_submit_button("Upload") and uploaded_file:
video_path = save_uploaded_file(uploaded_file)
if video_path:
st.session_state['video_path'] = video_path
st.session_state['app_state'] = 'processing'
st.write(f"Uploaded file: {uploaded_file.name}")
st.rerun()
if st.session_state['app_state'] == 'processing':
with st.form(key="processing_form"):
language = st.selectbox("Select language", ["English", "Urdu"], key="language_select")
language_code = "en" if language == "English" else "ur"
st.session_state['language'] = language
st.session_state['language_code'] = language_code
chunk_duration = st.number_input("Duration per chunk (seconds):", min_value=1.0, step=0.1, value=10.0)
if language_code == "ur":
translate_to_english = st.checkbox("Generate English translation", key="translate_checkbox")
st.session_state['translate_to_english'] = translate_to_english
else:
st.session_state['translate_to_english'] = False
if st.form_submit_button("Process"):
with st.spinner("Processing video..."):
start_time = time.time()
try:
st.write("Extracting audio...")
audio_path = "processed_audio.wav"
ffmpeg.input(st.session_state['video_path']).output(audio_path, ac=1, ar=16000).run(overwrite_output=True, quiet=True)
audio, sr = librosa.load(audio_path, sr=16000)
audio = np.nan_to_num(audio, nan=0.0, posinf=0.0, neginf=0.0)
audio_duration = len(audio) / sr
st.write(f"Audio duration: {audio_duration:.2f} seconds")
if audio_duration < 5:
st.error("Audio too short (< 5s). Upload a longer video.")
return
summarizer_type = 'bart' if audio_duration <= 300 else 'led'
st.write(f"Using summarizer: {summarizer_type}")
st.session_state['summarizer_type'] = summarizer_type
st.write("Loading models...")
processor, model, sum_tokenizer, sum_model, device = load_model(language_code, summarizer_type)
st.write("Splitting audio into chunks...")
chunks = split_audio_into_chunks(audio, sr, chunk_duration)
st.write(f"Number of chunks: {len(chunks)}")
st.write("Transcribing audio...")
primary_transcript = process_chunks(chunks, sr, processor, model, device, language_code, chunk_duration, task="transcribe", transcript_file="temp_primary_transcript.json")
english_transcript = None
if st.session_state['translate_to_english'] and language_code == "ur":
st.write("Translating to English...")
processor, model, _, _, device = load_model('en', summarizer_type)
english_transcript = process_chunks(chunks, sr, processor, model, device, 'ur', chunk_duration, task="translate", transcript_file="temp_english_transcript.json")
st.session_state.update({
'primary_transcript': primary_transcript,
'english_transcript': english_transcript,
'summary_generated': False,
'app_state': 'results'
})
st.write("Processing completed successfully!")
st.rerun()
except Exception as e:
st.error(f"Processing failed: {str(e)}")
finally:
if os.path.exists(audio_path):
os.remove(audio_path)
for temp_file in ["temp_primary_transcript.json", "temp_english_transcript.json"]:
if os.path.exists(temp_file):
os.remove(temp_file)
if st.session_state['app_state'] == 'results':
st.markdown('
', unsafe_allow_html=True)
st.session_state['show_timeframe'] = st.checkbox("Show timeframe in transcript", value=st.session_state['show_timeframe'])
st.markdown("### Search Subtitles")
# Callback to handle search query updates
def update_search_query():
st.session_state['search_query'] = st.session_state.get('search_input', '').lower().strip()
# Text input with on_change callback
st.text_input("Search subtitles...", value=st.session_state['search_query'], key="search_input", on_change=update_search_query)
# Primary Transcript
st.markdown(f"### {st.session_state['language']} Transcript")
primary_matches = 0
for text, start, end in st.session_state['primary_transcript']:
display_text = text.lower() # Case-insensitive comparison
if not st.session_state['search_query'] or st.session_state['search_query'] in display_text:
primary_matches += 1
label = f"[{format_time(start)} - {format_time(end)}] {text}" if st.session_state['show_timeframe'] else text
if st.button(label, key=f"primary_{start}"):
st.session_state['current_time'] = start
st.rerun()
if primary_matches == 0 and st.session_state['search_query']:
st.info("No matches found in primary transcript for the search query.")
# English Transcript
if st.session_state['english_transcript']:
st.markdown("### English Translation")
english_matches = 0
for text, start, end in st.session_state['english_transcript']:
display_text = text.lower() # Case-insensitive comparison
if not st.session_state['search_query'] or st.session_state['search_query'] in display_text:
english_matches += 1
label = f"[{format_time(start)} - {format_time(end)}] {text}" if st.session_state['show_timeframe'] else text
if st.button(label, key=f"english_{start}"):
st.session_state['current_time'] = start
st.rerun()
if english_matches == 0 and st.session_state['search_query']:
st.info("No matches found in English transcript for the search query.")
# Summary Generation
if (st.session_state['language_code'] == 'en' or st.session_state['translate_to_english']) and not st.session_state['summary_generated']:
if st.button("Generate Summary"):
with st.spinner("Generating summary..."):
try:
_, _, sum_tokenizer, sum_model, device = load_model(st.session_state['language_code'], st.session_state['summarizer_type'])
full_text = " ".join([text for text, _, _ in (st.session_state['english_transcript'] or st.session_state['primary_transcript'])])
english_summary = summarize_text(full_text, sum_tokenizer, sum_model, device, st.session_state['summarizer_type'])
st.session_state['english_summary'] = english_summary
st.session_state['summary_generated'] = True
except Exception as e:
st.error(f"Summary generation failed: {str(e)}")
if st.session_state['english_summary'] and st.session_state['summary_generated']:
st.markdown("### Summary")
st.write(st.session_state['english_summary'])
# Download Subtitles
st.markdown("### Download Subtitles")
include_timeframe = st.checkbox("Include timeframe in subtitles", value=True)
transcript_to_download = st.session_state['primary_transcript'] or st.session_state['english_transcript']
if transcript_to_download:
srt_content = generate_srt(transcript_to_download, include_timeframe)
st.download_button(label="Download Subtitles (SRT)", data=srt_content, file_name="subtitles.srt", mime="text/plain")
# Edit Subtitles
st.markdown("### Edit Subtitles")
transcript_to_edit = st.session_state['primary_transcript'] or st.session_state['english_transcript']
if transcript_to_edit and st.button("Delete Subtitles"):
st.session_state['app_state'] = 'editing'
st.rerun()
if st.session_state['app_state'] == 'editing':
st.markdown("### Delete Subtitles")
transcript_to_edit = st.session_state['primary_transcript'] or st.session_state['english_transcript']
for i, (text, start, end) in enumerate(transcript_to_edit):
st.write(f"{i}: [{format_time(start)} - {format_time(end)}] {text}")
indices_input = st.text_input("Enter the indices of subtitles to delete (comma-separated, e.g., 0,1,3):")
if st.button("Confirm Deletion"):
try:
delete_indices = [int(idx.strip()) for idx in indices_input.split(',') if idx.strip()]
delete_indices = [idx for idx in delete_indices if 0 <= idx < len(transcript_to_edit)]
keep_indices = [i for i in range(len(transcript_to_edit)) if i not in delete_indices]
if not keep_indices:
st.error("All subtitles are deleted. No video to generate.")
else:
edited_video_path = create_edited_video(st.session_state['video_path'], transcript_to_edit, keep_indices)
if edited_video_path:
st.session_state['edited_video_path'] = edited_video_path
st.session_state['app_state'] = 'results'
st.rerun()
except ValueError:
st.error("Invalid input. Please enter comma-separated integers.")
except Exception as e:
st.error(f"Error during video editing: {str(e)}")
if st.button("Cancel Deletion"):
st.session_state['app_state'] = 'results'
st.rerun()
if st.session_state['app_state'] == 'results' and st.session_state['edited_video_path']:
st.markdown("### Edited Video")
st.markdown('
', unsafe_allow_html=True)
with open(st.session_state['edited_video_path'], "rb") as file:
st.download_button(label="Download Edited Video", data=file, file_name="edited_video.mp4", mime="video/mp4")
if st.session_state.get('video_path') and st.button("Reset"):
if st.session_state['video_path'] and os.path.exists(st.session_state['video_path']):
os.remove(st.session_state['video_path'])
if st.session_state['edited_video_path'] and os.path.exists(st.session_state['edited_video_path']):
os.remove(st.session_state['edited_video_path'])
st.session_state.clear()
st.rerun()
st.markdown("""
Why VidEp Stands Out
Cloud Upload
Smart Search
Easy Editing
AI Summary
""", unsafe_allow_html=True)
st.markdown("""
About VidEp
Our Mission
VidEp aims to revolutionize how creators and professionals work with video content by providing state-of-the-art AI-powered tools for transcription, translation, and summarization.
What We Do
Our platform combines the latest advancements in speech recognition and natural language processing to automatically transcribe videos in multiple languages, generate accurate translations, and create concise summaries of content.
Why Choose Us
Advanced AI models for superior accuracy
Multi-language support including English and Urdu
Easy-to-use interface for editing and managing subtitles
Smart search functionality to quickly find content
Seamless video editing based on transcripts
""", unsafe_allow_html=True)
st.markdown("""
Contact Us
""", unsafe_allow_html=True)
st.markdown("""
Choose Your Plan
Free
$0 / month
Basic video transcription
English only
Max 5 minutes video
No summarization
Premium
$19 / month
Advanced transcription
Multiple languages
Max 30 minutes video
AI summarization
Business
$49 / month
Enterprise-grade transcription
All languages
Unlimited video length
""", unsafe_allow_html=True)
st.markdown("""
""", unsafe_allow_html=True)
if __name__ == "__main__":
main()