Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,10 +9,18 @@ import ffmpeg
|
|
9 |
import time
|
10 |
import json
|
11 |
import psutil
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
st.set_page_config(layout="wide")
|
14 |
|
15 |
-
#
|
16 |
st.markdown("""
|
17 |
<style>
|
18 |
@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600;700&display=swap');
|
@@ -190,7 +198,7 @@ st.markdown("""
|
|
190 |
font-family: 'Poppins', sans-serif;
|
191 |
}
|
192 |
|
193 |
-
/* Video player styling
|
194 |
video {
|
195 |
display: block;
|
196 |
width: 350px !important;
|
@@ -292,21 +300,25 @@ class TranscriptionProgress:
|
|
292 |
@st.cache_resource
|
293 |
def load_model(language='en', summarizer_type='bart'):
|
294 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
|
|
|
|
|
|
|
|
310 |
|
311 |
def split_audio_into_chunks(audio, sr, chunk_duration):
|
312 |
chunk_samples = int(chunk_duration * sr)
|
@@ -314,17 +326,23 @@ def split_audio_into_chunks(audio, sr, chunk_duration):
|
|
314 |
return chunks
|
315 |
|
316 |
def transcribe_audio(audio, sr, processor, model, device, start_time, language, task="transcribe"):
|
317 |
-
inputs = processor(audio, sampling_rate=sr, return_tensors="pt")
|
318 |
-
input_features = inputs.input_features.to(device)
|
319 |
-
if model.dtype == torch.float16:
|
320 |
-
input_features = input_features.half()
|
321 |
-
generate_kwargs = {
|
322 |
-
"task": task,
|
323 |
-
"language": "urdu" if language == "ur" else language,
|
324 |
-
"max_new_tokens": 128,
|
325 |
-
"return_timestamps": True
|
326 |
-
}
|
327 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
328 |
with torch.no_grad():
|
329 |
outputs = model.generate(input_features, **generate_kwargs)
|
330 |
text = processor.decode(outputs[0], skip_special_tokens=True)
|
@@ -337,26 +355,29 @@ def process_chunks(chunks, sr, processor, model, device, language, chunk_duratio
|
|
337 |
transcript = []
|
338 |
chunk_start = 0
|
339 |
total_chunks = len(chunks)
|
340 |
-
|
341 |
-
|
342 |
if os.path.exists(transcript_file):
|
343 |
-
|
|
|
|
|
|
|
|
|
344 |
for i, chunk in enumerate(chunks):
|
345 |
-
|
346 |
try:
|
347 |
memory = psutil.virtual_memory()
|
348 |
-
|
|
|
349 |
chunk_transcript = transcribe_audio(chunk, sr, processor, model, device, chunk_start, language, task)
|
350 |
transcript.extend(chunk_transcript)
|
351 |
with open(transcript_file, "w", encoding="utf-8") as f:
|
352 |
json.dump(transcript, f, ensure_ascii=False)
|
353 |
chunk_start += chunk_duration
|
354 |
-
progress_bar.progress((i + 1) / total_chunks)
|
355 |
except Exception as e:
|
356 |
st.error(f"Error processing chunk {i+1}: {str(e)}")
|
357 |
break
|
358 |
-
|
359 |
-
progress_bar.empty()
|
360 |
return transcript
|
361 |
|
362 |
def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
|
@@ -368,18 +389,29 @@ def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
|
|
368 |
max_input_length = 16384
|
369 |
max_summary_length = 512
|
370 |
chunk_size = 8192
|
371 |
-
inputs = tokenizer(text, return_tensors="pt", truncation=False)
|
372 |
-
input_ids = inputs["input_ids"].to(device)
|
373 |
-
num_tokens = input_ids.shape[1]
|
374 |
-
st.write(f"Number of tokens in input: {num_tokens}")
|
375 |
-
if num_tokens < 50:
|
376 |
-
return "Transcript too short to summarize effectively."
|
377 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
378 |
summaries = []
|
379 |
if num_tokens <= max_input_length:
|
380 |
truncated_inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
|
381 |
with torch.no_grad():
|
382 |
-
summary_ids = model.generate(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
383 |
summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
|
384 |
else:
|
385 |
st.write(f"Transcript exceeds {max_input_length} tokens. Processing in chunks...")
|
@@ -388,12 +420,27 @@ def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
|
|
388 |
chunk_tokens = tokens[i:i + chunk_size]
|
389 |
chunk_input_ids = torch.tensor([chunk_tokens]).to(device)
|
390 |
with torch.no_grad():
|
391 |
-
summary_ids = model.generate(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
|
393 |
combined_summary = " ".join(summaries)
|
394 |
combined_inputs = tokenizer(combined_summary, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
|
395 |
with torch.no_grad():
|
396 |
-
final_summary_ids = model.generate(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
summaries = [tokenizer.decode(final_summary_ids[0], skip_special_tokens=True)]
|
398 |
return " ".join(summaries)
|
399 |
except Exception as e:
|
@@ -402,7 +449,8 @@ def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
|
|
402 |
|
403 |
def save_uploaded_file(uploaded_file):
|
404 |
try:
|
405 |
-
|
|
|
406 |
tmp_file.write(uploaded_file.read())
|
407 |
return tmp_file.name
|
408 |
except Exception as e:
|
@@ -423,10 +471,10 @@ def merge_intervals(intervals):
|
|
423 |
return merged
|
424 |
|
425 |
def create_edited_video(video_path, transcript, keep_indices):
|
|
|
426 |
try:
|
427 |
intervals_to_keep = [(transcript[i][1], transcript[i][2]) for i in keep_indices]
|
428 |
merged_intervals = merge_intervals(intervals_to_keep)
|
429 |
-
temp_files = []
|
430 |
for j, (start, end) in enumerate(merged_intervals):
|
431 |
temp_file = f"temp_{j}.mp4"
|
432 |
ffmpeg.input(video_path, ss=start, to=end).output(temp_file, c='copy').run(overwrite_output=True, quiet=True)
|
@@ -436,28 +484,54 @@ def create_edited_video(video_path, transcript, keep_indices):
|
|
436 |
f.write(f"file '{temp_file}'\n")
|
437 |
edited_video_path = "edited_video.mp4"
|
438 |
ffmpeg.input('list.txt', format='concat', safe=0).output(edited_video_path, c='copy').run(overwrite_output=True, quiet=True)
|
439 |
-
for temp_file in temp_files:
|
440 |
-
if os.path.exists(temp_file):
|
441 |
-
os.remove(temp_file)
|
442 |
-
if os.path.exists("list.txt"):
|
443 |
-
os.remove("list.txt")
|
444 |
return edited_video_path
|
445 |
except Exception as e:
|
446 |
st.error(f"Error creating edited video: {str(e)}")
|
447 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
448 |
|
449 |
def generate_srt(transcript, include_timeframe=True):
|
450 |
srt_content = ""
|
451 |
-
for text, start, end in transcript:
|
452 |
if include_timeframe:
|
453 |
start_time = seconds_to_srt_time(start)
|
454 |
end_time = seconds_to_srt_time(end)
|
455 |
-
srt_content += f"{start_time} --> {end_time}\n{text}\n\n"
|
456 |
else:
|
457 |
srt_content += f"{text}\n\n"
|
458 |
return srt_content
|
459 |
|
460 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
461 |
def main():
|
462 |
st.markdown("""
|
463 |
<div class="header">
|
@@ -480,7 +554,7 @@ def main():
|
|
480 |
</div>
|
481 |
""", unsafe_allow_html=True)
|
482 |
|
483 |
-
# Initialize session state
|
484 |
if 'app_state' not in st.session_state:
|
485 |
st.session_state['app_state'] = 'upload'
|
486 |
if 'video_path' not in st.session_state:
|
@@ -554,6 +628,9 @@ def main():
|
|
554 |
st.session_state['summarizer_type'] = summarizer_type
|
555 |
st.write("Loading models...")
|
556 |
processor, model, sum_tokenizer, sum_model, device = load_model(language_code, summarizer_type)
|
|
|
|
|
|
|
557 |
st.write("Splitting audio into chunks...")
|
558 |
chunks = split_audio_into_chunks(audio, sr, chunk_duration)
|
559 |
st.write(f"Number of chunks: {len(chunks)}")
|
@@ -563,6 +640,9 @@ def main():
|
|
563 |
if st.session_state['translate_to_english'] and language_code == "ur":
|
564 |
st.write("Translating to English...")
|
565 |
processor, model, _, _, device = load_model('en', summarizer_type)
|
|
|
|
|
|
|
566 |
english_transcript = process_chunks(chunks, sr, processor, model, device, 'ur', chunk_duration, task="translate", transcript_file="temp_english_transcript.json")
|
567 |
st.session_state.update({
|
568 |
'primary_transcript': primary_transcript,
|
@@ -575,14 +655,9 @@ def main():
|
|
575 |
except Exception as e:
|
576 |
st.error(f"Processing failed: {str(e)}")
|
577 |
finally:
|
578 |
-
|
579 |
-
os.remove(audio_path)
|
580 |
-
for temp_file in ["temp_primary_transcript.json", "temp_english_transcript.json"]:
|
581 |
-
if os.path.exists(temp_file):
|
582 |
-
os.remove(temp_file)
|
583 |
|
584 |
if st.session_state['app_state'] == 'results':
|
585 |
-
# Center the original video
|
586 |
st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
|
587 |
st.video(st.session_state['video_path'], start_time=st.session_state['current_time'])
|
588 |
st.markdown('</div>', unsafe_allow_html=True)
|
@@ -613,6 +688,9 @@ def main():
|
|
613 |
with st.spinner("Generating summary..."):
|
614 |
try:
|
615 |
_, _, sum_tokenizer, sum_model, device = load_model(st.session_state['language_code'], st.session_state['summarizer_type'])
|
|
|
|
|
|
|
616 |
full_text = " ".join([text for text, _, _ in (st.session_state['english_transcript'] or st.session_state['primary_transcript'])])
|
617 |
english_summary = summarize_text(full_text, sum_tokenizer, sum_model, device, st.session_state['summarizer_type'])
|
618 |
st.session_state['english_summary'] = english_summary
|
@@ -663,7 +741,6 @@ def main():
|
|
663 |
|
664 |
if st.session_state['app_state'] == 'results' and st.session_state['edited_video_path']:
|
665 |
st.markdown("### Edited Video")
|
666 |
-
# Center the edited video
|
667 |
st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
|
668 |
st.video(st.session_state['edited_video_path'])
|
669 |
st.markdown('</div>', unsafe_allow_html=True)
|
@@ -671,10 +748,19 @@ def main():
|
|
671 |
st.download_button(label="Download Edited Video", data=file, file_name="edited_video.mp4", mime="video/mp4")
|
672 |
|
673 |
if st.session_state.get('video_path') and st.button("Reset"):
|
|
|
674 |
if st.session_state['video_path'] and os.path.exists(st.session_state['video_path']):
|
675 |
-
|
|
|
|
|
|
|
|
|
676 |
if st.session_state['edited_video_path'] and os.path.exists(st.session_state['edited_video_path']):
|
677 |
-
|
|
|
|
|
|
|
|
|
678 |
st.session_state.clear()
|
679 |
st.rerun()
|
680 |
|
@@ -815,4 +901,9 @@ def main():
|
|
815 |
""", unsafe_allow_html=True)
|
816 |
|
817 |
if __name__ == "__main__":
|
818 |
-
|
|
|
|
|
|
|
|
|
|
|
|
9 |
import time
|
10 |
import json
|
11 |
import psutil
|
12 |
+
import sys
|
13 |
+
import glob
|
14 |
+
from pathlib import Path
|
15 |
+
|
16 |
+
# Workaround for torch.classes and Streamlit compatibility
|
17 |
+
st._is_running_with_streamlit = True
|
18 |
+
if 'torch' in sys.modules and hasattr(sys.modules['torch'], '__path__'):
|
19 |
+
sys.modules['torch'].__path__ = []
|
20 |
|
21 |
st.set_page_config(layout="wide")
|
22 |
|
23 |
+
# CSS for styling
|
24 |
st.markdown("""
|
25 |
<style>
|
26 |
@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600;700&display=swap');
|
|
|
198 |
font-family: 'Poppins', sans-serif;
|
199 |
}
|
200 |
|
201 |
+
/* Video player styling */
|
202 |
video {
|
203 |
display: block;
|
204 |
width: 350px !important;
|
|
|
300 |
@st.cache_resource
|
301 |
def load_model(language='en', summarizer_type='bart'):
|
302 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
303 |
+
try:
|
304 |
+
if language == 'ur':
|
305 |
+
processor = AutoProcessor.from_pretrained("GogetaBlueMUI/whisper-medium-ur-fleurs")
|
306 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained("GogetaBlueMUI/whisper-medium-ur-fleurs").to(device)
|
307 |
+
else:
|
308 |
+
processor = AutoProcessor.from_pretrained("openai/whisper-small")
|
309 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small").to(device)
|
310 |
+
if device.type == "cuda":
|
311 |
+
model = model.half()
|
312 |
+
if summarizer_type == 'bart':
|
313 |
+
sum_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
|
314 |
+
sum_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn").to(device)
|
315 |
+
else:
|
316 |
+
sum_tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-large-book-summary")
|
317 |
+
sum_model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/led-large-book-summary").to(device)
|
318 |
+
return processor, model, sum_tokenizer, sum_model, device
|
319 |
+
except Exception as e:
|
320 |
+
st.error(f"Error loading models: {str(e)}")
|
321 |
+
return None, None, None, None, None
|
322 |
|
323 |
def split_audio_into_chunks(audio, sr, chunk_duration):
|
324 |
chunk_samples = int(chunk_duration * sr)
|
|
|
326 |
return chunks
|
327 |
|
328 |
def transcribe_audio(audio, sr, processor, model, device, start_time, language, task="transcribe"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
try:
|
330 |
+
inputs = processor(audio, sampling_rate=sr, return_tensors="pt")
|
331 |
+
input_features = inputs.input_features.to(device)
|
332 |
+
attention_mask = inputs.get("attention_mask", None)
|
333 |
+
if attention_mask is not None:
|
334 |
+
attention_mask = attention_mask.to(device)
|
335 |
+
if model.dtype == torch.float16:
|
336 |
+
input_features = input_features.half()
|
337 |
+
generate_kwargs = {
|
338 |
+
"task": task,
|
339 |
+
"language": "urdu" if language == "ur" else language,
|
340 |
+
"max_new_tokens": 128,
|
341 |
+
"return_timestamps": True,
|
342 |
+
"do_sample": False
|
343 |
+
}
|
344 |
+
if attention_mask is not None:
|
345 |
+
generate_kwargs["attention_mask"] = attention_mask
|
346 |
with torch.no_grad():
|
347 |
outputs = model.generate(input_features, **generate_kwargs)
|
348 |
text = processor.decode(outputs[0], skip_special_tokens=True)
|
|
|
355 |
transcript = []
|
356 |
chunk_start = 0
|
357 |
total_chunks = len(chunks)
|
358 |
+
progress = TranscriptionProgress()
|
359 |
+
progress.init_progress()
|
360 |
if os.path.exists(transcript_file):
|
361 |
+
try:
|
362 |
+
os.remove(transcript_file)
|
363 |
+
st.info(f"Removed temporary file: {transcript_file}")
|
364 |
+
except Exception as e:
|
365 |
+
st.warning(f"Failed to remove {transcript_file}: {str(e)}")
|
366 |
for i, chunk in enumerate(chunks):
|
367 |
+
progress.update((i + 1) / total_chunks, f"Processing chunk {i+1}/{total_chunks}...")
|
368 |
try:
|
369 |
memory = psutil.virtual_memory()
|
370 |
+
if memory.percent > 90:
|
371 |
+
st.warning(f"High memory usage: {memory.percent}% - Consider reducing chunk size.")
|
372 |
chunk_transcript = transcribe_audio(chunk, sr, processor, model, device, chunk_start, language, task)
|
373 |
transcript.extend(chunk_transcript)
|
374 |
with open(transcript_file, "w", encoding="utf-8") as f:
|
375 |
json.dump(transcript, f, ensure_ascii=False)
|
376 |
chunk_start += chunk_duration
|
|
|
377 |
except Exception as e:
|
378 |
st.error(f"Error processing chunk {i+1}: {str(e)}")
|
379 |
break
|
380 |
+
progress.update(1.0, "Processing complete!")
|
|
|
381 |
return transcript
|
382 |
|
383 |
def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
|
|
|
389 |
max_input_length = 16384
|
390 |
max_summary_length = 512
|
391 |
chunk_size = 8192
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
try:
|
393 |
+
inputs = tokenizer(text, return_tensors="pt", truncation=False)
|
394 |
+
input_ids = inputs["input_ids"].to(device)
|
395 |
+
attention_mask = inputs.get("attention_mask")
|
396 |
+
if attention_mask is not None:
|
397 |
+
attention_mask = attention_mask.to(device)
|
398 |
+
num_tokens = input_ids.shape[1]
|
399 |
+
st.write(f"Number of tokens in input: {num_tokens}")
|
400 |
+
if num_tokens < 50:
|
401 |
+
return "Transcript too short to summarize effectively."
|
402 |
summaries = []
|
403 |
if num_tokens <= max_input_length:
|
404 |
truncated_inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
|
405 |
with torch.no_grad():
|
406 |
+
summary_ids = model.generate(
|
407 |
+
truncated_inputs["input_ids"],
|
408 |
+
attention_mask=truncated_inputs.get("attention_mask"),
|
409 |
+
num_beams=4,
|
410 |
+
max_length=max_summary_length,
|
411 |
+
min_length=50,
|
412 |
+
early_stopping=True,
|
413 |
+
temperature=0.7
|
414 |
+
)
|
415 |
summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
|
416 |
else:
|
417 |
st.write(f"Transcript exceeds {max_input_length} tokens. Processing in chunks...")
|
|
|
420 |
chunk_tokens = tokens[i:i + chunk_size]
|
421 |
chunk_input_ids = torch.tensor([chunk_tokens]).to(device)
|
422 |
with torch.no_grad():
|
423 |
+
summary_ids = model.generate(
|
424 |
+
chunk_input_ids,
|
425 |
+
num_beams=4,
|
426 |
+
max_length=max_summary_length // 2,
|
427 |
+
min_length=25,
|
428 |
+
early_stopping=True,
|
429 |
+
temperature=0.7
|
430 |
+
)
|
431 |
summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
|
432 |
combined_summary = " ".join(summaries)
|
433 |
combined_inputs = tokenizer(combined_summary, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
|
434 |
with torch.no_grad():
|
435 |
+
final_summary_ids = model.generate(
|
436 |
+
combined_inputs["input_ids"],
|
437 |
+
attention_mask=combined_inputs.get("attention_mask"),
|
438 |
+
num_beams=4,
|
439 |
+
max_length=max_summary_length,
|
440 |
+
min_length=50,
|
441 |
+
early_stopping=True,
|
442 |
+
temperature=0.7
|
443 |
+
)
|
444 |
summaries = [tokenizer.decode(final_summary_ids[0], skip_special_tokens=True)]
|
445 |
return " ".join(summaries)
|
446 |
except Exception as e:
|
|
|
449 |
|
450 |
def save_uploaded_file(uploaded_file):
|
451 |
try:
|
452 |
+
suffix = Path(uploaded_file.name).suffix
|
453 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
|
454 |
tmp_file.write(uploaded_file.read())
|
455 |
return tmp_file.name
|
456 |
except Exception as e:
|
|
|
471 |
return merged
|
472 |
|
473 |
def create_edited_video(video_path, transcript, keep_indices):
|
474 |
+
temp_files = []
|
475 |
try:
|
476 |
intervals_to_keep = [(transcript[i][1], transcript[i][2]) for i in keep_indices]
|
477 |
merged_intervals = merge_intervals(intervals_to_keep)
|
|
|
478 |
for j, (start, end) in enumerate(merged_intervals):
|
479 |
temp_file = f"temp_{j}.mp4"
|
480 |
ffmpeg.input(video_path, ss=start, to=end).output(temp_file, c='copy').run(overwrite_output=True, quiet=True)
|
|
|
484 |
f.write(f"file '{temp_file}'\n")
|
485 |
edited_video_path = "edited_video.mp4"
|
486 |
ffmpeg.input('list.txt', format='concat', safe=0).output(edited_video_path, c='copy').run(overwrite_output=True, quiet=True)
|
|
|
|
|
|
|
|
|
|
|
487 |
return edited_video_path
|
488 |
except Exception as e:
|
489 |
st.error(f"Error creating edited video: {str(e)}")
|
490 |
return None
|
491 |
+
finally:
|
492 |
+
for temp_file in temp_files:
|
493 |
+
if os.path.exists(temp_file):
|
494 |
+
try:
|
495 |
+
os.remove(temp_file)
|
496 |
+
st.info(f"Removed temporary file: {temp_file}")
|
497 |
+
except Exception as e:
|
498 |
+
st.warning(f"Failed to remove {temp_file}: {str(e)}")
|
499 |
+
if os.path.exists("list.txt"):
|
500 |
+
try:
|
501 |
+
os.remove("list.txt")
|
502 |
+
st.info(f"Removed temporary file: list.txt")
|
503 |
+
except Exception as e:
|
504 |
+
st.warning(f"Failed to remove list.txt: {str(e)}")
|
505 |
|
506 |
def generate_srt(transcript, include_timeframe=True):
|
507 |
srt_content = ""
|
508 |
+
for i, (text, start, end) in enumerate(transcript, 1):
|
509 |
if include_timeframe:
|
510 |
start_time = seconds_to_srt_time(start)
|
511 |
end_time = seconds_to_srt_time(end)
|
512 |
+
srt_content += f"{i}\n{start_time} --> {end_time}\n{text}\n\n"
|
513 |
else:
|
514 |
srt_content += f"{text}\n\n"
|
515 |
return srt_content
|
516 |
|
517 |
+
def cleanup_temp_files():
|
518 |
+
temp_files = ["processed_audio.wav", "temp_primary_transcript.json", "temp_english_transcript.json", "edited_video.mp4", "list.txt"]
|
519 |
+
for temp_file in temp_files:
|
520 |
+
if os.path.exists(temp_file):
|
521 |
+
try:
|
522 |
+
os.remove(temp_file)
|
523 |
+
st.info(f"Removed temporary file: {temp_file}")
|
524 |
+
except Exception as e:
|
525 |
+
st.warning(f"Failed to remove {temp_file}: {str(e)}")
|
526 |
+
for temp_file in glob.glob("temp_*.mp4"):
|
527 |
+
if os.path.exists(temp_file):
|
528 |
+
try:
|
529 |
+
os.remove(temp_file)
|
530 |
+
st.info(f"Removed temporary file: {temp_file}")
|
531 |
+
except Exception as e:
|
532 |
+
st.warning(f"Failed to remove {temp_file}: {str(e)}")
|
533 |
+
|
534 |
+
# Main Function
|
535 |
def main():
|
536 |
st.markdown("""
|
537 |
<div class="header">
|
|
|
554 |
</div>
|
555 |
""", unsafe_allow_html=True)
|
556 |
|
557 |
+
# Initialize session state
|
558 |
if 'app_state' not in st.session_state:
|
559 |
st.session_state['app_state'] = 'upload'
|
560 |
if 'video_path' not in st.session_state:
|
|
|
628 |
st.session_state['summarizer_type'] = summarizer_type
|
629 |
st.write("Loading models...")
|
630 |
processor, model, sum_tokenizer, sum_model, device = load_model(language_code, summarizer_type)
|
631 |
+
if processor is None:
|
632 |
+
st.error("Failed to load models. Please try again.")
|
633 |
+
return
|
634 |
st.write("Splitting audio into chunks...")
|
635 |
chunks = split_audio_into_chunks(audio, sr, chunk_duration)
|
636 |
st.write(f"Number of chunks: {len(chunks)}")
|
|
|
640 |
if st.session_state['translate_to_english'] and language_code == "ur":
|
641 |
st.write("Translating to English...")
|
642 |
processor, model, _, _, device = load_model('en', summarizer_type)
|
643 |
+
if processor is None:
|
644 |
+
st.error("Failed to load translation models.")
|
645 |
+
return
|
646 |
english_transcript = process_chunks(chunks, sr, processor, model, device, 'ur', chunk_duration, task="translate", transcript_file="temp_english_transcript.json")
|
647 |
st.session_state.update({
|
648 |
'primary_transcript': primary_transcript,
|
|
|
655 |
except Exception as e:
|
656 |
st.error(f"Processing failed: {str(e)}")
|
657 |
finally:
|
658 |
+
cleanup_temp_files()
|
|
|
|
|
|
|
|
|
659 |
|
660 |
if st.session_state['app_state'] == 'results':
|
|
|
661 |
st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
|
662 |
st.video(st.session_state['video_path'], start_time=st.session_state['current_time'])
|
663 |
st.markdown('</div>', unsafe_allow_html=True)
|
|
|
688 |
with st.spinner("Generating summary..."):
|
689 |
try:
|
690 |
_, _, sum_tokenizer, sum_model, device = load_model(st.session_state['language_code'], st.session_state['summarizer_type'])
|
691 |
+
if sum_tokenizer is None:
|
692 |
+
st.error("Failed to load summarization models.")
|
693 |
+
return
|
694 |
full_text = " ".join([text for text, _, _ in (st.session_state['english_transcript'] or st.session_state['primary_transcript'])])
|
695 |
english_summary = summarize_text(full_text, sum_tokenizer, sum_model, device, st.session_state['summarizer_type'])
|
696 |
st.session_state['english_summary'] = english_summary
|
|
|
741 |
|
742 |
if st.session_state['app_state'] == 'results' and st.session_state['edited_video_path']:
|
743 |
st.markdown("### Edited Video")
|
|
|
744 |
st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
|
745 |
st.video(st.session_state['edited_video_path'])
|
746 |
st.markdown('</div>', unsafe_allow_html=True)
|
|
|
748 |
st.download_button(label="Download Edited Video", data=file, file_name="edited_video.mp4", mime="video/mp4")
|
749 |
|
750 |
if st.session_state.get('video_path') and st.button("Reset"):
|
751 |
+
cleanup_temp_files()
|
752 |
if st.session_state['video_path'] and os.path.exists(st.session_state['video_path']):
|
753 |
+
try:
|
754 |
+
os.remove(st.session_state['video_path'])
|
755 |
+
st.info(f"Removed video file: {st.session_state['video_path']}")
|
756 |
+
except Exception as e:
|
757 |
+
st.warning(f"Failed to remove video file: {str(e)}")
|
758 |
if st.session_state['edited_video_path'] and os.path.exists(st.session_state['edited_video_path']):
|
759 |
+
try:
|
760 |
+
os.remove(st.session_state['edited_video_path'])
|
761 |
+
st.info(f"Removed edited video file: {st.session_state['edited_video_path']}")
|
762 |
+
except Exception as e:
|
763 |
+
st.warning(f"Failed to remove edited video file: {str(e)}")
|
764 |
st.session_state.clear()
|
765 |
st.rerun()
|
766 |
|
|
|
901 |
""", unsafe_allow_html=True)
|
902 |
|
903 |
if __name__ == "__main__":
|
904 |
+
try:
|
905 |
+
main()
|
906 |
+
except Exception as e:
|
907 |
+
st.error(f"An unexpected error occurred: {str(e)}")
|
908 |
+
finally:
|
909 |
+
cleanup_temp_files()
|