Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,18 +9,10 @@ import ffmpeg
|
|
9 |
import time
|
10 |
import json
|
11 |
import psutil
|
12 |
-
import sys
|
13 |
-
import glob
|
14 |
-
from pathlib import Path
|
15 |
-
|
16 |
-
# Workaround for torch.classes and Streamlit compatibility
|
17 |
-
st._is_running_with_streamlit = True
|
18 |
-
if 'torch' in sys.modules and hasattr(sys.modules['torch'], '__path__'):
|
19 |
-
sys.modules['torch'].__path__ = []
|
20 |
|
21 |
st.set_page_config(layout="wide")
|
22 |
|
23 |
-
# CSS
|
24 |
st.markdown("""
|
25 |
<style>
|
26 |
@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600;700&display=swap');
|
@@ -198,7 +190,7 @@ st.markdown("""
|
|
198 |
font-family: 'Poppins', sans-serif;
|
199 |
}
|
200 |
|
201 |
-
/* Video player styling */
|
202 |
video {
|
203 |
display: block;
|
204 |
width: 350px !important;
|
@@ -300,25 +292,21 @@ class TranscriptionProgress:
|
|
300 |
@st.cache_resource
|
301 |
def load_model(language='en', summarizer_type='bart'):
|
302 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
return processor, model, sum_tokenizer, sum_model, device
|
319 |
-
except Exception as e:
|
320 |
-
st.error(f"Error loading models: {str(e)}")
|
321 |
-
return None, None, None, None, None
|
322 |
|
323 |
def split_audio_into_chunks(audio, sr, chunk_duration):
|
324 |
chunk_samples = int(chunk_duration * sr)
|
@@ -326,23 +314,17 @@ def split_audio_into_chunks(audio, sr, chunk_duration):
|
|
326 |
return chunks
|
327 |
|
328 |
def transcribe_audio(audio, sr, processor, model, device, start_time, language, task="transcribe"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
try:
|
330 |
-
inputs = processor(audio, sampling_rate=sr, return_tensors="pt")
|
331 |
-
input_features = inputs.input_features.to(device)
|
332 |
-
attention_mask = inputs.get("attention_mask", None)
|
333 |
-
if attention_mask is not None:
|
334 |
-
attention_mask = attention_mask.to(device)
|
335 |
-
if model.dtype == torch.float16:
|
336 |
-
input_features = input_features.half()
|
337 |
-
generate_kwargs = {
|
338 |
-
"task": task,
|
339 |
-
"language": "urdu" if language == "ur" else language,
|
340 |
-
"max_new_tokens": 128,
|
341 |
-
"return_timestamps": True,
|
342 |
-
"do_sample": False
|
343 |
-
}
|
344 |
-
if attention_mask is not None:
|
345 |
-
generate_kwargs["attention_mask"] = attention_mask
|
346 |
with torch.no_grad():
|
347 |
outputs = model.generate(input_features, **generate_kwargs)
|
348 |
text = processor.decode(outputs[0], skip_special_tokens=True)
|
@@ -355,29 +337,26 @@ def process_chunks(chunks, sr, processor, model, device, language, chunk_duratio
|
|
355 |
transcript = []
|
356 |
chunk_start = 0
|
357 |
total_chunks = len(chunks)
|
358 |
-
|
359 |
-
|
360 |
if os.path.exists(transcript_file):
|
361 |
-
|
362 |
-
os.remove(transcript_file)
|
363 |
-
st.info(f"Removed temporary file: {transcript_file}")
|
364 |
-
except Exception as e:
|
365 |
-
st.warning(f"Failed to remove {transcript_file}: {str(e)}")
|
366 |
for i, chunk in enumerate(chunks):
|
367 |
-
|
368 |
try:
|
369 |
memory = psutil.virtual_memory()
|
370 |
-
|
371 |
-
st.warning(f"High memory usage: {memory.percent}% - Consider reducing chunk size.")
|
372 |
chunk_transcript = transcribe_audio(chunk, sr, processor, model, device, chunk_start, language, task)
|
373 |
transcript.extend(chunk_transcript)
|
374 |
with open(transcript_file, "w", encoding="utf-8") as f:
|
375 |
json.dump(transcript, f, ensure_ascii=False)
|
376 |
chunk_start += chunk_duration
|
|
|
377 |
except Exception as e:
|
378 |
st.error(f"Error processing chunk {i+1}: {str(e)}")
|
379 |
break
|
380 |
-
|
|
|
381 |
return transcript
|
382 |
|
383 |
def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
|
@@ -389,29 +368,18 @@ def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
|
|
389 |
max_input_length = 16384
|
390 |
max_summary_length = 512
|
391 |
chunk_size = 8192
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
try:
|
393 |
-
inputs = tokenizer(text, return_tensors="pt", truncation=False)
|
394 |
-
input_ids = inputs["input_ids"].to(device)
|
395 |
-
attention_mask = inputs.get("attention_mask")
|
396 |
-
if attention_mask is not None:
|
397 |
-
attention_mask = attention_mask.to(device)
|
398 |
-
num_tokens = input_ids.shape[1]
|
399 |
-
st.write(f"Number of tokens in input: {num_tokens}")
|
400 |
-
if num_tokens < 50:
|
401 |
-
return "Transcript too short to summarize effectively."
|
402 |
summaries = []
|
403 |
if num_tokens <= max_input_length:
|
404 |
truncated_inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
|
405 |
with torch.no_grad():
|
406 |
-
summary_ids = model.generate(
|
407 |
-
truncated_inputs["input_ids"],
|
408 |
-
attention_mask=truncated_inputs.get("attention_mask"),
|
409 |
-
num_beams=4,
|
410 |
-
max_length=max_summary_length,
|
411 |
-
min_length=50,
|
412 |
-
early_stopping=True,
|
413 |
-
temperature=0.7
|
414 |
-
)
|
415 |
summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
|
416 |
else:
|
417 |
st.write(f"Transcript exceeds {max_input_length} tokens. Processing in chunks...")
|
@@ -420,27 +388,12 @@ def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
|
|
420 |
chunk_tokens = tokens[i:i + chunk_size]
|
421 |
chunk_input_ids = torch.tensor([chunk_tokens]).to(device)
|
422 |
with torch.no_grad():
|
423 |
-
summary_ids = model.generate(
|
424 |
-
chunk_input_ids,
|
425 |
-
num_beams=4,
|
426 |
-
max_length=max_summary_length // 2,
|
427 |
-
min_length=25,
|
428 |
-
early_stopping=True,
|
429 |
-
temperature=0.7
|
430 |
-
)
|
431 |
summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
|
432 |
combined_summary = " ".join(summaries)
|
433 |
combined_inputs = tokenizer(combined_summary, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
|
434 |
with torch.no_grad():
|
435 |
-
final_summary_ids = model.generate(
|
436 |
-
combined_inputs["input_ids"],
|
437 |
-
attention_mask=combined_inputs.get("attention_mask"),
|
438 |
-
num_beams=4,
|
439 |
-
max_length=max_summary_length,
|
440 |
-
min_length=50,
|
441 |
-
early_stopping=True,
|
442 |
-
temperature=0.7
|
443 |
-
)
|
444 |
summaries = [tokenizer.decode(final_summary_ids[0], skip_special_tokens=True)]
|
445 |
return " ".join(summaries)
|
446 |
except Exception as e:
|
@@ -449,8 +402,7 @@ def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
|
|
449 |
|
450 |
def save_uploaded_file(uploaded_file):
|
451 |
try:
|
452 |
-
|
453 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
|
454 |
tmp_file.write(uploaded_file.read())
|
455 |
return tmp_file.name
|
456 |
except Exception as e:
|
@@ -471,10 +423,10 @@ def merge_intervals(intervals):
|
|
471 |
return merged
|
472 |
|
473 |
def create_edited_video(video_path, transcript, keep_indices):
|
474 |
-
temp_files = []
|
475 |
try:
|
476 |
intervals_to_keep = [(transcript[i][1], transcript[i][2]) for i in keep_indices]
|
477 |
merged_intervals = merge_intervals(intervals_to_keep)
|
|
|
478 |
for j, (start, end) in enumerate(merged_intervals):
|
479 |
temp_file = f"temp_{j}.mp4"
|
480 |
ffmpeg.input(video_path, ss=start, to=end).output(temp_file, c='copy').run(overwrite_output=True, quiet=True)
|
@@ -484,54 +436,28 @@ def create_edited_video(video_path, transcript, keep_indices):
|
|
484 |
f.write(f"file '{temp_file}'\n")
|
485 |
edited_video_path = "edited_video.mp4"
|
486 |
ffmpeg.input('list.txt', format='concat', safe=0).output(edited_video_path, c='copy').run(overwrite_output=True, quiet=True)
|
|
|
|
|
|
|
|
|
|
|
487 |
return edited_video_path
|
488 |
except Exception as e:
|
489 |
st.error(f"Error creating edited video: {str(e)}")
|
490 |
return None
|
491 |
-
finally:
|
492 |
-
for temp_file in temp_files:
|
493 |
-
if os.path.exists(temp_file):
|
494 |
-
try:
|
495 |
-
os.remove(temp_file)
|
496 |
-
st.info(f"Removed temporary file: {temp_file}")
|
497 |
-
except Exception as e:
|
498 |
-
st.warning(f"Failed to remove {temp_file}: {str(e)}")
|
499 |
-
if os.path.exists("list.txt"):
|
500 |
-
try:
|
501 |
-
os.remove("list.txt")
|
502 |
-
st.info(f"Removed temporary file: list.txt")
|
503 |
-
except Exception as e:
|
504 |
-
st.warning(f"Failed to remove list.txt: {str(e)}")
|
505 |
|
506 |
def generate_srt(transcript, include_timeframe=True):
|
507 |
srt_content = ""
|
508 |
-
for
|
509 |
if include_timeframe:
|
510 |
start_time = seconds_to_srt_time(start)
|
511 |
end_time = seconds_to_srt_time(end)
|
512 |
-
srt_content += f"{
|
513 |
else:
|
514 |
srt_content += f"{text}\n\n"
|
515 |
return srt_content
|
516 |
|
517 |
-
|
518 |
-
temp_files = ["processed_audio.wav", "temp_primary_transcript.json", "temp_english_transcript.json", "edited_video.mp4", "list.txt"]
|
519 |
-
for temp_file in temp_files:
|
520 |
-
if os.path.exists(temp_file):
|
521 |
-
try:
|
522 |
-
os.remove(temp_file)
|
523 |
-
st.info(f"Removed temporary file: {temp_file}")
|
524 |
-
except Exception as e:
|
525 |
-
st.warning(f"Failed to remove {temp_file}: {str(e)}")
|
526 |
-
for temp_file in glob.glob("temp_*.mp4"):
|
527 |
-
if os.path.exists(temp_file):
|
528 |
-
try:
|
529 |
-
os.remove(temp_file)
|
530 |
-
st.info(f"Removed temporary file: {temp_file}")
|
531 |
-
except Exception as e:
|
532 |
-
st.warning(f"Failed to remove {temp_file}: {str(e)}")
|
533 |
-
|
534 |
-
# Main Function
|
535 |
def main():
|
536 |
st.markdown("""
|
537 |
<div class="header">
|
@@ -554,7 +480,7 @@ def main():
|
|
554 |
</div>
|
555 |
""", unsafe_allow_html=True)
|
556 |
|
557 |
-
# Initialize session state
|
558 |
if 'app_state' not in st.session_state:
|
559 |
st.session_state['app_state'] = 'upload'
|
560 |
if 'video_path' not in st.session_state:
|
@@ -628,9 +554,6 @@ def main():
|
|
628 |
st.session_state['summarizer_type'] = summarizer_type
|
629 |
st.write("Loading models...")
|
630 |
processor, model, sum_tokenizer, sum_model, device = load_model(language_code, summarizer_type)
|
631 |
-
if processor is None:
|
632 |
-
st.error("Failed to load models. Please try again.")
|
633 |
-
return
|
634 |
st.write("Splitting audio into chunks...")
|
635 |
chunks = split_audio_into_chunks(audio, sr, chunk_duration)
|
636 |
st.write(f"Number of chunks: {len(chunks)}")
|
@@ -640,9 +563,6 @@ def main():
|
|
640 |
if st.session_state['translate_to_english'] and language_code == "ur":
|
641 |
st.write("Translating to English...")
|
642 |
processor, model, _, _, device = load_model('en', summarizer_type)
|
643 |
-
if processor is None:
|
644 |
-
st.error("Failed to load translation models.")
|
645 |
-
return
|
646 |
english_transcript = process_chunks(chunks, sr, processor, model, device, 'ur', chunk_duration, task="translate", transcript_file="temp_english_transcript.json")
|
647 |
st.session_state.update({
|
648 |
'primary_transcript': primary_transcript,
|
@@ -655,9 +575,14 @@ def main():
|
|
655 |
except Exception as e:
|
656 |
st.error(f"Processing failed: {str(e)}")
|
657 |
finally:
|
658 |
-
|
|
|
|
|
|
|
|
|
659 |
|
660 |
if st.session_state['app_state'] == 'results':
|
|
|
661 |
st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
|
662 |
st.video(st.session_state['video_path'], start_time=st.session_state['current_time'])
|
663 |
st.markdown('</div>', unsafe_allow_html=True)
|
@@ -688,9 +613,6 @@ def main():
|
|
688 |
with st.spinner("Generating summary..."):
|
689 |
try:
|
690 |
_, _, sum_tokenizer, sum_model, device = load_model(st.session_state['language_code'], st.session_state['summarizer_type'])
|
691 |
-
if sum_tokenizer is None:
|
692 |
-
st.error("Failed to load summarization models.")
|
693 |
-
return
|
694 |
full_text = " ".join([text for text, _, _ in (st.session_state['english_transcript'] or st.session_state['primary_transcript'])])
|
695 |
english_summary = summarize_text(full_text, sum_tokenizer, sum_model, device, st.session_state['summarizer_type'])
|
696 |
st.session_state['english_summary'] = english_summary
|
@@ -741,6 +663,7 @@ def main():
|
|
741 |
|
742 |
if st.session_state['app_state'] == 'results' and st.session_state['edited_video_path']:
|
743 |
st.markdown("### Edited Video")
|
|
|
744 |
st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
|
745 |
st.video(st.session_state['edited_video_path'])
|
746 |
st.markdown('</div>', unsafe_allow_html=True)
|
@@ -748,19 +671,10 @@ def main():
|
|
748 |
st.download_button(label="Download Edited Video", data=file, file_name="edited_video.mp4", mime="video/mp4")
|
749 |
|
750 |
if st.session_state.get('video_path') and st.button("Reset"):
|
751 |
-
cleanup_temp_files()
|
752 |
if st.session_state['video_path'] and os.path.exists(st.session_state['video_path']):
|
753 |
-
|
754 |
-
os.remove(st.session_state['video_path'])
|
755 |
-
st.info(f"Removed video file: {st.session_state['video_path']}")
|
756 |
-
except Exception as e:
|
757 |
-
st.warning(f"Failed to remove video file: {str(e)}")
|
758 |
if st.session_state['edited_video_path'] and os.path.exists(st.session_state['edited_video_path']):
|
759 |
-
|
760 |
-
os.remove(st.session_state['edited_video_path'])
|
761 |
-
st.info(f"Removed edited video file: {st.session_state['edited_video_path']}")
|
762 |
-
except Exception as e:
|
763 |
-
st.warning(f"Failed to remove edited video file: {str(e)}")
|
764 |
st.session_state.clear()
|
765 |
st.rerun()
|
766 |
|
@@ -901,9 +815,4 @@ def main():
|
|
901 |
""", unsafe_allow_html=True)
|
902 |
|
903 |
if __name__ == "__main__":
|
904 |
-
|
905 |
-
main()
|
906 |
-
except Exception as e:
|
907 |
-
st.error(f"An unexpected error occurred: {str(e)}")
|
908 |
-
finally:
|
909 |
-
cleanup_temp_files()
|
|
|
9 |
import time
|
10 |
import json
|
11 |
import psutil
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
st.set_page_config(layout="wide")
|
14 |
|
15 |
+
# Updated CSS with video styling from the second code
|
16 |
st.markdown("""
|
17 |
<style>
|
18 |
@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600;700&display=swap');
|
|
|
190 |
font-family: 'Poppins', sans-serif;
|
191 |
}
|
192 |
|
193 |
+
/* Video player styling - Updated to match second code */
|
194 |
video {
|
195 |
display: block;
|
196 |
width: 350px !important;
|
|
|
292 |
@st.cache_resource
|
293 |
def load_model(language='en', summarizer_type='bart'):
|
294 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
295 |
+
if language == 'ur':
|
296 |
+
processor = AutoProcessor.from_pretrained("GogetaBlueMUI/whisper-medium-ur-fleurs")
|
297 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained("GogetaBlueMUI/whisper-medium-ur-fleurs").to(device)
|
298 |
+
else:
|
299 |
+
processor = AutoProcessor.from_pretrained("openai/whisper-small")
|
300 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small").to(device)
|
301 |
+
if device.type == "cuda":
|
302 |
+
model = model.half()
|
303 |
+
if summarizer_type == 'bart':
|
304 |
+
sum_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
|
305 |
+
sum_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn").to(device)
|
306 |
+
else:
|
307 |
+
sum_tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-large-book-summary")
|
308 |
+
sum_model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/led-large-book-summary").to(device)
|
309 |
+
return processor, model, sum_tokenizer, sum_model, device
|
|
|
|
|
|
|
|
|
310 |
|
311 |
def split_audio_into_chunks(audio, sr, chunk_duration):
|
312 |
chunk_samples = int(chunk_duration * sr)
|
|
|
314 |
return chunks
|
315 |
|
316 |
def transcribe_audio(audio, sr, processor, model, device, start_time, language, task="transcribe"):
|
317 |
+
inputs = processor(audio, sampling_rate=sr, return_tensors="pt")
|
318 |
+
input_features = inputs.input_features.to(device)
|
319 |
+
if model.dtype == torch.float16:
|
320 |
+
input_features = input_features.half()
|
321 |
+
generate_kwargs = {
|
322 |
+
"task": task,
|
323 |
+
"language": "urdu" if language == "ur" else language,
|
324 |
+
"max_new_tokens": 128,
|
325 |
+
"return_timestamps": True
|
326 |
+
}
|
327 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
328 |
with torch.no_grad():
|
329 |
outputs = model.generate(input_features, **generate_kwargs)
|
330 |
text = processor.decode(outputs[0], skip_special_tokens=True)
|
|
|
337 |
transcript = []
|
338 |
chunk_start = 0
|
339 |
total_chunks = len(chunks)
|
340 |
+
progress_bar = st.progress(0)
|
341 |
+
status_text = st.empty()
|
342 |
if os.path.exists(transcript_file):
|
343 |
+
os.remove(transcript_file)
|
|
|
|
|
|
|
|
|
344 |
for i, chunk in enumerate(chunks):
|
345 |
+
status_text.text(f"Processing chunk {i+1}/{total_chunks}...")
|
346 |
try:
|
347 |
memory = psutil.virtual_memory()
|
348 |
+
st.write(f"Memory usage: {memory.percent}% (Chunk {i+1}/{total_chunks})")
|
|
|
349 |
chunk_transcript = transcribe_audio(chunk, sr, processor, model, device, chunk_start, language, task)
|
350 |
transcript.extend(chunk_transcript)
|
351 |
with open(transcript_file, "w", encoding="utf-8") as f:
|
352 |
json.dump(transcript, f, ensure_ascii=False)
|
353 |
chunk_start += chunk_duration
|
354 |
+
progress_bar.progress((i + 1) / total_chunks)
|
355 |
except Exception as e:
|
356 |
st.error(f"Error processing chunk {i+1}: {str(e)}")
|
357 |
break
|
358 |
+
status_text.text("Processing complete!")
|
359 |
+
progress_bar.empty()
|
360 |
return transcript
|
361 |
|
362 |
def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
|
|
|
368 |
max_input_length = 16384
|
369 |
max_summary_length = 512
|
370 |
chunk_size = 8192
|
371 |
+
inputs = tokenizer(text, return_tensors="pt", truncation=False)
|
372 |
+
input_ids = inputs["input_ids"].to(device)
|
373 |
+
num_tokens = input_ids.shape[1]
|
374 |
+
st.write(f"Number of tokens in input: {num_tokens}")
|
375 |
+
if num_tokens < 50:
|
376 |
+
return "Transcript too short to summarize effectively."
|
377 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
378 |
summaries = []
|
379 |
if num_tokens <= max_input_length:
|
380 |
truncated_inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
|
381 |
with torch.no_grad():
|
382 |
+
summary_ids = model.generate(truncated_inputs["input_ids"], num_beams=4, max_length=max_summary_length, min_length=50, early_stopping=True, temperature=0.7)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
383 |
summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
|
384 |
else:
|
385 |
st.write(f"Transcript exceeds {max_input_length} tokens. Processing in chunks...")
|
|
|
388 |
chunk_tokens = tokens[i:i + chunk_size]
|
389 |
chunk_input_ids = torch.tensor([chunk_tokens]).to(device)
|
390 |
with torch.no_grad():
|
391 |
+
summary_ids = model.generate(chunk_input_ids, num_beams=4, max_length=max_summary_length // 2, min_length=25, early_stopping=True, temperature=0.7)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
|
393 |
combined_summary = " ".join(summaries)
|
394 |
combined_inputs = tokenizer(combined_summary, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
|
395 |
with torch.no_grad():
|
396 |
+
final_summary_ids = model.generate(combined_inputs["input_ids"], num_beams=4, max_length=max_summary_length, min_length=50, early_stopping=True, temperature=0.7)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
summaries = [tokenizer.decode(final_summary_ids[0], skip_special_tokens=True)]
|
398 |
return " ".join(summaries)
|
399 |
except Exception as e:
|
|
|
402 |
|
403 |
def save_uploaded_file(uploaded_file):
|
404 |
try:
|
405 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_file:
|
|
|
406 |
tmp_file.write(uploaded_file.read())
|
407 |
return tmp_file.name
|
408 |
except Exception as e:
|
|
|
423 |
return merged
|
424 |
|
425 |
def create_edited_video(video_path, transcript, keep_indices):
|
|
|
426 |
try:
|
427 |
intervals_to_keep = [(transcript[i][1], transcript[i][2]) for i in keep_indices]
|
428 |
merged_intervals = merge_intervals(intervals_to_keep)
|
429 |
+
temp_files = []
|
430 |
for j, (start, end) in enumerate(merged_intervals):
|
431 |
temp_file = f"temp_{j}.mp4"
|
432 |
ffmpeg.input(video_path, ss=start, to=end).output(temp_file, c='copy').run(overwrite_output=True, quiet=True)
|
|
|
436 |
f.write(f"file '{temp_file}'\n")
|
437 |
edited_video_path = "edited_video.mp4"
|
438 |
ffmpeg.input('list.txt', format='concat', safe=0).output(edited_video_path, c='copy').run(overwrite_output=True, quiet=True)
|
439 |
+
for temp_file in temp_files:
|
440 |
+
if os.path.exists(temp_file):
|
441 |
+
os.remove(temp_file)
|
442 |
+
if os.path.exists("list.txt"):
|
443 |
+
os.remove("list.txt")
|
444 |
return edited_video_path
|
445 |
except Exception as e:
|
446 |
st.error(f"Error creating edited video: {str(e)}")
|
447 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
448 |
|
449 |
def generate_srt(transcript, include_timeframe=True):
|
450 |
srt_content = ""
|
451 |
+
for text, start, end in transcript:
|
452 |
if include_timeframe:
|
453 |
start_time = seconds_to_srt_time(start)
|
454 |
end_time = seconds_to_srt_time(end)
|
455 |
+
srt_content += f"{start_time} --> {end_time}\n{text}\n\n"
|
456 |
else:
|
457 |
srt_content += f"{text}\n\n"
|
458 |
return srt_content
|
459 |
|
460 |
+
# Main Function with Centered Video Display
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
461 |
def main():
|
462 |
st.markdown("""
|
463 |
<div class="header">
|
|
|
480 |
</div>
|
481 |
""", unsafe_allow_html=True)
|
482 |
|
483 |
+
# Initialize session state variables
|
484 |
if 'app_state' not in st.session_state:
|
485 |
st.session_state['app_state'] = 'upload'
|
486 |
if 'video_path' not in st.session_state:
|
|
|
554 |
st.session_state['summarizer_type'] = summarizer_type
|
555 |
st.write("Loading models...")
|
556 |
processor, model, sum_tokenizer, sum_model, device = load_model(language_code, summarizer_type)
|
|
|
|
|
|
|
557 |
st.write("Splitting audio into chunks...")
|
558 |
chunks = split_audio_into_chunks(audio, sr, chunk_duration)
|
559 |
st.write(f"Number of chunks: {len(chunks)}")
|
|
|
563 |
if st.session_state['translate_to_english'] and language_code == "ur":
|
564 |
st.write("Translating to English...")
|
565 |
processor, model, _, _, device = load_model('en', summarizer_type)
|
|
|
|
|
|
|
566 |
english_transcript = process_chunks(chunks, sr, processor, model, device, 'ur', chunk_duration, task="translate", transcript_file="temp_english_transcript.json")
|
567 |
st.session_state.update({
|
568 |
'primary_transcript': primary_transcript,
|
|
|
575 |
except Exception as e:
|
576 |
st.error(f"Processing failed: {str(e)}")
|
577 |
finally:
|
578 |
+
if os.path.exists(audio_path):
|
579 |
+
os.remove(audio_path)
|
580 |
+
for temp_file in ["temp_primary_transcript.json", "temp_english_transcript.json"]:
|
581 |
+
if os.path.exists(temp_file):
|
582 |
+
os.remove(temp_file)
|
583 |
|
584 |
if st.session_state['app_state'] == 'results':
|
585 |
+
# Center the original video
|
586 |
st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
|
587 |
st.video(st.session_state['video_path'], start_time=st.session_state['current_time'])
|
588 |
st.markdown('</div>', unsafe_allow_html=True)
|
|
|
613 |
with st.spinner("Generating summary..."):
|
614 |
try:
|
615 |
_, _, sum_tokenizer, sum_model, device = load_model(st.session_state['language_code'], st.session_state['summarizer_type'])
|
|
|
|
|
|
|
616 |
full_text = " ".join([text for text, _, _ in (st.session_state['english_transcript'] or st.session_state['primary_transcript'])])
|
617 |
english_summary = summarize_text(full_text, sum_tokenizer, sum_model, device, st.session_state['summarizer_type'])
|
618 |
st.session_state['english_summary'] = english_summary
|
|
|
663 |
|
664 |
if st.session_state['app_state'] == 'results' and st.session_state['edited_video_path']:
|
665 |
st.markdown("### Edited Video")
|
666 |
+
# Center the edited video
|
667 |
st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
|
668 |
st.video(st.session_state['edited_video_path'])
|
669 |
st.markdown('</div>', unsafe_allow_html=True)
|
|
|
671 |
st.download_button(label="Download Edited Video", data=file, file_name="edited_video.mp4", mime="video/mp4")
|
672 |
|
673 |
if st.session_state.get('video_path') and st.button("Reset"):
|
|
|
674 |
if st.session_state['video_path'] and os.path.exists(st.session_state['video_path']):
|
675 |
+
os.remove(st.session_state['video_path'])
|
|
|
|
|
|
|
|
|
676 |
if st.session_state['edited_video_path'] and os.path.exists(st.session_state['edited_video_path']):
|
677 |
+
os.remove(st.session_state['edited_video_path'])
|
|
|
|
|
|
|
|
|
678 |
st.session_state.clear()
|
679 |
st.rerun()
|
680 |
|
|
|
815 |
""", unsafe_allow_html=True)
|
816 |
|
817 |
if __name__ == "__main__":
|
818 |
+
main()
|
|
|
|
|
|
|
|
|
|