GogetaBlueMUI commited on
Commit
286f91d
·
verified ·
1 Parent(s): bc193d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -156
app.py CHANGED
@@ -9,18 +9,10 @@ import ffmpeg
9
  import time
10
  import json
11
  import psutil
12
- import sys
13
- import glob
14
- from pathlib import Path
15
-
16
- # Workaround for torch.classes and Streamlit compatibility
17
- st._is_running_with_streamlit = True
18
- if 'torch' in sys.modules and hasattr(sys.modules['torch'], '__path__'):
19
- sys.modules['torch'].__path__ = []
20
 
21
  st.set_page_config(layout="wide")
22
 
23
- # CSS for styling
24
  st.markdown("""
25
  <style>
26
  @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600;700&display=swap');
@@ -198,7 +190,7 @@ st.markdown("""
198
  font-family: 'Poppins', sans-serif;
199
  }
200
 
201
- /* Video player styling */
202
  video {
203
  display: block;
204
  width: 350px !important;
@@ -300,25 +292,21 @@ class TranscriptionProgress:
300
  @st.cache_resource
301
  def load_model(language='en', summarizer_type='bart'):
302
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
303
- try:
304
- if language == 'ur':
305
- processor = AutoProcessor.from_pretrained("GogetaBlueMUI/whisper-medium-ur-fleurs")
306
- model = AutoModelForSpeechSeq2Seq.from_pretrained("GogetaBlueMUI/whisper-medium-ur-fleurs").to(device)
307
- else:
308
- processor = AutoProcessor.from_pretrained("openai/whisper-small")
309
- model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small").to(device)
310
- if device.type == "cuda":
311
- model = model.half()
312
- if summarizer_type == 'bart':
313
- sum_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
314
- sum_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn").to(device)
315
- else:
316
- sum_tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-large-book-summary")
317
- sum_model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/led-large-book-summary").to(device)
318
- return processor, model, sum_tokenizer, sum_model, device
319
- except Exception as e:
320
- st.error(f"Error loading models: {str(e)}")
321
- return None, None, None, None, None
322
 
323
  def split_audio_into_chunks(audio, sr, chunk_duration):
324
  chunk_samples = int(chunk_duration * sr)
@@ -326,23 +314,17 @@ def split_audio_into_chunks(audio, sr, chunk_duration):
326
  return chunks
327
 
328
  def transcribe_audio(audio, sr, processor, model, device, start_time, language, task="transcribe"):
 
 
 
 
 
 
 
 
 
 
329
  try:
330
- inputs = processor(audio, sampling_rate=sr, return_tensors="pt")
331
- input_features = inputs.input_features.to(device)
332
- attention_mask = inputs.get("attention_mask", None)
333
- if attention_mask is not None:
334
- attention_mask = attention_mask.to(device)
335
- if model.dtype == torch.float16:
336
- input_features = input_features.half()
337
- generate_kwargs = {
338
- "task": task,
339
- "language": "urdu" if language == "ur" else language,
340
- "max_new_tokens": 128,
341
- "return_timestamps": True,
342
- "do_sample": False
343
- }
344
- if attention_mask is not None:
345
- generate_kwargs["attention_mask"] = attention_mask
346
  with torch.no_grad():
347
  outputs = model.generate(input_features, **generate_kwargs)
348
  text = processor.decode(outputs[0], skip_special_tokens=True)
@@ -355,29 +337,26 @@ def process_chunks(chunks, sr, processor, model, device, language, chunk_duratio
355
  transcript = []
356
  chunk_start = 0
357
  total_chunks = len(chunks)
358
- progress = TranscriptionProgress()
359
- progress.init_progress()
360
  if os.path.exists(transcript_file):
361
- try:
362
- os.remove(transcript_file)
363
- st.info(f"Removed temporary file: {transcript_file}")
364
- except Exception as e:
365
- st.warning(f"Failed to remove {transcript_file}: {str(e)}")
366
  for i, chunk in enumerate(chunks):
367
- progress.update((i + 1) / total_chunks, f"Processing chunk {i+1}/{total_chunks}...")
368
  try:
369
  memory = psutil.virtual_memory()
370
- if memory.percent > 90:
371
- st.warning(f"High memory usage: {memory.percent}% - Consider reducing chunk size.")
372
  chunk_transcript = transcribe_audio(chunk, sr, processor, model, device, chunk_start, language, task)
373
  transcript.extend(chunk_transcript)
374
  with open(transcript_file, "w", encoding="utf-8") as f:
375
  json.dump(transcript, f, ensure_ascii=False)
376
  chunk_start += chunk_duration
 
377
  except Exception as e:
378
  st.error(f"Error processing chunk {i+1}: {str(e)}")
379
  break
380
- progress.update(1.0, "Processing complete!")
 
381
  return transcript
382
 
383
  def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
@@ -389,29 +368,18 @@ def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
389
  max_input_length = 16384
390
  max_summary_length = 512
391
  chunk_size = 8192
 
 
 
 
 
 
392
  try:
393
- inputs = tokenizer(text, return_tensors="pt", truncation=False)
394
- input_ids = inputs["input_ids"].to(device)
395
- attention_mask = inputs.get("attention_mask")
396
- if attention_mask is not None:
397
- attention_mask = attention_mask.to(device)
398
- num_tokens = input_ids.shape[1]
399
- st.write(f"Number of tokens in input: {num_tokens}")
400
- if num_tokens < 50:
401
- return "Transcript too short to summarize effectively."
402
  summaries = []
403
  if num_tokens <= max_input_length:
404
  truncated_inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
405
  with torch.no_grad():
406
- summary_ids = model.generate(
407
- truncated_inputs["input_ids"],
408
- attention_mask=truncated_inputs.get("attention_mask"),
409
- num_beams=4,
410
- max_length=max_summary_length,
411
- min_length=50,
412
- early_stopping=True,
413
- temperature=0.7
414
- )
415
  summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
416
  else:
417
  st.write(f"Transcript exceeds {max_input_length} tokens. Processing in chunks...")
@@ -420,27 +388,12 @@ def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
420
  chunk_tokens = tokens[i:i + chunk_size]
421
  chunk_input_ids = torch.tensor([chunk_tokens]).to(device)
422
  with torch.no_grad():
423
- summary_ids = model.generate(
424
- chunk_input_ids,
425
- num_beams=4,
426
- max_length=max_summary_length // 2,
427
- min_length=25,
428
- early_stopping=True,
429
- temperature=0.7
430
- )
431
  summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
432
  combined_summary = " ".join(summaries)
433
  combined_inputs = tokenizer(combined_summary, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
434
  with torch.no_grad():
435
- final_summary_ids = model.generate(
436
- combined_inputs["input_ids"],
437
- attention_mask=combined_inputs.get("attention_mask"),
438
- num_beams=4,
439
- max_length=max_summary_length,
440
- min_length=50,
441
- early_stopping=True,
442
- temperature=0.7
443
- )
444
  summaries = [tokenizer.decode(final_summary_ids[0], skip_special_tokens=True)]
445
  return " ".join(summaries)
446
  except Exception as e:
@@ -449,8 +402,7 @@ def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
449
 
450
  def save_uploaded_file(uploaded_file):
451
  try:
452
- suffix = Path(uploaded_file.name).suffix
453
- with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
454
  tmp_file.write(uploaded_file.read())
455
  return tmp_file.name
456
  except Exception as e:
@@ -471,10 +423,10 @@ def merge_intervals(intervals):
471
  return merged
472
 
473
  def create_edited_video(video_path, transcript, keep_indices):
474
- temp_files = []
475
  try:
476
  intervals_to_keep = [(transcript[i][1], transcript[i][2]) for i in keep_indices]
477
  merged_intervals = merge_intervals(intervals_to_keep)
 
478
  for j, (start, end) in enumerate(merged_intervals):
479
  temp_file = f"temp_{j}.mp4"
480
  ffmpeg.input(video_path, ss=start, to=end).output(temp_file, c='copy').run(overwrite_output=True, quiet=True)
@@ -484,54 +436,28 @@ def create_edited_video(video_path, transcript, keep_indices):
484
  f.write(f"file '{temp_file}'\n")
485
  edited_video_path = "edited_video.mp4"
486
  ffmpeg.input('list.txt', format='concat', safe=0).output(edited_video_path, c='copy').run(overwrite_output=True, quiet=True)
 
 
 
 
 
487
  return edited_video_path
488
  except Exception as e:
489
  st.error(f"Error creating edited video: {str(e)}")
490
  return None
491
- finally:
492
- for temp_file in temp_files:
493
- if os.path.exists(temp_file):
494
- try:
495
- os.remove(temp_file)
496
- st.info(f"Removed temporary file: {temp_file}")
497
- except Exception as e:
498
- st.warning(f"Failed to remove {temp_file}: {str(e)}")
499
- if os.path.exists("list.txt"):
500
- try:
501
- os.remove("list.txt")
502
- st.info(f"Removed temporary file: list.txt")
503
- except Exception as e:
504
- st.warning(f"Failed to remove list.txt: {str(e)}")
505
 
506
  def generate_srt(transcript, include_timeframe=True):
507
  srt_content = ""
508
- for i, (text, start, end) in enumerate(transcript, 1):
509
  if include_timeframe:
510
  start_time = seconds_to_srt_time(start)
511
  end_time = seconds_to_srt_time(end)
512
- srt_content += f"{i}\n{start_time} --> {end_time}\n{text}\n\n"
513
  else:
514
  srt_content += f"{text}\n\n"
515
  return srt_content
516
 
517
- def cleanup_temp_files():
518
- temp_files = ["processed_audio.wav", "temp_primary_transcript.json", "temp_english_transcript.json", "edited_video.mp4", "list.txt"]
519
- for temp_file in temp_files:
520
- if os.path.exists(temp_file):
521
- try:
522
- os.remove(temp_file)
523
- st.info(f"Removed temporary file: {temp_file}")
524
- except Exception as e:
525
- st.warning(f"Failed to remove {temp_file}: {str(e)}")
526
- for temp_file in glob.glob("temp_*.mp4"):
527
- if os.path.exists(temp_file):
528
- try:
529
- os.remove(temp_file)
530
- st.info(f"Removed temporary file: {temp_file}")
531
- except Exception as e:
532
- st.warning(f"Failed to remove {temp_file}: {str(e)}")
533
-
534
- # Main Function
535
  def main():
536
  st.markdown("""
537
  <div class="header">
@@ -554,7 +480,7 @@ def main():
554
  </div>
555
  """, unsafe_allow_html=True)
556
 
557
- # Initialize session state
558
  if 'app_state' not in st.session_state:
559
  st.session_state['app_state'] = 'upload'
560
  if 'video_path' not in st.session_state:
@@ -628,9 +554,6 @@ def main():
628
  st.session_state['summarizer_type'] = summarizer_type
629
  st.write("Loading models...")
630
  processor, model, sum_tokenizer, sum_model, device = load_model(language_code, summarizer_type)
631
- if processor is None:
632
- st.error("Failed to load models. Please try again.")
633
- return
634
  st.write("Splitting audio into chunks...")
635
  chunks = split_audio_into_chunks(audio, sr, chunk_duration)
636
  st.write(f"Number of chunks: {len(chunks)}")
@@ -640,9 +563,6 @@ def main():
640
  if st.session_state['translate_to_english'] and language_code == "ur":
641
  st.write("Translating to English...")
642
  processor, model, _, _, device = load_model('en', summarizer_type)
643
- if processor is None:
644
- st.error("Failed to load translation models.")
645
- return
646
  english_transcript = process_chunks(chunks, sr, processor, model, device, 'ur', chunk_duration, task="translate", transcript_file="temp_english_transcript.json")
647
  st.session_state.update({
648
  'primary_transcript': primary_transcript,
@@ -655,9 +575,14 @@ def main():
655
  except Exception as e:
656
  st.error(f"Processing failed: {str(e)}")
657
  finally:
658
- cleanup_temp_files()
 
 
 
 
659
 
660
  if st.session_state['app_state'] == 'results':
 
661
  st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
662
  st.video(st.session_state['video_path'], start_time=st.session_state['current_time'])
663
  st.markdown('</div>', unsafe_allow_html=True)
@@ -688,9 +613,6 @@ def main():
688
  with st.spinner("Generating summary..."):
689
  try:
690
  _, _, sum_tokenizer, sum_model, device = load_model(st.session_state['language_code'], st.session_state['summarizer_type'])
691
- if sum_tokenizer is None:
692
- st.error("Failed to load summarization models.")
693
- return
694
  full_text = " ".join([text for text, _, _ in (st.session_state['english_transcript'] or st.session_state['primary_transcript'])])
695
  english_summary = summarize_text(full_text, sum_tokenizer, sum_model, device, st.session_state['summarizer_type'])
696
  st.session_state['english_summary'] = english_summary
@@ -741,6 +663,7 @@ def main():
741
 
742
  if st.session_state['app_state'] == 'results' and st.session_state['edited_video_path']:
743
  st.markdown("### Edited Video")
 
744
  st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
745
  st.video(st.session_state['edited_video_path'])
746
  st.markdown('</div>', unsafe_allow_html=True)
@@ -748,19 +671,10 @@ def main():
748
  st.download_button(label="Download Edited Video", data=file, file_name="edited_video.mp4", mime="video/mp4")
749
 
750
  if st.session_state.get('video_path') and st.button("Reset"):
751
- cleanup_temp_files()
752
  if st.session_state['video_path'] and os.path.exists(st.session_state['video_path']):
753
- try:
754
- os.remove(st.session_state['video_path'])
755
- st.info(f"Removed video file: {st.session_state['video_path']}")
756
- except Exception as e:
757
- st.warning(f"Failed to remove video file: {str(e)}")
758
  if st.session_state['edited_video_path'] and os.path.exists(st.session_state['edited_video_path']):
759
- try:
760
- os.remove(st.session_state['edited_video_path'])
761
- st.info(f"Removed edited video file: {st.session_state['edited_video_path']}")
762
- except Exception as e:
763
- st.warning(f"Failed to remove edited video file: {str(e)}")
764
  st.session_state.clear()
765
  st.rerun()
766
 
@@ -901,9 +815,4 @@ def main():
901
  """, unsafe_allow_html=True)
902
 
903
  if __name__ == "__main__":
904
- try:
905
- main()
906
- except Exception as e:
907
- st.error(f"An unexpected error occurred: {str(e)}")
908
- finally:
909
- cleanup_temp_files()
 
9
  import time
10
  import json
11
  import psutil
 
 
 
 
 
 
 
 
12
 
13
  st.set_page_config(layout="wide")
14
 
15
+ # Updated CSS with video styling from the second code
16
  st.markdown("""
17
  <style>
18
  @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600;700&display=swap');
 
190
  font-family: 'Poppins', sans-serif;
191
  }
192
 
193
+ /* Video player styling - Updated to match second code */
194
  video {
195
  display: block;
196
  width: 350px !important;
 
292
  @st.cache_resource
293
  def load_model(language='en', summarizer_type='bart'):
294
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
295
+ if language == 'ur':
296
+ processor = AutoProcessor.from_pretrained("GogetaBlueMUI/whisper-medium-ur-fleurs")
297
+ model = AutoModelForSpeechSeq2Seq.from_pretrained("GogetaBlueMUI/whisper-medium-ur-fleurs").to(device)
298
+ else:
299
+ processor = AutoProcessor.from_pretrained("openai/whisper-small")
300
+ model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small").to(device)
301
+ if device.type == "cuda":
302
+ model = model.half()
303
+ if summarizer_type == 'bart':
304
+ sum_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
305
+ sum_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn").to(device)
306
+ else:
307
+ sum_tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-large-book-summary")
308
+ sum_model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/led-large-book-summary").to(device)
309
+ return processor, model, sum_tokenizer, sum_model, device
 
 
 
 
310
 
311
  def split_audio_into_chunks(audio, sr, chunk_duration):
312
  chunk_samples = int(chunk_duration * sr)
 
314
  return chunks
315
 
316
  def transcribe_audio(audio, sr, processor, model, device, start_time, language, task="transcribe"):
317
+ inputs = processor(audio, sampling_rate=sr, return_tensors="pt")
318
+ input_features = inputs.input_features.to(device)
319
+ if model.dtype == torch.float16:
320
+ input_features = input_features.half()
321
+ generate_kwargs = {
322
+ "task": task,
323
+ "language": "urdu" if language == "ur" else language,
324
+ "max_new_tokens": 128,
325
+ "return_timestamps": True
326
+ }
327
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  with torch.no_grad():
329
  outputs = model.generate(input_features, **generate_kwargs)
330
  text = processor.decode(outputs[0], skip_special_tokens=True)
 
337
  transcript = []
338
  chunk_start = 0
339
  total_chunks = len(chunks)
340
+ progress_bar = st.progress(0)
341
+ status_text = st.empty()
342
  if os.path.exists(transcript_file):
343
+ os.remove(transcript_file)
 
 
 
 
344
  for i, chunk in enumerate(chunks):
345
+ status_text.text(f"Processing chunk {i+1}/{total_chunks}...")
346
  try:
347
  memory = psutil.virtual_memory()
348
+ st.write(f"Memory usage: {memory.percent}% (Chunk {i+1}/{total_chunks})")
 
349
  chunk_transcript = transcribe_audio(chunk, sr, processor, model, device, chunk_start, language, task)
350
  transcript.extend(chunk_transcript)
351
  with open(transcript_file, "w", encoding="utf-8") as f:
352
  json.dump(transcript, f, ensure_ascii=False)
353
  chunk_start += chunk_duration
354
+ progress_bar.progress((i + 1) / total_chunks)
355
  except Exception as e:
356
  st.error(f"Error processing chunk {i+1}: {str(e)}")
357
  break
358
+ status_text.text("Processing complete!")
359
+ progress_bar.empty()
360
  return transcript
361
 
362
  def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
 
368
  max_input_length = 16384
369
  max_summary_length = 512
370
  chunk_size = 8192
371
+ inputs = tokenizer(text, return_tensors="pt", truncation=False)
372
+ input_ids = inputs["input_ids"].to(device)
373
+ num_tokens = input_ids.shape[1]
374
+ st.write(f"Number of tokens in input: {num_tokens}")
375
+ if num_tokens < 50:
376
+ return "Transcript too short to summarize effectively."
377
  try:
 
 
 
 
 
 
 
 
 
378
  summaries = []
379
  if num_tokens <= max_input_length:
380
  truncated_inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
381
  with torch.no_grad():
382
+ summary_ids = model.generate(truncated_inputs["input_ids"], num_beams=4, max_length=max_summary_length, min_length=50, early_stopping=True, temperature=0.7)
 
 
 
 
 
 
 
 
383
  summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
384
  else:
385
  st.write(f"Transcript exceeds {max_input_length} tokens. Processing in chunks...")
 
388
  chunk_tokens = tokens[i:i + chunk_size]
389
  chunk_input_ids = torch.tensor([chunk_tokens]).to(device)
390
  with torch.no_grad():
391
+ summary_ids = model.generate(chunk_input_ids, num_beams=4, max_length=max_summary_length // 2, min_length=25, early_stopping=True, temperature=0.7)
 
 
 
 
 
 
 
392
  summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
393
  combined_summary = " ".join(summaries)
394
  combined_inputs = tokenizer(combined_summary, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
395
  with torch.no_grad():
396
+ final_summary_ids = model.generate(combined_inputs["input_ids"], num_beams=4, max_length=max_summary_length, min_length=50, early_stopping=True, temperature=0.7)
 
 
 
 
 
 
 
 
397
  summaries = [tokenizer.decode(final_summary_ids[0], skip_special_tokens=True)]
398
  return " ".join(summaries)
399
  except Exception as e:
 
402
 
403
  def save_uploaded_file(uploaded_file):
404
  try:
405
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_file:
 
406
  tmp_file.write(uploaded_file.read())
407
  return tmp_file.name
408
  except Exception as e:
 
423
  return merged
424
 
425
  def create_edited_video(video_path, transcript, keep_indices):
 
426
  try:
427
  intervals_to_keep = [(transcript[i][1], transcript[i][2]) for i in keep_indices]
428
  merged_intervals = merge_intervals(intervals_to_keep)
429
+ temp_files = []
430
  for j, (start, end) in enumerate(merged_intervals):
431
  temp_file = f"temp_{j}.mp4"
432
  ffmpeg.input(video_path, ss=start, to=end).output(temp_file, c='copy').run(overwrite_output=True, quiet=True)
 
436
  f.write(f"file '{temp_file}'\n")
437
  edited_video_path = "edited_video.mp4"
438
  ffmpeg.input('list.txt', format='concat', safe=0).output(edited_video_path, c='copy').run(overwrite_output=True, quiet=True)
439
+ for temp_file in temp_files:
440
+ if os.path.exists(temp_file):
441
+ os.remove(temp_file)
442
+ if os.path.exists("list.txt"):
443
+ os.remove("list.txt")
444
  return edited_video_path
445
  except Exception as e:
446
  st.error(f"Error creating edited video: {str(e)}")
447
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
 
449
  def generate_srt(transcript, include_timeframe=True):
450
  srt_content = ""
451
+ for text, start, end in transcript:
452
  if include_timeframe:
453
  start_time = seconds_to_srt_time(start)
454
  end_time = seconds_to_srt_time(end)
455
+ srt_content += f"{start_time} --> {end_time}\n{text}\n\n"
456
  else:
457
  srt_content += f"{text}\n\n"
458
  return srt_content
459
 
460
+ # Main Function with Centered Video Display
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  def main():
462
  st.markdown("""
463
  <div class="header">
 
480
  </div>
481
  """, unsafe_allow_html=True)
482
 
483
+ # Initialize session state variables
484
  if 'app_state' not in st.session_state:
485
  st.session_state['app_state'] = 'upload'
486
  if 'video_path' not in st.session_state:
 
554
  st.session_state['summarizer_type'] = summarizer_type
555
  st.write("Loading models...")
556
  processor, model, sum_tokenizer, sum_model, device = load_model(language_code, summarizer_type)
 
 
 
557
  st.write("Splitting audio into chunks...")
558
  chunks = split_audio_into_chunks(audio, sr, chunk_duration)
559
  st.write(f"Number of chunks: {len(chunks)}")
 
563
  if st.session_state['translate_to_english'] and language_code == "ur":
564
  st.write("Translating to English...")
565
  processor, model, _, _, device = load_model('en', summarizer_type)
 
 
 
566
  english_transcript = process_chunks(chunks, sr, processor, model, device, 'ur', chunk_duration, task="translate", transcript_file="temp_english_transcript.json")
567
  st.session_state.update({
568
  'primary_transcript': primary_transcript,
 
575
  except Exception as e:
576
  st.error(f"Processing failed: {str(e)}")
577
  finally:
578
+ if os.path.exists(audio_path):
579
+ os.remove(audio_path)
580
+ for temp_file in ["temp_primary_transcript.json", "temp_english_transcript.json"]:
581
+ if os.path.exists(temp_file):
582
+ os.remove(temp_file)
583
 
584
  if st.session_state['app_state'] == 'results':
585
+ # Center the original video
586
  st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
587
  st.video(st.session_state['video_path'], start_time=st.session_state['current_time'])
588
  st.markdown('</div>', unsafe_allow_html=True)
 
613
  with st.spinner("Generating summary..."):
614
  try:
615
  _, _, sum_tokenizer, sum_model, device = load_model(st.session_state['language_code'], st.session_state['summarizer_type'])
 
 
 
616
  full_text = " ".join([text for text, _, _ in (st.session_state['english_transcript'] or st.session_state['primary_transcript'])])
617
  english_summary = summarize_text(full_text, sum_tokenizer, sum_model, device, st.session_state['summarizer_type'])
618
  st.session_state['english_summary'] = english_summary
 
663
 
664
  if st.session_state['app_state'] == 'results' and st.session_state['edited_video_path']:
665
  st.markdown("### Edited Video")
666
+ # Center the edited video
667
  st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
668
  st.video(st.session_state['edited_video_path'])
669
  st.markdown('</div>', unsafe_allow_html=True)
 
671
  st.download_button(label="Download Edited Video", data=file, file_name="edited_video.mp4", mime="video/mp4")
672
 
673
  if st.session_state.get('video_path') and st.button("Reset"):
 
674
  if st.session_state['video_path'] and os.path.exists(st.session_state['video_path']):
675
+ os.remove(st.session_state['video_path'])
 
 
 
 
676
  if st.session_state['edited_video_path'] and os.path.exists(st.session_state['edited_video_path']):
677
+ os.remove(st.session_state['edited_video_path'])
 
 
 
 
678
  st.session_state.clear()
679
  st.rerun()
680
 
 
815
  """, unsafe_allow_html=True)
816
 
817
  if __name__ == "__main__":
818
+ main()