Spaces:

GogetaBlueMUI
/

FYP

Running

App Files Files Community

GogetaBlueMUI commited on 21 days ago

Commit

286f91d

verified ·

1 Parent(s): bc193d3

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -156

app.py CHANGED Viewed

@@ -9,18 +9,10 @@ import ffmpeg
 import time
 import json
 import psutil
-import sys
-import glob
-from pathlib import Path
-# Workaround for torch.classes and Streamlit compatibility
-st._is_running_with_streamlit = True
-if 'torch' in sys.modules and hasattr(sys.modules['torch'], '__path__'):
-    sys.modules['torch'].__path__ = []
 st.set_page_config(layout="wide")
-# CSS for styling
 st.markdown("""
 <style>
 @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600;700&display=swap');
@@ -198,7 +190,7 @@ st.markdown("""
     font-family: 'Poppins', sans-serif;
 }
-/* Video player styling */
 video {
     display: block;
     width: 350px !important;
@@ -300,25 +292,21 @@ class TranscriptionProgress:
 @st.cache_resource
 def load_model(language='en', summarizer_type='bart'):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    try:
-        if language == 'ur':
-            processor = AutoProcessor.from_pretrained("GogetaBlueMUI/whisper-medium-ur-fleurs")
-            model = AutoModelForSpeechSeq2Seq.from_pretrained("GogetaBlueMUI/whisper-medium-ur-fleurs").to(device)
-        else:
-            processor = AutoProcessor.from_pretrained("openai/whisper-small")
-            model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small").to(device)
-        if device.type == "cuda":
-            model = model.half()
-        if summarizer_type == 'bart':
-            sum_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
-            sum_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn").to(device)
-        else:
-            sum_tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-large-book-summary")
-            sum_model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/led-large-book-summary").to(device)
-        return processor, model, sum_tokenizer, sum_model, device
-    except Exception as e:
-        st.error(f"Error loading models: {str(e)}")
-        return None, None, None, None, None
 def split_audio_into_chunks(audio, sr, chunk_duration):
     chunk_samples = int(chunk_duration * sr)
@@ -326,23 +314,17 @@ def split_audio_into_chunks(audio, sr, chunk_duration):
     return chunks
 def transcribe_audio(audio, sr, processor, model, device, start_time, language, task="transcribe"):
     try:
-        inputs = processor(audio, sampling_rate=sr, return_tensors="pt")
-        input_features = inputs.input_features.to(device)
-        attention_mask = inputs.get("attention_mask", None)
-        if attention_mask is not None:
-            attention_mask = attention_mask.to(device)
-        if model.dtype == torch.float16:
-            input_features = input_features.half()
-        generate_kwargs = {
-            "task": task,
-            "language": "urdu" if language == "ur" else language,
-            "max_new_tokens": 128,
-            "return_timestamps": True,
-            "do_sample": False
-        }
-        if attention_mask is not None:
-            generate_kwargs["attention_mask"] = attention_mask
         with torch.no_grad():
             outputs = model.generate(input_features, **generate_kwargs)
         text = processor.decode(outputs[0], skip_special_tokens=True)
@@ -355,29 +337,26 @@ def process_chunks(chunks, sr, processor, model, device, language, chunk_duratio
     transcript = []
     chunk_start = 0
     total_chunks = len(chunks)
-    progress = TranscriptionProgress()
-    progress.init_progress()
     if os.path.exists(transcript_file):
-        try:
-            os.remove(transcript_file)
-            st.info(f"Removed temporary file: {transcript_file}")
-        except Exception as e:
-            st.warning(f"Failed to remove {transcript_file}: {str(e)}")
     for i, chunk in enumerate(chunks):
-        progress.update((i + 1) / total_chunks, f"Processing chunk {i+1}/{total_chunks}...")
         try:
             memory = psutil.virtual_memory()
-            if memory.percent > 90:
-                st.warning(f"High memory usage: {memory.percent}% - Consider reducing chunk size.")
             chunk_transcript = transcribe_audio(chunk, sr, processor, model, device, chunk_start, language, task)
             transcript.extend(chunk_transcript)
             with open(transcript_file, "w", encoding="utf-8") as f:
                 json.dump(transcript, f, ensure_ascii=False)
             chunk_start += chunk_duration
         except Exception as e:
             st.error(f"Error processing chunk {i+1}: {str(e)}")
             break
-    progress.update(1.0, "Processing complete!")
     return transcript
 def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
@@ -389,29 +368,18 @@ def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
         max_input_length = 16384
         max_summary_length = 512
         chunk_size = 8192
     try:
-        inputs = tokenizer(text, return_tensors="pt", truncation=False)
-        input_ids = inputs["input_ids"].to(device)
-        attention_mask = inputs.get("attention_mask")
-        if attention_mask is not None:
-            attention_mask = attention_mask.to(device)
-        num_tokens = input_ids.shape[1]
-        st.write(f"Number of tokens in input: {num_tokens}")
-        if num_tokens < 50:
-            return "Transcript too short to summarize effectively."
         summaries = []
         if num_tokens <= max_input_length:
             truncated_inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
             with torch.no_grad():
-                summary_ids = model.generate(
-                    truncated_inputs["input_ids"],
-                    attention_mask=truncated_inputs.get("attention_mask"),
-                    num_beams=4,
-                    max_length=max_summary_length,
-                    min_length=50,
-                    early_stopping=True,
-                    temperature=0.7
-                )
             summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
         else:
             st.write(f"Transcript exceeds {max_input_length} tokens. Processing in chunks...")
@@ -420,27 +388,12 @@ def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
                 chunk_tokens = tokens[i:i + chunk_size]
                 chunk_input_ids = torch.tensor([chunk_tokens]).to(device)
                 with torch.no_grad():
-                    summary_ids = model.generate(
-                        chunk_input_ids,
-                        num_beams=4,
-                        max_length=max_summary_length // 2,
-                        min_length=25,
-                        early_stopping=True,
-                        temperature=0.7
-                    )
                 summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
             combined_summary = " ".join(summaries)
             combined_inputs = tokenizer(combined_summary, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
             with torch.no_grad():
-                final_summary_ids = model.generate(
-                    combined_inputs["input_ids"],
-                    attention_mask=combined_inputs.get("attention_mask"),
-                    num_beams=4,
-                    max_length=max_summary_length,
-                    min_length=50,
-                    early_stopping=True,
-                    temperature=0.7
-                )
             summaries = [tokenizer.decode(final_summary_ids[0], skip_special_tokens=True)]
         return " ".join(summaries)
     except Exception as e:
@@ -449,8 +402,7 @@ def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
 def save_uploaded_file(uploaded_file):
     try:
-        suffix = Path(uploaded_file.name).suffix
-        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
             tmp_file.write(uploaded_file.read())
             return tmp_file.name
     except Exception as e:
@@ -471,10 +423,10 @@ def merge_intervals(intervals):
     return merged
 def create_edited_video(video_path, transcript, keep_indices):
-    temp_files = []
     try:
         intervals_to_keep = [(transcript[i][1], transcript[i][2]) for i in keep_indices]
         merged_intervals = merge_intervals(intervals_to_keep)
         for j, (start, end) in enumerate(merged_intervals):
             temp_file = f"temp_{j}.mp4"
             ffmpeg.input(video_path, ss=start, to=end).output(temp_file, c='copy').run(overwrite_output=True, quiet=True)
@@ -484,54 +436,28 @@ def create_edited_video(video_path, transcript, keep_indices):
                 f.write(f"file '{temp_file}'\n")
         edited_video_path = "edited_video.mp4"
         ffmpeg.input('list.txt', format='concat', safe=0).output(edited_video_path, c='copy').run(overwrite_output=True, quiet=True)
         return edited_video_path
     except Exception as e:
         st.error(f"Error creating edited video: {str(e)}")
         return None
-    finally:
-        for temp_file in temp_files:
-            if os.path.exists(temp_file):
-                try:
-                    os.remove(temp_file)
-                    st.info(f"Removed temporary file: {temp_file}")
-                except Exception as e:
-                    st.warning(f"Failed to remove {temp_file}: {str(e)}")
-        if os.path.exists("list.txt"):
-            try:
-                os.remove("list.txt")
-                st.info(f"Removed temporary file: list.txt")
-            except Exception as e:
-                st.warning(f"Failed to remove list.txt: {str(e)}")
 def generate_srt(transcript, include_timeframe=True):
     srt_content = ""
-    for i, (text, start, end) in enumerate(transcript, 1):
         if include_timeframe:
             start_time = seconds_to_srt_time(start)
             end_time = seconds_to_srt_time(end)
-            srt_content += f"{i}\n{start_time} --> {end_time}\n{text}\n\n"
         else:
             srt_content += f"{text}\n\n"
     return srt_content
-def cleanup_temp_files():
-    temp_files = ["processed_audio.wav", "temp_primary_transcript.json", "temp_english_transcript.json", "edited_video.mp4", "list.txt"]
-    for temp_file in temp_files:
-        if os.path.exists(temp_file):
-            try:
-                os.remove(temp_file)
-                st.info(f"Removed temporary file: {temp_file}")
-            except Exception as e:
-                st.warning(f"Failed to remove {temp_file}: {str(e)}")
-    for temp_file in glob.glob("temp_*.mp4"):
-        if os.path.exists(temp_file):
-            try:
-                os.remove(temp_file)
-                st.info(f"Removed temporary file: {temp_file}")
-            except Exception as e:
-                st.warning(f"Failed to remove {temp_file}: {str(e)}")
-# Main Function
 def main():
     st.markdown("""
     <div class="header">
@@ -554,7 +480,7 @@ def main():
     </div>
     """, unsafe_allow_html=True)
-    # Initialize session state
     if 'app_state' not in st.session_state:
         st.session_state['app_state'] = 'upload'
     if 'video_path' not in st.session_state:
@@ -628,9 +554,6 @@ def main():
                         st.session_state['summarizer_type'] = summarizer_type
                         st.write("Loading models...")
                         processor, model, sum_tokenizer, sum_model, device = load_model(language_code, summarizer_type)
-                        if processor is None:
-                            st.error("Failed to load models. Please try again.")
-                            return
                         st.write("Splitting audio into chunks...")
                         chunks = split_audio_into_chunks(audio, sr, chunk_duration)
                         st.write(f"Number of chunks: {len(chunks)}")
@@ -640,9 +563,6 @@ def main():
                         if st.session_state['translate_to_english'] and language_code == "ur":
                             st.write("Translating to English...")
                             processor, model, _, _, device = load_model('en', summarizer_type)
-                            if processor is None:
-                                st.error("Failed to load translation models.")
-                                return
                             english_transcript = process_chunks(chunks, sr, processor, model, device, 'ur', chunk_duration, task="translate", transcript_file="temp_english_transcript.json")
                         st.session_state.update({
                             'primary_transcript': primary_transcript,
@@ -655,9 +575,14 @@ def main():
                     except Exception as e:
                         st.error(f"Processing failed: {str(e)}")
                     finally:
-                        cleanup_temp_files()
     if st.session_state['app_state'] == 'results':
         st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
         st.video(st.session_state['video_path'], start_time=st.session_state['current_time'])
         st.markdown('</div>', unsafe_allow_html=True)
@@ -688,9 +613,6 @@ def main():
                 with st.spinner("Generating summary..."):
                     try:
                         _, _, sum_tokenizer, sum_model, device = load_model(st.session_state['language_code'], st.session_state['summarizer_type'])
-                        if sum_tokenizer is None:
-                            st.error("Failed to load summarization models.")
-                            return
                         full_text = " ".join([text for text, _, _ in (st.session_state['english_transcript'] or st.session_state['primary_transcript'])])
                         english_summary = summarize_text(full_text, sum_tokenizer, sum_model, device, st.session_state['summarizer_type'])
                         st.session_state['english_summary'] = english_summary
@@ -741,6 +663,7 @@ def main():
     if st.session_state['app_state'] == 'results' and st.session_state['edited_video_path']:
         st.markdown("### Edited Video")
         st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
         st.video(st.session_state['edited_video_path'])
         st.markdown('</div>', unsafe_allow_html=True)
@@ -748,19 +671,10 @@ def main():
             st.download_button(label="Download Edited Video", data=file, file_name="edited_video.mp4", mime="video/mp4")
     if st.session_state.get('video_path') and st.button("Reset"):
-        cleanup_temp_files()
         if st.session_state['video_path'] and os.path.exists(st.session_state['video_path']):
-            try:
-                os.remove(st.session_state['video_path'])
-                st.info(f"Removed video file: {st.session_state['video_path']}")
-            except Exception as e:
-                st.warning(f"Failed to remove video file: {str(e)}")
         if st.session_state['edited_video_path'] and os.path.exists(st.session_state['edited_video_path']):
-            try:
-                os.remove(st.session_state['edited_video_path'])
-                st.info(f"Removed edited video file: {st.session_state['edited_video_path']}")
-            except Exception as e:
-                st.warning(f"Failed to remove edited video file: {str(e)}")
         st.session_state.clear()
         st.rerun()
@@ -901,9 +815,4 @@ def main():
     """, unsafe_allow_html=True)
 if __name__ == "__main__":
-    try:
-        main()
-    except Exception as e:
-        st.error(f"An unexpected error occurred: {str(e)}")
-    finally:
-        cleanup_temp_files()

 import time
 import json
 import psutil
 st.set_page_config(layout="wide")
+# Updated CSS with video styling from the second code
 st.markdown("""
 <style>
 @import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600;700&display=swap');
     font-family: 'Poppins', sans-serif;
 }
+/* Video player styling - Updated to match second code */
 video {
     display: block;
     width: 350px !important;
 @st.cache_resource
 def load_model(language='en', summarizer_type='bart'):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    if language == 'ur':
+        processor = AutoProcessor.from_pretrained("GogetaBlueMUI/whisper-medium-ur-fleurs")
+        model = AutoModelForSpeechSeq2Seq.from_pretrained("GogetaBlueMUI/whisper-medium-ur-fleurs").to(device)
+    else:
+        processor = AutoProcessor.from_pretrained("openai/whisper-small")
+        model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small").to(device)
+    if device.type == "cuda":
+        model = model.half()
+    if summarizer_type == 'bart':
+        sum_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
+        sum_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn").to(device)
+    else:
+        sum_tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-large-book-summary")
+        sum_model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/led-large-book-summary").to(device)
+    return processor, model, sum_tokenizer, sum_model, device
 def split_audio_into_chunks(audio, sr, chunk_duration):
     chunk_samples = int(chunk_duration * sr)
     return chunks
 def transcribe_audio(audio, sr, processor, model, device, start_time, language, task="transcribe"):
+    inputs = processor(audio, sampling_rate=sr, return_tensors="pt")
+    input_features = inputs.input_features.to(device)
+    if model.dtype == torch.float16:
+        input_features = input_features.half()
+    generate_kwargs = {
+        "task": task,
+        "language": "urdu" if language == "ur" else language,
+        "max_new_tokens": 128,
+        "return_timestamps": True
+    }
     try:
         with torch.no_grad():
             outputs = model.generate(input_features, **generate_kwargs)
         text = processor.decode(outputs[0], skip_special_tokens=True)
     transcript = []
     chunk_start = 0
     total_chunks = len(chunks)
+    progress_bar = st.progress(0)
+    status_text = st.empty()
     if os.path.exists(transcript_file):
+        os.remove(transcript_file)
     for i, chunk in enumerate(chunks):
+        status_text.text(f"Processing chunk {i+1}/{total_chunks}...")
         try:
             memory = psutil.virtual_memory()
+            st.write(f"Memory usage: {memory.percent}% (Chunk {i+1}/{total_chunks})")
             chunk_transcript = transcribe_audio(chunk, sr, processor, model, device, chunk_start, language, task)
             transcript.extend(chunk_transcript)
             with open(transcript_file, "w", encoding="utf-8") as f:
                 json.dump(transcript, f, ensure_ascii=False)
             chunk_start += chunk_duration
+            progress_bar.progress((i + 1) / total_chunks)
         except Exception as e:
             st.error(f"Error processing chunk {i+1}: {str(e)}")
             break
+    status_text.text("Processing complete!")
+    progress_bar.empty()
     return transcript
 def summarize_text(text, tokenizer, model, device, summarizer_type='bart'):
         max_input_length = 16384
         max_summary_length = 512
         chunk_size = 8192
+    inputs = tokenizer(text, return_tensors="pt", truncation=False)
+    input_ids = inputs["input_ids"].to(device)
+    num_tokens = input_ids.shape[1]
+    st.write(f"Number of tokens in input: {num_tokens}")
+    if num_tokens < 50:
+        return "Transcript too short to summarize effectively."
     try:
         summaries = []
         if num_tokens <= max_input_length:
             truncated_inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
             with torch.no_grad():
+                summary_ids = model.generate(truncated_inputs["input_ids"], num_beams=4, max_length=max_summary_length, min_length=50, early_stopping=True, temperature=0.7)
             summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
         else:
             st.write(f"Transcript exceeds {max_input_length} tokens. Processing in chunks...")
                 chunk_tokens = tokens[i:i + chunk_size]
                 chunk_input_ids = torch.tensor([chunk_tokens]).to(device)
                 with torch.no_grad():
+                    summary_ids = model.generate(chunk_input_ids, num_beams=4, max_length=max_summary_length // 2, min_length=25, early_stopping=True, temperature=0.7)
                 summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
             combined_summary = " ".join(summaries)
             combined_inputs = tokenizer(combined_summary, return_tensors="pt", truncation=True, max_length=max_input_length).to(device)
             with torch.no_grad():
+                final_summary_ids = model.generate(combined_inputs["input_ids"], num_beams=4, max_length=max_summary_length, min_length=50, early_stopping=True, temperature=0.7)
             summaries = [tokenizer.decode(final_summary_ids[0], skip_special_tokens=True)]
         return " ".join(summaries)
     except Exception as e:
 def save_uploaded_file(uploaded_file):
     try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_file:
             tmp_file.write(uploaded_file.read())
             return tmp_file.name
     except Exception as e:
     return merged
 def create_edited_video(video_path, transcript, keep_indices):
     try:
         intervals_to_keep = [(transcript[i][1], transcript[i][2]) for i in keep_indices]
         merged_intervals = merge_intervals(intervals_to_keep)
+        temp_files = []
         for j, (start, end) in enumerate(merged_intervals):
             temp_file = f"temp_{j}.mp4"
             ffmpeg.input(video_path, ss=start, to=end).output(temp_file, c='copy').run(overwrite_output=True, quiet=True)
                 f.write(f"file '{temp_file}'\n")
         edited_video_path = "edited_video.mp4"
         ffmpeg.input('list.txt', format='concat', safe=0).output(edited_video_path, c='copy').run(overwrite_output=True, quiet=True)
+        for temp_file in temp_files:
+            if os.path.exists(temp_file):
+                os.remove(temp_file)
+        if os.path.exists("list.txt"):
+            os.remove("list.txt")
         return edited_video_path
     except Exception as e:
         st.error(f"Error creating edited video: {str(e)}")
         return None
 def generate_srt(transcript, include_timeframe=True):
     srt_content = ""
+    for text, start, end in transcript:
         if include_timeframe:
             start_time = seconds_to_srt_time(start)
             end_time = seconds_to_srt_time(end)
+            srt_content += f"{start_time} --> {end_time}\n{text}\n\n"
         else:
             srt_content += f"{text}\n\n"
     return srt_content
+# Main Function with Centered Video Display
 def main():
     st.markdown("""
     <div class="header">
     </div>
     """, unsafe_allow_html=True)
+    # Initialize session state variables
     if 'app_state' not in st.session_state:
         st.session_state['app_state'] = 'upload'
     if 'video_path' not in st.session_state:
                         st.session_state['summarizer_type'] = summarizer_type
                         st.write("Loading models...")
                         processor, model, sum_tokenizer, sum_model, device = load_model(language_code, summarizer_type)
                         st.write("Splitting audio into chunks...")
                         chunks = split_audio_into_chunks(audio, sr, chunk_duration)
                         st.write(f"Number of chunks: {len(chunks)}")
                         if st.session_state['translate_to_english'] and language_code == "ur":
                             st.write("Translating to English...")
                             processor, model, _, _, device = load_model('en', summarizer_type)
                             english_transcript = process_chunks(chunks, sr, processor, model, device, 'ur', chunk_duration, task="translate", transcript_file="temp_english_transcript.json")
                         st.session_state.update({
                             'primary_transcript': primary_transcript,
                     except Exception as e:
                         st.error(f"Processing failed: {str(e)}")
                     finally:
+                        if os.path.exists(audio_path):
+                            os.remove(audio_path)
+                        for temp_file in ["temp_primary_transcript.json", "temp_english_transcript.json"]:
+                            if os.path.exists(temp_file):
+                                os.remove(temp_file)
     if st.session_state['app_state'] == 'results':
+        # Center the original video
         st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
         st.video(st.session_state['video_path'], start_time=st.session_state['current_time'])
         st.markdown('</div>', unsafe_allow_html=True)
                 with st.spinner("Generating summary..."):
                     try:
                         _, _, sum_tokenizer, sum_model, device = load_model(st.session_state['language_code'], st.session_state['summarizer_type'])
                         full_text = " ".join([text for text, _, _ in (st.session_state['english_transcript'] or st.session_state['primary_transcript'])])
                         english_summary = summarize_text(full_text, sum_tokenizer, sum_model, device, st.session_state['summarizer_type'])
                         st.session_state['english_summary'] = english_summary
     if st.session_state['app_state'] == 'results' and st.session_state['edited_video_path']:
         st.markdown("### Edited Video")
+        # Center the edited video
         st.markdown('<div style="display: flex; justify-content: center;">', unsafe_allow_html=True)
         st.video(st.session_state['edited_video_path'])
         st.markdown('</div>', unsafe_allow_html=True)
             st.download_button(label="Download Edited Video", data=file, file_name="edited_video.mp4", mime="video/mp4")
     if st.session_state.get('video_path') and st.button("Reset"):
         if st.session_state['video_path'] and os.path.exists(st.session_state['video_path']):
+            os.remove(st.session_state['video_path'])
         if st.session_state['edited_video_path'] and os.path.exists(st.session_state['edited_video_path']):
+            os.remove(st.session_state['edited_video_path'])
         st.session_state.clear()
         st.rerun()
     """, unsafe_allow_html=True)
 if __name__ == "__main__":
+    main()