import streamlit as st import zipfile import os import tempfile import whisper from pathlib import Path def process_pptx(uploaded_file): # Create temporary file to save the uploaded file with tempfile.NamedTemporaryFile(delete=False, suffix='.pptx') as tmp_pptx: tmp_pptx.write(uploaded_file.getvalue()) pptx_path = tmp_pptx.name # Convert PPTX path to ZIP path zip_path = os.path.splitext(pptx_path)[0] + '.zip' os.rename(pptx_path, zip_path) # Create dictionary to store audio files audio_files = {} # Create temporary directory for extraction temp_dir = tempfile.mkdtemp() with st.spinner('Extracting PPTX contents...'): # Extract the zip file to temp directory with zipfile.ZipFile(zip_path, 'r') as zip_ref: zip_ref.extractall(temp_dir) # Path to media folder media_path = os.path.join(temp_dir, 'ppt', 'media') # Check if media folder exists if os.path.exists(media_path): # Create temporary directory for converted files temp_audio_dir = tempfile.mkdtemp() # Progress bar for audio conversion progress_bar = st.progress(0) status_text = st.empty() # First count total slides with audio total_slides = 0 slide_num = 1 while True: found = False for ext in ['.mp4', '.m4a']: if os.path.exists(os.path.join(media_path, f'media{slide_num}{ext}')): total_slides += 1 found = True break if not found: break slide_num += 1 # Process audio files slide_num = 1 processed_slides = 0 while True: # Check for either .mp4 or .m4a file for current slide media_file = None for ext in ['.mp4', '.m4a']: filename = f'media{slide_num}{ext}' file_path = os.path.join(media_path, filename) if os.path.exists(file_path): media_file = file_path break if not media_file: break # Create temporary mp3 file temp_mp3 = os.path.join(temp_audio_dir, f'temp_{slide_num}.mp3') try: status_text.text(f'Converting audio from slide {slide_num}...') # Convert to mp3 using ffmpeg os.system(f'ffmpeg -i "{media_file}" -vn -acodec libmp3lame "{temp_mp3}" -loglevel quiet') # Store the temp mp3 file path in dictionary audio_files[slide_num-1] = temp_mp3 processed_slides += 1 progress_bar.progress(processed_slides / total_slides) except Exception as e: st.error(f"Error converting slide {slide_num}: {str(e)}") slide_num += 1 progress_bar.empty() status_text.empty() # Load Whisper model with st.spinner('Loading Whisper model...'): model = whisper.load_model("base") # Dictionary to store transcriptions by slide number slide_transcripts = {} # Progress bar for transcription progress_bar = st.progress(0) status_text = st.empty() # Transcribe each audio file for idx, (slide_num, audio_file) in enumerate(audio_files.items()): status_text.text(f'Transcribing slide {slide_num + 1}...') # Transcribe the audio file result = model.transcribe(audio_file) # Store transcription text for this slide slide_transcripts[slide_num + 1] = result["text"] progress_bar.progress((idx + 1) / len(audio_files)) progress_bar.empty() status_text.empty() # Clean up temporary files os.unlink(zip_path) return slide_transcripts return None def main(): st.title('Audio2Text') st.write('Upload a PowerPoint file (PPTX) to transcribe its audio content') # File uploader uploaded_file = st.file_uploader("Choose a PPTX file", type="pptx") if uploaded_file is not None: # Check file size (2GB limit) if uploaded_file.size > 2 * 1024 * 1024 * 1024: st.error("File size exceeds 2GB limit") return st.write("Processing... This may take a while depending on the number and length of audio clips.") # Process the file transcripts = process_pptx(uploaded_file) if transcripts: st.subheader("Transcription Results") for slide_num, text in sorted(transcripts.items()): st.markdown(f"**Slide {slide_num}**") st.write(text) st.markdown("---") else: st.warning("No audio content found in the PowerPoint file.") if __name__ == "__main__": main()