Spaces:
Sleeping
Sleeping
File size: 5,111 Bytes
3a5f6fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import streamlit as st
import zipfile
import os
import tempfile
import whisper
from pathlib import Path
def process_pptx(uploaded_file):
# Create temporary file to save the uploaded file
with tempfile.NamedTemporaryFile(delete=False, suffix='.pptx') as tmp_pptx:
tmp_pptx.write(uploaded_file.getvalue())
pptx_path = tmp_pptx.name
# Convert PPTX path to ZIP path
zip_path = os.path.splitext(pptx_path)[0] + '.zip'
os.rename(pptx_path, zip_path)
# Create dictionary to store audio files
audio_files = {}
# Create temporary directory for extraction
temp_dir = tempfile.mkdtemp()
with st.spinner('Extracting PPTX contents...'):
# Extract the zip file to temp directory
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(temp_dir)
# Path to media folder
media_path = os.path.join(temp_dir, 'ppt', 'media')
# Check if media folder exists
if os.path.exists(media_path):
# Create temporary directory for converted files
temp_audio_dir = tempfile.mkdtemp()
# Progress bar for audio conversion
progress_bar = st.progress(0)
status_text = st.empty()
# First count total slides with audio
total_slides = 0
slide_num = 1
while True:
found = False
for ext in ['.mp4', '.m4a']:
if os.path.exists(os.path.join(media_path, f'media{slide_num}{ext}')):
total_slides += 1
found = True
break
if not found:
break
slide_num += 1
# Process audio files
slide_num = 1
processed_slides = 0
while True:
# Check for either .mp4 or .m4a file for current slide
media_file = None
for ext in ['.mp4', '.m4a']:
filename = f'media{slide_num}{ext}'
file_path = os.path.join(media_path, filename)
if os.path.exists(file_path):
media_file = file_path
break
if not media_file:
break
# Create temporary mp3 file
temp_mp3 = os.path.join(temp_audio_dir, f'temp_{slide_num}.mp3')
try:
status_text.text(f'Converting audio from slide {slide_num}...')
# Convert to mp3 using ffmpeg
os.system(f'ffmpeg -i "{media_file}" -vn -acodec libmp3lame "{temp_mp3}" -loglevel quiet')
# Store the temp mp3 file path in dictionary
audio_files[slide_num-1] = temp_mp3
processed_slides += 1
progress_bar.progress(processed_slides / total_slides)
except Exception as e:
st.error(f"Error converting slide {slide_num}: {str(e)}")
slide_num += 1
progress_bar.empty()
status_text.empty()
# Load Whisper model
with st.spinner('Loading Whisper model...'):
model = whisper.load_model("base")
# Dictionary to store transcriptions by slide number
slide_transcripts = {}
# Progress bar for transcription
progress_bar = st.progress(0)
status_text = st.empty()
# Transcribe each audio file
for idx, (slide_num, audio_file) in enumerate(audio_files.items()):
status_text.text(f'Transcribing slide {slide_num + 1}...')
# Transcribe the audio file
result = model.transcribe(audio_file)
# Store transcription text for this slide
slide_transcripts[slide_num + 1] = result["text"]
progress_bar.progress((idx + 1) / len(audio_files))
progress_bar.empty()
status_text.empty()
# Clean up temporary files
os.unlink(zip_path)
return slide_transcripts
return None
def main():
st.title('Audio2Text')
st.write('Upload a PowerPoint file (PPTX) to transcribe its audio content')
# File uploader
uploaded_file = st.file_uploader("Choose a PPTX file", type="pptx")
if uploaded_file is not None:
# Check file size (2GB limit)
if uploaded_file.size > 2 * 1024 * 1024 * 1024:
st.error("File size exceeds 2GB limit")
return
st.write("Processing... This may take a while depending on the number and length of audio clips.")
# Process the file
transcripts = process_pptx(uploaded_file)
if transcripts:
st.subheader("Transcription Results")
for slide_num, text in sorted(transcripts.items()):
st.markdown(f"**Slide {slide_num}**")
st.write(text)
st.markdown("---")
else:
st.warning("No audio content found in the PowerPoint file.")
if __name__ == "__main__":
main() |