Spaces:
Sleeping
Sleeping
File size: 7,562 Bytes
c2347c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
# import zipfile
# import os
# import tempfile
# import whisper
# # Specify the input PPTX file and output ZIP file names
# file = '/Users/tushargupta/Downloads/Lecture 1_Definition and conceptualization.pptx' # Replace with your PPTX file path
# file = os.path.splitext(file)[0] + '.zip'
# # Create dictionary to store audio files
# audio_files = {}
# # Create temporary directory for extraction
# temp_dir = tempfile.mkdtemp()
# # Extract the zip file to temp directory
# with zipfile.ZipFile(file, 'r') as zip_ref:
# zip_ref.extractall(temp_dir)
# # Path to media folder
# media_path = os.path.join(temp_dir, 'ppt', 'media')
# # Check if media folder exists
# if os.path.exists(media_path):
# # Create temporary directory for converted files
# temp_audio_dir = tempfile.mkdtemp()
# # Iterate through slide numbers
# slide_num = 1
# while True:
# # Check for either .mp4 or .m4a file for current slide
# media_file = None
# for ext in ['.mp4', '.m4a']:
# filename = f'media{slide_num}{ext}'
# file_path = os.path.join(media_path, filename)
# if os.path.exists(file_path):
# media_file = file_path
# break
# if not media_file:
# break
# # Create temporary mp3 file
# temp_mp3 = os.path.join(temp_audio_dir, f'temp_{slide_num}.mp3')
# try:
# # Convert to mp3 using ffmpeg
# os.system(f'ffmpeg -i "{media_file}" -vn -acodec libmp3lame "{temp_mp3}" -loglevel quiet')
# # Store the temp mp3 file path in dictionary
# audio_files[slide_num-1] = temp_mp3
# except Exception as e:
# print(f"Error converting slide {slide_num}: {str(e)}")
# slide_num += 1
# # Load Whisper model
# model = whisper.load_model("base")
# # Dictionary to store transcriptions by slide number
# slide_transcripts = {}
# # Transcribe each audio file
# for slide_num, audio_file in audio_files.items():
# # Transcribe the audio file
# result = model.transcribe(audio_file)
# # Store transcription text for this slide
# slide_transcripts[slide_num + 1] = result["text"]
# # Display transcription per slide
# print("\nTranscription by Slide:")
# for slide_num, text in sorted(slide_transcripts.items()):
# print(f"\nSlide {slide_num}:")
# print(text)
import streamlit as st
import zipfile
import os
import tempfile
import whisper
from pathlib import Path
def process_pptx(uploaded_file):
# Create temporary file to save the uploaded file
with tempfile.NamedTemporaryFile(delete=False, suffix='.pptx') as tmp_pptx:
tmp_pptx.write(uploaded_file.getvalue())
pptx_path = tmp_pptx.name
# Convert PPTX path to ZIP path
zip_path = os.path.splitext(pptx_path)[0] + '.zip'
os.rename(pptx_path, zip_path)
# Create dictionary to store audio files
audio_files = {}
# Create temporary directory for extraction
temp_dir = tempfile.mkdtemp()
with st.spinner('Extracting PPTX contents...'):
# Extract the zip file to temp directory
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(temp_dir)
# Path to media folder
media_path = os.path.join(temp_dir, 'ppt', 'media')
# Check if media folder exists
if os.path.exists(media_path):
# Create temporary directory for converted files
temp_audio_dir = tempfile.mkdtemp()
# Progress bar for audio conversion
progress_bar = st.progress(0)
status_text = st.empty()
# First count total slides with audio
total_slides = 0
slide_num = 1
while True:
found = False
for ext in ['.mp4', '.m4a']:
if os.path.exists(os.path.join(media_path, f'media{slide_num}{ext}')):
total_slides += 1
found = True
break
if not found:
break
slide_num += 1
# Process audio files
slide_num = 1
processed_slides = 0
while True:
# Check for either .mp4 or .m4a file for current slide
media_file = None
for ext in ['.mp4', '.m4a']:
filename = f'media{slide_num}{ext}'
file_path = os.path.join(media_path, filename)
if os.path.exists(file_path):
media_file = file_path
break
if not media_file:
break
# Create temporary mp3 file
temp_mp3 = os.path.join(temp_audio_dir, f'temp_{slide_num}.mp3')
try:
status_text.text(f'Converting audio from slide {slide_num}...')
# Convert to mp3 using ffmpeg
os.system(f'ffmpeg -i "{media_file}" -vn -acodec libmp3lame "{temp_mp3}" -loglevel quiet')
# Store the temp mp3 file path in dictionary
audio_files[slide_num-1] = temp_mp3
processed_slides += 1
progress_bar.progress(processed_slides / total_slides)
except Exception as e:
st.error(f"Error converting slide {slide_num}: {str(e)}")
slide_num += 1
progress_bar.empty()
status_text.empty()
# Load Whisper model
with st.spinner('Loading Whisper model...'):
model = whisper.load_model("base")
# Dictionary to store transcriptions by slide number
slide_transcripts = {}
# Progress bar for transcription
progress_bar = st.progress(0)
status_text = st.empty()
# Transcribe each audio file
for idx, (slide_num, audio_file) in enumerate(audio_files.items()):
status_text.text(f'Transcribing slide {slide_num + 1}...')
# Transcribe the audio file
result = model.transcribe(audio_file)
# Store transcription text for this slide
slide_transcripts[slide_num + 1] = result["text"]
progress_bar.progress((idx + 1) / len(audio_files))
progress_bar.empty()
status_text.empty()
# Clean up temporary files
os.unlink(zip_path)
return slide_transcripts
return None
def main():
st.title('Audio2Text')
st.write('Upload a PowerPoint file (PPTX) to transcribe its audio content')
# File uploader
uploaded_file = st.file_uploader("Choose a PPTX file", type="pptx")
if uploaded_file is not None:
# Check file size (2GB limit)
if uploaded_file.size > 2 * 1024 * 1024 * 1024:
st.error("File size exceeds 2GB limit")
return
st.write("Processing... This may take a while depending on the number and length of audio clips.")
# Process the file
transcripts = process_pptx(uploaded_file)
if transcripts:
st.subheader("Transcription Results")
for slide_num, text in sorted(transcripts.items()):
st.markdown(f"**Slide {slide_num}**")
st.write(text)
st.markdown("---")
else:
st.warning("No audio content found in the PowerPoint file.")
if __name__ == "__main__":
main() |