ummtushar commited on
Commit
3a5f6fa
·
verified ·
1 Parent(s): 437996a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -0
app.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import zipfile
3
+ import os
4
+ import tempfile
5
+ import whisper
6
+ from pathlib import Path
7
+
8
+ def process_pptx(uploaded_file):
9
+ # Create temporary file to save the uploaded file
10
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pptx') as tmp_pptx:
11
+ tmp_pptx.write(uploaded_file.getvalue())
12
+ pptx_path = tmp_pptx.name
13
+
14
+ # Convert PPTX path to ZIP path
15
+ zip_path = os.path.splitext(pptx_path)[0] + '.zip'
16
+ os.rename(pptx_path, zip_path)
17
+
18
+ # Create dictionary to store audio files
19
+ audio_files = {}
20
+
21
+ # Create temporary directory for extraction
22
+ temp_dir = tempfile.mkdtemp()
23
+
24
+ with st.spinner('Extracting PPTX contents...'):
25
+ # Extract the zip file to temp directory
26
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
27
+ zip_ref.extractall(temp_dir)
28
+
29
+ # Path to media folder
30
+ media_path = os.path.join(temp_dir, 'ppt', 'media')
31
+
32
+ # Check if media folder exists
33
+ if os.path.exists(media_path):
34
+ # Create temporary directory for converted files
35
+ temp_audio_dir = tempfile.mkdtemp()
36
+
37
+ # Progress bar for audio conversion
38
+ progress_bar = st.progress(0)
39
+ status_text = st.empty()
40
+
41
+ # First count total slides with audio
42
+ total_slides = 0
43
+ slide_num = 1
44
+ while True:
45
+ found = False
46
+ for ext in ['.mp4', '.m4a']:
47
+ if os.path.exists(os.path.join(media_path, f'media{slide_num}{ext}')):
48
+ total_slides += 1
49
+ found = True
50
+ break
51
+ if not found:
52
+ break
53
+ slide_num += 1
54
+
55
+ # Process audio files
56
+ slide_num = 1
57
+ processed_slides = 0
58
+ while True:
59
+ # Check for either .mp4 or .m4a file for current slide
60
+ media_file = None
61
+ for ext in ['.mp4', '.m4a']:
62
+ filename = f'media{slide_num}{ext}'
63
+ file_path = os.path.join(media_path, filename)
64
+ if os.path.exists(file_path):
65
+ media_file = file_path
66
+ break
67
+
68
+ if not media_file:
69
+ break
70
+
71
+ # Create temporary mp3 file
72
+ temp_mp3 = os.path.join(temp_audio_dir, f'temp_{slide_num}.mp3')
73
+
74
+ try:
75
+ status_text.text(f'Converting audio from slide {slide_num}...')
76
+ # Convert to mp3 using ffmpeg
77
+ os.system(f'ffmpeg -i "{media_file}" -vn -acodec libmp3lame "{temp_mp3}" -loglevel quiet')
78
+ # Store the temp mp3 file path in dictionary
79
+ audio_files[slide_num-1] = temp_mp3
80
+ processed_slides += 1
81
+ progress_bar.progress(processed_slides / total_slides)
82
+ except Exception as e:
83
+ st.error(f"Error converting slide {slide_num}: {str(e)}")
84
+
85
+ slide_num += 1
86
+
87
+ progress_bar.empty()
88
+ status_text.empty()
89
+
90
+ # Load Whisper model
91
+ with st.spinner('Loading Whisper model...'):
92
+ model = whisper.load_model("base")
93
+
94
+ # Dictionary to store transcriptions by slide number
95
+ slide_transcripts = {}
96
+
97
+ # Progress bar for transcription
98
+ progress_bar = st.progress(0)
99
+ status_text = st.empty()
100
+
101
+ # Transcribe each audio file
102
+ for idx, (slide_num, audio_file) in enumerate(audio_files.items()):
103
+ status_text.text(f'Transcribing slide {slide_num + 1}...')
104
+ # Transcribe the audio file
105
+ result = model.transcribe(audio_file)
106
+ # Store transcription text for this slide
107
+ slide_transcripts[slide_num + 1] = result["text"]
108
+ progress_bar.progress((idx + 1) / len(audio_files))
109
+
110
+ progress_bar.empty()
111
+ status_text.empty()
112
+
113
+ # Clean up temporary files
114
+ os.unlink(zip_path)
115
+
116
+ return slide_transcripts
117
+ return None
118
+
119
+ def main():
120
+ st.title('Audio2Text')
121
+ st.write('Upload a PowerPoint file (PPTX) to transcribe its audio content')
122
+
123
+ # File uploader
124
+ uploaded_file = st.file_uploader("Choose a PPTX file", type="pptx")
125
+
126
+ if uploaded_file is not None:
127
+ # Check file size (2GB limit)
128
+ if uploaded_file.size > 2 * 1024 * 1024 * 1024:
129
+ st.error("File size exceeds 2GB limit")
130
+ return
131
+
132
+ st.write("Processing... This may take a while depending on the number and length of audio clips.")
133
+
134
+ # Process the file
135
+ transcripts = process_pptx(uploaded_file)
136
+
137
+ if transcripts:
138
+ st.subheader("Transcription Results")
139
+ for slide_num, text in sorted(transcripts.items()):
140
+ st.markdown(f"**Slide {slide_num}**")
141
+ st.write(text)
142
+ st.markdown("---")
143
+ else:
144
+ st.warning("No audio content found in the PowerPoint file.")
145
+
146
+ if __name__ == "__main__":
147
+ main()