ummtushar commited on
Commit
c2347c5
·
verified ·
1 Parent(s): 3770270

Upload 5 files

Browse files
Files changed (5) hide show
  1. .gitignore +9 -0
  2. README.md +1 -14
  3. main.py +222 -0
  4. packages.txt +1 -0
  5. requirements.txt +8 -0
.gitignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ .env
4
+ .DS_Store
5
+
6
+ *.mp4
7
+ *.mp3
8
+
9
+ .env
README.md CHANGED
@@ -1,14 +1 @@
1
- ---
2
- title: Audio2Text
3
- emoji: ⚡
4
- colorFrom: yellow
5
- colorTo: red
6
- sdk: streamlit
7
- sdk_version: 1.42.0
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- short_description: Converts powerpoint lectures into text separated by slides
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # vid2text
 
 
 
 
 
 
 
 
 
 
 
 
 
main.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import zipfile
2
+ # import os
3
+ # import tempfile
4
+ # import whisper
5
+
6
+ # # Specify the input PPTX file and output ZIP file names
7
+ # file = '/Users/tushargupta/Downloads/Lecture 1_Definition and conceptualization.pptx' # Replace with your PPTX file path
8
+ # file = os.path.splitext(file)[0] + '.zip'
9
+
10
+ # # Create dictionary to store audio files
11
+ # audio_files = {}
12
+
13
+ # # Create temporary directory for extraction
14
+ # temp_dir = tempfile.mkdtemp()
15
+
16
+ # # Extract the zip file to temp directory
17
+ # with zipfile.ZipFile(file, 'r') as zip_ref:
18
+ # zip_ref.extractall(temp_dir)
19
+
20
+ # # Path to media folder
21
+ # media_path = os.path.join(temp_dir, 'ppt', 'media')
22
+
23
+ # # Check if media folder exists
24
+ # if os.path.exists(media_path):
25
+ # # Create temporary directory for converted files
26
+ # temp_audio_dir = tempfile.mkdtemp()
27
+
28
+ # # Iterate through slide numbers
29
+ # slide_num = 1
30
+ # while True:
31
+ # # Check for either .mp4 or .m4a file for current slide
32
+ # media_file = None
33
+ # for ext in ['.mp4', '.m4a']:
34
+ # filename = f'media{slide_num}{ext}'
35
+ # file_path = os.path.join(media_path, filename)
36
+ # if os.path.exists(file_path):
37
+ # media_file = file_path
38
+ # break
39
+
40
+ # if not media_file:
41
+ # break
42
+
43
+ # # Create temporary mp3 file
44
+ # temp_mp3 = os.path.join(temp_audio_dir, f'temp_{slide_num}.mp3')
45
+
46
+ # try:
47
+ # # Convert to mp3 using ffmpeg
48
+ # os.system(f'ffmpeg -i "{media_file}" -vn -acodec libmp3lame "{temp_mp3}" -loglevel quiet')
49
+ # # Store the temp mp3 file path in dictionary
50
+ # audio_files[slide_num-1] = temp_mp3
51
+ # except Exception as e:
52
+ # print(f"Error converting slide {slide_num}: {str(e)}")
53
+
54
+ # slide_num += 1
55
+
56
+ # # Load Whisper model
57
+ # model = whisper.load_model("base")
58
+
59
+ # # Dictionary to store transcriptions by slide number
60
+ # slide_transcripts = {}
61
+
62
+ # # Transcribe each audio file
63
+ # for slide_num, audio_file in audio_files.items():
64
+ # # Transcribe the audio file
65
+ # result = model.transcribe(audio_file)
66
+ # # Store transcription text for this slide
67
+ # slide_transcripts[slide_num + 1] = result["text"]
68
+
69
+
70
+ # # Display transcription per slide
71
+ # print("\nTranscription by Slide:")
72
+ # for slide_num, text in sorted(slide_transcripts.items()):
73
+ # print(f"\nSlide {slide_num}:")
74
+ # print(text)
75
+
76
+ import streamlit as st
77
+ import zipfile
78
+ import os
79
+ import tempfile
80
+ import whisper
81
+ from pathlib import Path
82
+
83
+ def process_pptx(uploaded_file):
84
+ # Create temporary file to save the uploaded file
85
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pptx') as tmp_pptx:
86
+ tmp_pptx.write(uploaded_file.getvalue())
87
+ pptx_path = tmp_pptx.name
88
+
89
+ # Convert PPTX path to ZIP path
90
+ zip_path = os.path.splitext(pptx_path)[0] + '.zip'
91
+ os.rename(pptx_path, zip_path)
92
+
93
+ # Create dictionary to store audio files
94
+ audio_files = {}
95
+
96
+ # Create temporary directory for extraction
97
+ temp_dir = tempfile.mkdtemp()
98
+
99
+ with st.spinner('Extracting PPTX contents...'):
100
+ # Extract the zip file to temp directory
101
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
102
+ zip_ref.extractall(temp_dir)
103
+
104
+ # Path to media folder
105
+ media_path = os.path.join(temp_dir, 'ppt', 'media')
106
+
107
+ # Check if media folder exists
108
+ if os.path.exists(media_path):
109
+ # Create temporary directory for converted files
110
+ temp_audio_dir = tempfile.mkdtemp()
111
+
112
+ # Progress bar for audio conversion
113
+ progress_bar = st.progress(0)
114
+ status_text = st.empty()
115
+
116
+ # First count total slides with audio
117
+ total_slides = 0
118
+ slide_num = 1
119
+ while True:
120
+ found = False
121
+ for ext in ['.mp4', '.m4a']:
122
+ if os.path.exists(os.path.join(media_path, f'media{slide_num}{ext}')):
123
+ total_slides += 1
124
+ found = True
125
+ break
126
+ if not found:
127
+ break
128
+ slide_num += 1
129
+
130
+ # Process audio files
131
+ slide_num = 1
132
+ processed_slides = 0
133
+ while True:
134
+ # Check for either .mp4 or .m4a file for current slide
135
+ media_file = None
136
+ for ext in ['.mp4', '.m4a']:
137
+ filename = f'media{slide_num}{ext}'
138
+ file_path = os.path.join(media_path, filename)
139
+ if os.path.exists(file_path):
140
+ media_file = file_path
141
+ break
142
+
143
+ if not media_file:
144
+ break
145
+
146
+ # Create temporary mp3 file
147
+ temp_mp3 = os.path.join(temp_audio_dir, f'temp_{slide_num}.mp3')
148
+
149
+ try:
150
+ status_text.text(f'Converting audio from slide {slide_num}...')
151
+ # Convert to mp3 using ffmpeg
152
+ os.system(f'ffmpeg -i "{media_file}" -vn -acodec libmp3lame "{temp_mp3}" -loglevel quiet')
153
+ # Store the temp mp3 file path in dictionary
154
+ audio_files[slide_num-1] = temp_mp3
155
+ processed_slides += 1
156
+ progress_bar.progress(processed_slides / total_slides)
157
+ except Exception as e:
158
+ st.error(f"Error converting slide {slide_num}: {str(e)}")
159
+
160
+ slide_num += 1
161
+
162
+ progress_bar.empty()
163
+ status_text.empty()
164
+
165
+ # Load Whisper model
166
+ with st.spinner('Loading Whisper model...'):
167
+ model = whisper.load_model("base")
168
+
169
+ # Dictionary to store transcriptions by slide number
170
+ slide_transcripts = {}
171
+
172
+ # Progress bar for transcription
173
+ progress_bar = st.progress(0)
174
+ status_text = st.empty()
175
+
176
+ # Transcribe each audio file
177
+ for idx, (slide_num, audio_file) in enumerate(audio_files.items()):
178
+ status_text.text(f'Transcribing slide {slide_num + 1}...')
179
+ # Transcribe the audio file
180
+ result = model.transcribe(audio_file)
181
+ # Store transcription text for this slide
182
+ slide_transcripts[slide_num + 1] = result["text"]
183
+ progress_bar.progress((idx + 1) / len(audio_files))
184
+
185
+ progress_bar.empty()
186
+ status_text.empty()
187
+
188
+ # Clean up temporary files
189
+ os.unlink(zip_path)
190
+
191
+ return slide_transcripts
192
+ return None
193
+
194
+ def main():
195
+ st.title('Audio2Text')
196
+ st.write('Upload a PowerPoint file (PPTX) to transcribe its audio content')
197
+
198
+ # File uploader
199
+ uploaded_file = st.file_uploader("Choose a PPTX file", type="pptx")
200
+
201
+ if uploaded_file is not None:
202
+ # Check file size (2GB limit)
203
+ if uploaded_file.size > 2 * 1024 * 1024 * 1024:
204
+ st.error("File size exceeds 2GB limit")
205
+ return
206
+
207
+ st.write("Processing... This may take a while depending on the number and length of audio clips.")
208
+
209
+ # Process the file
210
+ transcripts = process_pptx(uploaded_file)
211
+
212
+ if transcripts:
213
+ st.subheader("Transcription Results")
214
+ for slide_num, text in sorted(transcripts.items()):
215
+ st.markdown(f"**Slide {slide_num}**")
216
+ st.write(text)
217
+ st.markdown("---")
218
+ else:
219
+ st.warning("No audio content found in the PowerPoint file.")
220
+
221
+ if __name__ == "__main__":
222
+ main()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.9.5
2
+ beautifulsoup4==4.12.3
3
+ ffmpeg-python==0.2.0
4
+ Flask==1.1.4
5
+ langchain==0.1.0
6
+ matplotlib==3.8.2
7
+ openai-whisper==20231117
8
+ streamlit==1.31.0