Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,21 +6,20 @@ import shutil
|
|
6 |
import re
|
7 |
import logging
|
8 |
from pathlib import Path
|
9 |
-
from PIL import Image # For checking image validity
|
10 |
try:
|
11 |
import mutagen
|
12 |
from mutagen.mp3 import MP3, EasyMP3
|
13 |
from mutagen.oggvorbis import OggVorbis
|
14 |
from mutagen.flac import FLAC
|
15 |
from mutagen.mp4 import MP4, MP4Cover
|
16 |
-
from mutagen.id3 import ID3, APIC, error as ID3Error
|
17 |
MUTAGEN_AVAILABLE = True
|
18 |
except ImportError:
|
19 |
MUTAGEN_AVAILABLE = False
|
20 |
logging.warning("Mutagen library not found. Cover art embedding will be disabled.")
|
21 |
logging.warning("Install it using: pip install mutagen")
|
22 |
|
23 |
-
|
24 |
# --- Configuration & Logging ---
|
25 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
26 |
|
@@ -29,20 +28,24 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
|
|
29 |
def check_command(command):
|
30 |
"""Checks if a command exists in the system's PATH."""
|
31 |
try:
|
32 |
-
# Use a more reliable check for command existence, sometimes --version fails
|
33 |
-
# On Windows, 'where' command; on Unix-like, 'command -v' or 'which'
|
34 |
if os.name == 'nt':
|
35 |
-
|
|
|
36 |
else:
|
37 |
-
# 'command -v' is generally preferred
|
38 |
-
subprocess.run(['command', '-v', command], check=True, capture_output=True)
|
39 |
logging.info(f"Command '{command}' found.")
|
40 |
return True
|
41 |
-
except
|
42 |
-
logging.error(f"Command '{command}'
|
43 |
-
|
|
|
|
|
44 |
return False
|
45 |
-
except
|
|
|
|
|
|
|
46 |
logging.error(f"Unexpected error checking for command '{command}': {e}")
|
47 |
return False
|
48 |
|
@@ -51,42 +54,61 @@ def get_espeak_voices():
|
|
51 |
"""Gets available espeak-ng voices and their languages."""
|
52 |
voices = {}
|
53 |
try:
|
54 |
-
|
55 |
-
|
|
|
|
|
56 |
# Example line format: P L V Language Code Age/Gender VoiceName File Other Langs
|
57 |
# 2 y en-US M american-english-us Mbrola/us1 (en 10)
|
58 |
# 1 af M afrikaans Afrikaans
|
59 |
-
pattern
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
if match:
|
63 |
-
code
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
if display_name not in voices:
|
67 |
voices[display_name] = code
|
68 |
else:
|
69 |
-
#
|
70 |
parts = line.split()
|
71 |
if len(parts) >= 4 and parts[0].isdigit():
|
72 |
code = parts[1]
|
73 |
lang_name = parts[3]
|
74 |
-
display_name = f"{lang_name.strip()} ({code})"
|
75 |
if display_name not in voices:
|
76 |
voices[display_name] = code
|
|
|
|
|
|
|
77 |
|
78 |
if not voices:
|
79 |
logging.warning("Could not parse any voices from 'espeak-ng --voices'. Using fallback list.")
|
80 |
-
|
81 |
-
voices = {"English (en)": "en", "Spanish (es)": "es", "French (fr)": "fr", "German (de)": "de"}
|
82 |
|
83 |
# Sort voices alphabetically by display name
|
84 |
sorted_voices = dict(sorted(voices.items()))
|
|
|
85 |
return sorted_voices
|
86 |
|
87 |
-
except (FileNotFoundError, subprocess.CalledProcessError, Exception) as e:
|
88 |
logging.error(f"Error getting espeak-ng voices: {e}")
|
89 |
-
# Provide a basic fallback list if the command fails
|
90 |
return {"English (en)": "en", "Spanish (es)": "es", "French (fr)": "fr", "German (de)": "de"}
|
91 |
|
92 |
# --- Main Conversion Logic ---
|
@@ -96,365 +118,392 @@ def convert_ebook_to_audio(ebook_file, language_display, output_format, embed_co
|
|
96 |
Converts an ebook file to an audiobook using Calibre and espeak-ng.
|
97 |
"""
|
98 |
if not ebook_file:
|
99 |
-
# Return None for audio, None for cover, and the error message
|
100 |
return None, None, "**Error:** No ebook file provided."
|
101 |
|
102 |
-
# Check required commands
|
103 |
calibre_convert_ok = check_command("ebook-convert")
|
104 |
-
calibre_meta_ok =
|
105 |
espeak_ok = check_command("espeak-ng")
|
106 |
-
lame_ok =
|
107 |
-
oggenc_ok =
|
108 |
|
109 |
missing = []
|
110 |
if not calibre_convert_ok: missing.append("Calibre ('ebook-convert')")
|
111 |
-
if not calibre_meta_ok
|
112 |
if not espeak_ok: missing.append("espeak-ng")
|
113 |
if not lame_ok and output_format == 'mp3': missing.append("LAME (for MP3)")
|
114 |
if not oggenc_ok and output_format == 'ogg': missing.append("oggenc (for OGG)")
|
115 |
|
116 |
if missing:
|
117 |
-
error_msg = f"**Error:** Missing required command(s):\n- {', '.join(missing)}\n\nPlease
|
118 |
logging.error(error_msg.replace("**Error:** ","").replace("\n- "," ").replace("\n"," ")) # Log plain text
|
119 |
-
# Return None for audio, None for cover, and the error message
|
120 |
return None, None, error_msg
|
121 |
|
122 |
|
123 |
temp_dir = tempfile.mkdtemp(prefix="ebook_audio_")
|
124 |
logging.info(f"Created temporary directory: {temp_dir}")
|
125 |
-
status_updates = ["Conversion started..."]
|
126 |
-
cover_image_path_final = None
|
127 |
-
audio_output_path_final = None # Keep track of the final audio path
|
128 |
|
129 |
try:
|
130 |
input_ebook_path = ebook_file.name # Gradio provides a temp path for the upload
|
131 |
-
|
|
|
132 |
txt_output_path = os.path.join(temp_dir, f"{base_filename}.txt")
|
133 |
-
|
|
|
134 |
audio_output_path = os.path.join(temp_dir, f"{base_filename}.{output_format}")
|
135 |
|
136 |
# --- Step 1: Extract Cover Art (Optional) ---
|
137 |
cover_extracted = False
|
138 |
-
if embed_cover and calibre_meta_ok:
|
139 |
-
progress(0.1, desc="Extracting cover art (optional)")
|
140 |
-
status_updates.append("Attempting to extract cover art...")
|
141 |
try:
|
142 |
cmd_meta = ['ebook-meta', input_ebook_path, '--get-cover', cover_output_path_temp]
|
143 |
logging.info(f"Running cover extraction: {' '.join(cmd_meta)}")
|
144 |
-
|
|
|
145 |
if os.path.exists(cover_output_path_temp) and os.path.getsize(cover_output_path_temp) > 0:
|
146 |
-
# Validate
|
147 |
try:
|
148 |
img = Image.open(cover_output_path_temp)
|
149 |
-
img.verify() #
|
150 |
img.close() # Need to close after verify
|
151 |
-
|
|
|
152 |
img = Image.open(cover_output_path_temp)
|
153 |
-
|
154 |
img.close()
|
155 |
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
cover_image_path_final = cover_output_path_temp # Update final path for display
|
169 |
-
status_updates.append("✅ Cover art extracted and converted to JPG.")
|
170 |
-
logging.info(f"Cover art extracted and converted to JPG: {cover_image_path_final}")
|
171 |
-
|
172 |
-
else:
|
173 |
-
logging.error("Failed to convert cover art to JPG.")
|
174 |
-
status_updates.append("⚠️ Could not convert extracted cover art to JPG. Will skip embedding.")
|
175 |
-
if os.path.exists(cover_output_path_temp): os.remove(cover_output_path_temp) # Clean up original if unusable
|
176 |
-
|
177 |
-
except Exception as convert_err:
|
178 |
-
logging.error(f"Error converting cover image: {convert_err}")
|
179 |
-
status_updates.append(f"⚠️ Error converting cover image: {convert_err}. Will skip embedding.")
|
180 |
-
if os.path.exists(cover_output_path_temp): os.remove(cover_output_path_temp) # Clean up original
|
181 |
-
|
182 |
-
else:
|
183 |
-
cover_extracted = True
|
184 |
-
cover_image_path_final = cover_output_path_temp # Use original path
|
185 |
-
status_updates.append("✅ Cover art extracted successfully.")
|
186 |
-
logging.info(f"Cover art extracted to {cover_image_path_final} (Format: {fmt})")
|
187 |
-
|
188 |
-
except (IOError, SyntaxError, Image.UnidentifiedImageError) as img_err:
|
189 |
-
logging.warning(f"Extracted file is not a valid image or couldn't be processed: {img_err}")
|
190 |
-
status_updates.append("⚠️ Extracted 'cover' file is not a valid image. Will skip embedding.")
|
191 |
if os.path.exists(cover_output_path_temp): os.remove(cover_output_path_temp) # Clean up invalid file
|
|
|
|
|
|
|
192 |
else:
|
193 |
-
status_updates.append("ℹ️ No cover art found in the ebook metadata.")
|
194 |
logging.info("ebook-meta ran but did not produce a cover file or it was empty.")
|
|
|
195 |
|
196 |
-
|
|
|
|
|
197 |
except subprocess.CalledProcessError as e:
|
198 |
-
stderr_decoded = e.stderr.decode(errors='ignore') if e.stderr else "No stderr"
|
199 |
-
status_updates.append(f"⚠️ Failed to extract cover art. Error: {stderr_decoded}")
|
200 |
logging.warning(f"ebook-meta failed: {stderr_decoded}")
|
201 |
except Exception as e:
|
202 |
-
status_updates.append(f"⚠️ An unexpected error occurred during cover extraction: {e}")
|
203 |
logging.error(f"Unexpected error during cover extraction: {e}", exc_info=True)
|
|
|
|
|
|
|
|
|
204 |
elif embed_cover and not calibre_meta_ok:
|
205 |
-
status_updates.append("ℹ️ Cover art embedding requested, but 'ebook-meta' not found.")
|
|
|
|
|
206 |
|
207 |
# --- Step 2: Convert Ebook to TXT ---
|
208 |
-
progress(0.3, desc="Converting ebook to TXT")
|
209 |
-
status_updates.append("Converting ebook to plain text...")
|
210 |
try:
|
211 |
-
#
|
212 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
logging.info(f"Running ebook conversion: {' '.join(cmd_convert)}")
|
214 |
-
|
215 |
-
|
|
|
216 |
if result_convert.stdout: logging.info(f"ebook-convert stdout: {result_convert.stdout.strip()}")
|
217 |
if result_convert.stderr: logging.warning(f"ebook-convert stderr: {result_convert.stderr.strip()}")
|
218 |
-
status_updates.append("✅ Ebook converted to TXT.")
|
219 |
logging.info("Ebook successfully converted to TXT.")
|
|
|
|
|
|
|
|
|
|
|
220 |
except subprocess.CalledProcessError as e:
|
221 |
-
stderr_decoded = e.stderr.decode(errors='ignore') if e.stderr else "No stderr"
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
|
|
226 |
return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
|
227 |
except Exception as e:
|
228 |
error_msg = f"**Error:** An unexpected error occurred during ebook conversion:\n{e}"
|
229 |
-
status_updates.append(f"❌ Unexpected conversion error.")
|
230 |
logging.error(f"Unexpected error during ebook conversion: {e}", exc_info=True)
|
231 |
return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
|
232 |
|
233 |
# Check if TXT file was actually created and is not empty
|
234 |
if not os.path.exists(txt_output_path) or os.path.getsize(txt_output_path) == 0:
|
235 |
-
error_msg = "**Error:** Calibre finished, but the output TXT file is missing or empty.\nThis can happen with image-based ebooks (like comics/
|
236 |
-
status_updates.append(f"❌ TXT output empty/missing.")
|
237 |
logging.error("Calibre finished, but the output TXT file is missing or empty.")
|
238 |
return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
|
239 |
|
240 |
# --- Step 3: Convert TXT to Audio ---
|
241 |
-
progress(0.6, desc="Converting TXT to Audio")
|
242 |
-
status_updates.append("Converting text to speech...")
|
243 |
|
244 |
voice_code = available_voices.get(language_display, 'en') # Get code from display name
|
|
|
245 |
cmd_speak = ['espeak-ng', '-v', voice_code, '-f', txt_output_path]
|
246 |
-
#
|
|
|
247 |
|
248 |
try:
|
249 |
logging.info(f"Preparing audio command for format: {output_format}")
|
|
|
|
|
|
|
250 |
if output_format == 'wav':
|
251 |
cmd_speak.extend(['-w', audio_output_path])
|
252 |
logging.info(f"Running espeak-ng (WAV): {' '.join(cmd_speak)}")
|
253 |
-
result_speak = subprocess.run(cmd_speak, check=True, capture_output=True)
|
254 |
-
|
255 |
-
if result_speak.stdout: logging.info(f"espeak-ng stdout: {result_speak.stdout.decode(errors='ignore').strip()}")
|
256 |
-
if result_speak.stderr: logging.warning(f"espeak-ng stderr: {result_speak.stderr.decode(errors='ignore').strip()}")
|
257 |
|
258 |
elif output_format == 'mp3':
|
259 |
-
|
260 |
-
|
261 |
-
|
|
|
|
|
|
|
262 |
ps_speak = subprocess.Popen(cmd_speak, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
|
263 |
ps_lame = subprocess.Popen(cmd_lame, stdin=ps_speak.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
264 |
|
265 |
-
# Allow ps_speak to
|
266 |
if ps_speak.stdout:
|
267 |
ps_speak.stdout.close()
|
268 |
|
269 |
-
# Capture
|
270 |
-
|
271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
272 |
speak_stderr_bytes = ps_speak.stderr.read() if ps_speak.stderr else b""
|
273 |
-
ps_speak.wait() #
|
274 |
if ps_speak.stderr: ps_speak.stderr.close()
|
275 |
|
276 |
-
# Decode stderr for logging
|
277 |
lame_stderr_str = lame_stderr_bytes.decode(errors='ignore').strip()
|
278 |
speak_stderr_str = speak_stderr_bytes.decode(errors='ignore').strip()
|
279 |
|
280 |
-
# Check return codes
|
281 |
if ps_lame.returncode != 0:
|
282 |
-
|
283 |
-
raise subprocess.CalledProcessError(ps_lame.returncode, cmd_lame,
|
284 |
if ps_speak.returncode != 0:
|
285 |
-
|
286 |
-
raise subprocess.CalledProcessError(ps_speak.returncode, cmd_speak, stderr=speak_stderr_bytes)
|
287 |
|
288 |
-
# Log warnings from stderr
|
289 |
-
if lame_stderr_str:
|
290 |
-
|
291 |
-
if speak_stderr_str:
|
292 |
-
logging.warning(f"espeak-ng stderr: {speak_stderr_str}")
|
293 |
|
294 |
elif output_format == 'ogg':
|
295 |
-
|
296 |
-
|
297 |
-
|
|
|
|
|
|
|
298 |
ps_speak = subprocess.Popen(cmd_speak, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
299 |
ps_ogg = subprocess.Popen(cmd_ogg, stdin=ps_speak.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
300 |
|
301 |
-
# Allow ps_speak to receive SIGPIPE if oggenc exits early.
|
302 |
if ps_speak.stdout:
|
303 |
ps_speak.stdout.close()
|
304 |
|
305 |
-
|
306 |
-
|
307 |
-
|
|
|
|
|
|
|
|
|
|
|
308 |
speak_stderr_bytes = ps_speak.stderr.read() if ps_speak.stderr else b""
|
309 |
-
ps_speak.wait()
|
310 |
if ps_speak.stderr: ps_speak.stderr.close()
|
311 |
|
312 |
-
# Decode stderr for logging
|
313 |
ogg_stderr_str = ogg_stderr_bytes.decode(errors='ignore').strip()
|
314 |
speak_stderr_str = speak_stderr_bytes.decode(errors='ignore').strip()
|
315 |
|
316 |
-
# Now check return codes safely
|
317 |
if ps_ogg.returncode != 0:
|
318 |
-
|
319 |
-
raise subprocess.CalledProcessError(ps_ogg.returncode, cmd_ogg,
|
320 |
if ps_speak.returncode != 0:
|
321 |
-
|
322 |
-
raise subprocess.CalledProcessError(ps_speak.returncode, cmd_speak, stderr=speak_stderr_bytes)
|
323 |
|
324 |
-
|
325 |
-
if
|
326 |
-
logging.warning(f"oggenc stderr: {ogg_stderr_str}")
|
327 |
-
if speak_stderr_str:
|
328 |
-
logging.warning(f"espeak-ng stderr: {speak_stderr_str}")
|
329 |
|
330 |
else:
|
331 |
-
raise ValueError(f"Unsupported output format: {output_format}")
|
332 |
|
333 |
-
status_updates.append("✅ Text converted to audio.")
|
334 |
logging.info(f"Text successfully converted to {output_format.upper()}.")
|
335 |
|
336 |
except subprocess.CalledProcessError as e:
|
337 |
-
|
338 |
-
command_name = e.cmd[0] if isinstance(e.cmd, list) else e.cmd
|
339 |
-
# Decode stderr/stdout safely (they might be bytes or None)
|
340 |
stderr_str = e.stderr.decode(errors='ignore').strip() if isinstance(e.stderr, bytes) else (e.stderr or "")
|
341 |
stdout_str = e.stdout.decode(errors='ignore').strip() if isinstance(e.stdout, bytes) else (e.stdout or "")
|
342 |
error_details = stderr_str or stdout_str or "No output/error captured."
|
343 |
-
|
344 |
-
# Construct error message carefully
|
345 |
exit_status_str = f"exit status {e.returncode}" if e.returncode is not None else "unknown exit status"
|
346 |
cmd_str = ' '.join(e.cmd) if isinstance(e.cmd, list) else e.cmd
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
|
|
|
|
|
|
|
|
359 |
|
360 |
except FileNotFoundError as e:
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
|
|
365 |
return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
|
366 |
except Exception as e:
|
367 |
-
error_msg = f"**Error:** An unexpected error occurred during audio generation:\n{e}"
|
368 |
-
status_updates.append(f"❌ Unexpected audio error.")
|
369 |
logging.error(f"An unexpected error occurred during audio generation: {e}", exc_info=True)
|
370 |
return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
|
371 |
|
372 |
-
#
|
373 |
-
if not os.path.exists(audio_output_path) or os.path.getsize(audio_output_path) <
|
374 |
-
error_msg = f"**Error:** Audio generation command finished, but the output file '{Path(audio_output_path).name}' is missing or too small
|
375 |
-
status_updates.append(f"❌ Audio output missing
|
376 |
-
logging.error(f"Audio output file missing or too small: {audio_output_path}")
|
|
|
|
|
|
|
377 |
return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
|
378 |
|
379 |
|
380 |
# --- Step 4: Embed Cover Art (Optional) ---
|
381 |
-
if embed_cover and cover_extracted and MUTAGEN_AVAILABLE and os.path.exists(cover_image_path_final):
|
382 |
-
progress(0.9, desc="Embedding cover art")
|
383 |
-
status_updates.append("Embedding cover art into audio file...")
|
384 |
try:
|
385 |
with open(cover_image_path_final, 'rb') as img_f:
|
386 |
cover_data = img_f.read()
|
387 |
|
388 |
-
# Determine mimetype using
|
389 |
-
mime_type =
|
|
|
390 |
try:
|
391 |
img = Image.open(cover_image_path_final)
|
392 |
mime_type = Image.MIME.get(img.format)
|
|
|
|
|
393 |
img.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
394 |
except Exception as pil_err:
|
395 |
-
logging.warning(f"Could not determine MIME type using PIL: {pil_err}")
|
396 |
-
|
397 |
-
if not mime_type:
|
398 |
-
# Basic fallback based on extension
|
399 |
-
ext = Path(cover_image_path_final).suffix.lower()
|
400 |
-
if ext == ".jpg" or ext == ".jpeg":
|
401 |
-
mime_type = 'image/jpeg'
|
402 |
-
elif ext == ".png":
|
403 |
-
mime_type = 'image/png'
|
404 |
-
else:
|
405 |
-
mime_type = 'image/jpeg' # Default guess if extension unknown/unsupported
|
406 |
-
logging.warning(f"Defaulting cover MIME type to {mime_type}")
|
407 |
-
|
408 |
|
409 |
logging.info(f"Attempting to embed cover art ({mime_type}) into {audio_output_path}")
|
410 |
-
audio = mutagen.File(audio_output_path, easy=
|
411 |
|
412 |
if audio is None:
|
413 |
-
|
|
|
|
|
|
|
414 |
|
415 |
-
#
|
416 |
try:
|
417 |
-
|
418 |
-
|
419 |
-
if audio.
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
if
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
|
|
|
|
|
|
449 |
save_needed = False
|
450 |
-
if isinstance(audio, (MP3, EasyMP3)):
|
451 |
-
if audio.tags is None: audio.add_tags()
|
452 |
audio.tags.add(
|
453 |
APIC(
|
454 |
-
encoding=3, # 3 is for
|
455 |
mime=mime_type,
|
456 |
-
type=
|
457 |
-
desc=
|
458 |
data=cover_data
|
459 |
)
|
460 |
)
|
@@ -462,118 +511,96 @@ def convert_ebook_to_audio(ebook_file, language_display, output_format, embed_co
|
|
462 |
elif isinstance(audio, FLAC):
|
463 |
pic = mutagen.flac.Picture()
|
464 |
pic.data = cover_data
|
465 |
-
pic.type =
|
466 |
pic.mime = mime_type
|
467 |
-
|
|
|
|
|
468 |
audio.add_picture(pic)
|
469 |
save_needed = True
|
470 |
elif isinstance(audio, OggVorbis):
|
471 |
-
# Ogg uses base64 encoded
|
472 |
import base64
|
473 |
-
# This field expects a FLAC Picture block, base64 encoded.
|
474 |
pic = mutagen.flac.Picture()
|
475 |
pic.data = cover_data
|
476 |
-
pic.type =
|
477 |
pic.mime = mime_type
|
478 |
-
|
479 |
-
|
480 |
-
pic.
|
481 |
-
|
482 |
-
# Determine color depth (e.g., 24 for RGB, 32 for RGBA)
|
483 |
-
pic.depth = {'RGB': 24, 'RGBA': 32, 'L': 8}.get(img.mode, 24)
|
484 |
-
img.close()
|
485 |
-
|
486 |
-
# Encode the full picture block
|
487 |
-
audio["metadata_block_picture"] = [base64.b64encode(pic.write()).decode("ascii")]
|
488 |
save_needed = True
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
elif mime_type == 'image/png':
|
494 |
-
pic_format = MP4Cover.FORMAT_PNG
|
495 |
-
else:
|
496 |
-
pic_format = MP4Cover.FORMAT_UNDEFINED # Or skip if unknown
|
497 |
-
logging.warning(f"Unsupported cover image format ({mime_type}) for MP4 embedding.")
|
498 |
-
|
499 |
if pic_format != MP4Cover.FORMAT_UNDEFINED:
|
500 |
audio['covr'] = [MP4Cover(cover_data, imageformat=pic_format)]
|
501 |
save_needed = True
|
|
|
|
|
|
|
502 |
|
503 |
-
# Add other metadata (optional) - Use easy=True for simpler access
|
504 |
-
try:
|
505 |
-
audio_easy = mutagen.File(audio_output_path, easy=True)
|
506 |
-
if audio_easy is not None:
|
507 |
-
if 'title' not in audio_easy or not audio_easy['title']:
|
508 |
-
audio_easy['title'] = base_filename
|
509 |
-
save_needed = True
|
510 |
-
if 'artist' not in audio_easy or not audio_easy['artist']:
|
511 |
-
audio_easy['artist'] = "Generated Audiobook"
|
512 |
-
save_needed = True
|
513 |
-
if save_needed:
|
514 |
-
audio_easy.save() # Save easy tags if modified
|
515 |
-
save_needed = False # Prevent double save if only easy tags changed
|
516 |
-
except Exception as tag_err:
|
517 |
-
logging.warning(f"Could not set basic title/artist tags: {tag_err}")
|
518 |
|
519 |
-
|
520 |
-
if save_needed and audio is not None:
|
521 |
audio.save()
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
|
|
|
|
528 |
logging.warning(f"Failed to embed cover art: {e}", exc_info=True)
|
529 |
except Exception as e:
|
530 |
-
status_updates.append(f"⚠️ An unexpected error occurred during cover art embedding
|
531 |
logging.error(f"Unexpected error during cover embedding: {e}", exc_info=True)
|
532 |
elif embed_cover and not cover_extracted:
|
533 |
-
status_updates.append("ℹ️ Cover art embedding skipped (no cover extracted or invalid).")
|
534 |
elif embed_cover and not MUTAGEN_AVAILABLE:
|
535 |
-
|
|
|
536 |
|
537 |
|
538 |
# --- Step 5: Prepare final output ---
|
539 |
-
progress(1.0, desc="Complete")
|
540 |
-
status_updates.append("
|
541 |
audio_output_path_final = audio_output_path # Mark the path as final
|
542 |
|
543 |
# Return paths for Gradio components
|
544 |
final_status = "\n".join(status_updates)
|
545 |
-
logging.info(f"Returning audio: {audio_output_path_final}, cover: {cover_image_path_final}")
|
546 |
# Return audio path for Audio component, cover path for Image, status for Textbox
|
547 |
return audio_output_path_final, cover_image_path_final, final_status
|
548 |
|
549 |
except Exception as e:
|
550 |
-
|
551 |
-
|
|
|
552 |
logging.error(error_msg, exc_info=True)
|
553 |
# Return None for audio, cover path (if extracted), and the error status
|
554 |
-
|
|
|
555 |
|
556 |
finally:
|
557 |
# --- Cleanup ---
|
558 |
-
#
|
|
|
559 |
try:
|
560 |
if 'txt_output_path' in locals() and os.path.exists(txt_output_path):
|
561 |
os.remove(txt_output_path)
|
562 |
logging.info(f"Removed intermediate file: {txt_output_path}")
|
563 |
-
# Remove
|
564 |
-
if
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
|
570 |
-
#
|
571 |
-
#
|
572 |
-
|
573 |
-
logging.info(f"Temp dir '{temp_dir}' contains output files. Skipping immediate deletion.")
|
574 |
-
# To force cleanup (may break Gradio display if files aren't copied):
|
575 |
-
# shutil.rmtree(temp_dir, ignore_errors=True)
|
576 |
-
# logging.info(f"Attempted cleanup of temp dir: {temp_dir}")
|
577 |
|
578 |
except OSError as e:
|
579 |
logging.warning(f"Error during cleanup of intermediate files: {e}")
|
@@ -581,88 +608,147 @@ def convert_ebook_to_audio(ebook_file, language_display, output_format, embed_co
|
|
581 |
|
582 |
# --- Gradio Interface Definition ---
|
583 |
|
|
|
|
|
584 |
available_voices = get_espeak_voices()
|
585 |
voice_choices = list(available_voices.keys())
|
586 |
-
|
587 |
-
|
588 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
589 |
|
590 |
|
591 |
# Check for external tools on startup and display warnings if needed
|
|
|
592 |
startup_warnings = []
|
593 |
if not check_command("ebook-convert"): startup_warnings.append("Calibre ('ebook-convert')")
|
594 |
-
if not check_command("ebook-meta"): startup_warnings.append("Calibre ('ebook-meta' -
|
595 |
-
if not check_command("espeak-ng"): startup_warnings.append("espeak-ng")
|
596 |
if not check_command("lame"): startup_warnings.append("LAME (needed for MP3 output)")
|
597 |
-
if not check_command("oggenc"): startup_warnings.append("oggenc (needed for OGG output)")
|
598 |
-
if not MUTAGEN_AVAILABLE: startup_warnings.append("Python 'mutagen' library (needed for embedding cover art)")
|
599 |
|
600 |
startup_message = ""
|
601 |
if startup_warnings:
|
602 |
warning_list = "\n- ".join(startup_warnings)
|
603 |
startup_message = (
|
604 |
-
"**⚠️ Startup Warning: The following components might be missing or not found
|
605 |
f"- {warning_list}\n\n"
|
606 |
-
"Please
|
|
|
607 |
)
|
608 |
-
print("
|
609 |
-
print(f"
|
610 |
-
print("
|
|
|
611 |
|
612 |
# Define UI Elements
|
613 |
-
|
614 |
-
|
615 |
-
gr.Markdown(
|
616 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
617 |
|
618 |
if startup_message:
|
619 |
-
gr.
|
620 |
|
621 |
with gr.Row():
|
622 |
-
with gr.Column(scale=1):
|
623 |
-
ebook_input = gr.File(label="1. Upload Ebook", file_count="single")
|
624 |
lang_dropdown = gr.Dropdown(
|
625 |
label="2. Select Language / Voice",
|
626 |
choices=voice_choices,
|
627 |
-
value=default_voice,
|
|
|
628 |
interactive=True
|
629 |
)
|
630 |
format_dropdown = gr.Dropdown(
|
631 |
label="3. Select Output Audio Format",
|
632 |
choices=["mp3", "ogg", "wav"],
|
633 |
value="mp3",
|
|
|
634 |
interactive=True
|
635 |
)
|
636 |
cover_checkbox = gr.Checkbox(
|
637 |
label="Embed Cover Art (if available)",
|
638 |
-
value=True if MUTAGEN_AVAILABLE else False, # Default
|
639 |
-
|
|
|
|
|
|
|
640 |
)
|
641 |
-
submit_button = gr.Button("Convert to Audiobook", variant="primary")
|
642 |
-
|
643 |
-
with gr.Column(scale=2):
|
644 |
-
status_textbox = gr.Textbox(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
645 |
with gr.Row():
|
646 |
-
#
|
647 |
-
cover_image = gr.Image(
|
648 |
-
|
649 |
-
|
650 |
-
|
651 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
652 |
|
653 |
# Connect components
|
654 |
submit_button.click(
|
655 |
fn=convert_ebook_to_audio,
|
656 |
inputs=[ebook_input, lang_dropdown, format_dropdown, cover_checkbox],
|
657 |
-
#
|
658 |
-
outputs=[audio_output_player, cover_image, status_textbox]
|
659 |
)
|
660 |
|
|
|
|
|
661 |
# --- Launch the App ---
|
662 |
if __name__ == "__main__":
|
663 |
-
print("Starting Gradio App...")
|
664 |
-
|
665 |
-
|
666 |
-
|
667 |
-
#
|
668 |
-
|
|
|
|
|
|
|
|
6 |
import re
|
7 |
import logging
|
8 |
from pathlib import Path
|
9 |
+
from PIL import Image, UnidentifiedImageError # For checking image validity
|
10 |
try:
|
11 |
import mutagen
|
12 |
from mutagen.mp3 import MP3, EasyMP3
|
13 |
from mutagen.oggvorbis import OggVorbis
|
14 |
from mutagen.flac import FLAC
|
15 |
from mutagen.mp4 import MP4, MP4Cover
|
16 |
+
from mutagen.id3 import ID3, APIC, PictureType, error as ID3Error
|
17 |
MUTAGEN_AVAILABLE = True
|
18 |
except ImportError:
|
19 |
MUTAGEN_AVAILABLE = False
|
20 |
logging.warning("Mutagen library not found. Cover art embedding will be disabled.")
|
21 |
logging.warning("Install it using: pip install mutagen")
|
22 |
|
|
|
23 |
# --- Configuration & Logging ---
|
24 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
25 |
|
|
|
28 |
def check_command(command):
|
29 |
"""Checks if a command exists in the system's PATH."""
|
30 |
try:
|
|
|
|
|
31 |
if os.name == 'nt':
|
32 |
+
# 'where' command on Windows
|
33 |
+
subprocess.run(['where', command], check=True, capture_output=True, timeout=5)
|
34 |
else:
|
35 |
+
# 'command -v' is generally preferred and more portable than 'which' on Unix-like systems
|
36 |
+
subprocess.run(['command', '-v', command], check=True, capture_output=True, timeout=5)
|
37 |
logging.info(f"Command '{command}' found.")
|
38 |
return True
|
39 |
+
except FileNotFoundError:
|
40 |
+
logging.error(f"Command '{command}' check tool ('where' or 'command') not found.")
|
41 |
+
return False
|
42 |
+
except subprocess.CalledProcessError:
|
43 |
+
logging.warning(f"Command '{command}' not found in PATH.")
|
44 |
return False
|
45 |
+
except subprocess.TimeoutExpired:
|
46 |
+
logging.error(f"Timeout checking for command '{command}'. Assuming not found.")
|
47 |
+
return False
|
48 |
+
except Exception as e:
|
49 |
logging.error(f"Unexpected error checking for command '{command}': {e}")
|
50 |
return False
|
51 |
|
|
|
54 |
"""Gets available espeak-ng voices and their languages."""
|
55 |
voices = {}
|
56 |
try:
|
57 |
+
cmd = ['espeak-ng', '--voices']
|
58 |
+
logging.info(f"Getting voices with command: {' '.join(cmd)}")
|
59 |
+
# Use a timeout to prevent hanging if espeak-ng has issues
|
60 |
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True, encoding='utf-8', errors='ignore', timeout=15)
|
61 |
# Example line format: P L V Language Code Age/Gender VoiceName File Other Langs
|
62 |
# 2 y en-US M american-english-us Mbrola/us1 (en 10)
|
63 |
# 1 af M afrikaans Afrikaans
|
64 |
+
# More robust pattern to handle variations
|
65 |
+
pattern = re.compile(r"^\s*\d+\s+[yn\-]\s+([\w\-]+)\s+[MF\-]?\s+([\w\s\(\)\-]+?)\s+([\w\/\s\-]+?)(?:\s+\(.*\))?\s*$")
|
66 |
+
|
67 |
+
lines = result.stdout.splitlines()
|
68 |
+
if not lines or len(lines) < 2: # Check if there's output beyond the header
|
69 |
+
logging.warning("No voice lines found in 'espeak-ng --voices' output.")
|
70 |
+
raise ValueError("No voice data returned.")
|
71 |
+
|
72 |
+
for line in lines[1:]: # Skip header
|
73 |
+
match = pattern.match(line.strip())
|
74 |
if match:
|
75 |
+
# Extract code (group 1) and language name (group 2)
|
76 |
+
code = match.group(1).strip()
|
77 |
+
lang_name = match.group(2).strip()
|
78 |
+
|
79 |
+
# Clean up language name (remove potential file paths sometimes included)
|
80 |
+
lang_name = lang_name.split(" ")[0]
|
81 |
+
# Prioritize names like "english-us" over just "english" if code reflects it
|
82 |
+
display_name = f"{lang_name.replace('-', ' ').title()} ({code})"
|
83 |
+
|
84 |
+
# Avoid duplicates, preferring more specific codes if names clash slightly
|
85 |
if display_name not in voices:
|
86 |
voices[display_name] = code
|
87 |
else:
|
88 |
+
# Simpler split as fallback for lines that don't match complex regex
|
89 |
parts = line.split()
|
90 |
if len(parts) >= 4 and parts[0].isdigit():
|
91 |
code = parts[1]
|
92 |
lang_name = parts[3]
|
93 |
+
display_name = f"{lang_name.strip().title()} ({code})"
|
94 |
if display_name not in voices:
|
95 |
voices[display_name] = code
|
96 |
+
else:
|
97 |
+
logging.warning(f"Could not parse voice line: {line}")
|
98 |
+
|
99 |
|
100 |
if not voices:
|
101 |
logging.warning("Could not parse any voices from 'espeak-ng --voices'. Using fallback list.")
|
102 |
+
raise ValueError("Parsing failed.")
|
|
|
103 |
|
104 |
# Sort voices alphabetically by display name
|
105 |
sorted_voices = dict(sorted(voices.items()))
|
106 |
+
logging.info(f"Found {len(sorted_voices)} espeak-ng voices.")
|
107 |
return sorted_voices
|
108 |
|
109 |
+
except (FileNotFoundError, subprocess.CalledProcessError, subprocess.TimeoutExpired, ValueError, Exception) as e:
|
110 |
logging.error(f"Error getting espeak-ng voices: {e}")
|
111 |
+
# Provide a basic fallback list if the command fails or parsing fails
|
112 |
return {"English (en)": "en", "Spanish (es)": "es", "French (fr)": "fr", "German (de)": "de"}
|
113 |
|
114 |
# --- Main Conversion Logic ---
|
|
|
118 |
Converts an ebook file to an audiobook using Calibre and espeak-ng.
|
119 |
"""
|
120 |
if not ebook_file:
|
|
|
121 |
return None, None, "**Error:** No ebook file provided."
|
122 |
|
123 |
+
# Check required commands *before* creating temp dir
|
124 |
calibre_convert_ok = check_command("ebook-convert")
|
125 |
+
calibre_meta_ok = check_command("ebook-meta") # Check always, needed logic follows
|
126 |
espeak_ok = check_command("espeak-ng")
|
127 |
+
lame_ok = check_command("lame")
|
128 |
+
oggenc_ok = check_command("oggenc") # From vorbis-tools
|
129 |
|
130 |
missing = []
|
131 |
if not calibre_convert_ok: missing.append("Calibre ('ebook-convert')")
|
132 |
+
if not calibre_meta_ok: missing.append("Calibre ('ebook-meta' - for cover art)")
|
133 |
if not espeak_ok: missing.append("espeak-ng")
|
134 |
if not lame_ok and output_format == 'mp3': missing.append("LAME (for MP3)")
|
135 |
if not oggenc_ok and output_format == 'ogg': missing.append("oggenc (for OGG)")
|
136 |
|
137 |
if missing:
|
138 |
+
error_msg = f"**Error:** Missing required system command(s):\n- {', '.join(missing)}\n\nPlease ensure they are installed in the environment (check packages.txt)."
|
139 |
logging.error(error_msg.replace("**Error:** ","").replace("\n- "," ").replace("\n"," ")) # Log plain text
|
|
|
140 |
return None, None, error_msg
|
141 |
|
142 |
|
143 |
temp_dir = tempfile.mkdtemp(prefix="ebook_audio_")
|
144 |
logging.info(f"Created temporary directory: {temp_dir}")
|
145 |
+
status_updates = ["▶️ Conversion process started..."]
|
146 |
+
cover_image_path_final = None # Track final usable cover path for display/embedding
|
147 |
+
audio_output_path_final = None # Keep track of the final audio path for return
|
148 |
|
149 |
try:
|
150 |
input_ebook_path = ebook_file.name # Gradio provides a temp path for the upload
|
151 |
+
# Sanitize filename slightly for output files
|
152 |
+
base_filename = re.sub(r'[^\w\-]+', '_', Path(input_ebook_path).stem)
|
153 |
txt_output_path = os.path.join(temp_dir, f"{base_filename}.txt")
|
154 |
+
# Use a generic name first, then check format
|
155 |
+
cover_output_path_temp = os.path.join(temp_dir, "cover_temp")
|
156 |
audio_output_path = os.path.join(temp_dir, f"{base_filename}.{output_format}")
|
157 |
|
158 |
# --- Step 1: Extract Cover Art (Optional) ---
|
159 |
cover_extracted = False
|
160 |
+
if embed_cover and calibre_meta_ok:
|
161 |
+
progress(0.1, desc="🖼️ Extracting cover art (optional)...")
|
162 |
+
status_updates.append(" Attempting to extract cover art...")
|
163 |
try:
|
164 |
cmd_meta = ['ebook-meta', input_ebook_path, '--get-cover', cover_output_path_temp]
|
165 |
logging.info(f"Running cover extraction: {' '.join(cmd_meta)}")
|
166 |
+
# Use timeout for ebook-meta as well
|
167 |
+
result_meta = subprocess.run(cmd_meta, check=True, capture_output=True, text=True, errors='ignore', timeout=30)
|
168 |
if os.path.exists(cover_output_path_temp) and os.path.getsize(cover_output_path_temp) > 0:
|
169 |
+
# Validate image and get format
|
170 |
try:
|
171 |
img = Image.open(cover_output_path_temp)
|
172 |
+
img.verify() # Basic check
|
173 |
img.close() # Need to close after verify
|
174 |
+
|
175 |
+
# Reopen to check format properly and prepare final path
|
176 |
img = Image.open(cover_output_path_temp)
|
177 |
+
img_format = img.format.lower() if img.format else 'jpeg' # Default guess
|
178 |
img.close()
|
179 |
|
180 |
+
# Define final path with correct extension
|
181 |
+
valid_ext = f".{img_format}" if img_format in ['jpeg', 'png', 'gif'] else ".jpg" # Default to jpg
|
182 |
+
cover_image_path_final = os.path.join(temp_dir, f"cover_final{valid_ext}")
|
183 |
+
shutil.move(cover_output_path_temp, cover_image_path_final) # Rename with correct extension
|
184 |
+
|
185 |
+
cover_extracted = True
|
186 |
+
status_updates.append(f" ✅ Cover art extracted successfully ({img_format.upper()}).")
|
187 |
+
logging.info(f"Cover art extracted to {cover_image_path_final}")
|
188 |
+
|
189 |
+
except (IOError, SyntaxError, UnidentifiedImageError) as img_err:
|
190 |
+
logging.warning(f"Extracted file at {cover_output_path_temp} is not a valid image: {img_err}")
|
191 |
+
status_updates.append(" ⚠️ Extracted 'cover' file is not a valid image. Will skip embedding.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
if os.path.exists(cover_output_path_temp): os.remove(cover_output_path_temp) # Clean up invalid file
|
193 |
+
if cover_image_path_final and os.path.exists(cover_image_path_final): os.remove(cover_image_path_final)
|
194 |
+
cover_image_path_final = None # Ensure it's None
|
195 |
+
|
196 |
else:
|
197 |
+
status_updates.append(" ℹ️ No cover art found in the ebook metadata.")
|
198 |
logging.info("ebook-meta ran but did not produce a cover file or it was empty.")
|
199 |
+
if os.path.exists(cover_output_path_temp): os.remove(cover_output_path_temp) # Clean up empty file
|
200 |
|
201 |
+
except subprocess.TimeoutExpired:
|
202 |
+
status_updates.append(f" ⚠️ Timeout trying to extract cover art.")
|
203 |
+
logging.warning(f"ebook-meta timed out.")
|
204 |
except subprocess.CalledProcessError as e:
|
205 |
+
stderr_decoded = e.stderr.decode(errors='ignore').strip() if e.stderr else "No stderr"
|
206 |
+
status_updates.append(f" ⚠️ Failed to extract cover art. Error: {stderr_decoded[:200]}{'...' if len(stderr_decoded)>200 else ''}") # Keep it short
|
207 |
logging.warning(f"ebook-meta failed: {stderr_decoded}")
|
208 |
except Exception as e:
|
209 |
+
status_updates.append(f" ⚠️ An unexpected error occurred during cover extraction: {e}")
|
210 |
logging.error(f"Unexpected error during cover extraction: {e}", exc_info=True)
|
211 |
+
# Ensure temp file is removed if final path wasn't set
|
212 |
+
if not cover_image_path_final and os.path.exists(cover_output_path_temp):
|
213 |
+
os.remove(cover_output_path_temp)
|
214 |
+
|
215 |
elif embed_cover and not calibre_meta_ok:
|
216 |
+
status_updates.append(" ℹ️ Cover art embedding requested, but 'ebook-meta' command not found.")
|
217 |
+
elif embed_cover and not MUTAGEN_AVAILABLE:
|
218 |
+
status_updates.append(" ℹ️ Cover art embedding requested, but 'mutagen' Python library not installed.")
|
219 |
|
220 |
# --- Step 2: Convert Ebook to TXT ---
|
221 |
+
progress(0.3, desc="📖 Converting ebook to TXT...")
|
222 |
+
status_updates.append("📖 Converting ebook to plain text...")
|
223 |
try:
|
224 |
+
# Add options known to help with TXT output quality
|
225 |
+
# --input-encoding=utf8 is often needed for non-ASCII content
|
226 |
+
cmd_convert = [
|
227 |
+
'ebook-convert', input_ebook_path, txt_output_path,
|
228 |
+
'--enable-heuristics',
|
229 |
+
'--output-profile=generic_eink', # Profiles can influence text formatting
|
230 |
+
'--input-encoding=utf8', # Try specifying UTF-8
|
231 |
+
'--pretty-print' # Can sometimes help structure
|
232 |
+
]
|
233 |
logging.info(f"Running ebook conversion: {' '.join(cmd_convert)}")
|
234 |
+
# Increased timeout for potentially large books
|
235 |
+
result_convert = subprocess.run(cmd_convert, check=True, capture_output=True, encoding='utf-8', errors='ignore', timeout=300) # 5 mins
|
236 |
+
# Log stdout/stderr even on success for warnings
|
237 |
if result_convert.stdout: logging.info(f"ebook-convert stdout: {result_convert.stdout.strip()}")
|
238 |
if result_convert.stderr: logging.warning(f"ebook-convert stderr: {result_convert.stderr.strip()}")
|
239 |
+
status_updates.append(" ✅ Ebook converted to TXT.")
|
240 |
logging.info("Ebook successfully converted to TXT.")
|
241 |
+
except subprocess.TimeoutExpired:
|
242 |
+
error_msg = "**Error:** Calibre conversion timed out (may be a very large or complex book)."
|
243 |
+
status_updates.append(f" ❌ Calibre conversion timed out.")
|
244 |
+
logging.error("Error during Calibre conversion: Timeout")
|
245 |
+
return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
|
246 |
except subprocess.CalledProcessError as e:
|
247 |
+
stderr_decoded = e.stderr.decode(errors='ignore').strip() if e.stderr else "No stderr"
|
248 |
+
stdout_decoded = e.stdout.decode(errors='ignore').strip() if e.stdout else "No stdout"
|
249 |
+
error_details = f"Stderr:\n```\n{stderr_decoded}\n```\nStdout:\n```\n{stdout_decoded}\n```" if stderr_decoded or stdout_decoded else str(e)
|
250 |
+
error_msg = f"**Error:** Calibre conversion failed (Exit Code {e.returncode}).\n{error_details}"
|
251 |
+
status_updates.append(f" ❌ Calibre conversion failed.")
|
252 |
+
logging.error(f"Error during Calibre conversion: Exit Code {e.returncode}\nStderr: {stderr_decoded}\nStdout: {stdout_decoded}")
|
253 |
return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
|
254 |
except Exception as e:
|
255 |
error_msg = f"**Error:** An unexpected error occurred during ebook conversion:\n{e}"
|
256 |
+
status_updates.append(f" ❌ Unexpected conversion error.")
|
257 |
logging.error(f"Unexpected error during ebook conversion: {e}", exc_info=True)
|
258 |
return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
|
259 |
|
260 |
# Check if TXT file was actually created and is not empty
|
261 |
if not os.path.exists(txt_output_path) or os.path.getsize(txt_output_path) == 0:
|
262 |
+
error_msg = "**Error:** Calibre finished, but the output TXT file is missing or empty.\nThis can happen with image-based ebooks (like scanned PDFs, comics, CBZ/CBR) or DRM-protected files.\nCalibre cannot process these types into text."
|
263 |
+
status_updates.append(f" ❌ TXT output empty/missing.")
|
264 |
logging.error("Calibre finished, but the output TXT file is missing or empty.")
|
265 |
return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
|
266 |
|
267 |
# --- Step 3: Convert TXT to Audio ---
|
268 |
+
progress(0.6, desc="🗣️ Converting TXT to Audio...")
|
269 |
+
status_updates.append("🗣️ Converting text to speech...")
|
270 |
|
271 |
voice_code = available_voices.get(language_display, 'en') # Get code from display name
|
272 |
+
# Base espeak-ng command: specify voice, read from file
|
273 |
cmd_speak = ['espeak-ng', '-v', voice_code, '-f', txt_output_path]
|
274 |
+
# Optionally add speed or other espeak parameters here:
|
275 |
+
# cmd_speak.extend(['-s', '160']) # Example: Set speed (default 175)
|
276 |
|
277 |
try:
|
278 |
logging.info(f"Preparing audio command for format: {output_format}")
|
279 |
+
# Define timeout for TTS process (can be long for large books)
|
280 |
+
tts_timeout = 1800 # 30 minutes
|
281 |
+
|
282 |
if output_format == 'wav':
|
283 |
cmd_speak.extend(['-w', audio_output_path])
|
284 |
logging.info(f"Running espeak-ng (WAV): {' '.join(cmd_speak)}")
|
285 |
+
result_speak = subprocess.run(cmd_speak, check=True, capture_output=True, timeout=tts_timeout)
|
286 |
+
if result_speak.stderr: logging.warning(f"espeak-ng stderr (WAV): {result_speak.stderr.decode(errors='ignore').strip()}")
|
|
|
|
|
287 |
|
288 |
elif output_format == 'mp3':
|
289 |
+
if not lame_ok: raise FileNotFoundError("LAME command not found")
|
290 |
+
cmd_speak.append('--stdout') # espeak outputs WAV to stdout
|
291 |
+
cmd_lame = ['lame', '-', audio_output_path] # LAME reads WAV from stdin, outputs MP3
|
292 |
+
logging.info(f"Running pipe: {' '.join(cmd_speak)} | {' '.join(cmd_lame)}")
|
293 |
+
|
294 |
+
# Start espeak-ng process
|
295 |
ps_speak = subprocess.Popen(cmd_speak, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
296 |
+
# Start LAME process, piping espeak's stdout to LAME's stdin
|
297 |
ps_lame = subprocess.Popen(cmd_lame, stdin=ps_speak.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
298 |
|
299 |
+
# *** Crucial: Allow ps_speak stdout to be closed by ps_lame if it finishes reading ***
|
300 |
if ps_speak.stdout:
|
301 |
ps_speak.stdout.close()
|
302 |
|
303 |
+
# Capture stderr from both processes, wait for LAME first (end of pipeline)
|
304 |
+
try:
|
305 |
+
lame_stdout_bytes, lame_stderr_bytes = ps_lame.communicate(timeout=tts_timeout + 60) # Allow extra time for encoding
|
306 |
+
except subprocess.TimeoutExpired:
|
307 |
+
logging.error("LAME process timed out.")
|
308 |
+
ps_speak.kill() # Kill upstream process too
|
309 |
+
ps_lame.kill()
|
310 |
+
raise subprocess.TimeoutExpired(cmd_lame, tts_timeout + 60)
|
311 |
+
|
312 |
+
# Now wait for espeak and capture its stderr
|
313 |
speak_stderr_bytes = ps_speak.stderr.read() if ps_speak.stderr else b""
|
314 |
+
ps_speak.wait() # Wait for espeak to fully terminate
|
315 |
if ps_speak.stderr: ps_speak.stderr.close()
|
316 |
|
317 |
+
# Decode stderr for logging/errors
|
318 |
lame_stderr_str = lame_stderr_bytes.decode(errors='ignore').strip()
|
319 |
speak_stderr_str = speak_stderr_bytes.decode(errors='ignore').strip()
|
320 |
|
321 |
+
# Check return codes AFTER both processes finished
|
322 |
if ps_lame.returncode != 0:
|
323 |
+
logging.error(f"LAME failed with exit code {ps_lame.returncode}. LAME stderr: {lame_stderr_str}")
|
324 |
+
raise subprocess.CalledProcessError(ps_lame.returncode, cmd_lame, stderr=lame_stderr_bytes)
|
325 |
if ps_speak.returncode != 0:
|
326 |
+
logging.error(f"espeak-ng failed with exit code {ps_speak.returncode}. espeak-ng stderr: {speak_stderr_str}")
|
327 |
+
raise subprocess.CalledProcessError(ps_speak.returncode, cmd_speak, stderr=speak_stderr_bytes)
|
328 |
|
329 |
+
# Log any non-fatal warnings from stderr
|
330 |
+
if lame_stderr_str: logging.warning(f"LAME stderr: {lame_stderr_str}")
|
331 |
+
if speak_stderr_str: logging.warning(f"espeak-ng stderr: {speak_stderr_str}")
|
|
|
|
|
332 |
|
333 |
elif output_format == 'ogg':
|
334 |
+
if not oggenc_ok: raise FileNotFoundError("oggenc command not found")
|
335 |
+
cmd_speak.append('--stdout') # espeak outputs WAV to stdout
|
336 |
+
# oggenc reads WAV from stdin ('-') and writes to output file ('-o')
|
337 |
+
cmd_ogg = ['oggenc', '-o', audio_output_path, '-']
|
338 |
+
logging.info(f"Running pipe: {' '.join(cmd_speak)} | {' '.join(cmd_ogg)}")
|
339 |
+
|
340 |
ps_speak = subprocess.Popen(cmd_speak, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
341 |
ps_ogg = subprocess.Popen(cmd_ogg, stdin=ps_speak.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
342 |
|
|
|
343 |
if ps_speak.stdout:
|
344 |
ps_speak.stdout.close()
|
345 |
|
346 |
+
try:
|
347 |
+
ogg_stdout_bytes, ogg_stderr_bytes = ps_ogg.communicate(timeout=tts_timeout + 60)
|
348 |
+
except subprocess.TimeoutExpired:
|
349 |
+
logging.error("oggenc process timed out.")
|
350 |
+
ps_speak.kill()
|
351 |
+
ps_ogg.kill()
|
352 |
+
raise subprocess.TimeoutExpired(cmd_ogg, tts_timeout + 60)
|
353 |
+
|
354 |
speak_stderr_bytes = ps_speak.stderr.read() if ps_speak.stderr else b""
|
355 |
+
ps_speak.wait()
|
356 |
if ps_speak.stderr: ps_speak.stderr.close()
|
357 |
|
|
|
358 |
ogg_stderr_str = ogg_stderr_bytes.decode(errors='ignore').strip()
|
359 |
speak_stderr_str = speak_stderr_bytes.decode(errors='ignore').strip()
|
360 |
|
|
|
361 |
if ps_ogg.returncode != 0:
|
362 |
+
logging.error(f"oggenc failed with exit code {ps_ogg.returncode}. oggenc stderr: {ogg_stderr_str}")
|
363 |
+
raise subprocess.CalledProcessError(ps_ogg.returncode, cmd_ogg, stderr=ogg_stderr_bytes)
|
364 |
if ps_speak.returncode != 0:
|
365 |
+
logging.error(f"espeak-ng failed with exit code {ps_speak.returncode}. espeak-ng stderr: {speak_stderr_str}")
|
366 |
+
raise subprocess.CalledProcessError(ps_speak.returncode, cmd_speak, stderr=speak_stderr_bytes)
|
367 |
|
368 |
+
if ogg_stderr_str: logging.warning(f"oggenc stderr: {ogg_stderr_str}")
|
369 |
+
if speak_stderr_str: logging.warning(f"espeak-ng stderr: {speak_stderr_str}")
|
|
|
|
|
|
|
370 |
|
371 |
else:
|
372 |
+
raise ValueError(f"Unsupported output format selected: {output_format}")
|
373 |
|
374 |
+
status_updates.append(" ✅ Text converted to audio.")
|
375 |
logging.info(f"Text successfully converted to {output_format.upper()}.")
|
376 |
|
377 |
except subprocess.CalledProcessError as e:
|
378 |
+
command_name = Path(e.cmd[0]).name if isinstance(e.cmd, list) else e.cmd
|
|
|
|
|
379 |
stderr_str = e.stderr.decode(errors='ignore').strip() if isinstance(e.stderr, bytes) else (e.stderr or "")
|
380 |
stdout_str = e.stdout.decode(errors='ignore').strip() if isinstance(e.stdout, bytes) else (e.stdout or "")
|
381 |
error_details = stderr_str or stdout_str or "No output/error captured."
|
|
|
|
|
382 |
exit_status_str = f"exit status {e.returncode}" if e.returncode is not None else "unknown exit status"
|
383 |
cmd_str = ' '.join(e.cmd) if isinstance(e.cmd, list) else e.cmd
|
384 |
+
|
385 |
+
error_msg = (f"**Error:** Audio generation failed.\n\n"
|
386 |
+
f"**Process:** `{command_name}`\n"
|
387 |
+
f"**Command:**\n```\n{cmd_str}\n```\n"
|
388 |
+
f"**Exit Status:** {exit_status_str}\n\n"
|
389 |
+
f"**Output/Error:**\n```\n{error_details}\n```")
|
390 |
+
status_updates.append(f" ❌ Audio generation failed ({command_name}).")
|
391 |
+
logging.error(f"Audio generation failed. Command: `{cmd_str}` Exit: {exit_status_str} Details: {error_details}")
|
392 |
+
return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
|
393 |
+
|
394 |
+
except subprocess.TimeoutExpired as e:
|
395 |
+
command_name = Path(e.cmd[0]).name if isinstance(e.cmd, list) else e.cmd
|
396 |
+
error_msg = f"**Error:** Audio generation timed out (over {e.timeout}s) during `{command_name}` processing.\nThe ebook might be too long for the current timeout limit."
|
397 |
+
status_updates.append(f" ❌ Audio generation timed out.")
|
398 |
+
logging.error(f"Audio generation timed out for command: {' '.join(e.cmd)}")
|
399 |
+
return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
|
400 |
|
401 |
except FileNotFoundError as e:
|
402 |
+
# This should ideally be caught by initial checks, but handle defensively
|
403 |
+
missing_cmd = e.filename or "Unknown command"
|
404 |
+
error_msg = f"**Error:** Command `{missing_cmd}` not found during audio generation for {output_format.upper()} output.\nPlease check `packages.txt`."
|
405 |
+
status_updates.append(f" ❌ Command '{missing_cmd}' not found.")
|
406 |
+
logging.error(f"Error: Command '{missing_cmd}' not found during execution.")
|
407 |
return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
|
408 |
except Exception as e:
|
409 |
+
error_msg = f"**Error:** An unexpected error occurred during audio generation:\n```\n{e}\n```"
|
410 |
+
status_updates.append(f" ❌ Unexpected audio error.")
|
411 |
logging.error(f"An unexpected error occurred during audio generation: {e}", exc_info=True)
|
412 |
return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
|
413 |
|
414 |
+
# --- Step 3b: Verify Audio Output ---
|
415 |
+
if not os.path.exists(audio_output_path) or os.path.getsize(audio_output_path) < 256: # Check if file exists and has *some* data
|
416 |
+
error_msg = f"**Error:** Audio generation command finished, but the output file '{Path(audio_output_path).name}' is missing or empty/too small.\nCheck logs for potential errors during the TTS or encoding process."
|
417 |
+
status_updates.append(f" ❌ Audio output missing or invalid.")
|
418 |
+
logging.error(f"Audio output file missing or too small after generation: {audio_output_path}")
|
419 |
+
# Try to provide more context if stderr was captured earlier
|
420 |
+
# last_stderr = speak_stderr_str or lame_stderr_str or ogg_stderr_str # From pipe section
|
421 |
+
# if last_stderr: error_msg += f"\nLast captured error output:\n```\n{last_stderr}\n```"
|
422 |
return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
|
423 |
|
424 |
|
425 |
# --- Step 4: Embed Cover Art (Optional) ---
|
426 |
+
if embed_cover and cover_extracted and MUTAGEN_AVAILABLE and cover_image_path_final and os.path.exists(cover_image_path_final):
|
427 |
+
progress(0.9, desc="🖼️ Embedding cover art...")
|
428 |
+
status_updates.append("🖼️ Embedding cover art into audio file...")
|
429 |
try:
|
430 |
with open(cover_image_path_final, 'rb') as img_f:
|
431 |
cover_data = img_f.read()
|
432 |
|
433 |
+
# Determine mimetype robustly using Pillow
|
434 |
+
mime_type = 'image/jpeg' # Default
|
435 |
+
img_width, img_height, img_depth = 0, 0, 24 # Defaults for FLAC/OGG
|
436 |
try:
|
437 |
img = Image.open(cover_image_path_final)
|
438 |
mime_type = Image.MIME.get(img.format)
|
439 |
+
img_width, img_height = img.width, img.height
|
440 |
+
img_depth = {'RGB': 24, 'RGBA': 32, 'L': 8, 'P': 8}.get(img.mode, 24) # Palette 'P' often 8-bit
|
441 |
img.close()
|
442 |
+
if not mime_type:
|
443 |
+
ext = Path(cover_image_path_final).suffix.lower()
|
444 |
+
if ext == ".jpg" or ext == ".jpeg": mime_type = 'image/jpeg'
|
445 |
+
elif ext == ".png": mime_type = 'image/png'
|
446 |
+
else: raise ValueError("Unsupported image format for MIME detection") # Force fallback
|
447 |
+
logging.info(f"Using cover mime type: {mime_type}, Dimensions: {img_width}x{img_height}, Depth: {img_depth}")
|
448 |
except Exception as pil_err:
|
449 |
+
logging.warning(f"Could not determine MIME type/dimensions using PIL: {pil_err}. Falling back to image/jpeg.")
|
450 |
+
mime_type = 'image/jpeg' # Fallback
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
451 |
|
452 |
logging.info(f"Attempting to embed cover art ({mime_type}) into {audio_output_path}")
|
453 |
+
audio = mutagen.File(audio_output_path, easy=True) # Use easy=True for simple tags, fallback to non-easy for picture
|
454 |
|
455 |
if audio is None:
|
456 |
+
# Try loading without easy=True if easy fails
|
457 |
+
audio = mutagen.File(audio_output_path, easy=False)
|
458 |
+
if audio is None:
|
459 |
+
raise ValueError("Mutagen could not load the audio file. Format might be unsupported or file corrupted.")
|
460 |
|
461 |
+
# --- Add Title/Artist using Easy Interface if possible ---
|
462 |
try:
|
463 |
+
if isinstance(audio, mutagen.easy.EasyMutagen): # Check if Easy interface loaded
|
464 |
+
if not audio.get('title'): audio['title'] = Path(base_filename).name.replace('_', ' ') # Use sanitized filename base
|
465 |
+
if not audio.get('artist'): audio['artist'] = "Ebook Speaker"
|
466 |
+
audio.save() # Save easy tags
|
467 |
+
# Reload without easy=True for picture embedding if needed by format
|
468 |
+
audio = mutagen.File(audio_output_path, easy=False)
|
469 |
+
elif audio is not None: # Easy interface failed, try basic tags with normal interface
|
470 |
+
if not audio.tags.get('TIT2'): audio.tags.add(mutagen.id3.TIT2(encoding=3, text=Path(base_filename).name.replace('_', ' ')))
|
471 |
+
if not audio.tags.get('TPE1'): audio.tags.add(mutagen.id3.TPE1(encoding=3, text="Ebook Speaker"))
|
472 |
+
audio.save()
|
473 |
+
audio = mutagen.File(audio_output_path, easy=False) # Reload after save
|
474 |
+
|
475 |
+
except Exception as tag_err:
|
476 |
+
logging.warning(f"Could not set basic title/artist tags: {tag_err}")
|
477 |
+
|
478 |
+
|
479 |
+
# --- Embed Picture (using non-easy interface often required) ---
|
480 |
+
if audio is None: # Check again after potential reload
|
481 |
+
raise ValueError("Audio object became None after tag saving.")
|
482 |
+
|
483 |
+
# Clear existing art first (important!)
|
484 |
+
try:
|
485 |
+
audio.tags.delall('APIC') # ID3v2 (MP3)
|
486 |
+
audio.tags.delall('covr') # MP4
|
487 |
+
if hasattr(audio, 'clear_pictures'): audio.clear_pictures() # FLAC
|
488 |
+
if "metadata_block_picture" in audio: del audio["metadata_block_picture"] # OggVorbis
|
489 |
+
audio.save()
|
490 |
+
# Reload again after deleting to ensure clean slate
|
491 |
+
audio = mutagen.File(audio_output_path, easy=False)
|
492 |
+
if audio is None: raise ValueError("Audio object None after clearing art.")
|
493 |
+
except (AttributeError, KeyError, TypeError, Exception) as clear_err:
|
494 |
+
logging.warning(f"Could not definitively clear existing artwork: {clear_err}. Proceeding anyway.")
|
495 |
+
|
496 |
+
|
497 |
+
# Add the new cover
|
498 |
save_needed = False
|
499 |
+
if isinstance(audio, (MP3, EasyMP3)): # Handles MP3
|
500 |
+
if audio.tags is None: audio.add_tags()
|
501 |
audio.tags.add(
|
502 |
APIC(
|
503 |
+
encoding=3, # 3 is for UTF-8
|
504 |
mime=mime_type,
|
505 |
+
type=PictureType.COVER_FRONT, # Use standard enum
|
506 |
+
desc='Cover',
|
507 |
data=cover_data
|
508 |
)
|
509 |
)
|
|
|
511 |
elif isinstance(audio, FLAC):
|
512 |
pic = mutagen.flac.Picture()
|
513 |
pic.data = cover_data
|
514 |
+
pic.type = PictureType.COVER_FRONT
|
515 |
pic.mime = mime_type
|
516 |
+
pic.width = img_width
|
517 |
+
pic.height = img_height
|
518 |
+
pic.depth = img_depth
|
519 |
audio.add_picture(pic)
|
520 |
save_needed = True
|
521 |
elif isinstance(audio, OggVorbis):
|
522 |
+
# Ogg Vorbis uses base64 encoded FLAC Picture block
|
523 |
import base64
|
|
|
524 |
pic = mutagen.flac.Picture()
|
525 |
pic.data = cover_data
|
526 |
+
pic.type = PictureType.COVER_FRONT
|
527 |
pic.mime = mime_type
|
528 |
+
pic.width = img_width
|
529 |
+
pic.height = img_height
|
530 |
+
pic.depth = img_depth
|
531 |
+
audio["METADATA_BLOCK_PICTURE"] = [base64.b64encode(pic.write()).decode("ascii")]
|
|
|
|
|
|
|
|
|
|
|
|
|
532 |
save_needed = True
|
533 |
+
elif isinstance(audio, MP4): # Handles M4A/M4B
|
534 |
+
if mime_type == 'image/jpeg': pic_format = MP4Cover.FORMAT_JPEG
|
535 |
+
elif mime_type == 'image/png': pic_format = MP4Cover.FORMAT_PNG
|
536 |
+
else: pic_format = MP4Cover.FORMAT_UNDEFINED
|
|
|
|
|
|
|
|
|
|
|
|
|
537 |
if pic_format != MP4Cover.FORMAT_UNDEFINED:
|
538 |
audio['covr'] = [MP4Cover(cover_data, imageformat=pic_format)]
|
539 |
save_needed = True
|
540 |
+
else: logging.warning(f"Unsupported cover image format ({mime_type}) for MP4 embedding.")
|
541 |
+
else:
|
542 |
+
logging.warning(f"Cover embedding not implemented for this audio type: {type(audio)}")
|
543 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
544 |
|
545 |
+
if save_needed:
|
|
|
546 |
audio.save()
|
547 |
+
status_updates.append(" ✅ Cover art embedded successfully.")
|
548 |
+
logging.info("Cover art embedded successfully.")
|
549 |
+
elif embed_cover: # Only report skip if embedding was attempted but failed type match
|
550 |
+
status_updates.append(" ⚠️ Cover embedding skipped (unsupported audio format for mutagen?).")
|
551 |
+
logging.warning(f"Could not embed cover: audio format {type(audio)} not explicitly handled.")
|
552 |
+
|
553 |
+
except (mutagen.MutagenError, ValueError, IOError, TypeError, KeyError, AttributeError) as e:
|
554 |
+
status_updates.append(f" ⚠️ Could not embed cover art. Error: {str(e)[:100]}...")
|
555 |
logging.warning(f"Failed to embed cover art: {e}", exc_info=True)
|
556 |
except Exception as e:
|
557 |
+
status_updates.append(f" ⚠️ An unexpected error occurred during cover art embedding.")
|
558 |
logging.error(f"Unexpected error during cover embedding: {e}", exc_info=True)
|
559 |
elif embed_cover and not cover_extracted:
|
560 |
+
status_updates.append(" ℹ️ Cover art embedding skipped (no cover extracted or invalid).")
|
561 |
elif embed_cover and not MUTAGEN_AVAILABLE:
|
562 |
+
# This was logged earlier, but confirm skip in status
|
563 |
+
status_updates.append(" ⚠️ Cover art embedding skipped (Mutagen library not installed).")
|
564 |
|
565 |
|
566 |
# --- Step 5: Prepare final output ---
|
567 |
+
progress(1.0, desc="✅ Complete!")
|
568 |
+
status_updates.append("🏁 Conversion complete!")
|
569 |
audio_output_path_final = audio_output_path # Mark the path as final
|
570 |
|
571 |
# Return paths for Gradio components
|
572 |
final_status = "\n".join(status_updates)
|
573 |
+
logging.info(f"Returning audio: {audio_output_path_final}, cover: {cover_image_path_final}, Status: Success.")
|
574 |
# Return audio path for Audio component, cover path for Image, status for Textbox
|
575 |
return audio_output_path_final, cover_image_path_final, final_status
|
576 |
|
577 |
except Exception as e:
|
578 |
+
# Catch-all for unexpected errors in the main try block
|
579 |
+
error_msg = f"An unexpected critical error occurred in the main process: {e}"
|
580 |
+
status_updates.append(f" ❌ CRITICAL ERROR: {error_msg}")
|
581 |
logging.error(error_msg, exc_info=True)
|
582 |
# Return None for audio, cover path (if extracted), and the error status
|
583 |
+
final_status = "\n".join(status_updates)
|
584 |
+
return None, cover_image_path_final, f"{final_status}\n\n**Error:** An unexpected critical error occurred.\nCheck application logs for details.\n{e}"
|
585 |
|
586 |
finally:
|
587 |
# --- Cleanup ---
|
588 |
+
# We leave the final audio and cover files in temp_dir for Gradio to serve.
|
589 |
+
# Clean up intermediate files ONLY.
|
590 |
try:
|
591 |
if 'txt_output_path' in locals() and os.path.exists(txt_output_path):
|
592 |
os.remove(txt_output_path)
|
593 |
logging.info(f"Removed intermediate file: {txt_output_path}")
|
594 |
+
# Remove temporary cover if it's different from final or if final doesn't exist
|
595 |
+
if 'cover_output_path_temp' in locals() and os.path.exists(cover_output_path_temp):
|
596 |
+
if not cover_image_path_final or cover_output_path_temp != cover_image_path_final:
|
597 |
+
os.remove(cover_output_path_temp)
|
598 |
+
logging.info(f"Removed intermediate file: {cover_output_path_temp}")
|
599 |
+
|
600 |
+
# Note: Gradio typically copies temp files, but leaving the dir might be safer
|
601 |
+
# If space becomes an issue, add shutil.rmtree(temp_dir) here,
|
602 |
+
# but ensure Gradio doesn't need the original files after the function returns.
|
603 |
+
logging.info(f"Temporary directory '{temp_dir}' contains final output files and will be cleaned up by Gradio/system later.")
|
|
|
|
|
|
|
|
|
604 |
|
605 |
except OSError as e:
|
606 |
logging.warning(f"Error during cleanup of intermediate files: {e}")
|
|
|
608 |
|
609 |
# --- Gradio Interface Definition ---
|
610 |
|
611 |
+
print("Initializing Gradio Interface...")
|
612 |
+
print("Fetching available eSpeak-NG voices...")
|
613 |
available_voices = get_espeak_voices()
|
614 |
voice_choices = list(available_voices.keys())
|
615 |
+
print(f"Found {len(voice_choices)} voices.")
|
616 |
+
|
617 |
+
# Try to find a sensible default voice (e.g., US English)
|
618 |
+
default_voice = "English (en)" # Basic fallback
|
619 |
+
possible_defaults = [
|
620 |
+
"English (United States) (en-us)",
|
621 |
+
"English (Us) (en-us)", # Variations in naming
|
622 |
+
"English (en-us)",
|
623 |
+
"English (Great Britain) (en-gb)",
|
624 |
+
"English (Gb) (en-gb)",
|
625 |
+
"English (en-gb)",
|
626 |
+
"English (en)",
|
627 |
+
]
|
628 |
+
for V in possible_defaults:
|
629 |
+
if V in voice_choices:
|
630 |
+
default_voice = V
|
631 |
+
break
|
632 |
+
if not voice_choices:
|
633 |
+
logging.error("FATAL: No espeak voices found or parsed. Language selection will fail.")
|
634 |
+
# Add a dummy entry if empty to prevent Gradio crash, though unusable
|
635 |
+
voice_choices = ["Error: No Voices Found"]
|
636 |
+
default_voice = voice_choices[0]
|
637 |
+
available_voices = {default_voice: "error"}
|
638 |
|
639 |
|
640 |
# Check for external tools on startup and display warnings if needed
|
641 |
+
print("Checking required external commands...")
|
642 |
startup_warnings = []
|
643 |
if not check_command("ebook-convert"): startup_warnings.append("Calibre ('ebook-convert')")
|
644 |
+
if not check_command("ebook-meta"): startup_warnings.append("Calibre ('ebook-meta' - needed for cover art)")
|
645 |
+
if not check_command("espeak-ng"): startup_warnings.append("espeak-ng (core TTS engine)")
|
646 |
if not check_command("lame"): startup_warnings.append("LAME (needed for MP3 output)")
|
647 |
+
if not check_command("oggenc"): startup_warnings.append("oggenc (needed for OGG output, from 'vorbis-tools')")
|
648 |
+
if not MUTAGEN_AVAILABLE: startup_warnings.append("Python 'mutagen' library (needed for embedding cover art - install via requirements.txt)")
|
649 |
|
650 |
startup_message = ""
|
651 |
if startup_warnings:
|
652 |
warning_list = "\n- ".join(startup_warnings)
|
653 |
startup_message = (
|
654 |
+
"**⚠️ Startup Warning: The following components might be missing or not found:**\n\n"
|
655 |
f"- {warning_list}\n\n"
|
656 |
+
"Please ensure system packages are listed in `packages.txt` and Python libraries in `requirements.txt`. "
|
657 |
+
"Functionality relying on missing components will fail. Check container build logs for installation errors."
|
658 |
)
|
659 |
+
print("--- STARTUP WARNING ---")
|
660 |
+
print(f"Missing components: {', '.join(startup_warnings)}")
|
661 |
+
print("-----------------------")
|
662 |
+
|
663 |
|
664 |
# Define UI Elements
|
665 |
+
print("Building Gradio UI...")
|
666 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Ebook to Audiobook") as demo:
|
667 |
+
gr.Markdown(
|
668 |
+
"""
|
669 |
+
# Ebook to Audiobook Converter 🎧📚
|
670 |
+
**Convert your ebooks (EPUB, MOBI, AZW3, FB2, PDF*, etc.) into audiobooks!**
|
671 |
+
|
672 |
+
Upload your ebook, select the desired language/voice and audio format, and click Convert.
|
673 |
+
Optionally, embed the cover art into the audio file metadata.
|
674 |
+
|
675 |
+
*(*) Note: PDF conversion works best for text-based PDFs. Scanned images or complex layouts may result in poor text extraction.*
|
676 |
+
"""
|
677 |
+
)
|
678 |
|
679 |
if startup_message:
|
680 |
+
gr.Warning(startup_message) # Use Gradio's warning component
|
681 |
|
682 |
with gr.Row():
|
683 |
+
with gr.Column(scale=1, min_width=300):
|
684 |
+
ebook_input = gr.File(label="1. Upload Ebook File", file_count="single", type="file") # Use type="file" for direct path access
|
685 |
lang_dropdown = gr.Dropdown(
|
686 |
label="2. Select Language / Voice",
|
687 |
choices=voice_choices,
|
688 |
+
value=default_voice if default_voice in voice_choices else (voice_choices[0] if voice_choices else None),
|
689 |
+
info="Uses voices available from espeak-ng.",
|
690 |
interactive=True
|
691 |
)
|
692 |
format_dropdown = gr.Dropdown(
|
693 |
label="3. Select Output Audio Format",
|
694 |
choices=["mp3", "ogg", "wav"],
|
695 |
value="mp3",
|
696 |
+
info="MP3 offers good compatibility and compression. OGG is open source. WAV is uncompressed.",
|
697 |
interactive=True
|
698 |
)
|
699 |
cover_checkbox = gr.Checkbox(
|
700 |
label="Embed Cover Art (if available)",
|
701 |
+
value=True if MUTAGEN_AVAILABLE else False, # Default based on library presence
|
702 |
+
info="Requires 'mutagen' library and 'ebook-meta' command.",
|
703 |
+
# interactive=True # Removed dynamic interactive setting to avoid potential Gradio bug
|
704 |
+
# Let the backend handle skipping if dependencies are missing.
|
705 |
+
interactive=True # Let's try keeping it interactive, the check is internal now.
|
706 |
)
|
707 |
+
submit_button = gr.Button("Convert to Audiobook", variant="primary", icon="▶️")
|
708 |
+
|
709 |
+
with gr.Column(scale=2, min_width=400):
|
710 |
+
status_textbox = gr.Textbox(
|
711 |
+
label="📊 Conversion Status & Log",
|
712 |
+
lines=10,
|
713 |
+
max_lines=20,
|
714 |
+
interactive=False,
|
715 |
+
show_copy_button=True,
|
716 |
+
placeholder="Conversion progress will appear here..."
|
717 |
+
)
|
718 |
with gr.Row():
|
719 |
+
# Output components: Image for cover, Audio for playback
|
720 |
+
cover_image = gr.Image(
|
721 |
+
label="🖼️ Extracted Cover Art",
|
722 |
+
type="filepath", # Function returns a path
|
723 |
+
interactive=False,
|
724 |
+
height=250,
|
725 |
+
show_download_button=True
|
726 |
+
)
|
727 |
+
audio_output_player = gr.Audio(
|
728 |
+
label="🎧 Generated Audiobook",
|
729 |
+
type="filepath", # Function returns a path
|
730 |
+
interactive=False # Playback is interactive, but component value isn't set by user
|
731 |
+
)
|
732 |
+
# REMOVED separate download button - gr.Audio and gr.Image have download capabilities
|
733 |
|
734 |
# Connect components
|
735 |
submit_button.click(
|
736 |
fn=convert_ebook_to_audio,
|
737 |
inputs=[ebook_input, lang_dropdown, format_dropdown, cover_checkbox],
|
738 |
+
# Outputs map directly to the components defined above
|
739 |
+
outputs=[audio_output_player, cover_image, status_textbox]
|
740 |
)
|
741 |
|
742 |
+
gr.Markdown("--- \n *Powered by Calibre, eSpeak-NG, LAME, OggEnc, Mutagen, and Gradio.*")
|
743 |
+
|
744 |
# --- Launch the App ---
|
745 |
if __name__ == "__main__":
|
746 |
+
print("Starting Gradio App Server...")
|
747 |
+
if not voice_choices or voice_choices[0].startswith("Error"):
|
748 |
+
print("\nWARNING: Could not retrieve voices from espeak-ng. Language selection may be broken!\n")
|
749 |
+
|
750 |
+
# Set share=True for Hugging Face Spaces deployment.
|
751 |
+
# debug=True can be helpful locally but disable for production.
|
752 |
+
# server_name="0.0.0.0" allows access within Docker/network.
|
753 |
+
demo.launch(share=True, server_name="0.0.0.0")
|
754 |
+
print("Gradio App Launched.")
|