drewThomasson commited on
Commit
1859aa0
·
verified ·
1 Parent(s): 2702e94

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +434 -348
app.py CHANGED
@@ -6,21 +6,20 @@ import shutil
6
  import re
7
  import logging
8
  from pathlib import Path
9
- from PIL import Image # For checking image validity
10
  try:
11
  import mutagen
12
  from mutagen.mp3 import MP3, EasyMP3
13
  from mutagen.oggvorbis import OggVorbis
14
  from mutagen.flac import FLAC
15
  from mutagen.mp4 import MP4, MP4Cover
16
- from mutagen.id3 import ID3, APIC, error as ID3Error
17
  MUTAGEN_AVAILABLE = True
18
  except ImportError:
19
  MUTAGEN_AVAILABLE = False
20
  logging.warning("Mutagen library not found. Cover art embedding will be disabled.")
21
  logging.warning("Install it using: pip install mutagen")
22
 
23
-
24
  # --- Configuration & Logging ---
25
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
26
 
@@ -29,20 +28,24 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
29
  def check_command(command):
30
  """Checks if a command exists in the system's PATH."""
31
  try:
32
- # Use a more reliable check for command existence, sometimes --version fails
33
- # On Windows, 'where' command; on Unix-like, 'command -v' or 'which'
34
  if os.name == 'nt':
35
- subprocess.run(['where', command], check=True, capture_output=True)
 
36
  else:
37
- # 'command -v' is generally preferred over 'which'
38
- subprocess.run(['command', '-v', command], check=True, capture_output=True)
39
  logging.info(f"Command '{command}' found.")
40
  return True
41
- except (FileNotFoundError, subprocess.CalledProcessError) as e:
42
- logging.error(f"Command '{command}' not found or check failed. Please ensure it's installed and in your PATH.")
43
- # Log the specific error if needed: logging.error(f"Error details: {e}")
 
 
44
  return False
45
- except Exception as e: # Catch unexpected errors during check
 
 
 
46
  logging.error(f"Unexpected error checking for command '{command}': {e}")
47
  return False
48
 
@@ -51,42 +54,61 @@ def get_espeak_voices():
51
  """Gets available espeak-ng voices and their languages."""
52
  voices = {}
53
  try:
54
- # Use a robust way to list voices that includes language info
55
- result = subprocess.run(['espeak-ng', '--voices'], capture_output=True, text=True, check=True, encoding='utf-8', errors='ignore')
 
 
56
  # Example line format: P L V Language Code Age/Gender VoiceName File Other Langs
57
  # 2 y en-US M american-english-us Mbrola/us1 (en 10)
58
  # 1 af M afrikaans Afrikaans
59
- pattern = re.compile(r"^\s*\d+\s+[yn-]\s+([\w-]+)\s+[MF-]\s+(.+?)\s+([\w/ -]+?)(?:\s+\(([\w\s]+)\))?\s*$")
60
- for line in result.stdout.splitlines()[1:]: # Skip header
61
- match = pattern.match(line)
 
 
 
 
 
 
 
62
  if match:
63
- code, lang_name, _voice_name, _other_langs = match.groups()
64
- display_name = f"{lang_name.strip()} ({code})"
65
- # Avoid duplicates if multiple voice names exist for the same code
 
 
 
 
 
 
 
66
  if display_name not in voices:
67
  voices[display_name] = code
68
  else:
69
- # Try simpler parsing for lines without extra details
70
  parts = line.split()
71
  if len(parts) >= 4 and parts[0].isdigit():
72
  code = parts[1]
73
  lang_name = parts[3]
74
- display_name = f"{lang_name.strip()} ({code})"
75
  if display_name not in voices:
76
  voices[display_name] = code
 
 
 
77
 
78
  if not voices:
79
  logging.warning("Could not parse any voices from 'espeak-ng --voices'. Using fallback list.")
80
- # Add common fallbacks if parsing fails
81
- voices = {"English (en)": "en", "Spanish (es)": "es", "French (fr)": "fr", "German (de)": "de"}
82
 
83
  # Sort voices alphabetically by display name
84
  sorted_voices = dict(sorted(voices.items()))
 
85
  return sorted_voices
86
 
87
- except (FileNotFoundError, subprocess.CalledProcessError, Exception) as e:
88
  logging.error(f"Error getting espeak-ng voices: {e}")
89
- # Provide a basic fallback list if the command fails
90
  return {"English (en)": "en", "Spanish (es)": "es", "French (fr)": "fr", "German (de)": "de"}
91
 
92
  # --- Main Conversion Logic ---
@@ -96,365 +118,392 @@ def convert_ebook_to_audio(ebook_file, language_display, output_format, embed_co
96
  Converts an ebook file to an audiobook using Calibre and espeak-ng.
97
  """
98
  if not ebook_file:
99
- # Return None for audio, None for cover, and the error message
100
  return None, None, "**Error:** No ebook file provided."
101
 
102
- # Check required commands based on selection
103
  calibre_convert_ok = check_command("ebook-convert")
104
- calibre_meta_ok = True if not embed_cover else check_command("ebook-meta") # Only check if needed
105
  espeak_ok = check_command("espeak-ng")
106
- lame_ok = True if output_format != 'mp3' else check_command("lame")
107
- oggenc_ok = True if output_format != 'ogg' else check_command("oggenc")
108
 
109
  missing = []
110
  if not calibre_convert_ok: missing.append("Calibre ('ebook-convert')")
111
- if not calibre_meta_ok and embed_cover: missing.append("Calibre ('ebook-meta' for cover art)")
112
  if not espeak_ok: missing.append("espeak-ng")
113
  if not lame_ok and output_format == 'mp3': missing.append("LAME (for MP3)")
114
  if not oggenc_ok and output_format == 'ogg': missing.append("oggenc (for OGG)")
115
 
116
  if missing:
117
- error_msg = f"**Error:** Missing required command(s):\n- {', '.join(missing)}\n\nPlease install them and ensure they are in your system PATH."
118
  logging.error(error_msg.replace("**Error:** ","").replace("\n- "," ").replace("\n"," ")) # Log plain text
119
- # Return None for audio, None for cover, and the error message
120
  return None, None, error_msg
121
 
122
 
123
  temp_dir = tempfile.mkdtemp(prefix="ebook_audio_")
124
  logging.info(f"Created temporary directory: {temp_dir}")
125
- status_updates = ["Conversion started..."]
126
- cover_image_path_final = None
127
- audio_output_path_final = None # Keep track of the final audio path
128
 
129
  try:
130
  input_ebook_path = ebook_file.name # Gradio provides a temp path for the upload
131
- base_filename = Path(input_ebook_path).stem
 
132
  txt_output_path = os.path.join(temp_dir, f"{base_filename}.txt")
133
- cover_output_path_temp = os.path.join(temp_dir, "cover.jpg") # Assume jpg initially
 
134
  audio_output_path = os.path.join(temp_dir, f"{base_filename}.{output_format}")
135
 
136
  # --- Step 1: Extract Cover Art (Optional) ---
137
  cover_extracted = False
138
- if embed_cover and calibre_meta_ok: # Already checked if ebook-meta exists
139
- progress(0.1, desc="Extracting cover art (optional)")
140
- status_updates.append("Attempting to extract cover art...")
141
  try:
142
  cmd_meta = ['ebook-meta', input_ebook_path, '--get-cover', cover_output_path_temp]
143
  logging.info(f"Running cover extraction: {' '.join(cmd_meta)}")
144
- result_meta = subprocess.run(cmd_meta, check=True, capture_output=True, text=True, errors='ignore')
 
145
  if os.path.exists(cover_output_path_temp) and os.path.getsize(cover_output_path_temp) > 0:
146
- # Validate if it's a real image file Pillow can open
147
  try:
148
  img = Image.open(cover_output_path_temp)
149
- img.verify() # Verify CRC markers
150
  img.close() # Need to close after verify
151
- # Reopen to check format and potentially save in a consistent format if needed
 
152
  img = Image.open(cover_output_path_temp)
153
- fmt = img.format.lower() if img.format else 'unknown'
154
  img.close()
155
 
156
- if fmt not in ['jpeg', 'png']:
157
- logging.warning(f"Extracted cover is not JPEG or PNG ({fmt}), attempting conversion.")
158
- # Try converting to JPG for broader compatibility with mutagen
159
- new_cover_path = os.path.join(temp_dir, "cover_converted.jpg")
160
- try:
161
- img = Image.open(cover_output_path_temp)
162
- img.convert('RGB').save(new_cover_path, "JPEG")
163
- img.close()
164
- # Check if conversion worked
165
- if os.path.exists(new_cover_path) and os.path.getsize(new_cover_path) > 0:
166
- cover_output_path_temp = new_cover_path # Use the converted path
167
- cover_extracted = True
168
- cover_image_path_final = cover_output_path_temp # Update final path for display
169
- status_updates.append("✅ Cover art extracted and converted to JPG.")
170
- logging.info(f"Cover art extracted and converted to JPG: {cover_image_path_final}")
171
-
172
- else:
173
- logging.error("Failed to convert cover art to JPG.")
174
- status_updates.append("⚠️ Could not convert extracted cover art to JPG. Will skip embedding.")
175
- if os.path.exists(cover_output_path_temp): os.remove(cover_output_path_temp) # Clean up original if unusable
176
-
177
- except Exception as convert_err:
178
- logging.error(f"Error converting cover image: {convert_err}")
179
- status_updates.append(f"⚠️ Error converting cover image: {convert_err}. Will skip embedding.")
180
- if os.path.exists(cover_output_path_temp): os.remove(cover_output_path_temp) # Clean up original
181
-
182
- else:
183
- cover_extracted = True
184
- cover_image_path_final = cover_output_path_temp # Use original path
185
- status_updates.append("✅ Cover art extracted successfully.")
186
- logging.info(f"Cover art extracted to {cover_image_path_final} (Format: {fmt})")
187
-
188
- except (IOError, SyntaxError, Image.UnidentifiedImageError) as img_err:
189
- logging.warning(f"Extracted file is not a valid image or couldn't be processed: {img_err}")
190
- status_updates.append("⚠️ Extracted 'cover' file is not a valid image. Will skip embedding.")
191
  if os.path.exists(cover_output_path_temp): os.remove(cover_output_path_temp) # Clean up invalid file
 
 
 
192
  else:
193
- status_updates.append("ℹ️ No cover art found in the ebook metadata.")
194
  logging.info("ebook-meta ran but did not produce a cover file or it was empty.")
 
195
 
196
- # No FileNotFoundError needed here as calibre_meta_ok check already happened
 
 
197
  except subprocess.CalledProcessError as e:
198
- stderr_decoded = e.stderr.decode(errors='ignore') if e.stderr else "No stderr"
199
- status_updates.append(f"⚠️ Failed to extract cover art. Error: {stderr_decoded}")
200
  logging.warning(f"ebook-meta failed: {stderr_decoded}")
201
  except Exception as e:
202
- status_updates.append(f"⚠️ An unexpected error occurred during cover extraction: {e}")
203
  logging.error(f"Unexpected error during cover extraction: {e}", exc_info=True)
 
 
 
 
204
  elif embed_cover and not calibre_meta_ok:
205
- status_updates.append("ℹ️ Cover art embedding requested, but 'ebook-meta' not found.")
 
 
206
 
207
  # --- Step 2: Convert Ebook to TXT ---
208
- progress(0.3, desc="Converting ebook to TXT")
209
- status_updates.append("Converting ebook to plain text...")
210
  try:
211
- # --input-encoding and --output-encoding might be needed for some books
212
- cmd_convert = ['ebook-convert', input_ebook_path, txt_output_path, '--enable-heuristics']
 
 
 
 
 
 
 
213
  logging.info(f"Running ebook conversion: {' '.join(cmd_convert)}")
214
- result_convert = subprocess.run(cmd_convert, check=True, capture_output=True, encoding='utf-8', errors='ignore')
215
- # Check stdout/stderr even on success for warnings
 
216
  if result_convert.stdout: logging.info(f"ebook-convert stdout: {result_convert.stdout.strip()}")
217
  if result_convert.stderr: logging.warning(f"ebook-convert stderr: {result_convert.stderr.strip()}")
218
- status_updates.append("✅ Ebook converted to TXT.")
219
  logging.info("Ebook successfully converted to TXT.")
 
 
 
 
 
220
  except subprocess.CalledProcessError as e:
221
- stderr_decoded = e.stderr.decode(errors='ignore') if e.stderr else "No stderr"
222
- error_msg = f"**Error:** Calibre conversion failed.\n```\n{stderr_decoded or e}\n```"
223
- status_updates.append(f" Calibre conversion failed.") # Keep status short
224
- logging.error(f"Error during Calibre conversion: {stderr_decoded or e}")
225
- # Return None for audio, the extracted cover (if any), and the error message
 
226
  return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
227
  except Exception as e:
228
  error_msg = f"**Error:** An unexpected error occurred during ebook conversion:\n{e}"
229
- status_updates.append(f"❌ Unexpected conversion error.")
230
  logging.error(f"Unexpected error during ebook conversion: {e}", exc_info=True)
231
  return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
232
 
233
  # Check if TXT file was actually created and is not empty
234
  if not os.path.exists(txt_output_path) or os.path.getsize(txt_output_path) == 0:
235
- error_msg = "**Error:** Calibre finished, but the output TXT file is missing or empty.\nThis can happen with image-based ebooks (like comics/scans) or DRM-protected files."
236
- status_updates.append(f"❌ TXT output empty/missing.")
237
  logging.error("Calibre finished, but the output TXT file is missing or empty.")
238
  return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
239
 
240
  # --- Step 3: Convert TXT to Audio ---
241
- progress(0.6, desc="Converting TXT to Audio")
242
- status_updates.append("Converting text to speech...")
243
 
244
  voice_code = available_voices.get(language_display, 'en') # Get code from display name
 
245
  cmd_speak = ['espeak-ng', '-v', voice_code, '-f', txt_output_path]
246
- # Add speed option if needed: cmd_speak.extend(['-s', '160']) # Example speed
 
247
 
248
  try:
249
  logging.info(f"Preparing audio command for format: {output_format}")
 
 
 
250
  if output_format == 'wav':
251
  cmd_speak.extend(['-w', audio_output_path])
252
  logging.info(f"Running espeak-ng (WAV): {' '.join(cmd_speak)}")
253
- result_speak = subprocess.run(cmd_speak, check=True, capture_output=True) # Capture bytes
254
- # Log stdout/stderr even on success
255
- if result_speak.stdout: logging.info(f"espeak-ng stdout: {result_speak.stdout.decode(errors='ignore').strip()}")
256
- if result_speak.stderr: logging.warning(f"espeak-ng stderr: {result_speak.stderr.decode(errors='ignore').strip()}")
257
 
258
  elif output_format == 'mp3':
259
- cmd_speak.append('--stdout')
260
- cmd_lame = ['lame', '-', audio_output_path] # Read from stdin, write to file
261
- logging.info(f"Running espeak-ng | lame (MP3): {' '.join(cmd_speak)} | {' '.join(cmd_lame)}")
 
 
 
262
  ps_speak = subprocess.Popen(cmd_speak, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
263
  ps_lame = subprocess.Popen(cmd_lame, stdin=ps_speak.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
264
 
265
- # Allow ps_speak to receive SIGPIPE if ps_lame exits early. Crucial!
266
  if ps_speak.stdout:
267
  ps_speak.stdout.close()
268
 
269
- # Capture output/errors and wait for LAME to finish
270
- lame_stdout_bytes, lame_stderr_bytes = ps_lame.communicate()
271
- # Capture stderr from espeak and WAIT for it to finish *after* lame is done
 
 
 
 
 
 
 
272
  speak_stderr_bytes = ps_speak.stderr.read() if ps_speak.stderr else b""
273
- ps_speak.wait() # <<< --- Explicitly wait for espeak-ng ---
274
  if ps_speak.stderr: ps_speak.stderr.close()
275
 
276
- # Decode stderr for logging
277
  lame_stderr_str = lame_stderr_bytes.decode(errors='ignore').strip()
278
  speak_stderr_str = speak_stderr_bytes.decode(errors='ignore').strip()
279
 
280
- # Check return codes safely
281
  if ps_lame.returncode != 0:
282
- # LAME failed
283
- raise subprocess.CalledProcessError(ps_lame.returncode, cmd_lame, output=lame_stdout_bytes, stderr=lame_stderr_bytes)
284
  if ps_speak.returncode != 0:
285
- # Espeak failed (even if lame seemed okay initially)
286
- raise subprocess.CalledProcessError(ps_speak.returncode, cmd_speak, stderr=speak_stderr_bytes) # Pass the captured stderr bytes
287
 
288
- # Log warnings from stderr if processes succeeded
289
- if lame_stderr_str:
290
- logging.warning(f"LAME stderr: {lame_stderr_str}")
291
- if speak_stderr_str:
292
- logging.warning(f"espeak-ng stderr: {speak_stderr_str}")
293
 
294
  elif output_format == 'ogg':
295
- cmd_speak.append('--stdout')
296
- cmd_ogg = ['oggenc', '-o', audio_output_path, '-'] # Write to file, read from stdin
297
- logging.info(f"Running espeak-ng | oggenc (OGG): {' '.join(cmd_speak)} | {' '.join(cmd_ogg)}")
 
 
 
298
  ps_speak = subprocess.Popen(cmd_speak, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
299
  ps_ogg = subprocess.Popen(cmd_ogg, stdin=ps_speak.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
300
 
301
- # Allow ps_speak to receive SIGPIPE if oggenc exits early.
302
  if ps_speak.stdout:
303
  ps_speak.stdout.close()
304
 
305
- # Capture output/errors and wait for oggenc to finish
306
- ogg_stdout_bytes, ogg_stderr_bytes = ps_ogg.communicate()
307
- # Capture stderr from espeak and WAIT for it to finish *after* oggenc is done
 
 
 
 
 
308
  speak_stderr_bytes = ps_speak.stderr.read() if ps_speak.stderr else b""
309
- ps_speak.wait() # <<< --- Explicitly wait for espeak-ng ---
310
  if ps_speak.stderr: ps_speak.stderr.close()
311
 
312
- # Decode stderr for logging
313
  ogg_stderr_str = ogg_stderr_bytes.decode(errors='ignore').strip()
314
  speak_stderr_str = speak_stderr_bytes.decode(errors='ignore').strip()
315
 
316
- # Now check return codes safely
317
  if ps_ogg.returncode != 0:
318
- # Oggenc failed
319
- raise subprocess.CalledProcessError(ps_ogg.returncode, cmd_ogg, output=ogg_stdout_bytes, stderr=ogg_stderr_bytes)
320
  if ps_speak.returncode != 0:
321
- # Espeak failed
322
- raise subprocess.CalledProcessError(ps_speak.returncode, cmd_speak, stderr=speak_stderr_bytes) # Pass captured stderr bytes
323
 
324
- # Log warnings from stderr if processes succeeded
325
- if ogg_stderr_str:
326
- logging.warning(f"oggenc stderr: {ogg_stderr_str}")
327
- if speak_stderr_str:
328
- logging.warning(f"espeak-ng stderr: {speak_stderr_str}")
329
 
330
  else:
331
- raise ValueError(f"Unsupported output format: {output_format}")
332
 
333
- status_updates.append("✅ Text converted to audio.")
334
  logging.info(f"Text successfully converted to {output_format.upper()}.")
335
 
336
  except subprocess.CalledProcessError as e:
337
- # --- MODIFIED ERROR HANDLING ---
338
- command_name = e.cmd[0] if isinstance(e.cmd, list) else e.cmd
339
- # Decode stderr/stdout safely (they might be bytes or None)
340
  stderr_str = e.stderr.decode(errors='ignore').strip() if isinstance(e.stderr, bytes) else (e.stderr or "")
341
  stdout_str = e.stdout.decode(errors='ignore').strip() if isinstance(e.stdout, bytes) else (e.stdout or "")
342
  error_details = stderr_str or stdout_str or "No output/error captured."
343
-
344
- # Construct error message carefully
345
  exit_status_str = f"exit status {e.returncode}" if e.returncode is not None else "unknown exit status"
346
  cmd_str = ' '.join(e.cmd) if isinstance(e.cmd, list) else e.cmd
347
- error_msg = f"Audio generation failed ({command_name} with {exit_status_str})."
348
- status_updates.append(f" {error_msg}")
349
- logging.error(f"{error_msg} Command: `{cmd_str}` Output/Error: {error_details}")
350
-
351
- # Use Markdown for better formatting in Gradio Textbox
352
- md_error_details = f"**Error:** Audio generation failed.\n\n" \
353
- f"**Command:**\n```\n{cmd_str}\n```\n" \
354
- f"**Exit Status:** {exit_status_str}\n\n" \
355
- f"**Output/Error:**\n```\n{error_details}\n```"
356
- # Return None for audio, the cover (if any), and the combined status/error message
357
- return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{md_error_details}"
358
- # --- END MODIFIED ERROR HANDLING ---
 
 
 
 
359
 
360
  except FileNotFoundError as e:
361
- missing_cmd = e.filename # Usually contains the missing command
362
- error_msg = f"**Error:** Command `{missing_cmd}` not found for {output_format.upper()} output.\nPlease install it and ensure it's in your system PATH."
363
- status_updates.append(f" Command '{missing_cmd}' not found.")
364
- logging.error(f"Error: Command '{missing_cmd}' not found.")
 
365
  return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
366
  except Exception as e:
367
- error_msg = f"**Error:** An unexpected error occurred during audio generation:\n{e}"
368
- status_updates.append(f"❌ Unexpected audio error.")
369
  logging.error(f"An unexpected error occurred during audio generation: {e}", exc_info=True)
370
  return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
371
 
372
- # Check if audio file exists and has size
373
- if not os.path.exists(audio_output_path) or os.path.getsize(audio_output_path) < 1024: # Check for > 1KB as a basic sanity check
374
- error_msg = f"**Error:** Audio generation command finished, but the output file '{Path(audio_output_path).name}' is missing or too small. Check logs for details."
375
- status_updates.append(f"❌ Audio output missing/small.")
376
- logging.error(f"Audio output file missing or too small: {audio_output_path}")
 
 
 
377
  return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
378
 
379
 
380
  # --- Step 4: Embed Cover Art (Optional) ---
381
- if embed_cover and cover_extracted and MUTAGEN_AVAILABLE and os.path.exists(cover_image_path_final):
382
- progress(0.9, desc="Embedding cover art")
383
- status_updates.append("Embedding cover art into audio file...")
384
  try:
385
  with open(cover_image_path_final, 'rb') as img_f:
386
  cover_data = img_f.read()
387
 
388
- # Determine mimetype using PIL
389
- mime_type = None
 
390
  try:
391
  img = Image.open(cover_image_path_final)
392
  mime_type = Image.MIME.get(img.format)
 
 
393
  img.close()
 
 
 
 
 
 
394
  except Exception as pil_err:
395
- logging.warning(f"Could not determine MIME type using PIL: {pil_err}")
396
-
397
- if not mime_type:
398
- # Basic fallback based on extension
399
- ext = Path(cover_image_path_final).suffix.lower()
400
- if ext == ".jpg" or ext == ".jpeg":
401
- mime_type = 'image/jpeg'
402
- elif ext == ".png":
403
- mime_type = 'image/png'
404
- else:
405
- mime_type = 'image/jpeg' # Default guess if extension unknown/unsupported
406
- logging.warning(f"Defaulting cover MIME type to {mime_type}")
407
-
408
 
409
  logging.info(f"Attempting to embed cover art ({mime_type}) into {audio_output_path}")
410
- audio = mutagen.File(audio_output_path, easy=False) # Use easy=False for more control
411
 
412
  if audio is None:
413
- raise ValueError("Mutagen could not load the audio file. Format might be unsupported by Mutagen or file corrupted.")
 
 
 
414
 
415
- # Clear existing images before adding new one (optional, prevents duplicates)
416
  try:
417
- tags_modified = False
418
- if isinstance(audio, (MP3, EasyMP3)):
419
- if audio.tags and 'APIC:' in audio.tags:
420
- del audio.tags['APIC:'] # Common key format
421
- tags_modified = True
422
- # Also try deleting all APIC frames regardless of description
423
- if audio.tags:
424
- apic_keys = [k for k in audio.tags.keys() if k.startswith('APIC')]
425
- for k in apic_keys:
426
- del audio.tags[k]
427
- tags_modified = True
428
- elif isinstance(audio, FLAC):
429
- if audio.pictures:
430
- audio.clear_pictures()
431
- tags_modified = True
432
- elif isinstance(audio, MP4):
433
- if 'covr' in audio:
434
- del audio['covr']
435
- tags_modified = True
436
- # OggVorbis picture removal is more complex, might need specific key deletion
437
- elif isinstance(audio, OggVorbis) and "metadata_block_picture" in audio:
438
- del audio["metadata_block_picture"]
439
- tags_modified = True
440
-
441
- if tags_modified:
442
- audio.save() # Save after deletion before adding
443
- audio = mutagen.File(audio_output_path, easy=False) # Re-load
444
- except Exception as e:
445
- logging.warning(f"Could not clear existing artwork before embedding: {e}")
446
-
447
-
448
- # Embedding logic differs by format
 
 
 
449
  save_needed = False
450
- if isinstance(audio, (MP3, EasyMP3)):
451
- if audio.tags is None: audio.add_tags() # Ensure tags exist
452
  audio.tags.add(
453
  APIC(
454
- encoding=3, # 3 is for utf-8
455
  mime=mime_type,
456
- type=3, # 3 is for cover image (front)
457
- desc=u'Cover',
458
  data=cover_data
459
  )
460
  )
@@ -462,118 +511,96 @@ def convert_ebook_to_audio(ebook_file, language_display, output_format, embed_co
462
  elif isinstance(audio, FLAC):
463
  pic = mutagen.flac.Picture()
464
  pic.data = cover_data
465
- pic.type = mutagen.id3.PictureType.COVER_FRONT
466
  pic.mime = mime_type
467
- # pic.width, pic.height, pic.depth = ... # Optionally get dimensions from PIL
 
 
468
  audio.add_picture(pic)
469
  save_needed = True
470
  elif isinstance(audio, OggVorbis):
471
- # Ogg uses base64 encoded pictures in METADATA_BLOCK_PICTURE tag
472
  import base64
473
- # This field expects a FLAC Picture block, base64 encoded.
474
  pic = mutagen.flac.Picture()
475
  pic.data = cover_data
476
- pic.type = mutagen.id3.PictureType.COVER_FRONT
477
  pic.mime = mime_type
478
- # Add required fields if possible (otherwise defaults might work)
479
- img = Image.open(cover_image_path_final)
480
- pic.width = img.width
481
- pic.height = img.height
482
- # Determine color depth (e.g., 24 for RGB, 32 for RGBA)
483
- pic.depth = {'RGB': 24, 'RGBA': 32, 'L': 8}.get(img.mode, 24)
484
- img.close()
485
-
486
- # Encode the full picture block
487
- audio["metadata_block_picture"] = [base64.b64encode(pic.write()).decode("ascii")]
488
  save_needed = True
489
-
490
- elif isinstance(audio, MP4):
491
- if mime_type == 'image/jpeg':
492
- pic_format = MP4Cover.FORMAT_JPEG
493
- elif mime_type == 'image/png':
494
- pic_format = MP4Cover.FORMAT_PNG
495
- else:
496
- pic_format = MP4Cover.FORMAT_UNDEFINED # Or skip if unknown
497
- logging.warning(f"Unsupported cover image format ({mime_type}) for MP4 embedding.")
498
-
499
  if pic_format != MP4Cover.FORMAT_UNDEFINED:
500
  audio['covr'] = [MP4Cover(cover_data, imageformat=pic_format)]
501
  save_needed = True
 
 
 
502
 
503
- # Add other metadata (optional) - Use easy=True for simpler access
504
- try:
505
- audio_easy = mutagen.File(audio_output_path, easy=True)
506
- if audio_easy is not None:
507
- if 'title' not in audio_easy or not audio_easy['title']:
508
- audio_easy['title'] = base_filename
509
- save_needed = True
510
- if 'artist' not in audio_easy or not audio_easy['artist']:
511
- audio_easy['artist'] = "Generated Audiobook"
512
- save_needed = True
513
- if save_needed:
514
- audio_easy.save() # Save easy tags if modified
515
- save_needed = False # Prevent double save if only easy tags changed
516
- except Exception as tag_err:
517
- logging.warning(f"Could not set basic title/artist tags: {tag_err}")
518
 
519
- # Save the main audio object if changes were made (picture or direct tags)
520
- if save_needed and audio is not None:
521
  audio.save()
522
-
523
- status_updates.append("Cover art embedded successfully.")
524
- logging.info("Cover art embedded successfully.")
525
-
526
- except (mutagen.MutagenError, ValueError, IOError, TypeError, KeyError) as e:
527
- status_updates.append(f"⚠️ Could not embed cover art. Error: {e}")
 
 
528
  logging.warning(f"Failed to embed cover art: {e}", exc_info=True)
529
  except Exception as e:
530
- status_updates.append(f"⚠️ An unexpected error occurred during cover art embedding: {e}")
531
  logging.error(f"Unexpected error during cover embedding: {e}", exc_info=True)
532
  elif embed_cover and not cover_extracted:
533
- status_updates.append("ℹ️ Cover art embedding skipped (no cover extracted or invalid).")
534
  elif embed_cover and not MUTAGEN_AVAILABLE:
535
- status_updates.append("⚠️ Cover art embedding skipped (Mutagen library not installed).")
 
536
 
537
 
538
  # --- Step 5: Prepare final output ---
539
- progress(1.0, desc="Complete")
540
- status_updates.append(" Conversion complete!")
541
  audio_output_path_final = audio_output_path # Mark the path as final
542
 
543
  # Return paths for Gradio components
544
  final_status = "\n".join(status_updates)
545
- logging.info(f"Returning audio: {audio_output_path_final}, cover: {cover_image_path_final}")
546
  # Return audio path for Audio component, cover path for Image, status for Textbox
547
  return audio_output_path_final, cover_image_path_final, final_status
548
 
549
  except Exception as e:
550
- error_msg = f"An unexpected error occurred in the main process: {e}"
551
- status_updates.append(f" {error_msg}")
 
552
  logging.error(error_msg, exc_info=True)
553
  # Return None for audio, cover path (if extracted), and the error status
554
- return None, cover_image_path_final, f"**Error:** An unexpected critical error occurred.\nCheck logs for details.\n{e}"
 
555
 
556
  finally:
557
  # --- Cleanup ---
558
- # Clean intermediate files. Let Gradio handle the returned files.
 
559
  try:
560
  if 'txt_output_path' in locals() and os.path.exists(txt_output_path):
561
  os.remove(txt_output_path)
562
  logging.info(f"Removed intermediate file: {txt_output_path}")
563
- # Remove original cover if it was converted and different from final
564
- if ('cover_image_path_final' in locals() and cover_image_path_final and
565
- 'cover_output_path_temp' in locals() and cover_output_path_temp != cover_image_path_final and
566
- os.path.exists(cover_output_path_temp)):
567
- os.remove(cover_output_path_temp)
568
- logging.info(f"Removed intermediate file: {cover_output_path_temp}")
569
-
570
- # Optionally schedule full temp dir removal if Gradio doesn't handle it.
571
- # For now, assume Gradio manages the returned file paths.
572
- if 'temp_dir' in locals() and os.path.exists(temp_dir):
573
- logging.info(f"Temp dir '{temp_dir}' contains output files. Skipping immediate deletion.")
574
- # To force cleanup (may break Gradio display if files aren't copied):
575
- # shutil.rmtree(temp_dir, ignore_errors=True)
576
- # logging.info(f"Attempted cleanup of temp dir: {temp_dir}")
577
 
578
  except OSError as e:
579
  logging.warning(f"Error during cleanup of intermediate files: {e}")
@@ -581,88 +608,147 @@ def convert_ebook_to_audio(ebook_file, language_display, output_format, embed_co
581
 
582
  # --- Gradio Interface Definition ---
583
 
 
 
584
  available_voices = get_espeak_voices()
585
  voice_choices = list(available_voices.keys())
586
- # Try to find a more specific default like en-US, otherwise fall back
587
- default_voice_options = ["English (en-US) (en-us)", "English (United States) (en-us)", "English (en)", "en"]
588
- default_voice = next((v for v in default_voice_options if v in voice_choices), (voice_choices[0] if voice_choices else "en"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
589
 
590
 
591
  # Check for external tools on startup and display warnings if needed
 
592
  startup_warnings = []
593
  if not check_command("ebook-convert"): startup_warnings.append("Calibre ('ebook-convert')")
594
- if not check_command("ebook-meta"): startup_warnings.append("Calibre ('ebook-meta' - recommended for cover art)")
595
- if not check_command("espeak-ng"): startup_warnings.append("espeak-ng")
596
  if not check_command("lame"): startup_warnings.append("LAME (needed for MP3 output)")
597
- if not check_command("oggenc"): startup_warnings.append("oggenc (needed for OGG output)")
598
- if not MUTAGEN_AVAILABLE: startup_warnings.append("Python 'mutagen' library (needed for embedding cover art)")
599
 
600
  startup_message = ""
601
  if startup_warnings:
602
  warning_list = "\n- ".join(startup_warnings)
603
  startup_message = (
604
- "**⚠️ Startup Warning: The following components might be missing or not found in PATH:**\n\n"
605
  f"- {warning_list}\n\n"
606
- "Please install them for full functionality. Check console logs for details."
 
607
  )
608
- print("-" * 60)
609
- print(f"STARTUP WARNING: Missing components: {', '.join(startup_warnings)}")
610
- print("-" * 60)
 
611
 
612
  # Define UI Elements
613
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
614
- gr.Markdown("# Ebook to Audiobook Converter 🎧📚")
615
- gr.Markdown("Upload an ebook file (EPUB, MOBI, AZW3, PDF*, etc.), choose a language and format, and convert it to an audiobook using Calibre and eSpeak-NG.\n\n"
616
- "*Note: PDF conversion quality varies greatly. Text-based PDFs work best.*")
 
 
 
 
 
 
 
 
 
617
 
618
  if startup_message:
619
- gr.Markdown(startup_message) # Display warning in UI
620
 
621
  with gr.Row():
622
- with gr.Column(scale=1):
623
- ebook_input = gr.File(label="1. Upload Ebook", file_count="single")
624
  lang_dropdown = gr.Dropdown(
625
  label="2. Select Language / Voice",
626
  choices=voice_choices,
627
- value=default_voice,
 
628
  interactive=True
629
  )
630
  format_dropdown = gr.Dropdown(
631
  label="3. Select Output Audio Format",
632
  choices=["mp3", "ogg", "wav"],
633
  value="mp3",
 
634
  interactive=True
635
  )
636
  cover_checkbox = gr.Checkbox(
637
  label="Embed Cover Art (if available)",
638
- value=True if MUTAGEN_AVAILABLE else False, # Default to True if mutagen is there
639
- interactive=MUTAGEN_AVAILABLE # Disable if mutagen is missing
 
 
 
640
  )
641
- submit_button = gr.Button("Convert to Audiobook", variant="primary")
642
-
643
- with gr.Column(scale=2):
644
- status_textbox = gr.Textbox(label="Conversion Status", lines=12, interactive=False, max_lines=25, show_copy_button=True)
 
 
 
 
 
 
 
645
  with gr.Row():
646
- # Use filepath for image to avoid potential base64 encoding issues with large images
647
- cover_image = gr.Image(label="Extracted Cover Art", type="filepath", interactive=False, height=200, width=200)
648
- # Use filepath for audio for consistency and potentially better handling of large files
649
- # The gr.Audio component includes download functionality.
650
- audio_output_player = gr.Audio(label="Generated Audiobook", type="filepath", interactive=False)
651
- # REMOVED: audio_output_download = gr.File(label="Download Audiobook File", interactive=False)
 
 
 
 
 
 
 
 
652
 
653
  # Connect components
654
  submit_button.click(
655
  fn=convert_ebook_to_audio,
656
  inputs=[ebook_input, lang_dropdown, format_dropdown, cover_checkbox],
657
- # Map outputs to the player, image, and status box.
658
- outputs=[audio_output_player, cover_image, status_textbox] # MODIFIED
659
  )
660
 
 
 
661
  # --- Launch the App ---
662
  if __name__ == "__main__":
663
- print("Starting Gradio App...")
664
- print("Ensure Calibre (ebook-convert, ebook-meta), espeak-ng, lame, and oggenc are installed and in your system PATH.")
665
- if not voice_choices:
666
- print("\nWARNING: Could not retrieve any voices from espeak-ng. The language dropdown will be limited or empty!\n")
667
- # Add share=True for a public link, server_name="0.0.0.0" for Docker/network access
668
- demo.launch()
 
 
 
 
6
  import re
7
  import logging
8
  from pathlib import Path
9
+ from PIL import Image, UnidentifiedImageError # For checking image validity
10
  try:
11
  import mutagen
12
  from mutagen.mp3 import MP3, EasyMP3
13
  from mutagen.oggvorbis import OggVorbis
14
  from mutagen.flac import FLAC
15
  from mutagen.mp4 import MP4, MP4Cover
16
+ from mutagen.id3 import ID3, APIC, PictureType, error as ID3Error
17
  MUTAGEN_AVAILABLE = True
18
  except ImportError:
19
  MUTAGEN_AVAILABLE = False
20
  logging.warning("Mutagen library not found. Cover art embedding will be disabled.")
21
  logging.warning("Install it using: pip install mutagen")
22
 
 
23
  # --- Configuration & Logging ---
24
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
25
 
 
28
  def check_command(command):
29
  """Checks if a command exists in the system's PATH."""
30
  try:
 
 
31
  if os.name == 'nt':
32
+ # 'where' command on Windows
33
+ subprocess.run(['where', command], check=True, capture_output=True, timeout=5)
34
  else:
35
+ # 'command -v' is generally preferred and more portable than 'which' on Unix-like systems
36
+ subprocess.run(['command', '-v', command], check=True, capture_output=True, timeout=5)
37
  logging.info(f"Command '{command}' found.")
38
  return True
39
+ except FileNotFoundError:
40
+ logging.error(f"Command '{command}' check tool ('where' or 'command') not found.")
41
+ return False
42
+ except subprocess.CalledProcessError:
43
+ logging.warning(f"Command '{command}' not found in PATH.")
44
  return False
45
+ except subprocess.TimeoutExpired:
46
+ logging.error(f"Timeout checking for command '{command}'. Assuming not found.")
47
+ return False
48
+ except Exception as e:
49
  logging.error(f"Unexpected error checking for command '{command}': {e}")
50
  return False
51
 
 
54
  """Gets available espeak-ng voices and their languages."""
55
  voices = {}
56
  try:
57
+ cmd = ['espeak-ng', '--voices']
58
+ logging.info(f"Getting voices with command: {' '.join(cmd)}")
59
+ # Use a timeout to prevent hanging if espeak-ng has issues
60
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True, encoding='utf-8', errors='ignore', timeout=15)
61
  # Example line format: P L V Language Code Age/Gender VoiceName File Other Langs
62
  # 2 y en-US M american-english-us Mbrola/us1 (en 10)
63
  # 1 af M afrikaans Afrikaans
64
+ # More robust pattern to handle variations
65
+ pattern = re.compile(r"^\s*\d+\s+[yn\-]\s+([\w\-]+)\s+[MF\-]?\s+([\w\s\(\)\-]+?)\s+([\w\/\s\-]+?)(?:\s+\(.*\))?\s*$")
66
+
67
+ lines = result.stdout.splitlines()
68
+ if not lines or len(lines) < 2: # Check if there's output beyond the header
69
+ logging.warning("No voice lines found in 'espeak-ng --voices' output.")
70
+ raise ValueError("No voice data returned.")
71
+
72
+ for line in lines[1:]: # Skip header
73
+ match = pattern.match(line.strip())
74
  if match:
75
+ # Extract code (group 1) and language name (group 2)
76
+ code = match.group(1).strip()
77
+ lang_name = match.group(2).strip()
78
+
79
+ # Clean up language name (remove potential file paths sometimes included)
80
+ lang_name = lang_name.split(" ")[0]
81
+ # Prioritize names like "english-us" over just "english" if code reflects it
82
+ display_name = f"{lang_name.replace('-', ' ').title()} ({code})"
83
+
84
+ # Avoid duplicates, preferring more specific codes if names clash slightly
85
  if display_name not in voices:
86
  voices[display_name] = code
87
  else:
88
+ # Simpler split as fallback for lines that don't match complex regex
89
  parts = line.split()
90
  if len(parts) >= 4 and parts[0].isdigit():
91
  code = parts[1]
92
  lang_name = parts[3]
93
+ display_name = f"{lang_name.strip().title()} ({code})"
94
  if display_name not in voices:
95
  voices[display_name] = code
96
+ else:
97
+ logging.warning(f"Could not parse voice line: {line}")
98
+
99
 
100
  if not voices:
101
  logging.warning("Could not parse any voices from 'espeak-ng --voices'. Using fallback list.")
102
+ raise ValueError("Parsing failed.")
 
103
 
104
  # Sort voices alphabetically by display name
105
  sorted_voices = dict(sorted(voices.items()))
106
+ logging.info(f"Found {len(sorted_voices)} espeak-ng voices.")
107
  return sorted_voices
108
 
109
+ except (FileNotFoundError, subprocess.CalledProcessError, subprocess.TimeoutExpired, ValueError, Exception) as e:
110
  logging.error(f"Error getting espeak-ng voices: {e}")
111
+ # Provide a basic fallback list if the command fails or parsing fails
112
  return {"English (en)": "en", "Spanish (es)": "es", "French (fr)": "fr", "German (de)": "de"}
113
 
114
  # --- Main Conversion Logic ---
 
118
  Converts an ebook file to an audiobook using Calibre and espeak-ng.
119
  """
120
  if not ebook_file:
 
121
  return None, None, "**Error:** No ebook file provided."
122
 
123
+ # Check required commands *before* creating temp dir
124
  calibre_convert_ok = check_command("ebook-convert")
125
+ calibre_meta_ok = check_command("ebook-meta") # Check always, needed logic follows
126
  espeak_ok = check_command("espeak-ng")
127
+ lame_ok = check_command("lame")
128
+ oggenc_ok = check_command("oggenc") # From vorbis-tools
129
 
130
  missing = []
131
  if not calibre_convert_ok: missing.append("Calibre ('ebook-convert')")
132
+ if not calibre_meta_ok: missing.append("Calibre ('ebook-meta' - for cover art)")
133
  if not espeak_ok: missing.append("espeak-ng")
134
  if not lame_ok and output_format == 'mp3': missing.append("LAME (for MP3)")
135
  if not oggenc_ok and output_format == 'ogg': missing.append("oggenc (for OGG)")
136
 
137
  if missing:
138
+ error_msg = f"**Error:** Missing required system command(s):\n- {', '.join(missing)}\n\nPlease ensure they are installed in the environment (check packages.txt)."
139
  logging.error(error_msg.replace("**Error:** ","").replace("\n- "," ").replace("\n"," ")) # Log plain text
 
140
  return None, None, error_msg
141
 
142
 
143
  temp_dir = tempfile.mkdtemp(prefix="ebook_audio_")
144
  logging.info(f"Created temporary directory: {temp_dir}")
145
+ status_updates = ["▶️ Conversion process started..."]
146
+ cover_image_path_final = None # Track final usable cover path for display/embedding
147
+ audio_output_path_final = None # Keep track of the final audio path for return
148
 
149
  try:
150
  input_ebook_path = ebook_file.name # Gradio provides a temp path for the upload
151
+ # Sanitize filename slightly for output files
152
+ base_filename = re.sub(r'[^\w\-]+', '_', Path(input_ebook_path).stem)
153
  txt_output_path = os.path.join(temp_dir, f"{base_filename}.txt")
154
+ # Use a generic name first, then check format
155
+ cover_output_path_temp = os.path.join(temp_dir, "cover_temp")
156
  audio_output_path = os.path.join(temp_dir, f"{base_filename}.{output_format}")
157
 
158
  # --- Step 1: Extract Cover Art (Optional) ---
159
  cover_extracted = False
160
+ if embed_cover and calibre_meta_ok:
161
+ progress(0.1, desc="🖼️ Extracting cover art (optional)...")
162
+ status_updates.append(" Attempting to extract cover art...")
163
  try:
164
  cmd_meta = ['ebook-meta', input_ebook_path, '--get-cover', cover_output_path_temp]
165
  logging.info(f"Running cover extraction: {' '.join(cmd_meta)}")
166
+ # Use timeout for ebook-meta as well
167
+ result_meta = subprocess.run(cmd_meta, check=True, capture_output=True, text=True, errors='ignore', timeout=30)
168
  if os.path.exists(cover_output_path_temp) and os.path.getsize(cover_output_path_temp) > 0:
169
+ # Validate image and get format
170
  try:
171
  img = Image.open(cover_output_path_temp)
172
+ img.verify() # Basic check
173
  img.close() # Need to close after verify
174
+
175
+ # Reopen to check format properly and prepare final path
176
  img = Image.open(cover_output_path_temp)
177
+ img_format = img.format.lower() if img.format else 'jpeg' # Default guess
178
  img.close()
179
 
180
+ # Define final path with correct extension
181
+ valid_ext = f".{img_format}" if img_format in ['jpeg', 'png', 'gif'] else ".jpg" # Default to jpg
182
+ cover_image_path_final = os.path.join(temp_dir, f"cover_final{valid_ext}")
183
+ shutil.move(cover_output_path_temp, cover_image_path_final) # Rename with correct extension
184
+
185
+ cover_extracted = True
186
+ status_updates.append(f" ✅ Cover art extracted successfully ({img_format.upper()}).")
187
+ logging.info(f"Cover art extracted to {cover_image_path_final}")
188
+
189
+ except (IOError, SyntaxError, UnidentifiedImageError) as img_err:
190
+ logging.warning(f"Extracted file at {cover_output_path_temp} is not a valid image: {img_err}")
191
+ status_updates.append(" ⚠️ Extracted 'cover' file is not a valid image. Will skip embedding.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  if os.path.exists(cover_output_path_temp): os.remove(cover_output_path_temp) # Clean up invalid file
193
+ if cover_image_path_final and os.path.exists(cover_image_path_final): os.remove(cover_image_path_final)
194
+ cover_image_path_final = None # Ensure it's None
195
+
196
  else:
197
+ status_updates.append(" ℹ️ No cover art found in the ebook metadata.")
198
  logging.info("ebook-meta ran but did not produce a cover file or it was empty.")
199
+ if os.path.exists(cover_output_path_temp): os.remove(cover_output_path_temp) # Clean up empty file
200
 
201
+ except subprocess.TimeoutExpired:
202
+ status_updates.append(f" ⚠️ Timeout trying to extract cover art.")
203
+ logging.warning(f"ebook-meta timed out.")
204
  except subprocess.CalledProcessError as e:
205
+ stderr_decoded = e.stderr.decode(errors='ignore').strip() if e.stderr else "No stderr"
206
+ status_updates.append(f" ⚠️ Failed to extract cover art. Error: {stderr_decoded[:200]}{'...' if len(stderr_decoded)>200 else ''}") # Keep it short
207
  logging.warning(f"ebook-meta failed: {stderr_decoded}")
208
  except Exception as e:
209
+ status_updates.append(f" ⚠️ An unexpected error occurred during cover extraction: {e}")
210
  logging.error(f"Unexpected error during cover extraction: {e}", exc_info=True)
211
+ # Ensure temp file is removed if final path wasn't set
212
+ if not cover_image_path_final and os.path.exists(cover_output_path_temp):
213
+ os.remove(cover_output_path_temp)
214
+
215
  elif embed_cover and not calibre_meta_ok:
216
+ status_updates.append(" ℹ️ Cover art embedding requested, but 'ebook-meta' command not found.")
217
+ elif embed_cover and not MUTAGEN_AVAILABLE:
218
+ status_updates.append(" ℹ️ Cover art embedding requested, but 'mutagen' Python library not installed.")
219
 
220
  # --- Step 2: Convert Ebook to TXT ---
221
+ progress(0.3, desc="📖 Converting ebook to TXT...")
222
+ status_updates.append("📖 Converting ebook to plain text...")
223
  try:
224
+ # Add options known to help with TXT output quality
225
+ # --input-encoding=utf8 is often needed for non-ASCII content
226
+ cmd_convert = [
227
+ 'ebook-convert', input_ebook_path, txt_output_path,
228
+ '--enable-heuristics',
229
+ '--output-profile=generic_eink', # Profiles can influence text formatting
230
+ '--input-encoding=utf8', # Try specifying UTF-8
231
+ '--pretty-print' # Can sometimes help structure
232
+ ]
233
  logging.info(f"Running ebook conversion: {' '.join(cmd_convert)}")
234
+ # Increased timeout for potentially large books
235
+ result_convert = subprocess.run(cmd_convert, check=True, capture_output=True, encoding='utf-8', errors='ignore', timeout=300) # 5 mins
236
+ # Log stdout/stderr even on success for warnings
237
  if result_convert.stdout: logging.info(f"ebook-convert stdout: {result_convert.stdout.strip()}")
238
  if result_convert.stderr: logging.warning(f"ebook-convert stderr: {result_convert.stderr.strip()}")
239
+ status_updates.append(" ✅ Ebook converted to TXT.")
240
  logging.info("Ebook successfully converted to TXT.")
241
+ except subprocess.TimeoutExpired:
242
+ error_msg = "**Error:** Calibre conversion timed out (may be a very large or complex book)."
243
+ status_updates.append(f" ❌ Calibre conversion timed out.")
244
+ logging.error("Error during Calibre conversion: Timeout")
245
+ return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
246
  except subprocess.CalledProcessError as e:
247
+ stderr_decoded = e.stderr.decode(errors='ignore').strip() if e.stderr else "No stderr"
248
+ stdout_decoded = e.stdout.decode(errors='ignore').strip() if e.stdout else "No stdout"
249
+ error_details = f"Stderr:\n```\n{stderr_decoded}\n```\nStdout:\n```\n{stdout_decoded}\n```" if stderr_decoded or stdout_decoded else str(e)
250
+ error_msg = f"**Error:** Calibre conversion failed (Exit Code {e.returncode}).\n{error_details}"
251
+ status_updates.append(f" ❌ Calibre conversion failed.")
252
+ logging.error(f"Error during Calibre conversion: Exit Code {e.returncode}\nStderr: {stderr_decoded}\nStdout: {stdout_decoded}")
253
  return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
254
  except Exception as e:
255
  error_msg = f"**Error:** An unexpected error occurred during ebook conversion:\n{e}"
256
+ status_updates.append(f" ❌ Unexpected conversion error.")
257
  logging.error(f"Unexpected error during ebook conversion: {e}", exc_info=True)
258
  return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
259
 
260
  # Check if TXT file was actually created and is not empty
261
  if not os.path.exists(txt_output_path) or os.path.getsize(txt_output_path) == 0:
262
+ error_msg = "**Error:** Calibre finished, but the output TXT file is missing or empty.\nThis can happen with image-based ebooks (like scanned PDFs, comics, CBZ/CBR) or DRM-protected files.\nCalibre cannot process these types into text."
263
+ status_updates.append(f" ❌ TXT output empty/missing.")
264
  logging.error("Calibre finished, but the output TXT file is missing or empty.")
265
  return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
266
 
267
  # --- Step 3: Convert TXT to Audio ---
268
+ progress(0.6, desc="🗣️ Converting TXT to Audio...")
269
+ status_updates.append("🗣️ Converting text to speech...")
270
 
271
  voice_code = available_voices.get(language_display, 'en') # Get code from display name
272
+ # Base espeak-ng command: specify voice, read from file
273
  cmd_speak = ['espeak-ng', '-v', voice_code, '-f', txt_output_path]
274
+ # Optionally add speed or other espeak parameters here:
275
+ # cmd_speak.extend(['-s', '160']) # Example: Set speed (default 175)
276
 
277
  try:
278
  logging.info(f"Preparing audio command for format: {output_format}")
279
+ # Define timeout for TTS process (can be long for large books)
280
+ tts_timeout = 1800 # 30 minutes
281
+
282
  if output_format == 'wav':
283
  cmd_speak.extend(['-w', audio_output_path])
284
  logging.info(f"Running espeak-ng (WAV): {' '.join(cmd_speak)}")
285
+ result_speak = subprocess.run(cmd_speak, check=True, capture_output=True, timeout=tts_timeout)
286
+ if result_speak.stderr: logging.warning(f"espeak-ng stderr (WAV): {result_speak.stderr.decode(errors='ignore').strip()}")
 
 
287
 
288
  elif output_format == 'mp3':
289
+ if not lame_ok: raise FileNotFoundError("LAME command not found")
290
+ cmd_speak.append('--stdout') # espeak outputs WAV to stdout
291
+ cmd_lame = ['lame', '-', audio_output_path] # LAME reads WAV from stdin, outputs MP3
292
+ logging.info(f"Running pipe: {' '.join(cmd_speak)} | {' '.join(cmd_lame)}")
293
+
294
+ # Start espeak-ng process
295
  ps_speak = subprocess.Popen(cmd_speak, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
296
+ # Start LAME process, piping espeak's stdout to LAME's stdin
297
  ps_lame = subprocess.Popen(cmd_lame, stdin=ps_speak.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
298
 
299
+ # *** Crucial: Allow ps_speak stdout to be closed by ps_lame if it finishes reading ***
300
  if ps_speak.stdout:
301
  ps_speak.stdout.close()
302
 
303
+ # Capture stderr from both processes, wait for LAME first (end of pipeline)
304
+ try:
305
+ lame_stdout_bytes, lame_stderr_bytes = ps_lame.communicate(timeout=tts_timeout + 60) # Allow extra time for encoding
306
+ except subprocess.TimeoutExpired:
307
+ logging.error("LAME process timed out.")
308
+ ps_speak.kill() # Kill upstream process too
309
+ ps_lame.kill()
310
+ raise subprocess.TimeoutExpired(cmd_lame, tts_timeout + 60)
311
+
312
+ # Now wait for espeak and capture its stderr
313
  speak_stderr_bytes = ps_speak.stderr.read() if ps_speak.stderr else b""
314
+ ps_speak.wait() # Wait for espeak to fully terminate
315
  if ps_speak.stderr: ps_speak.stderr.close()
316
 
317
+ # Decode stderr for logging/errors
318
  lame_stderr_str = lame_stderr_bytes.decode(errors='ignore').strip()
319
  speak_stderr_str = speak_stderr_bytes.decode(errors='ignore').strip()
320
 
321
+ # Check return codes AFTER both processes finished
322
  if ps_lame.returncode != 0:
323
+ logging.error(f"LAME failed with exit code {ps_lame.returncode}. LAME stderr: {lame_stderr_str}")
324
+ raise subprocess.CalledProcessError(ps_lame.returncode, cmd_lame, stderr=lame_stderr_bytes)
325
  if ps_speak.returncode != 0:
326
+ logging.error(f"espeak-ng failed with exit code {ps_speak.returncode}. espeak-ng stderr: {speak_stderr_str}")
327
+ raise subprocess.CalledProcessError(ps_speak.returncode, cmd_speak, stderr=speak_stderr_bytes)
328
 
329
+ # Log any non-fatal warnings from stderr
330
+ if lame_stderr_str: logging.warning(f"LAME stderr: {lame_stderr_str}")
331
+ if speak_stderr_str: logging.warning(f"espeak-ng stderr: {speak_stderr_str}")
 
 
332
 
333
  elif output_format == 'ogg':
334
+ if not oggenc_ok: raise FileNotFoundError("oggenc command not found")
335
+ cmd_speak.append('--stdout') # espeak outputs WAV to stdout
336
+ # oggenc reads WAV from stdin ('-') and writes to output file ('-o')
337
+ cmd_ogg = ['oggenc', '-o', audio_output_path, '-']
338
+ logging.info(f"Running pipe: {' '.join(cmd_speak)} | {' '.join(cmd_ogg)}")
339
+
340
  ps_speak = subprocess.Popen(cmd_speak, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
341
  ps_ogg = subprocess.Popen(cmd_ogg, stdin=ps_speak.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
342
 
 
343
  if ps_speak.stdout:
344
  ps_speak.stdout.close()
345
 
346
+ try:
347
+ ogg_stdout_bytes, ogg_stderr_bytes = ps_ogg.communicate(timeout=tts_timeout + 60)
348
+ except subprocess.TimeoutExpired:
349
+ logging.error("oggenc process timed out.")
350
+ ps_speak.kill()
351
+ ps_ogg.kill()
352
+ raise subprocess.TimeoutExpired(cmd_ogg, tts_timeout + 60)
353
+
354
  speak_stderr_bytes = ps_speak.stderr.read() if ps_speak.stderr else b""
355
+ ps_speak.wait()
356
  if ps_speak.stderr: ps_speak.stderr.close()
357
 
 
358
  ogg_stderr_str = ogg_stderr_bytes.decode(errors='ignore').strip()
359
  speak_stderr_str = speak_stderr_bytes.decode(errors='ignore').strip()
360
 
 
361
  if ps_ogg.returncode != 0:
362
+ logging.error(f"oggenc failed with exit code {ps_ogg.returncode}. oggenc stderr: {ogg_stderr_str}")
363
+ raise subprocess.CalledProcessError(ps_ogg.returncode, cmd_ogg, stderr=ogg_stderr_bytes)
364
  if ps_speak.returncode != 0:
365
+ logging.error(f"espeak-ng failed with exit code {ps_speak.returncode}. espeak-ng stderr: {speak_stderr_str}")
366
+ raise subprocess.CalledProcessError(ps_speak.returncode, cmd_speak, stderr=speak_stderr_bytes)
367
 
368
+ if ogg_stderr_str: logging.warning(f"oggenc stderr: {ogg_stderr_str}")
369
+ if speak_stderr_str: logging.warning(f"espeak-ng stderr: {speak_stderr_str}")
 
 
 
370
 
371
  else:
372
+ raise ValueError(f"Unsupported output format selected: {output_format}")
373
 
374
+ status_updates.append(" ✅ Text converted to audio.")
375
  logging.info(f"Text successfully converted to {output_format.upper()}.")
376
 
377
  except subprocess.CalledProcessError as e:
378
+ command_name = Path(e.cmd[0]).name if isinstance(e.cmd, list) else e.cmd
 
 
379
  stderr_str = e.stderr.decode(errors='ignore').strip() if isinstance(e.stderr, bytes) else (e.stderr or "")
380
  stdout_str = e.stdout.decode(errors='ignore').strip() if isinstance(e.stdout, bytes) else (e.stdout or "")
381
  error_details = stderr_str or stdout_str or "No output/error captured."
 
 
382
  exit_status_str = f"exit status {e.returncode}" if e.returncode is not None else "unknown exit status"
383
  cmd_str = ' '.join(e.cmd) if isinstance(e.cmd, list) else e.cmd
384
+
385
+ error_msg = (f"**Error:** Audio generation failed.\n\n"
386
+ f"**Process:** `{command_name}`\n"
387
+ f"**Command:**\n```\n{cmd_str}\n```\n"
388
+ f"**Exit Status:** {exit_status_str}\n\n"
389
+ f"**Output/Error:**\n```\n{error_details}\n```")
390
+ status_updates.append(f" ❌ Audio generation failed ({command_name}).")
391
+ logging.error(f"Audio generation failed. Command: `{cmd_str}` Exit: {exit_status_str} Details: {error_details}")
392
+ return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
393
+
394
+ except subprocess.TimeoutExpired as e:
395
+ command_name = Path(e.cmd[0]).name if isinstance(e.cmd, list) else e.cmd
396
+ error_msg = f"**Error:** Audio generation timed out (over {e.timeout}s) during `{command_name}` processing.\nThe ebook might be too long for the current timeout limit."
397
+ status_updates.append(f" ❌ Audio generation timed out.")
398
+ logging.error(f"Audio generation timed out for command: {' '.join(e.cmd)}")
399
+ return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
400
 
401
  except FileNotFoundError as e:
402
+ # This should ideally be caught by initial checks, but handle defensively
403
+ missing_cmd = e.filename or "Unknown command"
404
+ error_msg = f"**Error:** Command `{missing_cmd}` not found during audio generation for {output_format.upper()} output.\nPlease check `packages.txt`."
405
+ status_updates.append(f" Command '{missing_cmd}' not found.")
406
+ logging.error(f"Error: Command '{missing_cmd}' not found during execution.")
407
  return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
408
  except Exception as e:
409
+ error_msg = f"**Error:** An unexpected error occurred during audio generation:\n```\n{e}\n```"
410
+ status_updates.append(f" ❌ Unexpected audio error.")
411
  logging.error(f"An unexpected error occurred during audio generation: {e}", exc_info=True)
412
  return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
413
 
414
+ # --- Step 3b: Verify Audio Output ---
415
+ if not os.path.exists(audio_output_path) or os.path.getsize(audio_output_path) < 256: # Check if file exists and has *some* data
416
+ error_msg = f"**Error:** Audio generation command finished, but the output file '{Path(audio_output_path).name}' is missing or empty/too small.\nCheck logs for potential errors during the TTS or encoding process."
417
+ status_updates.append(f" ❌ Audio output missing or invalid.")
418
+ logging.error(f"Audio output file missing or too small after generation: {audio_output_path}")
419
+ # Try to provide more context if stderr was captured earlier
420
+ # last_stderr = speak_stderr_str or lame_stderr_str or ogg_stderr_str # From pipe section
421
+ # if last_stderr: error_msg += f"\nLast captured error output:\n```\n{last_stderr}\n```"
422
  return None, cover_image_path_final, "\n".join(status_updates) + f"\n\n{error_msg}"
423
 
424
 
425
  # --- Step 4: Embed Cover Art (Optional) ---
426
+ if embed_cover and cover_extracted and MUTAGEN_AVAILABLE and cover_image_path_final and os.path.exists(cover_image_path_final):
427
+ progress(0.9, desc="🖼️ Embedding cover art...")
428
+ status_updates.append("🖼️ Embedding cover art into audio file...")
429
  try:
430
  with open(cover_image_path_final, 'rb') as img_f:
431
  cover_data = img_f.read()
432
 
433
+ # Determine mimetype robustly using Pillow
434
+ mime_type = 'image/jpeg' # Default
435
+ img_width, img_height, img_depth = 0, 0, 24 # Defaults for FLAC/OGG
436
  try:
437
  img = Image.open(cover_image_path_final)
438
  mime_type = Image.MIME.get(img.format)
439
+ img_width, img_height = img.width, img.height
440
+ img_depth = {'RGB': 24, 'RGBA': 32, 'L': 8, 'P': 8}.get(img.mode, 24) # Palette 'P' often 8-bit
441
  img.close()
442
+ if not mime_type:
443
+ ext = Path(cover_image_path_final).suffix.lower()
444
+ if ext == ".jpg" or ext == ".jpeg": mime_type = 'image/jpeg'
445
+ elif ext == ".png": mime_type = 'image/png'
446
+ else: raise ValueError("Unsupported image format for MIME detection") # Force fallback
447
+ logging.info(f"Using cover mime type: {mime_type}, Dimensions: {img_width}x{img_height}, Depth: {img_depth}")
448
  except Exception as pil_err:
449
+ logging.warning(f"Could not determine MIME type/dimensions using PIL: {pil_err}. Falling back to image/jpeg.")
450
+ mime_type = 'image/jpeg' # Fallback
 
 
 
 
 
 
 
 
 
 
 
451
 
452
  logging.info(f"Attempting to embed cover art ({mime_type}) into {audio_output_path}")
453
+ audio = mutagen.File(audio_output_path, easy=True) # Use easy=True for simple tags, fallback to non-easy for picture
454
 
455
  if audio is None:
456
+ # Try loading without easy=True if easy fails
457
+ audio = mutagen.File(audio_output_path, easy=False)
458
+ if audio is None:
459
+ raise ValueError("Mutagen could not load the audio file. Format might be unsupported or file corrupted.")
460
 
461
+ # --- Add Title/Artist using Easy Interface if possible ---
462
  try:
463
+ if isinstance(audio, mutagen.easy.EasyMutagen): # Check if Easy interface loaded
464
+ if not audio.get('title'): audio['title'] = Path(base_filename).name.replace('_', ' ') # Use sanitized filename base
465
+ if not audio.get('artist'): audio['artist'] = "Ebook Speaker"
466
+ audio.save() # Save easy tags
467
+ # Reload without easy=True for picture embedding if needed by format
468
+ audio = mutagen.File(audio_output_path, easy=False)
469
+ elif audio is not None: # Easy interface failed, try basic tags with normal interface
470
+ if not audio.tags.get('TIT2'): audio.tags.add(mutagen.id3.TIT2(encoding=3, text=Path(base_filename).name.replace('_', ' ')))
471
+ if not audio.tags.get('TPE1'): audio.tags.add(mutagen.id3.TPE1(encoding=3, text="Ebook Speaker"))
472
+ audio.save()
473
+ audio = mutagen.File(audio_output_path, easy=False) # Reload after save
474
+
475
+ except Exception as tag_err:
476
+ logging.warning(f"Could not set basic title/artist tags: {tag_err}")
477
+
478
+
479
+ # --- Embed Picture (using non-easy interface often required) ---
480
+ if audio is None: # Check again after potential reload
481
+ raise ValueError("Audio object became None after tag saving.")
482
+
483
+ # Clear existing art first (important!)
484
+ try:
485
+ audio.tags.delall('APIC') # ID3v2 (MP3)
486
+ audio.tags.delall('covr') # MP4
487
+ if hasattr(audio, 'clear_pictures'): audio.clear_pictures() # FLAC
488
+ if "metadata_block_picture" in audio: del audio["metadata_block_picture"] # OggVorbis
489
+ audio.save()
490
+ # Reload again after deleting to ensure clean slate
491
+ audio = mutagen.File(audio_output_path, easy=False)
492
+ if audio is None: raise ValueError("Audio object None after clearing art.")
493
+ except (AttributeError, KeyError, TypeError, Exception) as clear_err:
494
+ logging.warning(f"Could not definitively clear existing artwork: {clear_err}. Proceeding anyway.")
495
+
496
+
497
+ # Add the new cover
498
  save_needed = False
499
+ if isinstance(audio, (MP3, EasyMP3)): # Handles MP3
500
+ if audio.tags is None: audio.add_tags()
501
  audio.tags.add(
502
  APIC(
503
+ encoding=3, # 3 is for UTF-8
504
  mime=mime_type,
505
+ type=PictureType.COVER_FRONT, # Use standard enum
506
+ desc='Cover',
507
  data=cover_data
508
  )
509
  )
 
511
  elif isinstance(audio, FLAC):
512
  pic = mutagen.flac.Picture()
513
  pic.data = cover_data
514
+ pic.type = PictureType.COVER_FRONT
515
  pic.mime = mime_type
516
+ pic.width = img_width
517
+ pic.height = img_height
518
+ pic.depth = img_depth
519
  audio.add_picture(pic)
520
  save_needed = True
521
  elif isinstance(audio, OggVorbis):
522
+ # Ogg Vorbis uses base64 encoded FLAC Picture block
523
  import base64
 
524
  pic = mutagen.flac.Picture()
525
  pic.data = cover_data
526
+ pic.type = PictureType.COVER_FRONT
527
  pic.mime = mime_type
528
+ pic.width = img_width
529
+ pic.height = img_height
530
+ pic.depth = img_depth
531
+ audio["METADATA_BLOCK_PICTURE"] = [base64.b64encode(pic.write()).decode("ascii")]
 
 
 
 
 
 
532
  save_needed = True
533
+ elif isinstance(audio, MP4): # Handles M4A/M4B
534
+ if mime_type == 'image/jpeg': pic_format = MP4Cover.FORMAT_JPEG
535
+ elif mime_type == 'image/png': pic_format = MP4Cover.FORMAT_PNG
536
+ else: pic_format = MP4Cover.FORMAT_UNDEFINED
 
 
 
 
 
 
537
  if pic_format != MP4Cover.FORMAT_UNDEFINED:
538
  audio['covr'] = [MP4Cover(cover_data, imageformat=pic_format)]
539
  save_needed = True
540
+ else: logging.warning(f"Unsupported cover image format ({mime_type}) for MP4 embedding.")
541
+ else:
542
+ logging.warning(f"Cover embedding not implemented for this audio type: {type(audio)}")
543
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
544
 
545
+ if save_needed:
 
546
  audio.save()
547
+ status_updates.append(" ✅ Cover art embedded successfully.")
548
+ logging.info("Cover art embedded successfully.")
549
+ elif embed_cover: # Only report skip if embedding was attempted but failed type match
550
+ status_updates.append(" ⚠️ Cover embedding skipped (unsupported audio format for mutagen?).")
551
+ logging.warning(f"Could not embed cover: audio format {type(audio)} not explicitly handled.")
552
+
553
+ except (mutagen.MutagenError, ValueError, IOError, TypeError, KeyError, AttributeError) as e:
554
+ status_updates.append(f" ⚠️ Could not embed cover art. Error: {str(e)[:100]}...")
555
  logging.warning(f"Failed to embed cover art: {e}", exc_info=True)
556
  except Exception as e:
557
+ status_updates.append(f" ⚠️ An unexpected error occurred during cover art embedding.")
558
  logging.error(f"Unexpected error during cover embedding: {e}", exc_info=True)
559
  elif embed_cover and not cover_extracted:
560
+ status_updates.append(" ℹ️ Cover art embedding skipped (no cover extracted or invalid).")
561
  elif embed_cover and not MUTAGEN_AVAILABLE:
562
+ # This was logged earlier, but confirm skip in status
563
+ status_updates.append(" ⚠️ Cover art embedding skipped (Mutagen library not installed).")
564
 
565
 
566
  # --- Step 5: Prepare final output ---
567
+ progress(1.0, desc="Complete!")
568
+ status_updates.append("🏁 Conversion complete!")
569
  audio_output_path_final = audio_output_path # Mark the path as final
570
 
571
  # Return paths for Gradio components
572
  final_status = "\n".join(status_updates)
573
+ logging.info(f"Returning audio: {audio_output_path_final}, cover: {cover_image_path_final}, Status: Success.")
574
  # Return audio path for Audio component, cover path for Image, status for Textbox
575
  return audio_output_path_final, cover_image_path_final, final_status
576
 
577
  except Exception as e:
578
+ # Catch-all for unexpected errors in the main try block
579
+ error_msg = f"An unexpected critical error occurred in the main process: {e}"
580
+ status_updates.append(f" ❌ CRITICAL ERROR: {error_msg}")
581
  logging.error(error_msg, exc_info=True)
582
  # Return None for audio, cover path (if extracted), and the error status
583
+ final_status = "\n".join(status_updates)
584
+ return None, cover_image_path_final, f"{final_status}\n\n**Error:** An unexpected critical error occurred.\nCheck application logs for details.\n{e}"
585
 
586
  finally:
587
  # --- Cleanup ---
588
+ # We leave the final audio and cover files in temp_dir for Gradio to serve.
589
+ # Clean up intermediate files ONLY.
590
  try:
591
  if 'txt_output_path' in locals() and os.path.exists(txt_output_path):
592
  os.remove(txt_output_path)
593
  logging.info(f"Removed intermediate file: {txt_output_path}")
594
+ # Remove temporary cover if it's different from final or if final doesn't exist
595
+ if 'cover_output_path_temp' in locals() and os.path.exists(cover_output_path_temp):
596
+ if not cover_image_path_final or cover_output_path_temp != cover_image_path_final:
597
+ os.remove(cover_output_path_temp)
598
+ logging.info(f"Removed intermediate file: {cover_output_path_temp}")
599
+
600
+ # Note: Gradio typically copies temp files, but leaving the dir might be safer
601
+ # If space becomes an issue, add shutil.rmtree(temp_dir) here,
602
+ # but ensure Gradio doesn't need the original files after the function returns.
603
+ logging.info(f"Temporary directory '{temp_dir}' contains final output files and will be cleaned up by Gradio/system later.")
 
 
 
 
604
 
605
  except OSError as e:
606
  logging.warning(f"Error during cleanup of intermediate files: {e}")
 
608
 
609
  # --- Gradio Interface Definition ---
610
 
611
+ print("Initializing Gradio Interface...")
612
+ print("Fetching available eSpeak-NG voices...")
613
  available_voices = get_espeak_voices()
614
  voice_choices = list(available_voices.keys())
615
+ print(f"Found {len(voice_choices)} voices.")
616
+
617
+ # Try to find a sensible default voice (e.g., US English)
618
+ default_voice = "English (en)" # Basic fallback
619
+ possible_defaults = [
620
+ "English (United States) (en-us)",
621
+ "English (Us) (en-us)", # Variations in naming
622
+ "English (en-us)",
623
+ "English (Great Britain) (en-gb)",
624
+ "English (Gb) (en-gb)",
625
+ "English (en-gb)",
626
+ "English (en)",
627
+ ]
628
+ for V in possible_defaults:
629
+ if V in voice_choices:
630
+ default_voice = V
631
+ break
632
+ if not voice_choices:
633
+ logging.error("FATAL: No espeak voices found or parsed. Language selection will fail.")
634
+ # Add a dummy entry if empty to prevent Gradio crash, though unusable
635
+ voice_choices = ["Error: No Voices Found"]
636
+ default_voice = voice_choices[0]
637
+ available_voices = {default_voice: "error"}
638
 
639
 
640
  # Check for external tools on startup and display warnings if needed
641
+ print("Checking required external commands...")
642
  startup_warnings = []
643
  if not check_command("ebook-convert"): startup_warnings.append("Calibre ('ebook-convert')")
644
+ if not check_command("ebook-meta"): startup_warnings.append("Calibre ('ebook-meta' - needed for cover art)")
645
+ if not check_command("espeak-ng"): startup_warnings.append("espeak-ng (core TTS engine)")
646
  if not check_command("lame"): startup_warnings.append("LAME (needed for MP3 output)")
647
+ if not check_command("oggenc"): startup_warnings.append("oggenc (needed for OGG output, from 'vorbis-tools')")
648
+ if not MUTAGEN_AVAILABLE: startup_warnings.append("Python 'mutagen' library (needed for embedding cover art - install via requirements.txt)")
649
 
650
  startup_message = ""
651
  if startup_warnings:
652
  warning_list = "\n- ".join(startup_warnings)
653
  startup_message = (
654
+ "**⚠️ Startup Warning: The following components might be missing or not found:**\n\n"
655
  f"- {warning_list}\n\n"
656
+ "Please ensure system packages are listed in `packages.txt` and Python libraries in `requirements.txt`. "
657
+ "Functionality relying on missing components will fail. Check container build logs for installation errors."
658
  )
659
+ print("--- STARTUP WARNING ---")
660
+ print(f"Missing components: {', '.join(startup_warnings)}")
661
+ print("-----------------------")
662
+
663
 
664
  # Define UI Elements
665
+ print("Building Gradio UI...")
666
+ with gr.Blocks(theme=gr.themes.Soft(), title="Ebook to Audiobook") as demo:
667
+ gr.Markdown(
668
+ """
669
+ # Ebook to Audiobook Converter 🎧📚
670
+ **Convert your ebooks (EPUB, MOBI, AZW3, FB2, PDF*, etc.) into audiobooks!**
671
+
672
+ Upload your ebook, select the desired language/voice and audio format, and click Convert.
673
+ Optionally, embed the cover art into the audio file metadata.
674
+
675
+ *(*) Note: PDF conversion works best for text-based PDFs. Scanned images or complex layouts may result in poor text extraction.*
676
+ """
677
+ )
678
 
679
  if startup_message:
680
+ gr.Warning(startup_message) # Use Gradio's warning component
681
 
682
  with gr.Row():
683
+ with gr.Column(scale=1, min_width=300):
684
+ ebook_input = gr.File(label="1. Upload Ebook File", file_count="single", type="file") # Use type="file" for direct path access
685
  lang_dropdown = gr.Dropdown(
686
  label="2. Select Language / Voice",
687
  choices=voice_choices,
688
+ value=default_voice if default_voice in voice_choices else (voice_choices[0] if voice_choices else None),
689
+ info="Uses voices available from espeak-ng.",
690
  interactive=True
691
  )
692
  format_dropdown = gr.Dropdown(
693
  label="3. Select Output Audio Format",
694
  choices=["mp3", "ogg", "wav"],
695
  value="mp3",
696
+ info="MP3 offers good compatibility and compression. OGG is open source. WAV is uncompressed.",
697
  interactive=True
698
  )
699
  cover_checkbox = gr.Checkbox(
700
  label="Embed Cover Art (if available)",
701
+ value=True if MUTAGEN_AVAILABLE else False, # Default based on library presence
702
+ info="Requires 'mutagen' library and 'ebook-meta' command.",
703
+ # interactive=True # Removed dynamic interactive setting to avoid potential Gradio bug
704
+ # Let the backend handle skipping if dependencies are missing.
705
+ interactive=True # Let's try keeping it interactive, the check is internal now.
706
  )
707
+ submit_button = gr.Button("Convert to Audiobook", variant="primary", icon="▶️")
708
+
709
+ with gr.Column(scale=2, min_width=400):
710
+ status_textbox = gr.Textbox(
711
+ label="📊 Conversion Status & Log",
712
+ lines=10,
713
+ max_lines=20,
714
+ interactive=False,
715
+ show_copy_button=True,
716
+ placeholder="Conversion progress will appear here..."
717
+ )
718
  with gr.Row():
719
+ # Output components: Image for cover, Audio for playback
720
+ cover_image = gr.Image(
721
+ label="🖼️ Extracted Cover Art",
722
+ type="filepath", # Function returns a path
723
+ interactive=False,
724
+ height=250,
725
+ show_download_button=True
726
+ )
727
+ audio_output_player = gr.Audio(
728
+ label="🎧 Generated Audiobook",
729
+ type="filepath", # Function returns a path
730
+ interactive=False # Playback is interactive, but component value isn't set by user
731
+ )
732
+ # REMOVED separate download button - gr.Audio and gr.Image have download capabilities
733
 
734
  # Connect components
735
  submit_button.click(
736
  fn=convert_ebook_to_audio,
737
  inputs=[ebook_input, lang_dropdown, format_dropdown, cover_checkbox],
738
+ # Outputs map directly to the components defined above
739
+ outputs=[audio_output_player, cover_image, status_textbox]
740
  )
741
 
742
+ gr.Markdown("--- \n *Powered by Calibre, eSpeak-NG, LAME, OggEnc, Mutagen, and Gradio.*")
743
+
744
  # --- Launch the App ---
745
  if __name__ == "__main__":
746
+ print("Starting Gradio App Server...")
747
+ if not voice_choices or voice_choices[0].startswith("Error"):
748
+ print("\nWARNING: Could not retrieve voices from espeak-ng. Language selection may be broken!\n")
749
+
750
+ # Set share=True for Hugging Face Spaces deployment.
751
+ # debug=True can be helpful locally but disable for production.
752
+ # server_name="0.0.0.0" allows access within Docker/network.
753
+ demo.launch(share=True, server_name="0.0.0.0")
754
+ print("Gradio App Launched.")