drewThomasson commited on
Commit
033f66a
·
verified ·
1 Parent(s): 44f6837

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +610 -244
app.py CHANGED
@@ -1,258 +1,624 @@
1
  import gradio as gr
2
- import os
3
  import subprocess
 
4
  import tempfile
5
- import threading
6
- import time
7
  import shutil
 
 
8
  from pathlib import Path
9
- from tqdm import tqdm
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- def get_espeak_voices():
12
- """Get available espeak-ng voices."""
13
- try:
14
- result = subprocess.run(['espeak-ng', '--voices'], capture_output=True, text=True)
15
- voices = []
16
- for line in result.stdout.splitlines()[1:]: # Skip header line
17
- parts = line.split()
18
- if len(parts) >= 4:
19
- voice_name = parts[3]
20
- voices.append(voice_name)
21
- return sorted(voices)
22
- except Exception as e:
23
- print(f"Error getting espeak voices: {e}")
24
- return ["default"]
25
-
26
- def convert_ebook_to_txt(ebook_path, original_name):
27
- """Convert ebook to txt using Calibre's ebook-convert."""
28
- ext = Path(original_name).suffix or ""
29
- if not ext:
30
- print("Error: The uploaded file does not have an extension.")
31
- return None
32
- # Copy into a temp directory with the correct extension
33
- temp_dir = tempfile.mkdtemp()
34
- src = Path(ebook_path)
35
- dst = Path(temp_dir) / f"book{ext}"
36
- shutil.copy2(src, dst)
37
- txt_path = Path(temp_dir) / "converted_book.txt"
38
  try:
39
- subprocess.run(
40
- ["ebook-convert", str(dst), str(txt_path)],
41
- check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
42
- )
43
- return str(txt_path) if txt_path.exists() else None
44
- except subprocess.CalledProcessError as e:
45
- print("Conversion error:", e.stderr)
46
- return None
47
-
48
- def count_words_in_file(file_path):
49
- """Count the number of words in a text file."""
 
 
 
 
 
 
 
 
 
 
50
  try:
51
- with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
52
- content = f.read()
53
- return len(content.split())
54
- except Exception as e:
55
- print(f"Error counting words: {e}")
56
- return 0
57
-
58
- def create_audiobook(progress_callback, ebook_path, original_name, voice, speech_rate, output_dir):
59
- """Convert ebook to audiobook using espeak-ng with progress bar."""
60
- if not os.path.exists(ebook_path):
61
- return f"Error: File {ebook_path} not found."
62
-
63
- # Create output directory if it doesn't exist
64
- if not os.path.exists(output_dir):
65
- os.makedirs(output_dir)
66
-
67
- book_name = os.path.splitext(os.path.basename(ebook_path))[0]
68
- output_path = os.path.join(output_dir, f"{book_name}.wav")
69
-
70
- # Convert ebook to text
71
- progress_callback(0, "Converting ebook to text...")
72
- txt_path = convert_ebook_to_txt(ebook_path, original_name)
73
- if not txt_path:
74
- return "Error: Failed to convert ebook to text. Check terminal for details."
75
-
76
- # Count words for progress estimation
77
- word_count = count_words_in_file(txt_path)
78
- if word_count == 0:
79
- return "Error: No text content found in the ebook."
80
-
81
- progress_callback(10, f"Starting audio conversion of {word_count} words...")
82
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  try:
84
- # Process text in chunks to show progress
85
- with open(txt_path, 'r', encoding='utf-8', errors='replace') as f:
86
- content = f.read()
87
-
88
- # Create temporary directory for audio chunks
89
- temp_audio_dir = tempfile.mkdtemp()
90
- chunks = split_text_into_chunks(content)
91
-
92
- # Convert each chunk with progress tracking
93
- for i, chunk in enumerate(tqdm(chunks, desc="Converting to audio")):
94
- chunk_path = os.path.join(temp_audio_dir, f"chunk_{i:04d}.wav")
95
-
96
- # Save chunk to temporary file
97
- chunk_txt_path = os.path.join(temp_audio_dir, f"chunk_{i:04d}.txt")
98
- with open(chunk_txt_path, 'w', encoding='utf-8') as f:
99
- f.write(chunk)
100
-
101
- # Convert chunk to audio
102
- subprocess.run([
103
- "espeak-ng",
104
- "-v", voice,
105
- "-s", str(speech_rate),
106
- "-f", chunk_txt_path,
107
- "-w", chunk_path
108
- ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
109
-
110
- # Update progress (from 10% to 90%)
111
- progress = 10 + int(80 * (i + 1) / len(chunks))
112
- progress_callback(progress, f"Converting chunk {i+1}/{len(chunks)}...")
113
-
114
- # Combine audio chunks into final audiobook
115
- progress_callback(90, "Combining audio chunks...")
116
- combine_audio_files(temp_audio_dir, output_path)
117
-
118
- # Clean up temporary files
119
- progress_callback(95, "Cleaning up temporary files...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  try:
121
- os.remove(txt_path)
122
- for file in os.listdir(temp_audio_dir):
123
- os.remove(os.path.join(temp_audio_dir, file))
124
- os.rmdir(temp_audio_dir)
125
- os.rmdir(os.path.dirname(txt_path))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  except Exception as e:
127
- print(f"Warning: Could not clean up all temporary files: {e}")
128
-
129
- progress_callback(100, "Conversion complete!")
130
- return f"Audiobook created successfully at {output_path}"
131
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  except Exception as e:
133
- print(f"Error creating audiobook: {e}")
134
- return f"Error creating audiobook: {e}"
135
-
136
- def split_text_into_chunks(text, chunk_size=1000):
137
- """Split text into chunks of roughly equal size."""
138
- words = text.split()
139
- chunks = []
140
- current_chunk = []
141
-
142
- for word in words:
143
- current_chunk.append(word)
144
- if len(current_chunk) >= chunk_size:
145
- chunks.append(" ".join(current_chunk))
146
- current_chunk = []
147
-
148
- if current_chunk:
149
- chunks.append(" ".join(current_chunk))
150
-
151
- return chunks
152
-
153
- def combine_audio_files(audio_dir, output_file):
154
- """Combine multiple WAV files into a single audiobook."""
155
- # List all audio chunks and sort them
156
- audio_files = sorted([os.path.join(audio_dir, f) for f in os.listdir(audio_dir) if f.endswith('.wav')])
157
-
158
- if not audio_files:
159
- raise Exception("No audio files were generated to combine")
160
-
161
- # Create a file list for ffmpeg
162
- list_file = os.path.join(audio_dir, "file_list.txt")
163
- with open(list_file, 'w') as f:
164
- for audio_file in audio_files:
165
- f.write(f"file '{audio_file}'\n")
166
-
167
- # Use ffmpeg to concatenate the files
168
- try:
169
- result = subprocess.run([
170
- "ffmpeg", "-f", "concat", "-safe", "0",
171
- "-i", list_file, "-c", "copy", output_file
172
- ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
173
- print("FFmpeg output:", result.stdout)
174
- print("FFmpeg errors:", result.stderr)
175
- except subprocess.CalledProcessError as e:
176
- print(f"Error combining audio files: {e}")
177
- print(f"FFmpeg output: {e.stdout}")
178
- print(f"FFmpeg errors: {e.stderr}")
179
- raise
180
-
181
- # Clean up the list file
182
- os.remove(list_file)
183
-
184
- def process_book(ebook_file, voice, speech_rate, output_dir, progress=gr.Progress()):
185
- """Process the ebook conversion with progress tracking."""
186
- if ebook_file is None:
187
- return "Error: No file was uploaded."
188
-
189
- # Get the actual file path from the Gradio component and its original name
190
- ebook_path = ebook_file.name
191
- original_name = ebook_file.orig_name if hasattr(ebook_file, "orig_name") else os.path.basename(ebook_path)
192
-
193
- def update_progress(percent, status):
194
- progress(percent / 100, status)
195
-
196
- result = create_audiobook(update_progress, ebook_path, original_name, voice, speech_rate, output_dir)
197
- return result
198
-
199
- def create_gui():
200
- """Create the Gradio UI for the ebook-to-audiobook converter."""
201
- available_voices = get_espeak_voices()
202
-
203
- with gr.Blocks(title="Ebook to Audiobook Converter") as app:
204
- gr.Markdown("# 📚 Ebook to Audiobook Converter")
205
- gr.Markdown("Convert any ebook to an audiobook using espeak-ng. The progress is shown in the terminal.")
206
-
207
- with gr.Row():
208
- with gr.Column():
209
- ebook_input = gr.File(label="Upload Ebook", type="filepath")
210
- voice_dropdown = gr.Dropdown(
211
- choices=available_voices,
212
- value=available_voices[0] if available_voices else "default",
213
- label="Select Voice"
214
- )
215
- speech_rate = gr.Slider(
216
- minimum=80,
217
- maximum=500,
218
- value=175,
219
- step=5,
220
- label="Speech Rate (words per minute)"
221
- )
222
- output_dir = gr.Textbox(
223
- label="Output Directory",
224
- value=str(Path.home() / "audiobooks"),
225
- placeholder="Enter the directory to save the audiobook"
226
- )
227
- convert_btn = gr.Button("Convert to Audiobook", variant="primary")
228
-
229
- with gr.Column():
230
- output_text = gr.Textbox(label="Status", interactive=False)
231
-
232
- convert_btn.click(
233
- fn=process_book,
234
- inputs=[ebook_input, voice_dropdown, speech_rate, output_dir],
235
- outputs=[output_text]
236
- )
237
-
238
- gr.Markdown("""
239
- ## Instructions
240
- 1. Upload your ebook file (supported formats: epub, mobi, pdf, azw, etc.)
241
- 2. Select a voice from the dropdown
242
- 3. Adjust the speech rate if needed
243
- 4. Specify an output directory for the audiobook
244
- 5. Click "Convert to Audiobook"
245
-
246
- ## Requirements
247
- - Calibre (for ebook conversion)
248
- - espeak-ng (for text-to-speech)
249
- - ffmpeg (for audio processing)
250
-
251
- The progress will be displayed in the terminal with a tqdm progress bar.
252
- """)
253
-
254
- return app
255
 
 
256
  if __name__ == "__main__":
257
- app = create_gui()
258
- app.launch(debug=True)
 
 
 
 
1
  import gradio as gr
 
2
  import subprocess
3
+ import os
4
  import tempfile
 
 
5
  import shutil
6
+ import re
7
+ import logging
8
  from pathlib import Path
9
+ from PIL import Image # For checking image validity
10
+ try:
11
+ import mutagen
12
+ from mutagen.mp3 import MP3, EasyMP3
13
+ from mutagen.oggvorbis import OggVorbis
14
+ from mutagen.flac import FLAC
15
+ from mutagen.mp4 import MP4, MP4Cover
16
+ from mutagen.id3 import ID3, APIC, error as ID3Error
17
+ MUTAGEN_AVAILABLE = True
18
+ except ImportError:
19
+ MUTAGEN_AVAILABLE = False
20
+ logging.warning("Mutagen library not found. Cover art embedding will be disabled.")
21
+ logging.warning("Install it using: pip install mutagen")
22
 
23
+
24
+ # --- Configuration & Logging ---
25
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
26
+
27
+ # --- Helper Functions ---
28
+
29
+ def check_command(command):
30
+ """Checks if a command exists in the system's PATH."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  try:
32
+ # Use a more reliable check for command existence, sometimes --version fails
33
+ # On Windows, 'where' command; on Unix-like, 'command -v' or 'which'
34
+ if os.name == 'nt':
35
+ subprocess.run(['where', command], check=True, capture_output=True)
36
+ else:
37
+ # 'command -v' is generally preferred over 'which'
38
+ subprocess.run(['command', '-v', command], check=True, capture_output=True)
39
+ logging.info(f"Command '{command}' found.")
40
+ return True
41
+ except (FileNotFoundError, subprocess.CalledProcessError) as e:
42
+ logging.error(f"Command '{command}' not found or check failed. Please ensure it's installed and in your PATH.")
43
+ # Log the specific error if needed: logging.error(f"Error details: {e}")
44
+ return False
45
+ except Exception as e: # Catch unexpected errors during check
46
+ logging.error(f"Unexpected error checking for command '{command}': {e}")
47
+ return False
48
+
49
+
50
+ def get_espeak_voices():
51
+ """Gets available espeak-ng voices and their languages."""
52
+ voices = {}
53
  try:
54
+ # Use a robust way to list voices that includes language info
55
+ result = subprocess.run(['espeak-ng', '--voices'], capture_output=True, text=True, check=True, encoding='utf-8', errors='ignore')
56
+ # Example line format: P L V Language Code Age/Gender VoiceName File Other Langs
57
+ # 2 y en-US M american-english-us Mbrola/us1 (en 10)
58
+ # 1 af M afrikaans Afrikaans
59
+ pattern = re.compile(r"^\s*\d+\s+[yn-]\s+([\w-]+)\s+[MF-]\s+(.+?)\s+([\w/ -]+?)(?:\s+\(([\w\s]+)\))?\s*$")
60
+ for line in result.stdout.splitlines()[1:]: # Skip header
61
+ match = pattern.match(line)
62
+ if match:
63
+ code, lang_name, _voice_name, _other_langs = match.groups()
64
+ display_name = f"{lang_name.strip()} ({code})"
65
+ # Avoid duplicates if multiple voice names exist for the same code
66
+ if display_name not in voices:
67
+ voices[display_name] = code
68
+ else:
69
+ # Try simpler parsing for lines without extra details
70
+ parts = line.split()
71
+ if len(parts) >= 4 and parts[0].isdigit():
72
+ code = parts[1]
73
+ lang_name = parts[3]
74
+ display_name = f"{lang_name.strip()} ({code})"
75
+ if display_name not in voices:
76
+ voices[display_name] = code
77
+
78
+ if not voices:
79
+ logging.warning("Could not parse any voices from 'espeak-ng --voices'. Using fallback list.")
80
+ # Add common fallbacks if parsing fails
81
+ voices = {"English (en)": "en", "Spanish (es)": "es", "French (fr)": "fr", "German (de)": "de"}
82
+
83
+ # Sort voices alphabetically by display name
84
+ sorted_voices = dict(sorted(voices.items()))
85
+ return sorted_voices
86
+
87
+ except (FileNotFoundError, subprocess.CalledProcessError, Exception) as e:
88
+ logging.error(f"Error getting espeak-ng voices: {e}")
89
+ # Provide a basic fallback list if the command fails
90
+ return {"English (en)": "en", "Spanish (es)": "es", "French (fr)": "fr", "German (de)": "de"}
91
+
92
+ # --- Main Conversion Logic ---
93
+
94
+ def convert_ebook_to_audio(ebook_file, language_display, output_format, embed_cover, progress=gr.Progress(track_tqdm=True)):
95
+ """
96
+ Converts an ebook file to an audiobook using Calibre and espeak-ng.
97
+ """
98
+ if not ebook_file:
99
+ return None, None, "Error: No ebook file provided.", None
100
+
101
+ # Check required commands based on selection
102
+ calibre_convert_ok = check_command("ebook-convert")
103
+ calibre_meta_ok = True if not embed_cover else check_command("ebook-meta") # Only check if needed
104
+ espeak_ok = check_command("espeak-ng")
105
+ lame_ok = True if output_format != 'mp3' else check_command("lame")
106
+ oggenc_ok = True if output_format != 'ogg' else check_command("oggenc")
107
+
108
+ missing = []
109
+ if not calibre_convert_ok: missing.append("Calibre ('ebook-convert')")
110
+ if not calibre_meta_ok and embed_cover: missing.append("Calibre ('ebook-meta' for cover art)")
111
+ if not espeak_ok: missing.append("espeak-ng")
112
+ if not lame_ok and output_format == 'mp3': missing.append("LAME (for MP3)")
113
+ if not oggenc_ok and output_format == 'ogg': missing.append("oggenc (for OGG)")
114
+
115
+ if missing:
116
+ error_msg = f"Error: Missing required command(s): {', '.join(missing)}. Please install them and ensure they are in your system PATH."
117
+ logging.error(error_msg)
118
+ # Use Markdown for better formatting in Gradio Textbox
119
+ return None, None, f"**Error:** Missing required command(s):\n- {', '.join(missing)}\n\nPlease install them and ensure they are in your system PATH.", None
120
+
121
+
122
+ temp_dir = tempfile.mkdtemp(prefix="ebook_audio_")
123
+ logging.info(f"Created temporary directory: {temp_dir}")
124
+ status_updates = ["Conversion started..."]
125
+ cover_image_path_final = None
126
+ audio_output_path_final = None
127
+
128
  try:
129
+ input_ebook_path = ebook_file.name # Gradio provides a temp path for the upload
130
+ base_filename = Path(input_ebook_path).stem
131
+ txt_output_path = os.path.join(temp_dir, f"{base_filename}.txt")
132
+ cover_output_path_temp = os.path.join(temp_dir, "cover.jpg") # Assume jpg initially
133
+ audio_output_path = os.path.join(temp_dir, f"{base_filename}.{output_format}")
134
+
135
+ # --- Step 1: Extract Cover Art (Optional) ---
136
+ cover_extracted = False
137
+ if embed_cover and calibre_meta_ok: # Already checked if ebook-meta exists
138
+ progress(0.1, desc="Extracting cover art (optional)")
139
+ status_updates.append("Attempting to extract cover art...")
140
+ try:
141
+ cmd_meta = ['ebook-meta', input_ebook_path, '--get-cover', cover_output_path_temp]
142
+ logging.info(f"Running cover extraction: {' '.join(cmd_meta)}")
143
+ result_meta = subprocess.run(cmd_meta, check=True, capture_output=True, text=True, errors='ignore')
144
+ if os.path.exists(cover_output_path_temp) and os.path.getsize(cover_output_path_temp) > 0:
145
+ # Validate if it's a real image file Pillow can open
146
+ try:
147
+ img = Image.open(cover_output_path_temp)
148
+ img.verify() # Verify CRC markers
149
+ img.close() # Need to close after verify
150
+ # Reopen to check format and potentially save in a consistent format if needed
151
+ img = Image.open(cover_output_path_temp)
152
+ fmt = img.format.lower() if img.format else 'unknown'
153
+ img.close()
154
+
155
+ if fmt not in ['jpeg', 'png']:
156
+ logging.warning(f"Extracted cover is not JPEG or PNG ({fmt}), attempting conversion.")
157
+ # Try converting to JPG for broader compatibility with mutagen
158
+ new_cover_path = os.path.join(temp_dir, "cover_converted.jpg")
159
+ try:
160
+ img = Image.open(cover_output_path_temp)
161
+ img.convert('RGB').save(new_cover_path, "JPEG")
162
+ img.close()
163
+ # Check if conversion worked
164
+ if os.path.exists(new_cover_path) and os.path.getsize(new_cover_path) > 0:
165
+ cover_output_path_temp = new_cover_path # Use the converted path
166
+ cover_extracted = True
167
+ cover_image_path_final = cover_output_path_temp # Update final path for display
168
+ status_updates.append("✅ Cover art extracted and converted to JPG.")
169
+ logging.info(f"Cover art extracted and converted to JPG: {cover_image_path_final}")
170
+
171
+ else:
172
+ logging.error("Failed to convert cover art to JPG.")
173
+ status_updates.append("⚠️ Could not convert extracted cover art to JPG. Will skip embedding.")
174
+ if os.path.exists(cover_output_path_temp): os.remove(cover_output_path_temp) # Clean up original if unusable
175
+
176
+ except Exception as convert_err:
177
+ logging.error(f"Error converting cover image: {convert_err}")
178
+ status_updates.append(f"⚠️ Error converting cover image: {convert_err}. Will skip embedding.")
179
+ if os.path.exists(cover_output_path_temp): os.remove(cover_output_path_temp) # Clean up original
180
+
181
+ else:
182
+ cover_extracted = True
183
+ cover_image_path_final = cover_output_path_temp # Use original path
184
+ status_updates.append("✅ Cover art extracted successfully.")
185
+ logging.info(f"Cover art extracted to {cover_image_path_final} (Format: {fmt})")
186
+
187
+ except (IOError, SyntaxError, Image.UnidentifiedImageError) as img_err:
188
+ logging.warning(f"Extracted file is not a valid image or couldn't be processed: {img_err}")
189
+ status_updates.append("⚠️ Extracted 'cover' file is not a valid image. Will skip embedding.")
190
+ if os.path.exists(cover_output_path_temp): os.remove(cover_output_path_temp) # Clean up invalid file
191
+ else:
192
+ status_updates.append("ℹ️ No cover art found in the ebook metadata.")
193
+ logging.info("ebook-meta ran but did not produce a cover file or it was empty.")
194
+
195
+ # No FileNotFoundError needed here as calibre_meta_ok check already happened
196
+ except subprocess.CalledProcessError as e:
197
+ stderr_decoded = e.stderr.decode(errors='ignore') if e.stderr else "No stderr"
198
+ status_updates.append(f"⚠️ Failed to extract cover art. Error: {stderr_decoded}")
199
+ logging.warning(f"ebook-meta failed: {stderr_decoded}")
200
+ except Exception as e:
201
+ status_updates.append(f"⚠️ An unexpected error occurred during cover extraction: {e}")
202
+ logging.error(f"Unexpected error during cover extraction: {e}", exc_info=True)
203
+ elif embed_cover and not calibre_meta_ok:
204
+ status_updates.append("ℹ️ Cover art embedding requested, but 'ebook-meta' not found.")
205
+
206
+ # --- Step 2: Convert Ebook to TXT ---
207
+ progress(0.3, desc="Converting ebook to TXT")
208
+ status_updates.append("Converting ebook to plain text...")
209
+ try:
210
+ # --input-encoding and --output-encoding might be needed for some books
211
+ cmd_convert = ['ebook-convert', input_ebook_path, txt_output_path, '--enable-heuristics']
212
+ logging.info(f"Running ebook conversion: {' '.join(cmd_convert)}")
213
+ result_convert = subprocess.run(cmd_convert, check=True, capture_output=True, encoding='utf-8', errors='ignore')
214
+ # Check stdout/stderr even on success for warnings
215
+ if result_convert.stdout: logging.info(f"ebook-convert stdout: {result_convert.stdout.strip()}")
216
+ if result_convert.stderr: logging.warning(f"ebook-convert stderr: {result_convert.stderr.strip()}")
217
+ status_updates.append("✅ Ebook converted to TXT.")
218
+ logging.info("Ebook successfully converted to TXT.")
219
+ except subprocess.CalledProcessError as e:
220
+ stderr_decoded = e.stderr.decode(errors='ignore') if e.stderr else "No stderr"
221
+ error_msg = f"Error during Calibre conversion: {stderr_decoded or e}"
222
+ status_updates.append(f"❌ {error_msg}")
223
+ logging.error(error_msg)
224
+ # Use Markdown for better formatting in Gradio Textbox
225
+ return None, cover_image_path_final, f"**Error:** Calibre conversion failed.\n```\n{stderr_decoded or e}\n```", None # Return extracted cover if available
226
+ except Exception as e:
227
+ error_msg = f"An unexpected error occurred during ebook conversion: {e}"
228
+ status_updates.append(f"❌ {error_msg}")
229
+ logging.error(error_msg, exc_info=True)
230
+ return None, cover_image_path_final, f"**Error:** An unexpected error occurred during ebook conversion:\n{e}", None
231
+
232
+ # Check if TXT file was actually created and is not empty
233
+ if not os.path.exists(txt_output_path) or os.path.getsize(txt_output_path) == 0:
234
+ error_msg = "Error: Calibre finished, but the output TXT file is missing or empty. The ebook might be image-based or DRM protected."
235
+ status_updates.append(f"❌ {error_msg}")
236
+ logging.error(error_msg)
237
+ return None, cover_image_path_final, f"**Error:** Calibre finished, but the output TXT file is missing or empty.\nThis can happen with image-based ebooks (like comics/scans) or DRM-protected files.", None
238
+
239
+ # --- Step 3: Convert TXT to Audio ---
240
+ progress(0.6, desc="Converting TXT to Audio")
241
+ status_updates.append("Converting text to speech...")
242
+
243
+ voice_code = available_voices.get(language_display, 'en') # Get code from display name
244
+ cmd_speak = ['espeak-ng', '-v', voice_code, '-f', txt_output_path]
245
+ # Add speed option if needed: cmd_speak.extend(['-s', '160']) # Example speed
246
+
247
  try:
248
+ logging.info(f"Preparing audio command for format: {output_format}")
249
+ if output_format == 'wav':
250
+ cmd_speak.extend(['-w', audio_output_path])
251
+ logging.info(f"Running espeak-ng (WAV): {' '.join(cmd_speak)}")
252
+ result_speak = subprocess.run(cmd_speak, check=True, capture_output=True) # Capture bytes
253
+ # Log stdout/stderr even on success
254
+ if result_speak.stdout: logging.info(f"espeak-ng stdout: {result_speak.stdout.decode(errors='ignore').strip()}")
255
+ if result_speak.stderr: logging.warning(f"espeak-ng stderr: {result_speak.stderr.decode(errors='ignore').strip()}")
256
+
257
+ elif output_format == 'mp3':
258
+ cmd_speak.append('--stdout')
259
+ cmd_lame = ['lame', '-', audio_output_path] # Read from stdin, write to file
260
+ logging.info(f"Running espeak-ng | lame (MP3): {' '.join(cmd_speak)} | {' '.join(cmd_lame)}")
261
+ ps_speak = subprocess.Popen(cmd_speak, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
262
+ ps_lame = subprocess.Popen(cmd_lame, stdin=ps_speak.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
263
+
264
+ # Allow ps_speak to receive SIGPIPE if ps_lame exits early. Crucial!
265
+ if ps_speak.stdout:
266
+ ps_speak.stdout.close()
267
+
268
+ # Capture output/errors and wait for LAME to finish
269
+ lame_stdout_bytes, lame_stderr_bytes = ps_lame.communicate()
270
+ # Capture stderr from espeak and WAIT for it to finish *after* lame is done
271
+ speak_stderr_bytes = ps_speak.stderr.read() if ps_speak.stderr else b""
272
+ ps_speak.wait() # <<< --- Explicitly wait for espeak-ng ---
273
+ if ps_speak.stderr: ps_speak.stderr.close()
274
+
275
+ # Decode stderr for logging
276
+ lame_stderr_str = lame_stderr_bytes.decode(errors='ignore').strip()
277
+ speak_stderr_str = speak_stderr_bytes.decode(errors='ignore').strip()
278
+
279
+ # Check return codes safely
280
+ if ps_lame.returncode != 0:
281
+ # LAME failed
282
+ raise subprocess.CalledProcessError(ps_lame.returncode, cmd_lame, output=lame_stdout_bytes, stderr=lame_stderr_bytes)
283
+ if ps_speak.returncode != 0:
284
+ # Espeak failed (even if lame seemed okay initially)
285
+ raise subprocess.CalledProcessError(ps_speak.returncode, cmd_speak, stderr=speak_stderr_bytes) # Pass the captured stderr bytes
286
+
287
+ # Log warnings from stderr if processes succeeded
288
+ if lame_stderr_str:
289
+ logging.warning(f"LAME stderr: {lame_stderr_str}")
290
+ if speak_stderr_str:
291
+ logging.warning(f"espeak-ng stderr: {speak_stderr_str}")
292
+
293
+ elif output_format == 'ogg':
294
+ cmd_speak.append('--stdout')
295
+ cmd_ogg = ['oggenc', '-o', audio_output_path, '-'] # Write to file, read from stdin
296
+ logging.info(f"Running espeak-ng | oggenc (OGG): {' '.join(cmd_speak)} | {' '.join(cmd_ogg)}")
297
+ ps_speak = subprocess.Popen(cmd_speak, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
298
+ ps_ogg = subprocess.Popen(cmd_ogg, stdin=ps_speak.stdout, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
299
+
300
+ # Allow ps_speak to receive SIGPIPE if oggenc exits early.
301
+ if ps_speak.stdout:
302
+ ps_speak.stdout.close()
303
+
304
+ # Capture output/errors and wait for oggenc to finish
305
+ ogg_stdout_bytes, ogg_stderr_bytes = ps_ogg.communicate()
306
+ # Capture stderr from espeak and WAIT for it to finish *after* oggenc is done
307
+ speak_stderr_bytes = ps_speak.stderr.read() if ps_speak.stderr else b""
308
+ ps_speak.wait() # <<< --- Explicitly wait for espeak-ng ---
309
+ if ps_speak.stderr: ps_speak.stderr.close()
310
+
311
+ # Decode stderr for logging
312
+ ogg_stderr_str = ogg_stderr_bytes.decode(errors='ignore').strip()
313
+ speak_stderr_str = speak_stderr_bytes.decode(errors='ignore').strip()
314
+
315
+ # Now check return codes safely
316
+ if ps_ogg.returncode != 0:
317
+ # Oggenc failed
318
+ raise subprocess.CalledProcessError(ps_ogg.returncode, cmd_ogg, output=ogg_stdout_bytes, stderr=ogg_stderr_bytes)
319
+ if ps_speak.returncode != 0:
320
+ # Espeak failed
321
+ raise subprocess.CalledProcessError(ps_speak.returncode, cmd_speak, stderr=speak_stderr_bytes) # Pass captured stderr bytes
322
+
323
+ # Log warnings from stderr if processes succeeded
324
+ if ogg_stderr_str:
325
+ logging.warning(f"oggenc stderr: {ogg_stderr_str}")
326
+ if speak_stderr_str:
327
+ logging.warning(f"espeak-ng stderr: {speak_stderr_str}")
328
+
329
+ else:
330
+ raise ValueError(f"Unsupported output format: {output_format}")
331
+
332
+ status_updates.append("✅ Text converted to audio.")
333
+ logging.info(f"Text successfully converted to {output_format.upper()}.")
334
+
335
+ except subprocess.CalledProcessError as e:
336
+ # --- MODIFIED ERROR HANDLING ---
337
+ command_name = e.cmd[0] if isinstance(e.cmd, list) else e.cmd
338
+ # Decode stderr/stdout safely (they might be bytes or None)
339
+ stderr_str = e.stderr.decode(errors='ignore').strip() if isinstance(e.stderr, bytes) else (e.stderr or "")
340
+ stdout_str = e.stdout.decode(errors='ignore').strip() if isinstance(e.stdout, bytes) else (e.stdout or "")
341
+ error_details = stderr_str or stdout_str or "No output/error captured."
342
+
343
+ # Construct error message carefully
344
+ exit_status_str = f"exit status {e.returncode}" if e.returncode is not None else "unknown exit status"
345
+ cmd_str = ' '.join(e.cmd) if isinstance(e.cmd, list) else e.cmd
346
+ error_msg = f"Audio generation failed ({command_name} with {exit_status_str})."
347
+ status_updates.append(f"❌ {error_msg}")
348
+ logging.error(f"{error_msg} Command: `{cmd_str}` Output/Error: {error_details}")
349
+
350
+ # Use Markdown for better formatting in Gradio Textbox
351
+ md_error_details = f"**Error:** Audio generation failed.\n\n" \
352
+ f"**Command:**\n```\n{cmd_str}\n```\n" \
353
+ f"**Exit Status:** {exit_status_str}\n\n" \
354
+ f"**Output/Error:**\n```\n{error_details}\n```"
355
+ return None, cover_image_path_final, md_error_details, None
356
+ # --- END MODIFIED ERROR HANDLING ---
357
+
358
+ except FileNotFoundError as e:
359
+ missing_cmd = e.filename # Usually contains the missing command
360
+ error_msg = f"Error: Command '{missing_cmd}' not found for {output_format.upper()} output."
361
+ status_updates.append(f"❌ {error_msg}")
362
+ logging.error(error_msg)
363
+ return None, cover_image_path_final, f"**Error:** Command `{missing_cmd}` not found.\nPlease install it and ensure it's in your system PATH.", None
364
  except Exception as e:
365
+ error_msg = f"An unexpected error occurred during audio generation: {e}"
366
+ status_updates.append(f"❌ {error_msg}")
367
+ logging.error(error_msg, exc_info=True)
368
+ return None, cover_image_path_final, f"**Error:** An unexpected error occurred during audio generation:\n{e}", None
369
+
370
+ # Check if audio file exists and has size
371
+ if not os.path.exists(audio_output_path) or os.path.getsize(audio_output_path) < 1024: # Check for > 1KB as a basic sanity check
372
+ error_msg = f"Error: Audio generation command finished, but the output file '{Path(audio_output_path).name}' is missing or too small. Check logs for details."
373
+ status_updates.append(f"❌ {error_msg}")
374
+ logging.error(error_msg)
375
+ return None, cover_image_path_final, f"**Error:** Audio output file missing or too small after conversion.\nCheck system logs for `espeak-ng`, `lame`, or `oggenc` or the status box above for errors.", None
376
+
377
+
378
+ # --- Step 4: Embed Cover Art (Optional) ---
379
+ if embed_cover and cover_extracted and MUTAGEN_AVAILABLE and os.path.exists(cover_image_path_final):
380
+ progress(0.9, desc="Embedding cover art")
381
+ status_updates.append("Embedding cover art into audio file...")
382
+ try:
383
+ with open(cover_image_path_final, 'rb') as img_f:
384
+ cover_data = img_f.read()
385
+
386
+ # Determine mimetype using PIL
387
+ img = Image.open(cover_image_path_final)
388
+ mime_type = Image.MIME.get(img.format)
389
+ img.close()
390
+ if not mime_type:
391
+ mime_type = 'image/jpeg' # Default guess
392
+ logging.warning(f"Could not determine MIME type for cover image, defaulting to {mime_type}")
393
+
394
+
395
+ logging.info(f"Attempting to embed cover art ({mime_type}) into {audio_output_path}")
396
+ audio = mutagen.File(audio_output_path, easy=False) # Use easy=False for more control
397
+
398
+ if audio is None:
399
+ raise ValueError("Mutagen could not load the audio file. Format might be unsupported by Mutagen or file corrupted.")
400
+
401
+ # Clear existing images before adding new one (optional, prevents duplicates)
402
+ try:
403
+ if isinstance(audio, (MP3, EasyMP3)):
404
+ audio.tags.delall('APIC')
405
+ elif isinstance(audio, FLAC):
406
+ audio.clear_pictures()
407
+ elif isinstance(audio, MP4):
408
+ if 'covr' in audio:
409
+ del audio['covr']
410
+ # OggVorbis picture removal is more complex, might need specific key deletion
411
+ elif isinstance(audio, OggVorbis) and "metadata_block_picture" in audio:
412
+ del audio["metadata_block_picture"]
413
+ audio.save() # Save after deletion before adding
414
+ audio = mutagen.File(audio_output_path, easy=False) # Re-load
415
+ except Exception as e:
416
+ logging.warning(f"Could not clear existing artwork before embedding: {e}")
417
+
418
+
419
+ # Embedding logic differs by format
420
+ if isinstance(audio, (MP3, EasyMP3)):
421
+ if audio.tags is None: audio.add_tags() # Ensure tags exist
422
+ audio.tags.add(
423
+ APIC(
424
+ encoding=3, # 3 is for utf-8
425
+ mime=mime_type,
426
+ type=3, # 3 is for cover image (front)
427
+ desc=u'Cover',
428
+ data=cover_data
429
+ )
430
+ )
431
+ elif isinstance(audio, FLAC):
432
+ pic = mutagen.flac.Picture()
433
+ pic.data = cover_data
434
+ pic.type = mutagen.id3.PictureType.COVER_FRONT
435
+ pic.mime = mime_type
436
+ # pic.width, pic.height, pic.depth = ... # Optionally get dimensions from PIL
437
+ audio.add_picture(pic)
438
+ elif isinstance(audio, OggVorbis):
439
+ # Ogg uses base64 encoded pictures in METADATA_BLOCK_PICTURE tag
440
+ import base64
441
+ pic_data = base64.b64encode(cover_data).decode('ascii')
442
+ # This field expects a FLAC Picture block, base64 encoded.
443
+ pic = mutagen.flac.Picture()
444
+ pic.data = cover_data
445
+ pic.type = mutagen.id3.PictureType.COVER_FRONT
446
+ pic.mime = mime_type
447
+ audio["metadata_block_picture"] = [base64.b64encode(pic.write()).decode("ascii")]
448
+
449
+ elif isinstance(audio, MP4):
450
+ if mime_type == 'image/jpeg':
451
+ pic_format = MP4Cover.FORMAT_JPEG
452
+ elif mime_type == 'image/png':
453
+ pic_format = MP4Cover.FORMAT_PNG
454
+ else:
455
+ pic_format = MP4Cover.FORMAT_UNDEFINED # Or skip if unknown
456
+ logging.warning(f"Unsupported cover image format ({mime_type}) for MP4 embedding.")
457
+
458
+ if pic_format != MP4Cover.FORMAT_UNDEFINED:
459
+ audio['covr'] = [MP4Cover(cover_data, imageformat=pic_format)]
460
+
461
+ # Add other metadata (optional)
462
+ try:
463
+ # Use easy=True for simpler metadata access if needed elsewhere
464
+ audio_easy = mutagen.File(audio_output_path, easy=True)
465
+ if audio_easy is not None:
466
+ audio_easy['title'] = base_filename
467
+ audio_easy['artist'] = "Generated Audiobook" # Or try to get from ebook metadata later
468
+ audio_easy.save() # Save easy tags first
469
+ except Exception as tag_err:
470
+ logging.warning(f"Could not set basic title/artist tags: {tag_err}")
471
+ # If easy tags failed, save the main audio object (with picture)
472
+ if audio is not None: audio.save()
473
+ else:
474
+ # If easy tags succeeded, save the main audio object too (if necessary, though easy.save might suffice)
475
+ if audio is not None: audio.save()
476
+
477
+
478
+ status_updates.append("✅ Cover art embedded successfully.")
479
+ logging.info("Cover art embedded successfully.")
480
+
481
+ except (mutagen.MutagenError, ValueError, IOError, TypeError, KeyError) as e:
482
+ status_updates.append(f"⚠️ Could not embed cover art. Error: {e}")
483
+ logging.warning(f"Failed to embed cover art: {e}", exc_info=True)
484
+ except Exception as e:
485
+ status_updates.append(f"⚠️ An unexpected error occurred during cover art embedding: {e}")
486
+ logging.error(f"Unexpected error during cover embedding: {e}", exc_info=True)
487
+ elif embed_cover and not cover_extracted:
488
+ status_updates.append("ℹ️ Cover art embedding skipped (no cover extracted or invalid).")
489
+ elif embed_cover and not MUTAGEN_AVAILABLE:
490
+ status_updates.append("⚠️ Cover art embedding skipped (Mutagen library not installed).")
491
+
492
+
493
+ # --- Step 5: Prepare final output ---
494
+ progress(1.0, desc="Complete")
495
+ status_updates.append("✅ Conversion complete!")
496
+ audio_output_path_final = audio_output_path # Mark the path as final
497
+
498
+ # Return paths for Gradio components
499
+ final_status = "\n".join(status_updates)
500
+ # Need to return a *copy* of the file outside the temp dir, or Gradio might lose it after cleanup
501
+ # However, Gradio usually handles temp files well if returned directly. Let's try direct return first.
502
+ # If issues arise, copy the file to a more stable temp location managed by Gradio if possible, or just let the user download.
503
+ logging.info(f"Returning audio: {audio_output_path_final}, cover: {cover_image_path_final}")
504
+ # Return audio path twice: once for Audio component, once for File component
505
+ return audio_output_path_final, cover_image_path_final, final_status, audio_output_path_final
506
+
507
  except Exception as e:
508
+ error_msg = f"An unexpected error occurred in the main process: {e}"
509
+ status_updates.append(f" {error_msg}")
510
+ logging.error(error_msg, exc_info=True)
511
+ return None, cover_image_path_final, f"**Error:** An unexpected critical error occurred.\nCheck logs for details.\n{e}", None # Return what we have
512
+
513
+ finally:
514
+ # --- Cleanup ---
515
+ # Keep the final audio and cover files if successful, delete the rest
516
+ # Gradio should handle the returned file paths, but clean the temp dir *contents* just in case.
517
+ # It's safer to let Gradio manage the returned files' lifecycle.
518
+ # We'll clean the intermediate files (.txt, original cover if converted).
519
+ try:
520
+ if 'txt_output_path' in locals() and os.path.exists(txt_output_path):
521
+ os.remove(txt_output_path)
522
+ logging.info(f"Removed intermediate file: {txt_output_path}")
523
+ # Remove original cover if it was converted and different from final
524
+ if ('cover_image_path_final' in locals() and cover_image_path_final and
525
+ 'cover_output_path_temp' in locals() and cover_output_path_temp != cover_image_path_final and
526
+ os.path.exists(cover_output_path_temp)):
527
+ os.remove(cover_output_path_temp)
528
+ logging.info(f"Removed intermediate file: {cover_output_path_temp}")
529
+ # Let Gradio handle the final audio/cover paths returned.
530
+ # Do NOT delete temp_dir itself if files within it were returned to Gradio.
531
+ # If Gradio copies the files, then shutil.rmtree(temp_dir) is safe. Test this behavior.
532
+ # For safety, let's rely on OS/Gradio temp file cleanup unless memory becomes an issue.
533
+ if 'temp_dir' in locals() and os.path.exists(temp_dir):
534
+ logging.info(f"Skipping deletion of temp dir '{temp_dir}' to allow Gradio access to output files.")
535
+ # To force cleanup (may break Gradio display):
536
+ # shutil.rmtree(temp_dir, ignore_errors=True)
537
+ # logging.info(f"Attempted cleanup of temp dir: {temp_dir}")
538
+
539
+
540
+ except OSError as e:
541
+ logging.warning(f"Could not remove intermediate file: {e}")
542
+
543
+
544
+ # --- Gradio Interface Definition ---
545
+
546
+ available_voices = get_espeak_voices()
547
+ voice_choices = list(available_voices.keys())
548
+ default_voice = "English (en-US) (en-us)" if "English (en-US) (en-us)" in voice_choices else ("English (en)" if "English (en)" in voice_choices else (voice_choices[0] if voice_choices else "en")) # Sensible default
549
+
550
+ # Check for external tools on startup and display warnings if needed
551
+ startup_warnings = []
552
+ if not check_command("ebook-convert"): startup_warnings.append("Calibre ('ebook-convert')")
553
+ if not check_command("ebook-meta"): startup_warnings.append("Calibre ('ebook-meta' - recommended for cover art)")
554
+ if not check_command("espeak-ng"): startup_warnings.append("espeak-ng")
555
+ if not check_command("lame"): startup_warnings.append("LAME (needed for MP3 output)")
556
+ if not check_command("oggenc"): startup_warnings.append("oggenc (needed for OGG output)")
557
+ if not MUTAGEN_AVAILABLE: startup_warnings.append("Python 'mutagen' library (needed for embedding cover art)")
558
+
559
+ startup_message = ""
560
+ if startup_warnings:
561
+ startup_message = (
562
+ "**⚠️ Startup Warning: The following components might be missing or not found in PATH:**\n\n"
563
+ f"- {', '.join(startup_warnings)}\n\n"
564
+ "Please install them for full functionality. Check console logs for details."
565
+ )
566
+ print("-" * 60)
567
+ print(f"STARTUP WARNING: Missing components: {', '.join(startup_warnings)}")
568
+ print("-" * 60)
569
+
570
+ # Define UI Elements
571
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
572
+ gr.Markdown("# Ebook to Audiobook Converter 🎧📚")
573
+ gr.Markdown("Upload an ebook file (EPUB, MOBI, AZW3, PDF*, etc.), choose a language and format, and convert it to an audiobook using Calibre and eSpeak-NG.\n\n"
574
+ "*Note: PDF conversion quality varies greatly. Text-based PDFs work best.*")
575
+
576
+ if startup_message:
577
+ gr.Markdown(startup_message) # Display warning in UI
578
+
579
+ with gr.Row():
580
+ with gr.Column(scale=1):
581
+ ebook_input = gr.File(label="1. Upload Ebook", file_count="single")
582
+ lang_dropdown = gr.Dropdown(
583
+ label="2. Select Language / Voice",
584
+ choices=voice_choices,
585
+ value=default_voice,
586
+ interactive=True
587
+ )
588
+ format_dropdown = gr.Dropdown(
589
+ label="3. Select Output Audio Format",
590
+ choices=["mp3", "ogg", "wav"],
591
+ value="mp3",
592
+ interactive=True
593
+ )
594
+ cover_checkbox = gr.Checkbox(
595
+ label="Embed Cover Art (if available)",
596
+ value=True if MUTAGEN_AVAILABLE else False, # Default to True if mutagen is there
597
+ interactive=MUTAGEN_AVAILABLE # Disable if mutagen is missing
598
+ )
599
+ submit_button = gr.Button("Convert to Audiobook", variant="primary")
600
+
601
+ with gr.Column(scale=2):
602
+ status_textbox = gr.Textbox(label="Conversion Status", lines=12, interactive=False, max_lines=25, show_copy_button=True)
603
+ with gr.Row():
604
+ # Use filepath for image to avoid potential base64 encoding issues with large images
605
+ cover_image = gr.Image(label="Extracted Cover Art", type="filepath", interactive=False, height=200, width=200)
606
+ # Use filepath for audio for consistency and potentially better handling of large files
607
+ audio_output_player = gr.Audio(label="Generated Audiobook", type="filepath", interactive=False)
608
+ # Add a dedicated download button using gr.File
609
+ audio_output_download = gr.File(label="Download Audiobook File", interactive=False)
610
+
611
+ # Connect components
612
+ submit_button.click(
613
+ fn=convert_ebook_to_audio,
614
+ inputs=[ebook_input, lang_dropdown, format_dropdown, cover_checkbox],
615
+ outputs=[audio_output_player, cover_image, status_textbox, audio_output_download] # Map audio path to Audio player and File download
616
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
617
 
618
+ # --- Launch the App ---
619
  if __name__ == "__main__":
620
+ print("Starting Gradio App...")
621
+ print("Ensure Calibre (ebook-convert, ebook-meta), espeak-ng, lame, and oggenc are installed and in your system PATH.")
622
+ if not voice_choices:
623
+ print("\nWARNING: Could not retrieve any voices from espeak-ng. The language dropdown will be limited or empty!\n")
624
+ demo.launch() # Add share=True here if you need a public link: demo.launch(share=True)