Update app.py
Browse files
app.py
CHANGED
@@ -130,41 +130,61 @@ async def get_video_url(youtube_url: str):
|
|
130 |
@app.get("/script")
|
131 |
async def get_transcript(youtube_url: str, language: str = None):
|
132 |
try:
|
133 |
-
#
|
134 |
ydl_opts = {
|
135 |
'skip_download': True,
|
136 |
'writesubtitles': True,
|
137 |
'writeautomaticsub': True,
|
138 |
-
'subtitleslangs': ['all'] if not language else [language],
|
139 |
-
'subtitlesformat': 'best',
|
140 |
'outtmpl': '%(id)s.%(ext)s',
|
141 |
'noplaylist': True,
|
142 |
'cookiefile': "firefox-cookies.txt"
|
143 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
env_to_cookies_from_env("firefox-cookies.txt")
|
145 |
-
# Show current directory structure before download
|
146 |
logger.info(f"Current directory files (before): {os.listdir('.')}")
|
147 |
|
148 |
-
|
149 |
-
|
|
|
150 |
video_id = info['id']
|
151 |
logger.info(f"Video ID: {video_id}")
|
152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
# Check actual downloaded files
|
154 |
logger.info(f"Current directory files (after extraction): {os.listdir('.')}")
|
155 |
|
156 |
-
#
|
157 |
-
subtitle_files = [
|
158 |
-
|
159 |
-
subtitle_files = [f for f in os.listdir('.')
|
160 |
-
if f.startswith(video_id) and (language in f)]
|
161 |
|
162 |
-
# If
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
any(f.endswith(ext) for ext in ['.vtt', '.srt', '.ttml', '.json3'])]
|
168 |
|
169 |
logger.info(f"Potential subtitle files: {subtitle_files}")
|
170 |
|
@@ -197,7 +217,15 @@ async def get_transcript(youtube_url: str, language: str = None):
|
|
197 |
else:
|
198 |
text = f"Unsupported format: {subtitle_file}"
|
199 |
|
200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
return {"transcript": f"No subtitle files found for {video_id}", "language": "none"}
|
203 |
except Exception as e:
|
|
|
130 |
@app.get("/script")
|
131 |
async def get_transcript(youtube_url: str, language: str = None):
|
132 |
try:
|
133 |
+
# Set up yt-dlp options
|
134 |
ydl_opts = {
|
135 |
'skip_download': True,
|
136 |
'writesubtitles': True,
|
137 |
'writeautomaticsub': True,
|
|
|
|
|
138 |
'outtmpl': '%(id)s.%(ext)s',
|
139 |
'noplaylist': True,
|
140 |
'cookiefile': "firefox-cookies.txt"
|
141 |
}
|
142 |
+
|
143 |
+
# If a language is specified, only download that language
|
144 |
+
# Otherwise, we'll first get video info to determine the original language
|
145 |
+
if language:
|
146 |
+
ydl_opts['subtitleslangs'] = [language]
|
147 |
+
|
148 |
env_to_cookies_from_env("firefox-cookies.txt")
|
|
|
149 |
logger.info(f"Current directory files (before): {os.listdir('.')}")
|
150 |
|
151 |
+
# First, get video info without downloading anything
|
152 |
+
with yt_dlp.YoutubeDL({**ydl_opts, 'skip_download': True, 'writesubtitles': False, 'writeautomaticsub': False}) as ydl:
|
153 |
+
info = ydl.extract_info(youtube_url, download=False)
|
154 |
video_id = info['id']
|
155 |
logger.info(f"Video ID: {video_id}")
|
156 |
|
157 |
+
# If no language specified, try to use the original language
|
158 |
+
if not language:
|
159 |
+
# Try to determine the original language if available in the info
|
160 |
+
if 'subtitles' in info and info['subtitles']:
|
161 |
+
# Use the first available subtitle language
|
162 |
+
available_languages = list(info['subtitles'].keys())
|
163 |
+
if available_languages:
|
164 |
+
language = available_languages[0]
|
165 |
+
logger.info(f"Using detected language: {language}")
|
166 |
+
ydl_opts['subtitleslangs'] = [language]
|
167 |
+
else:
|
168 |
+
# Fall back to 'en' if can't determine
|
169 |
+
language = 'en'
|
170 |
+
ydl_opts['subtitleslangs'] = [language]
|
171 |
+
|
172 |
+
# Now download the subtitle in the selected language
|
173 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
174 |
+
ydl.extract_info(youtube_url, download=True)
|
175 |
+
|
176 |
# Check actual downloaded files
|
177 |
logger.info(f"Current directory files (after extraction): {os.listdir('.')}")
|
178 |
|
179 |
+
# Look for the subtitle file with the specified language
|
180 |
+
subtitle_files = [f for f in os.listdir('.')
|
181 |
+
if f.startswith(video_id) and any(ext in f for ext in ['.vtt', '.srt', '.ttml', '.json3'])]
|
|
|
|
|
182 |
|
183 |
+
# If specific language requested, filter for that language
|
184 |
+
if language:
|
185 |
+
lang_subtitle_files = [f for f in subtitle_files if language in f]
|
186 |
+
if lang_subtitle_files:
|
187 |
+
subtitle_files = lang_subtitle_files
|
|
|
188 |
|
189 |
logger.info(f"Potential subtitle files: {subtitle_files}")
|
190 |
|
|
|
217 |
else:
|
218 |
text = f"Unsupported format: {subtitle_file}"
|
219 |
|
220 |
+
# Clean up files to avoid cluttering the directory
|
221 |
+
for f in subtitle_files:
|
222 |
+
try:
|
223 |
+
os.remove(f)
|
224 |
+
except:
|
225 |
+
logger.warning(f"Could not remove file: {f}")
|
226 |
+
|
227 |
+
detected_language = subtitle_file.split('.')[-2] if '.' in subtitle_file else "unknown"
|
228 |
+
return {"transcript": text, "language": detected_language}
|
229 |
|
230 |
return {"transcript": f"No subtitle files found for {video_id}", "language": "none"}
|
231 |
except Exception as e:
|