Chrunos commited on
Commit
59c77bf
·
verified ·
1 Parent(s): 0651ec2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -18
app.py CHANGED
@@ -130,41 +130,61 @@ async def get_video_url(youtube_url: str):
130
  @app.get("/script")
131
  async def get_transcript(youtube_url: str, language: str = None):
132
  try:
133
- # If no specific language is requested, we'll try to get any available subtitle
134
  ydl_opts = {
135
  'skip_download': True,
136
  'writesubtitles': True,
137
  'writeautomaticsub': True,
138
- 'subtitleslangs': ['all'] if not language else [language],
139
- 'subtitlesformat': 'best',
140
  'outtmpl': '%(id)s.%(ext)s',
141
  'noplaylist': True,
142
  'cookiefile': "firefox-cookies.txt"
143
  }
 
 
 
 
 
 
144
  env_to_cookies_from_env("firefox-cookies.txt")
145
- # Show current directory structure before download
146
  logger.info(f"Current directory files (before): {os.listdir('.')}")
147
 
148
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
149
- info = ydl.extract_info(youtube_url, download=True)
 
150
  video_id = info['id']
151
  logger.info(f"Video ID: {video_id}")
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  # Check actual downloaded files
154
  logger.info(f"Current directory files (after extraction): {os.listdir('.')}")
155
 
156
- # First see if we can find a subtitle file for the requested language
157
- subtitle_files = []
158
- if language:
159
- subtitle_files = [f for f in os.listdir('.')
160
- if f.startswith(video_id) and (language in f)]
161
 
162
- # If no specific language requested or no files found for requested language,
163
- # get any subtitle file for this video
164
- if not subtitle_files:
165
- subtitle_files = [f for f in os.listdir('.')
166
- if f.startswith(video_id) and
167
- any(f.endswith(ext) for ext in ['.vtt', '.srt', '.ttml', '.json3'])]
168
 
169
  logger.info(f"Potential subtitle files: {subtitle_files}")
170
 
@@ -197,7 +217,15 @@ async def get_transcript(youtube_url: str, language: str = None):
197
  else:
198
  text = f"Unsupported format: {subtitle_file}"
199
 
200
- return {"transcript": text, "language": subtitle_file.split('.')[-2] if '.' in subtitle_file else "unknown"}
 
 
 
 
 
 
 
 
201
 
202
  return {"transcript": f"No subtitle files found for {video_id}", "language": "none"}
203
  except Exception as e:
 
130
  @app.get("/script")
131
  async def get_transcript(youtube_url: str, language: str = None):
132
  try:
133
+ # Set up yt-dlp options
134
  ydl_opts = {
135
  'skip_download': True,
136
  'writesubtitles': True,
137
  'writeautomaticsub': True,
 
 
138
  'outtmpl': '%(id)s.%(ext)s',
139
  'noplaylist': True,
140
  'cookiefile': "firefox-cookies.txt"
141
  }
142
+
143
+ # If a language is specified, only download that language
144
+ # Otherwise, we'll first get video info to determine the original language
145
+ if language:
146
+ ydl_opts['subtitleslangs'] = [language]
147
+
148
  env_to_cookies_from_env("firefox-cookies.txt")
 
149
  logger.info(f"Current directory files (before): {os.listdir('.')}")
150
 
151
+ # First, get video info without downloading anything
152
+ with yt_dlp.YoutubeDL({**ydl_opts, 'skip_download': True, 'writesubtitles': False, 'writeautomaticsub': False}) as ydl:
153
+ info = ydl.extract_info(youtube_url, download=False)
154
  video_id = info['id']
155
  logger.info(f"Video ID: {video_id}")
156
 
157
+ # If no language specified, try to use the original language
158
+ if not language:
159
+ # Try to determine the original language if available in the info
160
+ if 'subtitles' in info and info['subtitles']:
161
+ # Use the first available subtitle language
162
+ available_languages = list(info['subtitles'].keys())
163
+ if available_languages:
164
+ language = available_languages[0]
165
+ logger.info(f"Using detected language: {language}")
166
+ ydl_opts['subtitleslangs'] = [language]
167
+ else:
168
+ # Fall back to 'en' if can't determine
169
+ language = 'en'
170
+ ydl_opts['subtitleslangs'] = [language]
171
+
172
+ # Now download the subtitle in the selected language
173
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
174
+ ydl.extract_info(youtube_url, download=True)
175
+
176
  # Check actual downloaded files
177
  logger.info(f"Current directory files (after extraction): {os.listdir('.')}")
178
 
179
+ # Look for the subtitle file with the specified language
180
+ subtitle_files = [f for f in os.listdir('.')
181
+ if f.startswith(video_id) and any(ext in f for ext in ['.vtt', '.srt', '.ttml', '.json3'])]
 
 
182
 
183
+ # If specific language requested, filter for that language
184
+ if language:
185
+ lang_subtitle_files = [f for f in subtitle_files if language in f]
186
+ if lang_subtitle_files:
187
+ subtitle_files = lang_subtitle_files
 
188
 
189
  logger.info(f"Potential subtitle files: {subtitle_files}")
190
 
 
217
  else:
218
  text = f"Unsupported format: {subtitle_file}"
219
 
220
+ # Clean up files to avoid cluttering the directory
221
+ for f in subtitle_files:
222
+ try:
223
+ os.remove(f)
224
+ except:
225
+ logger.warning(f"Could not remove file: {f}")
226
+
227
+ detected_language = subtitle_file.split('.')[-2] if '.' in subtitle_file else "unknown"
228
+ return {"transcript": text, "language": detected_language}
229
 
230
  return {"transcript": f"No subtitle files found for {video_id}", "language": "none"}
231
  except Exception as e: