Soufianesejjari commited on
Commit
628289a
·
verified ·
1 Parent(s): 41a6b44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -135
app.py CHANGED
@@ -6,24 +6,16 @@ import streamlit as st
6
  import tempfile
7
  import time
8
  import re
 
9
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
10
  from langchain_groq import ChatGroq
11
  from langchain.schema import HumanMessage
12
- from google.oauth2.credentials import Credentials
13
- from google_auth_oauthlib.flow import InstalledAppFlow
14
- from google.auth.transport.requests import Request
15
- from googleapiclient.discovery import build
16
- from googleapiclient.http import MediaFileUpload
17
- import pickle
18
 
19
  # Constants
20
  MAX_CHUNK_SIZE = 3000 # Characters per chunk
21
  SEGMENT_DURATION = 120 # Seconds for each segment
22
  MODEL_NAME = "deepseek-r1-distill-llama-70b"
23
 
24
- # Google Drive API setup
25
- SCOPES = ['https://www.googleapis.com/auth/drive.file']
26
-
27
  st.set_page_config(page_title="YouTube Video Processor", layout="wide")
28
  st.title("YouTube Video Processor")
29
 
@@ -36,75 +28,36 @@ with st.sidebar:
36
  ["ar", "en", "fr", "es", "de", "it", "ja", "ko", "pt", "ru", "zh"],
37
  index=0)
38
 
39
- st.header("Google Drive Connection")
40
- st.info("You'll need to authorize this app to upload files to your Google Drive")
41
-
42
- upload_to_drive = st.checkbox("Upload to Google Drive", value=True)
43
-
44
- if upload_to_drive:
45
- uploaded_credentials = st.file_uploader("Upload your client_secret.json", type="json",
46
- help="Download from Google Cloud Console")
47
 
48
- # Function to authenticate with Google Drive
49
- @st.cache_resource
50
- def authenticate_google_drive(credentials_file=None):
51
- creds = None
52
-
53
- # Check if we have token.pickle
54
- token_path = "token.pickle"
55
- if os.path.exists(token_path):
56
- with open(token_path, 'rb') as token:
57
- creds = pickle.load(token)
58
-
59
- # If there are no (valid) credentials, let the user log in
60
- if not creds or not creds.valid:
61
- if creds and creds.expired and creds.refresh_token:
62
- creds.refresh(Request())
63
- else:
64
- if credentials_file:
65
- # Save credentials file to temporary location
66
- with tempfile.NamedTemporaryFile(delete=False, suffix='.json') as tmp_file:
67
- tmp_file.write(credentials_file.getvalue())
68
- credentials_path = tmp_file.name
69
-
70
- flow = InstalledAppFlow.from_client_secrets_file(credentials_path, SCOPES)
71
- creds = flow.run_local_server(port=0)
72
- os.unlink(credentials_path) # Delete the temp file
73
- else:
74
- return None
75
-
76
- # Save the credentials for the next run
77
- with open(token_path, 'wb') as token:
78
- pickle.dump(creds, token)
79
-
80
- return creds
81
-
82
- # Create Drive service
83
- def create_drive_service(creds):
84
- return build('drive', 'v3', credentials=creds)
85
-
86
- # Function to upload file to Google Drive
87
- def upload_to_google_drive(drive_service, file_path, folder_id=None):
88
- file_name = os.path.basename(file_path)
89
- file_metadata = {'name': file_name}
90
-
91
- if folder_id:
92
- file_metadata['parents'] = [folder_id]
93
-
94
- media = MediaFileUpload(file_path, resumable=True)
95
- file = drive_service.files().create(body=file_metadata, media_body=media, fields='id').execute()
96
-
97
- return file.get('id')
98
 
99
- # Function to create folder in Google Drive
100
- def create_drive_folder(drive_service, folder_name):
101
- folder_metadata = {
102
- 'name': folder_name,
103
- 'mimeType': 'application/vnd.google-apps.folder'
104
- }
105
 
106
- folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
107
- return folder.get('id')
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  # Video processing functions from your original script
110
  def get_video_transcript(video_id, language):
@@ -193,12 +146,12 @@ def extract_questions_from_transcript(transcript, api_key, progress_bar=None):
193
  except Exception as e:
194
  st.warning(f"Error extracting questions for chunk {i+1}: {e}")
195
 
196
- # Remove duplicates and limit to 30 questions
197
  unique_questions = []
198
  for q in all_questions:
199
  if q not in unique_questions and q:
200
  unique_questions.append(q)
201
- if len(unique_questions) >= 30:
202
  break
203
 
204
  return unique_questions
@@ -364,7 +317,7 @@ if st.button("Process Video"):
364
  os.makedirs(final_dir, exist_ok=True)
365
 
366
  # Process tab layout
367
- tab1, tab2, tab3, tab4, tab5 = st.tabs(["Transcript", "Questions", "Video", "Segments", "Upload"])
368
 
369
  # Tab 1: Extract transcript
370
  with tab1:
@@ -423,7 +376,7 @@ if st.button("Process Video"):
423
 
424
  # Create segments
425
  st.subheader("Creating Segments")
426
- segments = create_segment_timestamps(transcript, num_segments=min(30, len(questions)))
427
  st.write(f"Created {len(segments)} segments")
428
  else:
429
  download_progress.progress(1.0, text="Failed to download video")
@@ -457,51 +410,32 @@ if st.button("Process Video"):
457
  segment_progress.progress(1.0, text="Failed to create segments")
458
  st.stop()
459
 
460
- # Tab 5: Upload to Google Drive
461
  with tab5:
462
- st.header("Upload to Google Drive")
463
 
464
- if upload_to_drive and uploaded_credentials:
465
- upload_progress = st.progress(0, text="Preparing Google Drive upload...")
466
 
467
- # Authenticate with Google Drive
468
- creds = authenticate_google_drive(uploaded_credentials)
 
 
 
 
 
469
 
470
- if creds:
471
- drive_service = create_drive_service(creds)
472
-
473
- # Create folder for videos
474
- folder_name = f"YouTube_Segments_{video_id}_{time.strftime('%Y%m%d-%H%M%S')}"
475
- folder_id = create_drive_folder(drive_service, folder_name)
476
-
477
- st.write(f"Created Google Drive folder: {folder_name}")
478
-
479
- # Upload files
480
- uploaded_files = []
481
- for i, file_path in enumerate(final_files):
482
- upload_progress.progress((i + 1) / len(final_files),
483
- text=f"Uploading file {i+1}/{len(final_files)}")
484
-
485
- file_id = upload_to_google_drive(drive_service, file_path, folder_id)
486
- if file_id:
487
- uploaded_files.append((os.path.basename(file_path), file_id))
488
-
489
- upload_progress.progress(1.0, text=f"Uploaded {len(uploaded_files)} files to Google Drive")
490
-
491
- # Display uploaded files
492
- st.subheader("Uploaded Files")
493
- for filename, file_id in uploaded_files:
494
- st.write(f"- {filename}")
495
-
496
- st.success(f"All {len(uploaded_files)} files have been uploaded to Google Drive in folder '{folder_name}'")
497
- else:
498
- upload_progress.progress(1.0, text="Failed to authenticate with Google Drive")
499
- st.error("Google Drive authentication failed. Please check your credentials file.")
500
  else:
501
- st.info("Google Drive upload was not selected or credentials were not provided.")
502
 
503
- # Cleanup temporary files
504
- st.write("Processing complete! Temporary files will be cleaned up automatically.")
505
 
506
  # Additional info
507
  with st.expander("About this app"):
@@ -511,27 +445,11 @@ with st.expander("About this app"):
511
  2. Using LLM to identify key questions answered in the video
512
  3. Splitting the video into segments
513
  4. Naming each segment based on the questions
514
- 5. Uploading the segments to Google Drive (optional)
515
 
516
  To use the app, you need:
517
  - A GROQ API key
518
  - A YouTube video ID
519
- - Google Drive credentials (if uploading to Drive)
520
 
521
  The app works best with videos that have transcripts available.
522
- """)
523
-
524
- # Instructions for Google Drive setup
525
- with st.expander("How to set up Google Drive Integration"):
526
- st.write("""
527
- ### Setting up Google Drive API Access
528
-
529
- 1. Go to the [Google Cloud Console](https://console.cloud.google.com/)
530
- 2. Create a new project or select an existing one
531
- 3. Enable the Google Drive API
532
- 4. Create OAuth 2.0 credentials (Desktop app type)
533
- 5. Download the credentials JSON file
534
- 6. Upload the file to this app when prompted
535
-
536
- After authentication, the app will be able to upload files to your Google Drive.
537
  """)
 
6
  import tempfile
7
  import time
8
  import re
9
+ import base64
10
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
11
  from langchain_groq import ChatGroq
12
  from langchain.schema import HumanMessage
 
 
 
 
 
 
13
 
14
  # Constants
15
  MAX_CHUNK_SIZE = 3000 # Characters per chunk
16
  SEGMENT_DURATION = 120 # Seconds for each segment
17
  MODEL_NAME = "deepseek-r1-distill-llama-70b"
18
 
 
 
 
19
  st.set_page_config(page_title="YouTube Video Processor", layout="wide")
20
  st.title("YouTube Video Processor")
21
 
 
28
  ["ar", "en", "fr", "es", "de", "it", "ja", "ko", "pt", "ru", "zh"],
29
  index=0)
30
 
31
+ max_segments = st.slider("Maximum number of segments", 5, 30, 15)
 
 
 
 
 
 
 
32
 
33
+ # Function to create a download link for a file
34
+ def get_download_link(file_path, link_text):
35
+ with open(file_path, 'rb') as f:
36
+ data = f.read()
37
+ b64 = base64.b64encode(data).decode()
38
+ filename = os.path.basename(file_path)
39
+ href = f'<a href="data:video/mp4;base64,{b64}" download="{filename}">{link_text}</a>'
40
+ return href
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ # Function to create a download link for all files as ZIP
43
+ def get_zip_download_link(files, output_name):
44
+ if not files:
45
+ return ""
 
 
46
 
47
+ # Create a temporary zip file
48
+ zip_path = f"{output_name}.zip"
49
+ try:
50
+ cmd = ['zip', '-j', zip_path] + files
51
+ subprocess.run(cmd, check=True)
52
+
53
+ with open(zip_path, 'rb') as f:
54
+ data = f.read()
55
+ b64 = base64.b64encode(data).decode()
56
+ href = f'<a href="data:application/zip;base64,{b64}" download="{output_name}.zip">Download All Segments (ZIP)</a>'
57
+ return href
58
+ except Exception as e:
59
+ st.error(f"Error creating ZIP file: {e}")
60
+ return ""
61
 
62
  # Video processing functions from your original script
63
  def get_video_transcript(video_id, language):
 
146
  except Exception as e:
147
  st.warning(f"Error extracting questions for chunk {i+1}: {e}")
148
 
149
+ # Remove duplicates and limit to max_segments questions
150
  unique_questions = []
151
  for q in all_questions:
152
  if q not in unique_questions and q:
153
  unique_questions.append(q)
154
+ if len(unique_questions) >= max_segments:
155
  break
156
 
157
  return unique_questions
 
317
  os.makedirs(final_dir, exist_ok=True)
318
 
319
  # Process tab layout
320
+ tab1, tab2, tab3, tab4, tab5 = st.tabs(["Transcript", "Questions", "Video", "Segments", "Download"])
321
 
322
  # Tab 1: Extract transcript
323
  with tab1:
 
376
 
377
  # Create segments
378
  st.subheader("Creating Segments")
379
+ segments = create_segment_timestamps(transcript, num_segments=min(max_segments, len(questions)))
380
  st.write(f"Created {len(segments)} segments")
381
  else:
382
  download_progress.progress(1.0, text="Failed to download video")
 
410
  segment_progress.progress(1.0, text="Failed to create segments")
411
  st.stop()
412
 
413
+ # Tab 5: Download segments
414
  with tab5:
415
+ st.header("Download Video Segments")
416
 
417
+ if final_files:
418
+ st.subheader("Individual Downloads")
419
 
420
+ # Create 3 columns for better layout
421
+ cols = st.columns(3)
422
+ for i, file_path in enumerate(final_files):
423
+ col_idx = i % 3
424
+ filename = os.path.basename(file_path)
425
+ download_link = get_download_link(file_path, f"Download: {filename}")
426
+ cols[col_idx].markdown(download_link, unsafe_allow_html=True)
427
 
428
+ st.subheader("Download All Segments")
429
+ zip_name = f"video_segments_{video_id}"
430
+ zip_link = get_zip_download_link(final_files, zip_name)
431
+ if zip_link:
432
+ st.markdown(zip_link, unsafe_allow_html=True)
433
+ st.info("The zip file contains all processed video segments with descriptive filenames.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
  else:
435
+ st.warning("No segments available for download.")
436
 
437
+ # Cleanup message
438
+ st.success("Processing complete! You can download the video segments from the Download tab.")
439
 
440
  # Additional info
441
  with st.expander("About this app"):
 
445
  2. Using LLM to identify key questions answered in the video
446
  3. Splitting the video into segments
447
  4. Naming each segment based on the questions
448
+ 5. Providing download links for all segments
449
 
450
  To use the app, you need:
451
  - A GROQ API key
452
  - A YouTube video ID
 
453
 
454
  The app works best with videos that have transcripts available.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
  """)