httpdaniel commited on
Commit
2e56c11
·
1 Parent(s): 2b04643

cleaning project

Browse files
Files changed (2) hide show
  1. app.py +7 -42
  2. requirements.txt +0 -1
app.py CHANGED
@@ -1,33 +1,18 @@
1
- from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
2
  from urllib.parse import urlparse, parse_qs
3
  from pytube import YouTube
4
  from huggingface_hub import InferenceClient
5
  import gradio as gr
6
- from langchain_community.document_loaders import YoutubeLoader
7
- from typing import Any, Dict, List
8
 
9
  model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
10
  client = InferenceClient(model=model_name)
11
 
12
 
13
- def langhchain_summary(link):
14
- loader = YoutubeLoader.from_youtube_url(link, add_video_info=False)
15
-
16
- documents = loader.load()
17
-
18
- transcription = " ".join([doc.page_content for doc in documents])
19
- print(f"Transcription: {transcription}")
20
- return transcription
21
-
22
-
23
  def transcribe_video(url):
24
- print(f"YouTube URL: {url}")
25
  video_id = parse_youtube_url(url)
26
  if video_id:
27
  video_metadata = get_video_metadata(video_id)
28
- # transcript_content = get_transcript_content(video_id)
29
- transcript_content = langhchain_summary(url)
30
- print(f"Transcript Content: {transcript_content}")
31
  transcript_summary = summarise_transcript(transcript_content)
32
  return (
33
  f"Title: {video_metadata['title']}\nAuthor: {video_metadata['author']}",
@@ -57,32 +42,12 @@ def get_video_metadata(video_id):
57
 
58
 
59
  def get_transcript_content(video_id):
60
- # try:
61
- # transcript = YouTubeTranscriptApi.get_transcript(video_id)
62
- # transcript_content = parse_transcript(transcript)
63
- # return transcript_content
64
- # except Exception as e:
65
- # raise e
66
-
67
  try:
68
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
69
- except TranscriptsDisabled:
70
- return []
71
-
72
- transcript = transcript_list.find_transcript(["en"])
73
- print(f"Transcript: {transcript}")
74
-
75
- transcript_pieces: List[Dict[str, Any]] = transcript.fetch()
76
-
77
- transcript = " ".join(
78
- map(
79
- lambda transcript_piece: transcript_piece["text"].strip(" "),
80
- transcript_pieces,
81
- )
82
- )
83
- print(f"Transcript: {transcript}")
84
-
85
- return transcript
86
 
87
 
88
  def parse_transcript(transcript):
 
1
+ from youtube_transcript_api import YouTubeTranscriptApi
2
  from urllib.parse import urlparse, parse_qs
3
  from pytube import YouTube
4
  from huggingface_hub import InferenceClient
5
  import gradio as gr
 
 
6
 
7
  model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
8
  client = InferenceClient(model=model_name)
9
 
10
 
 
 
 
 
 
 
 
 
 
 
11
  def transcribe_video(url):
 
12
  video_id = parse_youtube_url(url)
13
  if video_id:
14
  video_metadata = get_video_metadata(video_id)
15
+ transcript_content = get_transcript_content(video_id)
 
 
16
  transcript_summary = summarise_transcript(transcript_content)
17
  return (
18
  f"Title: {video_metadata['title']}\nAuthor: {video_metadata['author']}",
 
42
 
43
 
44
  def get_transcript_content(video_id):
 
 
 
 
 
 
 
45
  try:
46
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
47
+ transcript_content = parse_transcript(transcript)
48
+ return transcript_content
49
+ except Exception as e:
50
+ raise e
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
 
53
  def parse_transcript(transcript):
requirements.txt CHANGED
@@ -4,4 +4,3 @@ huggingface-hub
4
  pytube
5
  urllib3
6
  youtube-transcript-api
7
- langchain-community
 
4
  pytube
5
  urllib3
6
  youtube-transcript-api