SUHHHH commited on
Commit
dce378c
β€’
1 Parent(s): b1aeb47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -20
app.py CHANGED
@@ -1,7 +1,6 @@
1
- # URL To YoutubeID
2
- from urllib.parse import urlparse
3
- from youtube_transcript_api import YouTubeTranscriptApi
4
- from youtube_transcript_api.formatters import TextFormatter
5
  import openai
6
  import os
7
  import gradio as gr
@@ -25,20 +24,36 @@ def get_yt_video_id(url):
25
  else:
26
  raise ValueError("μœ νš¨ν•œ 유튜브 링크가 μ•„λ‹™λ‹ˆλ‹€.")
27
 
28
- # 유튜브 μ˜μƒμ˜ μžλ§‰μ„ κ°€μ Έμ˜€λŠ” ν•¨μˆ˜
29
- def transcribe(youtubeId):
30
- transcription = YouTubeTranscriptApi.get_transcript(youtubeId)
31
- return transcription
 
 
32
 
33
- # μžλ§‰μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜ν•˜λŠ” ν•¨μˆ˜
34
- formatter = TextFormatter()
 
35
 
36
- def transcriptToText(transcript):
37
- text = formatter.format_transcript(transcript)
38
- text = text.replace("\n", " ")
39
- return text
40
 
41
- # ν…μŠ€νŠΈλ₯Ό μš”μ•½ν•˜λŠ” ν•¨μˆ˜ (OpenAI API μ‚¬μš©, ν™˜κ²½ λ³€μˆ˜λ‘œ ν‚€λ₯Ό κ°€μ Έμ˜΄)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def textToSummary(text):
43
  openai.api_key = os.getenv("OPENAI_API_KEY") # ν™˜κ²½ λ³€μˆ˜μ—μ„œ OpenAI API ν‚€ κ°€μ Έμ˜€κΈ°
44
  response = openai.Completion.create(
@@ -55,16 +70,18 @@ def textToSummary(text):
55
  # 전체 μš”μ•½ ν”„λ‘œμ„ΈμŠ€λ₯Ό μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜
56
  def summarize(url):
57
  try:
58
- videoId = get_yt_video_id(url)
59
- transcript = transcribe(videoId)
60
- text = transcriptToText(transcript)
61
- summary = textToSummary(text)
 
 
62
  return summary
63
  except Exception as e:
64
  return f"μš”μ•½μ— μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
65
 
66
  # Gradio μΈν„°νŽ˜μ΄μŠ€ μ„€μ •
67
- description = "μš”μ•½ν•  유튜브 λ™μ˜μƒ 링크λ₯Ό μž…λ ₯ν•˜μ„Έμš”"
68
 
69
  gr.Interface(fn=summarize,
70
  inputs="text",
 
1
+ from pytube import YouTube
2
+ from google.cloud import speech_v1p1beta1 as speech
3
+ import io
 
4
  import openai
5
  import os
6
  import gradio as gr
 
24
  else:
25
  raise ValueError("μœ νš¨ν•œ 유튜브 링크가 μ•„λ‹™λ‹ˆλ‹€.")
26
 
27
+ # μ˜€λ””μ˜€ μΆ”μΆœ ν•¨μˆ˜
28
+ def download_audio(youtube_url):
29
+ yt = YouTube(youtube_url)
30
+ stream = yt.streams.filter(only_audio=True).first()
31
+ audio_path = stream.download(filename="audio.mp4")
32
+ return audio_path
33
 
34
+ # Google Speech-to-Text APIλ₯Ό μ‚¬μš©ν•˜μ—¬ μ˜€λ””μ˜€λ₯Ό ν…μŠ€νŠΈλ‘œ λ³€ν™˜
35
+ def speech_to_text(audio_path):
36
+ client = speech.SpeechClient()
37
 
38
+ with io.open(audio_path, "rb") as audio_file:
39
+ content = audio_file.read()
 
 
40
 
41
+ audio = speech.RecognitionAudio(content=content)
42
+ config = speech.RecognitionConfig(
43
+ encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED,
44
+ sample_rate_hertz=16000,
45
+ language_code="ko-KR" # ν•œκ΅­μ–΄ 인식
46
+ )
47
+
48
+ response = client.recognize(config=config, audio=audio)
49
+
50
+ transcript = ""
51
+ for result in response.results:
52
+ transcript += result.alternatives[0].transcript + " "
53
+
54
+ return transcript.strip()
55
+
56
+ # ν…μŠ€νŠΈλ₯Ό μš”μ•½ν•˜λŠ” ν•¨μˆ˜ (OpenAI API μ‚¬μš©)
57
  def textToSummary(text):
58
  openai.api_key = os.getenv("OPENAI_API_KEY") # ν™˜κ²½ λ³€μˆ˜μ—μ„œ OpenAI API ν‚€ κ°€μ Έμ˜€κΈ°
59
  response = openai.Completion.create(
 
70
  # 전체 μš”μ•½ ν”„λ‘œμ„ΈμŠ€λ₯Ό μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜
71
  def summarize(url):
72
  try:
73
+ # 유튜브 μ˜€λ””μ˜€ λ‹€μš΄λ‘œλ“œ
74
+ audio_path = download_audio(url)
75
+ # μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜
76
+ transcript = speech_to_text(audio_path)
77
+ # ν…μŠ€νŠΈ μš”μ•½
78
+ summary = textToSummary(transcript)
79
  return summary
80
  except Exception as e:
81
  return f"μš”μ•½μ— μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
82
 
83
  # Gradio μΈν„°νŽ˜μ΄μŠ€ μ„€μ •
84
+ description = "유튜브 λ™μ˜μƒμ˜ μžλ§‰μ΄ 없더라도 μŒμ„± 인식 κΈ°λŠ₯을 μ‚¬μš©ν•΄ μš”μ•½ν•©λ‹ˆλ‹€."
85
 
86
  gr.Interface(fn=summarize,
87
  inputs="text",