Spaces:
Sleeping
Sleeping
File size: 3,778 Bytes
dce378c 5d64735 dce378c 5d64735 91fe522 b1aeb47 8b1a899 dce378c 8b1a899 91fe522 8b1a899 2cfede8 5d64735 dce378c 5d64735 b1aeb47 5d64735 b1aeb47 5d64735 dce378c 5d64735 dce378c 161ad03 b1aeb47 2cfede8 b1aeb47 8b1a899 dce378c b1aeb47 dce378c b1aeb47 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
from pytube import YouTube
from google.cloud import speech_v1p1beta1 as speech
from pydub import AudioSegment
import io
import openai
import os
import gradio as gr
# μ νλΈ λΉλμ€ ID μΆμΆ ν¨μ
def get_yt_video_id(url):
from urllib.parse import urlparse, parse_qs
if url.startswith(('youtu', 'www')):
url = 'http://' + url
query = urlparse(url)
if 'youtube' in query.hostname:
if query.path == '/watch':
return parse_qs(query.query)['v'][0]
elif query.path.startswith(('/embed/', '/v/')):
return query.path.split('/')[2]
elif 'youtu.be' in query.hostname:
return query.path[1:]
else:
raise ValueError("μ ν¨ν μ νλΈ λ§ν¬κ° μλλλ€.")
# μ€λμ€ μΆμΆ λ° λ³ν ν¨μ (WAV νμμΌλ‘ λ³ν)
def download_and_convert_audio(youtube_url):
yt = YouTube(youtube_url)
stream = yt.streams.filter(only_audio=True).first()
audio_path = stream.download(filename="audio.mp4")
# μ€λμ€ νμΌμ WAVλ‘ λ³ν (16000Hz μν λ μ΄νΈ)
audio = AudioSegment.from_file(audio_path)
wav_audio_path = "converted_audio.wav"
audio.set_frame_rate(16000).set_channels(1).export(wav_audio_path, format="wav")
return wav_audio_path
# μ€λμ€λ₯Ό μ²ν¬λ‘ λλλ ν¨μ
def split_audio(audio_path, chunk_length_ms=60000):
audio = AudioSegment.from_wav(audio_path)
chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
return chunks
# Google Speech-to-Text APIλ₯Ό μ¬μ©νμ¬ μ€λμ€λ₯Ό ν
μ€νΈλ‘ λ³ν
def speech_to_text(audio_path):
client = speech.SpeechClient()
chunks = split_audio(audio_path) # μ€λμ€λ₯Ό μ²ν¬λ‘ λλ
transcript = ""
for chunk in chunks:
with io.BytesIO() as audio_file:
chunk.export(audio_file, format="wav")
audio_file.seek(0)
content = audio_file.read()
audio = speech.RecognitionAudio(content=content)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000, # 16000Hz μν λ μ΄νΈ
language_code="ko-KR" # νκ΅μ΄ μΈμ
)
response = client.recognize(config=config, audio=audio)
for result in response.results:
transcript += result.alternatives[0].transcript + " "
return transcript.strip()
# ν
μ€νΈλ₯Ό μμ½νλ ν¨μ (OpenAI API μ¬μ©)
def textToSummary(text):
openai.api_key = os.getenv("OPENAI_API_KEY") # νκ²½ λ³μμμ OpenAI API ν€ κ°μ Έμ€κΈ°
response = openai.Completion.create(
model="text-davinci-003",
prompt="Summarize this in 200 words or less:\n\n" + text,
temperature=0.7,
max_tokens=400,
top_p=1.0,
frequency_penalty=0.0,
presence_penalty=1
)
return response["choices"][0]["text"].replace("\n", " ").strip()
# μ 체 μμ½ νλ‘μΈμ€λ₯Ό μ²λ¦¬νλ ν¨μ
def summarize(url):
try:
# μ νλΈ μ€λμ€ λ€μ΄λ‘λ λ° λ³ν
audio_path = download_and_convert_audio(url)
# μμ±μ ν
μ€νΈλ‘ λ³ν
transcript = speech_to_text(audio_path)
# ν
μ€νΈ μμ½
summary = textToSummary(transcript)
return summary
except Exception as e:
return f"μμ½μ μ€ν¨νμ΅λλ€: {str(e)}"
# Gradio μΈν°νμ΄μ€ μ€μ
description = "μ νλΈ λμμμ μλ§μ΄ μλλΌλ μμ± μΈμ κΈ°λ₯μ μ¬μ©ν΄ μμ½ν©λλ€."
gr.Interface(fn=summarize,
inputs="text",
outputs="textbox",
description=description
).launch()
|