File size: 3,778 Bytes
dce378c
 
5d64735
dce378c
5d64735
91fe522
b1aeb47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b1a899
 
dce378c
 
 
8b1a899
 
 
 
91fe522
8b1a899
 
2cfede8
5d64735
 
 
 
 
 
dce378c
 
 
5d64735
b1aeb47
5d64735
 
 
 
 
 
b1aeb47
5d64735
 
 
 
 
 
dce378c
5d64735
 
 
dce378c
 
 
 
161ad03
b1aeb47
2cfede8
 
 
 
 
 
 
 
 
 
b1aeb47
 
 
 
8b1a899
 
dce378c
 
 
 
b1aeb47
 
 
 
 
dce378c
b1aeb47
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from pytube import YouTube
from google.cloud import speech_v1p1beta1 as speech
from pydub import AudioSegment
import io
import openai
import os
import gradio as gr

# 유튜브 λΉ„λ””μ˜€ ID μΆ”μΆœ ν•¨μˆ˜
def get_yt_video_id(url):
    from urllib.parse import urlparse, parse_qs

    if url.startswith(('youtu', 'www')):
        url = 'http://' + url
        
    query = urlparse(url)
    
    if 'youtube' in query.hostname:
        if query.path == '/watch':
            return parse_qs(query.query)['v'][0]
        elif query.path.startswith(('/embed/', '/v/')):
            return query.path.split('/')[2]
    elif 'youtu.be' in query.hostname:
        return query.path[1:]
    else:
        raise ValueError("μœ νš¨ν•œ 유튜브 링크가 μ•„λ‹™λ‹ˆλ‹€.")

# μ˜€λ””μ˜€ μΆ”μΆœ 및 λ³€ν™˜ ν•¨μˆ˜ (WAV ν˜•μ‹μœΌλ‘œ λ³€ν™˜)
def download_and_convert_audio(youtube_url):
    yt = YouTube(youtube_url)
    stream = yt.streams.filter(only_audio=True).first()
    audio_path = stream.download(filename="audio.mp4")
    
    # μ˜€λ””μ˜€ νŒŒμΌμ„ WAV둜 λ³€ν™˜ (16000Hz μƒ˜ν”Œ 레이트)
    audio = AudioSegment.from_file(audio_path)
    wav_audio_path = "converted_audio.wav"
    audio.set_frame_rate(16000).set_channels(1).export(wav_audio_path, format="wav")
    
    return wav_audio_path

# μ˜€λ””μ˜€λ₯Ό 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜
def split_audio(audio_path, chunk_length_ms=60000):
    audio = AudioSegment.from_wav(audio_path)
    chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
    return chunks

# Google Speech-to-Text APIλ₯Ό μ‚¬μš©ν•˜μ—¬ μ˜€λ””μ˜€λ₯Ό ν…μŠ€νŠΈλ‘œ λ³€ν™˜
def speech_to_text(audio_path):
    client = speech.SpeechClient()
    chunks = split_audio(audio_path)  # μ˜€λ””μ˜€λ₯Ό 청크둜 λ‚˜λˆ”

    transcript = ""
    for chunk in chunks:
        with io.BytesIO() as audio_file:
            chunk.export(audio_file, format="wav")
            audio_file.seek(0)
            content = audio_file.read()

            audio = speech.RecognitionAudio(content=content)
            config = speech.RecognitionConfig(
                encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
                sample_rate_hertz=16000,  # 16000Hz μƒ˜ν”Œ 레이트
                language_code="ko-KR"  # ν•œκ΅­μ–΄ 인식
            )

            response = client.recognize(config=config, audio=audio)
            for result in response.results:
                transcript += result.alternatives[0].transcript + " "
    
    return transcript.strip()

# ν…μŠ€νŠΈλ₯Ό μš”μ•½ν•˜λŠ” ν•¨μˆ˜ (OpenAI API μ‚¬μš©)
def textToSummary(text):
    openai.api_key = os.getenv("OPENAI_API_KEY")  # ν™˜κ²½ λ³€μˆ˜μ—μ„œ OpenAI API ν‚€ κ°€μ Έμ˜€κΈ°
    response = openai.Completion.create(
        model="text-davinci-003",
        prompt="Summarize this in 200 words or less:\n\n" + text,
        temperature=0.7,
        max_tokens=400,
        top_p=1.0,
        frequency_penalty=0.0,
        presence_penalty=1
    )
    return response["choices"][0]["text"].replace("\n", " ").strip()

# 전체 μš”μ•½ ν”„λ‘œμ„ΈμŠ€λ₯Ό μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜
def summarize(url):
    try:
        # 유튜브 μ˜€λ””μ˜€ λ‹€μš΄λ‘œλ“œ 및 λ³€ν™˜
        audio_path = download_and_convert_audio(url)
        # μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜
        transcript = speech_to_text(audio_path)
        # ν…μŠ€νŠΈ μš”μ•½
        summary = textToSummary(transcript)
        return summary
    except Exception as e:
        return f"μš”μ•½μ— μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"

# Gradio μΈν„°νŽ˜μ΄μŠ€ μ„€μ •
description = "유튜브 λ™μ˜μƒμ˜ μžλ§‰μ΄ 없더라도 μŒμ„± 인식 κΈ°λŠ₯을 μ‚¬μš©ν•΄ μš”μ•½ν•©λ‹ˆλ‹€."

gr.Interface(fn=summarize,
             inputs="text",
             outputs="textbox", 
             description=description
            ).launch()