adamchanadam
commited on
Commit
•
4abfe80
1
Parent(s):
a84e622
Upload 6 files
Browse files- Dockerfile +50 -0
- app.py +526 -0
- config.json +14 -0
- requirements.txt +9 -0
- static/main.js +360 -0
- templates/index.html +104 -0
Dockerfile
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 基於支持 CUDA 12.1 的官方 NVIDIA Docker 映像
|
2 |
+
FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
|
3 |
+
|
4 |
+
# 設置 DEBIAN_FRONTEND 以跳過交互式的 tzdata 設定
|
5 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
6 |
+
ENV TZ=Europe/London
|
7 |
+
|
8 |
+
# 安裝系統依賴和設置時區
|
9 |
+
RUN apt-get update && apt-get install -y \
|
10 |
+
python3 \
|
11 |
+
python3-pip \
|
12 |
+
python3-distutils \
|
13 |
+
ffmpeg \
|
14 |
+
tzdata \
|
15 |
+
&& ln -fs /usr/share/zoneinfo/$TZ /etc/localtime \
|
16 |
+
&& dpkg-reconfigure --frontend noninteractive tzdata \
|
17 |
+
&& rm -rf /var/lib/apt/lists/*
|
18 |
+
|
19 |
+
# 創建並切換到新用戶
|
20 |
+
RUN useradd -m -u 1000 user
|
21 |
+
USER user
|
22 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
23 |
+
|
24 |
+
# 設置工作目錄
|
25 |
+
WORKDIR /app
|
26 |
+
|
27 |
+
# 更新 pip 和安裝构建工具
|
28 |
+
RUN pip3 install --no-cache-dir --upgrade pip setuptools wheel
|
29 |
+
|
30 |
+
# 安裝 PyTorch 2.1.2 並支援 CUDA 12.1
|
31 |
+
RUN pip3 install --no-cache-dir torch==2.1.2+cu121 torchvision==0.16.2+cu121 torchaudio==2.1.2 --extra-index-url https://download.pytorch.org/whl/cu121
|
32 |
+
|
33 |
+
# 複製 requirements.txt 並安裝 Python 依賴
|
34 |
+
COPY --chown=user requirements.txt .
|
35 |
+
RUN pip3 install --no-cache-dir -r requirements.txt
|
36 |
+
|
37 |
+
# 下載 spaCy 中文模型
|
38 |
+
RUN python3 -m spacy download zh_core_web_md
|
39 |
+
|
40 |
+
# 複製應用程式檔案
|
41 |
+
COPY --chown=user . /app
|
42 |
+
|
43 |
+
# 設置環境變量
|
44 |
+
ENV PORT=7860
|
45 |
+
|
46 |
+
# 暴露應用運行的端口
|
47 |
+
EXPOSE 7860
|
48 |
+
|
49 |
+
# 啟動 Flask 應用
|
50 |
+
CMD ["python3", "app.py"]
|
app.py
ADDED
@@ -0,0 +1,526 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import datetime
|
4 |
+
import subprocess
|
5 |
+
from queue import Queue
|
6 |
+
from threading import Thread
|
7 |
+
|
8 |
+
import torch
|
9 |
+
import yt_dlp
|
10 |
+
from faster_whisper import WhisperModel
|
11 |
+
from flask import Flask, render_template, request, Response, jsonify
|
12 |
+
from openai import OpenAI
|
13 |
+
import spacy
|
14 |
+
from collections import Counter
|
15 |
+
|
16 |
+
import time
|
17 |
+
import uuid
|
18 |
+
|
19 |
+
import logging
|
20 |
+
from logging.handlers import RotatingFileHandler
|
21 |
+
from werkzeug.utils import secure_filename
|
22 |
+
from collections import deque
|
23 |
+
|
24 |
+
# 設置基本日誌配置
|
25 |
+
logging.basicConfig(level=logging.INFO)
|
26 |
+
logger = logging.getLogger(__name__)
|
27 |
+
|
28 |
+
# 創建一個文件處理器,使用 RotatingFileHandler 來限制日誌文件大小
|
29 |
+
log_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'app.log')
|
30 |
+
file_handler = RotatingFileHandler(log_file_path, maxBytes=10*1024*1024, backupCount=5, encoding='utf-8')
|
31 |
+
file_handler.setLevel(logging.DEBUG)
|
32 |
+
|
33 |
+
# 創建一個控制台處理器
|
34 |
+
console_handler = logging.StreamHandler()
|
35 |
+
console_handler.setLevel(logging.INFO)
|
36 |
+
|
37 |
+
# 創建一個格式器
|
38 |
+
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
39 |
+
file_handler.setFormatter(formatter)
|
40 |
+
console_handler.setFormatter(formatter)
|
41 |
+
|
42 |
+
# 將處理器添加到日誌器
|
43 |
+
logger.addHandler(file_handler)
|
44 |
+
logger.addHandler(console_handler)
|
45 |
+
|
46 |
+
# 設置其他模塊的日誌級別
|
47 |
+
logging.getLogger("faster_whisper").setLevel(logging.INFO)
|
48 |
+
|
49 |
+
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
|
50 |
+
|
51 |
+
app = Flask(__name__, static_folder='static', static_url_path='/static')
|
52 |
+
|
53 |
+
# 讀取設定檔
|
54 |
+
current_directory = os.path.dirname(os.path.realpath(__file__))
|
55 |
+
config_file_path = os.path.join(current_directory, 'config.json')
|
56 |
+
try:
|
57 |
+
with open(config_file_path, 'r', encoding='utf-8') as f:
|
58 |
+
config = json.load(f)
|
59 |
+
logger.info("成功加載配置文件")
|
60 |
+
except Exception as e:
|
61 |
+
logger.exception("加載配置文件時發生錯誤")
|
62 |
+
raise
|
63 |
+
|
64 |
+
# 設置 OpenAI API 金鑰
|
65 |
+
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
66 |
+
|
67 |
+
# 初始化 SpaCy
|
68 |
+
nlp = spacy.load(config['spacy_model'])
|
69 |
+
|
70 |
+
# 初始化 Whisper 模型
|
71 |
+
model = WhisperModel(config['whisper_model'], device="auto", compute_type=config['whisper_compute_type'])
|
72 |
+
|
73 |
+
# 設置 FFmpeg 路徑
|
74 |
+
ffmpeg_path = config['ffmpeg_path']
|
75 |
+
if ffmpeg_path not in os.environ["PATH"]:
|
76 |
+
os.environ["PATH"] += os.pathsep + ffmpeg_path
|
77 |
+
|
78 |
+
def send_sse_message(q, data):
|
79 |
+
q.put_nowait(data)
|
80 |
+
|
81 |
+
def clean_filename(filename):
|
82 |
+
return ''.join(c for c in filename if c.isalnum() or c in (' ', '.', '_')).rstrip()
|
83 |
+
|
84 |
+
def download_audio(youtube_url, save_directory, q):
|
85 |
+
send_sse_message(q, {"status": "開始下載 YouTube 音頻..."})
|
86 |
+
unique_id = str(uuid.uuid4())[:8] # 生成一個唯一的識別碼
|
87 |
+
output_filename = f"audio_{unique_id}"
|
88 |
+
output_path = os.path.join(save_directory, output_filename)
|
89 |
+
|
90 |
+
ydl_opts = {
|
91 |
+
'format': 'bestaudio/best',
|
92 |
+
'outtmpl': output_path + ".%(ext)s",
|
93 |
+
'postprocessors': [{
|
94 |
+
'key': 'FFmpegExtractAudio',
|
95 |
+
'preferredcodec': 'mp3',
|
96 |
+
'preferredquality': '192',
|
97 |
+
}],
|
98 |
+
'ffmpeg_location': ffmpeg_path,
|
99 |
+
'quiet': True
|
100 |
+
}
|
101 |
+
|
102 |
+
try:
|
103 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
104 |
+
info = ydl.extract_info(youtube_url, download=True)
|
105 |
+
video_title = clean_filename(info.get('title', 'Untitled'))
|
106 |
+
|
107 |
+
# 等待一小段時間,確保文件已經完全寫入
|
108 |
+
time.sleep(2)
|
109 |
+
|
110 |
+
# 檢查生成的文件
|
111 |
+
for file in os.listdir(save_directory):
|
112 |
+
if file.startswith(output_filename) and file.endswith('.mp3'):
|
113 |
+
converted_output_path = os.path.join(save_directory, file)
|
114 |
+
break
|
115 |
+
else:
|
116 |
+
raise FileNotFoundError("無法找到下載的音頻文件")
|
117 |
+
|
118 |
+
send_sse_message(q, {"status": f"音頻下載完成: {video_title}"})
|
119 |
+
return converted_output_path, video_title
|
120 |
+
except Exception as e:
|
121 |
+
send_sse_message(q, {"status": f"下載音頻時發生錯誤: {str(e)}"})
|
122 |
+
raise
|
123 |
+
|
124 |
+
def process_local_video(video_path, save_directory, q):
|
125 |
+
send_sse_message(q, {"status": "正在處理本地視頻..."})
|
126 |
+
video_title = os.path.splitext(os.path.basename(video_path))[0]
|
127 |
+
output_path = os.path.join(save_directory, f"{video_title}_audio.mp3")
|
128 |
+
|
129 |
+
ffmpeg_command = [
|
130 |
+
os.path.join(ffmpeg_path, 'ffmpeg'), # 使用完整路徑
|
131 |
+
'-i', video_path,
|
132 |
+
'-vn', # 禁用視頻
|
133 |
+
'-acodec', 'libmp3lame', # 使用 MP3 編碼器
|
134 |
+
'-q:a', '2', # 音頻質量,2 是很好的質量
|
135 |
+
output_path
|
136 |
+
]
|
137 |
+
|
138 |
+
logger.info(f"FFmpeg 命令: {' '.join(ffmpeg_command)}")
|
139 |
+
logger.info(f"輸入視頻路徑: {video_path}")
|
140 |
+
logger.info(f"輸出音頻路徑: {output_path}")
|
141 |
+
|
142 |
+
try:
|
143 |
+
# 檢查輸入文件是否存在
|
144 |
+
if not os.path.exists(video_path):
|
145 |
+
raise FileNotFoundError(f"輸入視頻文件不存在: {video_path}")
|
146 |
+
|
147 |
+
# 檢查輸出目錄是否可寫
|
148 |
+
if not os.access(os.path.dirname(output_path), os.W_OK):
|
149 |
+
raise PermissionError(f"沒有寫入權限: {os.path.dirname(output_path)}")
|
150 |
+
|
151 |
+
result = subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True)
|
152 |
+
logger.info(f"FFmpeg 輸出: {result.stdout}")
|
153 |
+
send_sse_message(q, {"status": f"本地視頻處理完成: {video_title}"})
|
154 |
+
return output_path, video_title
|
155 |
+
except subprocess.CalledProcessError as e:
|
156 |
+
error_message = f"處理本地視頻時出錯: {e}\n\nFFmpeg 輸出:\n{e.stdout}\n\nFFmpeg 錯誤:\n{e.stderr}"
|
157 |
+
logger.error(error_message)
|
158 |
+
send_sse_message(q, {"status": "錯誤", "error": error_message})
|
159 |
+
raise
|
160 |
+
except Exception as e:
|
161 |
+
error_message = f"處理本地視頻時出現意外錯誤: {str(e)}"
|
162 |
+
logger.error(error_message)
|
163 |
+
send_sse_message(q, {"status": "錯誤", "error": error_message})
|
164 |
+
raise
|
165 |
+
|
166 |
+
def generate_transcript(audio_path, video_title, q):
|
167 |
+
send_sse_message(q, {"status": "開始音頻轉錄..."})
|
168 |
+
segments, info = model.transcribe(
|
169 |
+
audio_path,
|
170 |
+
beam_size=config['whisper_beam_size'],
|
171 |
+
language=config['whisper_language'],
|
172 |
+
temperature=config['whisper_temperature'],
|
173 |
+
initial_prompt=video_title,
|
174 |
+
repetition_penalty=2,
|
175 |
+
condition_on_previous_text=False
|
176 |
+
)
|
177 |
+
transcript = "\n".join([segment.text for segment in segments])
|
178 |
+
send_sse_message(q, {"status": f"音頻轉錄完成,檢測到的語言: {info.language}", "transcript": transcript})
|
179 |
+
return transcript
|
180 |
+
|
181 |
+
def smart_split_transcript(transcript, q):
|
182 |
+
send_sse_message(q, {"status": "開始智能分割轉錄文本..."})
|
183 |
+
doc = nlp(transcript)
|
184 |
+
segments = []
|
185 |
+
current_segment = ""
|
186 |
+
max_length = 1024
|
187 |
+
|
188 |
+
for sent in doc.sents:
|
189 |
+
if len(current_segment) + len(sent.text) <= max_length:
|
190 |
+
current_segment += " " + sent.text
|
191 |
+
else:
|
192 |
+
if current_segment:
|
193 |
+
segments.append(current_segment.strip())
|
194 |
+
current_segment = sent.text
|
195 |
+
|
196 |
+
if current_segment:
|
197 |
+
segments.append(current_segment.strip())
|
198 |
+
|
199 |
+
send_sse_message(q, {"status": f"轉錄文本分割完成,共 {len(segments)} 個段落"})
|
200 |
+
return segments
|
201 |
+
|
202 |
+
def extract_keywords_and_entities(text):
|
203 |
+
doc = nlp(text)
|
204 |
+
keywords = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct]
|
205 |
+
keyword_freq = Counter(keywords).most_common(5)
|
206 |
+
entities = [(ent.text, ent.label_) for ent in doc.ents]
|
207 |
+
return [keyword for keyword, _ in keyword_freq], entities
|
208 |
+
|
209 |
+
def process_youtube_description(description):
|
210 |
+
prompt = f"""請處理以下 YouTube 影片描述,移除所有渠道宣傳內容後,保留原文。
|
211 |
+
|
212 |
+
描述內容:
|
213 |
+
{description}"""
|
214 |
+
|
215 |
+
response = client.chat.completions.create(
|
216 |
+
model=config['openai_model'],
|
217 |
+
messages=[{"role": "system", "content": prompt}],
|
218 |
+
temperature=0.1,
|
219 |
+
max_tokens=500
|
220 |
+
)
|
221 |
+
|
222 |
+
processed_description = response.choices[0].message.content.strip()
|
223 |
+
|
224 |
+
# 在終端機打印處理後的描述
|
225 |
+
print("處理後的 YouTube 描述:")
|
226 |
+
print(processed_description)
|
227 |
+
print("------------------------")
|
228 |
+
|
229 |
+
return processed_description
|
230 |
+
|
231 |
+
def get_openai_summary(segment, video_title, is_final_summary, keywords, entities, processed_description, q):
|
232 |
+
if is_final_summary:
|
233 |
+
prompt = f"""以下是YouTube視頻'{video_title}'的多個段落摘要。請生成一個深入且全面的最終摘要,盡力保留主要內容、資訊細節、關鍵點和結論。摘要應該是連貫的、有條理的、詳細的,並且避免重複信息。在內容結尾,加入能夠方便搜尋器和 SEO 找到的 3 個 Hash Tag。請用繁體中文(香港)回應。
|
234 |
+
|
235 |
+
影片描述提供的可靠資訊 (請特別使用來補充和糾正摘要中的信息,尤其是戈人名或專有名詞):
|
236 |
+
{processed_description}
|
237 |
+
|
238 |
+
|
239 |
+
|
240 |
+
以下是待處理的摘要內容:
|
241 |
+
{segment}"""
|
242 |
+
else:
|
243 |
+
keywords_str = ", ".join(keywords)
|
244 |
+
entities_str = ", ".join([f"{text}({label})" for text, label in entities])
|
245 |
+
prompt = f"""以下內容是YouTube視頻的部份字幕文本,每行以短句顯示,閱讀時需要將多行組合一起才是一句完整的句子,偶爾會出現音譯的錯別字,請修正。內容主題是關於:'{video_title}',其中包含的關鍵詞有:{keywords_str},和以下的NER實體:{entities_str}。
|
246 |
+
|
247 |
+
影片描述提供的可靠資訊 (請特別使用來補充和糾正摘要中的信息,尤其是戈人名或專有名詞):
|
248 |
+
{processed_description}
|
249 |
+
|
250 |
+
請根據每個NER實體的意思,以及上述描述資訊,以不少於 200 字的繁體中文(香港) 重組文章段落。目標是盡量抽取��主題有關的所有觀點、事件、案例、學問、步驟、方法、時間、人物、數據、名詞的基礎資料,建構成一篇連貫的、全面的、詳細的紀錄。請特別注意使用描述資訊來糾正可能的錯誤,尤其是人名和地名。忽略重複的、單純抒發個人情緒的訊息、與 Youtuber 個人宣傳的訊息。
|
251 |
+
|
252 |
+
你要處理的內容如下:
|
253 |
+
{segment}"""
|
254 |
+
|
255 |
+
response = client.chat.completions.create(
|
256 |
+
model=config['openai_model'],
|
257 |
+
messages=[{"role": "system", "content": prompt}],
|
258 |
+
temperature=0.6,
|
259 |
+
max_tokens=1000
|
260 |
+
)
|
261 |
+
|
262 |
+
summary = response.choices[0].message.content.strip()
|
263 |
+
return summary
|
264 |
+
|
265 |
+
def save_summary(text, video_title, url_or_path, save_directory):
|
266 |
+
current_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
267 |
+
cleaned_title = clean_filename(video_title)[:20]
|
268 |
+
summary_file_name = f"GPT_Summary_{cleaned_title}_{current_time}.txt"
|
269 |
+
summary_file_path = os.path.join(save_directory, summary_file_name)
|
270 |
+
|
271 |
+
# 移除文本開頭可能存在的影片名稱和 URL/路徑信息
|
272 |
+
lines = text.split('\n')
|
273 |
+
if lines[0].startswith("影片名稱:") and lines[1].startswith("網址或路徑:"):
|
274 |
+
text = '\n'.join(lines[2:])
|
275 |
+
|
276 |
+
summary_text = f"影片名稱:\"{video_title}\"\n網址或路徑:\"{url_or_path}\"\n\n{text}"
|
277 |
+
|
278 |
+
with open(summary_file_path, "w", encoding="utf-8") as file:
|
279 |
+
file.write(summary_text)
|
280 |
+
|
281 |
+
def save_transcript(transcript, video_title, url_or_path, save_directory):
|
282 |
+
current_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
283 |
+
cleaned_title = clean_filename(video_title)[:20]
|
284 |
+
transcript_file_name = f"Transcript_{cleaned_title}_{current_time}.txt"
|
285 |
+
transcript_file_path = os.path.join(save_directory, transcript_file_name)
|
286 |
+
|
287 |
+
with open(transcript_file_path, "w", encoding="utf-8") as file:
|
288 |
+
file.write(f"影片名稱:\"{video_title}\"\n網址或路徑:\"{url_or_path}\"\n\n{transcript}")
|
289 |
+
|
290 |
+
logger.info(f"轉錄文本已保存至 {transcript_file_path}")
|
291 |
+
|
292 |
+
def save_segment_summary(summary_text, segment_index, video_title, save_directory):
|
293 |
+
current_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
294 |
+
cleaned_title = clean_filename(video_title)[:20]
|
295 |
+
summary_file_name = f"Segment_Summary_{cleaned_title}_{segment_index}_{current_time}.txt"
|
296 |
+
summary_file_path = os.path.join(save_directory, summary_file_name)
|
297 |
+
|
298 |
+
with open(summary_file_path, "w", encoding="utf-8") as file:
|
299 |
+
file.write(summary_text)
|
300 |
+
|
301 |
+
logger.info(f"段落摘要已保存至 {summary_file_path}")
|
302 |
+
|
303 |
+
def process_video(url_or_path, q, local_video_description=''):
|
304 |
+
try:
|
305 |
+
logger.info(f"開始處理視頻: {url_or_path}")
|
306 |
+
save_directory = config['save_directory']
|
307 |
+
|
308 |
+
processed_description = ""
|
309 |
+
if url_or_path.startswith('http'):
|
310 |
+
# YouTube URL 處理邏輯保持不變
|
311 |
+
logger.info("檢測到 YouTube URL,開始獲取視頻信息")
|
312 |
+
ydl_opts = {'quiet': True}
|
313 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
314 |
+
video_info = ydl.extract_info(url_or_path, download=False)
|
315 |
+
|
316 |
+
video_data = {
|
317 |
+
'title': video_info['title'],
|
318 |
+
'duration': str(datetime.timedelta(seconds=video_info['duration'])),
|
319 |
+
'view_count': video_info['view_count'],
|
320 |
+
'like_count': video_info.get('like_count', 'N/A'),
|
321 |
+
'description': video_info['description']
|
322 |
+
}
|
323 |
+
send_sse_message(q, {"status": "獲取到視頻信息", "video_info": video_data})
|
324 |
+
|
325 |
+
# 處理 YouTube 描述
|
326 |
+
raw_description = video_info['description']
|
327 |
+
processed_description = process_youtube_description(raw_description)
|
328 |
+
|
329 |
+
logger.info("開始下載 YouTube 音頻")
|
330 |
+
audio_path, video_title = download_audio(url_or_path, save_directory, q)
|
331 |
+
else:
|
332 |
+
logger.info("檢測到本地文件路徑,開始處理本地視頻")
|
333 |
+
audio_path, video_title = process_local_video(url_or_path, save_directory, q)
|
334 |
+
processed_description = local_video_description if local_video_description else "這是一個本地視頻文件,用戶沒有提供視頻描述。"
|
335 |
+
|
336 |
+
if not audio_path or not os.path.exists(audio_path):
|
337 |
+
raise FileNotFoundError(f"音頻文件不存在: {audio_path}")
|
338 |
+
|
339 |
+
logger.info("開始生成轉錄文本")
|
340 |
+
transcript = generate_transcript(audio_path, video_title, q)
|
341 |
+
|
342 |
+
# 保存轉錄文本
|
343 |
+
save_transcript(transcript, video_title, url_or_path, save_directory)
|
344 |
+
|
345 |
+
logger.info("開始分割轉錄文本")
|
346 |
+
segments = smart_split_transcript(transcript, q)
|
347 |
+
|
348 |
+
all_summaries = []
|
349 |
+
for i, segment in enumerate(segments, start=1):
|
350 |
+
logger.info(f"開始為文本段 {i}/{len(segments)} 生成���要")
|
351 |
+
send_sse_message(q, {"status": f"正在為文本段 {i}/{len(segments)} 生成摘要..."})
|
352 |
+
keywords, entities = extract_keywords_and_entities(segment)
|
353 |
+
segment_summary = get_openai_summary(segment, video_title, False, keywords, entities, processed_description, q)
|
354 |
+
if segment_summary:
|
355 |
+
all_summaries.append(segment_summary)
|
356 |
+
save_segment_summary(segment_summary, i, video_title, save_directory)
|
357 |
+
send_sse_message(q, {"status": f"段落 {i} 摘要完成", "summary": segment_summary})
|
358 |
+
|
359 |
+
logger.info("開始生成最終摘要")
|
360 |
+
send_sse_message(q, {"status": "正在生成最終摘要..."})
|
361 |
+
all_summaries_text = "\n\n".join(all_summaries)
|
362 |
+
final_summary = get_openai_summary(all_summaries_text, video_title, True, [], [], processed_description, q)
|
363 |
+
# 將最終摘要添加到 summary_versions
|
364 |
+
summary_versions.append(final_summary)
|
365 |
+
# 修改這裡:發送包含版本信息的最終摘要
|
366 |
+
send_sse_message(q, {
|
367 |
+
"status": "處理完成",
|
368 |
+
"final_summary": final_summary,
|
369 |
+
"version": 0,
|
370 |
+
"total_versions": len(summary_versions)
|
371 |
+
})
|
372 |
+
|
373 |
+
# 添加影片名稱和 URL/路徑到最終摘要
|
374 |
+
final_summary_with_info = f'影片名稱:"{video_title}"\n網址或路徑:"{url_or_path}"\n\n{final_summary}'
|
375 |
+
|
376 |
+
send_sse_message(q, {"status": "處理完成", "final_summary": final_summary_with_info})
|
377 |
+
|
378 |
+
# 保存最終摘要
|
379 |
+
logger.info("保存最終摘要")
|
380 |
+
save_summary(final_summary_with_info, video_title, url_or_path, save_directory)
|
381 |
+
|
382 |
+
# 刪除臨時音頻文件
|
383 |
+
if os.path.exists(audio_path):
|
384 |
+
try:
|
385 |
+
os.remove(audio_path)
|
386 |
+
logger.info("臨時音頻文件已刪除")
|
387 |
+
send_sse_message(q, {"status": "臨時音頻文件已刪除"})
|
388 |
+
except Exception as e:
|
389 |
+
logger.error(f"無法刪除臨時音頻文件: {str(e)}")
|
390 |
+
send_sse_message(q, {"status": f"無法刪除臨時音頻文件: {str(e)}"})
|
391 |
+
|
392 |
+
# 如果是本地上傳的 .mp4 文件,刪除臨時文件
|
393 |
+
if not url_or_path.startswith('http') and url_or_path.lower().endswith('.mp4'):
|
394 |
+
try:
|
395 |
+
os.remove(url_or_path)
|
396 |
+
logger.info("臨時上傳的 .mp4 文件已刪除")
|
397 |
+
send_sse_message(q, {"status": "臨時上傳的 .mp4 文件已刪除"})
|
398 |
+
except Exception as e:
|
399 |
+
logger.error(f"無法刪除臨時上傳的 .mp4 文件: {str(e)}")
|
400 |
+
send_sse_message(q, {"status": f"無法刪除臨時上傳的 .mp4 文件: {str(e)}"})
|
401 |
+
|
402 |
+
logger.info("視頻處理完成")
|
403 |
+
|
404 |
+
except Exception as e:
|
405 |
+
logger.exception("處理視頻時發生錯誤")
|
406 |
+
send_sse_message(q, {"status": f"錯誤: {str(e)}"})
|
407 |
+
|
408 |
+
|
409 |
+
# 在全局變量部分添加:
|
410 |
+
refinement_count = 0
|
411 |
+
max_refinement_count = config.get('max_refinement_count', 5) # 使用 get 方法,如果 config.json 配置中沒有,則使用默認值 5
|
412 |
+
summary_versions = deque(maxlen=max_refinement_count + 1)
|
413 |
+
|
414 |
+
# 添加新的函數:
|
415 |
+
def refine_final_summary(original_summary, user_feedback, video_title, processed_description):
|
416 |
+
prompt = f"""你是一個專業的廣東話視頻內容摘要編輯。請根據用戶的反饋,改進以下內容摘要。標題是"{video_title}"。
|
417 |
+
|
418 |
+
原始摘要:
|
419 |
+
{original_summary}
|
420 |
+
|
421 |
+
用戶反饋:
|
422 |
+
{user_feedback}
|
423 |
+
|
424 |
+
|
425 |
+
請遵循以下指引:
|
426 |
+
1. 仔細閱讀原始摘要和用戶反饋,以用戶反饋的指示作為優先原則。
|
427 |
+
2. 根據用戶反饋,補充、修正在原始摘要內,任何錯誤或不準確的資訊,確保摘要全面涵蓋主題內容。
|
428 |
+
3. 保留原始摘要中準確和重要的部分。
|
429 |
+
4. 確保摘要邏輯清晰,結構完整,易於閱讀理解。
|
430 |
+
5. 如有必要,重新組織摘要結構以提高清晰度和連貫性。
|
431 |
+
6. 保留原有的 Hash Tag(如果有的話),或根據更新後的內容調整 Hash Tag。
|
432 |
+
|
433 |
+
請生成最終摘要,確保其準確、全面、連貫,並符合用戶的反饋意見。"""
|
434 |
+
|
435 |
+
response = client.chat.completions.create(
|
436 |
+
model=config['openai_model'],
|
437 |
+
messages=[{"role": "system", "content": prompt}],
|
438 |
+
temperature=0.8,
|
439 |
+
max_tokens=1000
|
440 |
+
)
|
441 |
+
|
442 |
+
refined_summary = response.choices[0].message.content.strip()
|
443 |
+
return refined_summary
|
444 |
+
|
445 |
+
# 添加新的路由:
|
446 |
+
@app.route('/refine_summary', methods=['POST'])
|
447 |
+
def refine_summary():
|
448 |
+
global refinement_count
|
449 |
+
data = request.json
|
450 |
+
#logger.info(f"Received refinement request: {data}") #{'original_summary': .... 'user_feedback': .... 'video_title':...'video_url'...'processed_description'...
|
451 |
+
original_summary = data['original_summary']
|
452 |
+
user_feedback = data['user_feedback']
|
453 |
+
video_title = data['video_title']
|
454 |
+
video_url = data['video_url']
|
455 |
+
processed_description = data['processed_description']
|
456 |
+
|
457 |
+
if refinement_count >= config['max_refinement_count']:
|
458 |
+
return jsonify({"error": "已達到最大重新生成次數"}), 400
|
459 |
+
|
460 |
+
refined_summary = refine_final_summary(original_summary, user_feedback, video_title, processed_description)
|
461 |
+
refinement_count += 1
|
462 |
+
|
463 |
+
# 添加視頻信息到摘要
|
464 |
+
refined_summary_with_info = f"影片名稱:{video_title}\n網址或路徑:{video_url}\n\n{refined_summary}"
|
465 |
+
|
466 |
+
logger.info(f"Sending refined summary: {refined_summary_with_info}")
|
467 |
+
return jsonify({
|
468 |
+
"refined_summary": refined_summary_with_info,
|
469 |
+
"version": refinement_count,
|
470 |
+
"total_versions": refinement_count + 1
|
471 |
+
})
|
472 |
+
|
473 |
+
|
474 |
+
|
475 |
+
@app.route('/')
|
476 |
+
def index():
|
477 |
+
return render_template('index.html')
|
478 |
+
|
479 |
+
@app.route('/process', methods=['POST'])
|
480 |
+
def process():
|
481 |
+
try:
|
482 |
+
url_or_path = request.form.get('url_or_path')
|
483 |
+
|
484 |
+
if not url_or_path:
|
485 |
+
return jsonify({"error": "No URL or path provided"}), 400
|
486 |
+
|
487 |
+
if url_or_path.startswith('http'):
|
488 |
+
# YouTube URL 處理邏輯保持不變
|
489 |
+
pass
|
490 |
+
else:
|
491 |
+
# 本地文件處理
|
492 |
+
if 'file' not in request.files:
|
493 |
+
return jsonify({"error": "No file uploaded"}), 400
|
494 |
+
file = request.files['file']
|
495 |
+
if file.filename == '':
|
496 |
+
return jsonify({"error": "No file selected"}), 400
|
497 |
+
if file:
|
498 |
+
filename = secure_filename(file.filename)
|
499 |
+
file_path = os.path.join(config['save_directory'], filename)
|
500 |
+
file.save(file_path)
|
501 |
+
url_or_path = file_path
|
502 |
+
|
503 |
+
# 獲取本地視頻描述
|
504 |
+
local_video_description = request.form.get('localVideoDescription', '')
|
505 |
+
|
506 |
+
logger.info(f"處理文件: {url_or_path}")
|
507 |
+
|
508 |
+
q = Queue()
|
509 |
+
thread = Thread(target=process_video, args=(url_or_path, q, local_video_description))
|
510 |
+
thread.start()
|
511 |
+
return Response(event_stream(q), content_type='text/event-stream')
|
512 |
+
except Exception as e:
|
513 |
+
error_message = f"處理請求時出現錯誤: {str(e)}"
|
514 |
+
logger.error(error_message)
|
515 |
+
return jsonify({"error": error_message}), 500
|
516 |
+
|
517 |
+
def event_stream(q):
|
518 |
+
while True:
|
519 |
+
message = q.get()
|
520 |
+
yield f"data: {json.dumps(message)}\n\n"
|
521 |
+
if message.get('status') == '處理完成' or message.get('status').startswith('錯誤'):
|
522 |
+
break
|
523 |
+
|
524 |
+
if __name__ == '__main__':
|
525 |
+
port = int(os.environ.get('PORT', 5000))
|
526 |
+
app.run(host='0.0.0.0', port=port)
|
config.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"save_directory": "D:\\_Adam_Projects\\AI\\myWorkspace_python\\YouTube_Caption_Generator\\HF_space\\output",
|
3 |
+
"ffmpeg_path": "D:\\_Adam_Projects\\_Toolbox\\ffmpeg\\bin",
|
4 |
+
"spacy_model": "zh_core_web_md",
|
5 |
+
"whisper_model": "large-v3",
|
6 |
+
"whisper_compute_type": "int8_float16",
|
7 |
+
"whisper_language": "zh",
|
8 |
+
"whisper_beam_size": 8,
|
9 |
+
"whisper_temperature" : 0.4,
|
10 |
+
"audio_start_percent":0,
|
11 |
+
"audio_end_percent":100,
|
12 |
+
"openai_model": "gpt-4o-mini",
|
13 |
+
"max_refinement_count": 5
|
14 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Flask==3.0.2
|
2 |
+
yt-dlp==2024.10.7
|
3 |
+
faster-whisper==1.0.3
|
4 |
+
openai==1.51.2
|
5 |
+
spacy==3.7.5
|
6 |
+
numpy==1.26.4
|
7 |
+
pydantic==2.9.2
|
8 |
+
typer==0.12.1
|
9 |
+
Werkzeug==3.0.1
|
static/main.js
ADDED
@@ -0,0 +1,360 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
let processingMethod = '';
|
2 |
+
let currentSummaryVersion = 0;
|
3 |
+
let totalSummaryVersions = 1;
|
4 |
+
let summaryVersions = [];
|
5 |
+
let currentVideoTitle = '';
|
6 |
+
let currentVideoUrl = '';
|
7 |
+
|
8 |
+
window.updateSummaryDisplay = function() {
|
9 |
+
if (summaryVersions.length > 0 && summaryVersions[currentSummaryVersion]) {
|
10 |
+
document.getElementById('finalSummary').innerHTML = linkifyText(summaryVersions[currentSummaryVersion]);
|
11 |
+
document.getElementById('summaryVersion').textContent = `版本:${currentSummaryVersion + 1}/${totalSummaryVersions}`;
|
12 |
+
document.getElementById('prevVersion').disabled = currentSummaryVersion === 0;
|
13 |
+
document.getElementById('nextVersion').disabled = currentSummaryVersion === totalSummaryVersions - 1;
|
14 |
+
} else {
|
15 |
+
console.error('No summary available for the current version');
|
16 |
+
document.getElementById('finalSummary').innerHTML = '暫無可用摘要';
|
17 |
+
}
|
18 |
+
}
|
19 |
+
document.addEventListener('DOMContentLoaded', (event) => {
|
20 |
+
|
21 |
+
document.getElementById('refineSummary').addEventListener('click', showSummaryLightbox);
|
22 |
+
document.getElementById('submitFeedback').addEventListener('click', submitFeedback);
|
23 |
+
document.getElementById('prevVersion').addEventListener('click', () => changeSummaryVersion(-1));
|
24 |
+
document.getElementById('nextVersion').addEventListener('click', () => changeSummaryVersion(1));
|
25 |
+
|
26 |
+
// 點擊 Lightbox 外部關閉
|
27 |
+
document.getElementById('summaryLightbox').addEventListener('click', function(e) {
|
28 |
+
if (e.target === this) {
|
29 |
+
hideSummaryLightbox();
|
30 |
+
}
|
31 |
+
});
|
32 |
+
|
33 |
+
function updateSummaryDisplay() {
|
34 |
+
console.log("Updating summary display", { currentSummaryVersion, totalSummaryVersions, summaryVersions });
|
35 |
+
if (summaryVersions.length > 0 && summaryVersions[currentSummaryVersion]) {
|
36 |
+
document.getElementById('finalSummary').innerHTML = linkifyText(summaryVersions[currentSummaryVersion]);
|
37 |
+
document.getElementById('summaryVersion').textContent = `版本:${currentSummaryVersion + 1}/${totalSummaryVersions}`;
|
38 |
+
document.getElementById('prevVersion').disabled = currentSummaryVersion === 0;
|
39 |
+
document.getElementById('nextVersion').disabled = currentSummaryVersion === totalSummaryVersions - 1;
|
40 |
+
} else {
|
41 |
+
console.error('No summary available for the current version');
|
42 |
+
document.getElementById('finalSummary').innerHTML = '暫無可用摘要';
|
43 |
+
}
|
44 |
+
}
|
45 |
+
|
46 |
+
function showSummaryLightbox() {
|
47 |
+
document.getElementById('summaryLightbox').classList.remove('hidden');
|
48 |
+
document.getElementById('submitFeedback').disabled = false;
|
49 |
+
document.getElementById('feedbackLoading').classList.add('hidden');
|
50 |
+
}
|
51 |
+
|
52 |
+
function hideSummaryLightbox() {
|
53 |
+
document.getElementById('summaryLightbox').classList.add('hidden');
|
54 |
+
}
|
55 |
+
|
56 |
+
function submitFeedback() {
|
57 |
+
console.log("開始提交反饋");
|
58 |
+
console.log("當前摘要版本:", currentSummaryVersion);
|
59 |
+
console.log("總摘要版本數:", totalSummaryVersions);
|
60 |
+
const feedback = document.getElementById('userFeedback').value;
|
61 |
+
const originalSummary = summaryVersions[currentSummaryVersion];
|
62 |
+
const videoInfoContent = document.getElementById('videoInfoContent').textContent;
|
63 |
+
let videoTitle = currentVideoTitle;
|
64 |
+
let videoUrl = currentVideoUrl;
|
65 |
+
|
66 |
+
// 顯示 loading 效果,禁用提交按鈕
|
67 |
+
document.getElementById('submitFeedback').disabled = true;
|
68 |
+
document.getElementById('feedbackLoading').classList.remove('hidden');
|
69 |
+
|
70 |
+
// 禁用點擊外部關閉
|
71 |
+
document.getElementById('summaryLightbox').style.pointerEvents = 'none';
|
72 |
+
|
73 |
+
// 移除原始摘要中的視頻信息
|
74 |
+
const summaryWithoutInfo = originalSummary.split('\n').slice(2).join('\n').trim();
|
75 |
+
|
76 |
+
console.log("Submitting feedback:", {
|
77 |
+
original_summary: summaryWithoutInfo,
|
78 |
+
user_feedback: feedback,
|
79 |
+
video_title: videoTitle,
|
80 |
+
video_url: videoUrl
|
81 |
+
});
|
82 |
+
|
83 |
+
fetch('/refine_summary', {
|
84 |
+
method: 'POST',
|
85 |
+
headers: {
|
86 |
+
'Content-Type': 'application/json',
|
87 |
+
},
|
88 |
+
body: JSON.stringify({
|
89 |
+
original_summary: summaryWithoutInfo,
|
90 |
+
user_feedback: feedback,
|
91 |
+
video_title: videoTitle,
|
92 |
+
video_url: videoUrl,
|
93 |
+
processed_description: document.getElementById('videoInfoContent').textContent
|
94 |
+
})
|
95 |
+
})
|
96 |
+
.then(response => {
|
97 |
+
if (!response.ok) {
|
98 |
+
throw new Error(`HTTP error! status: ${response.status}`);
|
99 |
+
}
|
100 |
+
return response.json();
|
101 |
+
})
|
102 |
+
.then(data => {
|
103 |
+
console.log("Received data from server:", data);
|
104 |
+
if (data.error) {
|
105 |
+
throw new Error(data.error);
|
106 |
+
}
|
107 |
+
if (data.refined_summary) {
|
108 |
+
console.log("收到新的摘要:", data.refined_summary);
|
109 |
+
|
110 |
+
// 添加新摘要到陣列
|
111 |
+
summaryVersions.push(data.refined_summary);
|
112 |
+
|
113 |
+
// 更新當前版本為最新版本
|
114 |
+
currentSummaryVersion = summaryVersions.length - 1;
|
115 |
+
|
116 |
+
// 更新總版本數
|
117 |
+
totalSummaryVersions = summaryVersions.length;
|
118 |
+
|
119 |
+
console.log("更新後的版本信息:", {
|
120 |
+
currentSummaryVersion: currentSummaryVersion,
|
121 |
+
totalSummaryVersions: totalSummaryVersions,
|
122 |
+
summaryVersionsLength: summaryVersions.length
|
123 |
+
});
|
124 |
+
|
125 |
+
// 更新顯示
|
126 |
+
window.updateSummaryDisplay();
|
127 |
+
|
128 |
+
console.log("摘要顯示已更新");
|
129 |
+
|
130 |
+
// 隱藏反饋輸入框
|
131 |
+
hideSummaryLightbox();
|
132 |
+
} else {
|
133 |
+
console.error("收到的摘要為空");
|
134 |
+
throw new Error("收到的摘要為空");
|
135 |
+
}
|
136 |
+
hideSummaryLightbox();
|
137 |
+
})
|
138 |
+
.catch(error => {
|
139 |
+
console.error('Error during feedback submission:', error);
|
140 |
+
alert('提交反饋時出錯: ' + error.message);
|
141 |
+
})
|
142 |
+
.finally(() => {
|
143 |
+
// 恢復提交按鈕,隱藏 loading 效果
|
144 |
+
document.getElementById('submitFeedback').disabled = false;
|
145 |
+
document.getElementById('feedbackLoading').classList.add('hidden');
|
146 |
+
// 恢復點擊外部關閉
|
147 |
+
document.getElementById('summaryLightbox').style.pointerEvents = 'auto';
|
148 |
+
});
|
149 |
+
}
|
150 |
+
|
151 |
+
|
152 |
+
|
153 |
+
function changeSummaryVersion(delta) {
|
154 |
+
currentSummaryVersion += delta;
|
155 |
+
if (currentSummaryVersion < 0) currentSummaryVersion = 0;
|
156 |
+
if (currentSummaryVersion >= totalSummaryVersions) currentSummaryVersion = totalSummaryVersions - 1;
|
157 |
+
window.updateSummaryDisplay(); // 使用 window.updateSummaryDisplay
|
158 |
+
}
|
159 |
+
|
160 |
+
|
161 |
+
document.getElementById('youtubeBtn').addEventListener('click', () => {
|
162 |
+
processingMethod = 'youtube';
|
163 |
+
document.getElementById('youtubeInput').classList.remove('hidden');
|
164 |
+
document.getElementById('localVideoInput').classList.add('hidden');
|
165 |
+
});
|
166 |
+
|
167 |
+
document.getElementById('localVideoBtn').addEventListener('click', () => {
|
168 |
+
processingMethod = 'local';
|
169 |
+
document.getElementById('localVideoInput').classList.remove('hidden');
|
170 |
+
document.getElementById('youtubeInput').classList.add('hidden');
|
171 |
+
});
|
172 |
+
|
173 |
+
document.getElementById('startProcessing').addEventListener('click', () => {
|
174 |
+
let url_or_path = '';
|
175 |
+
if (processingMethod === 'youtube') {
|
176 |
+
url_or_path = document.getElementById('youtubeUrl').value;
|
177 |
+
} else if (processingMethod === 'local') {
|
178 |
+
const fileInput = document.getElementById('localVideoFile');
|
179 |
+
if (fileInput.files.length > 0) {
|
180 |
+
url_or_path = fileInput.files[0].name;
|
181 |
+
}
|
182 |
+
}
|
183 |
+
|
184 |
+
if (!url_or_path) {
|
185 |
+
alert('請選擇處理方式並輸入 URL 或選擇文件');
|
186 |
+
return;
|
187 |
+
}
|
188 |
+
|
189 |
+
if (hasExistingContent()) {
|
190 |
+
if (confirm('開始新的處理任務將清空當前內容。是否繼續?')) {
|
191 |
+
clearPreviousContent();
|
192 |
+
startProcessing(url_or_path);
|
193 |
+
}
|
194 |
+
} else {
|
195 |
+
startProcessing(url_or_path);
|
196 |
+
}
|
197 |
+
});
|
198 |
+
|
199 |
+
document.getElementById('toggleTranscript').addEventListener('click', () => {
|
200 |
+
const transcript = document.getElementById('transcript');
|
201 |
+
transcript.style.display = transcript.style.display === 'none' ? 'block' : 'none';
|
202 |
+
});
|
203 |
+
});
|
204 |
+
|
205 |
+
function hasExistingContent() {
|
206 |
+
return !document.getElementById('videoInfo').classList.contains('hidden') ||
|
207 |
+
!document.getElementById('summary').classList.contains('hidden') ||
|
208 |
+
!document.getElementById('segmentSummaries').classList.contains('hidden') ||
|
209 |
+
document.getElementById('transcript').textContent.trim() !== '';
|
210 |
+
}
|
211 |
+
|
212 |
+
function clearPreviousContent() {
|
213 |
+
// 清空 UI 內容
|
214 |
+
document.getElementById('videoInfo').classList.add('hidden');
|
215 |
+
document.getElementById('videoInfoContent').textContent = '';
|
216 |
+
document.getElementById('status').classList.add('hidden');
|
217 |
+
document.getElementById('status').textContent = '';
|
218 |
+
document.getElementById('summary').classList.add('hidden');
|
219 |
+
document.getElementById('finalSummary').innerHTML = '';
|
220 |
+
document.getElementById('segmentSummaries').classList.add('hidden');
|
221 |
+
document.getElementById('summaries').innerHTML = '';
|
222 |
+
document.getElementById('transcript').textContent = '';
|
223 |
+
|
224 |
+
// 重置全局變量
|
225 |
+
currentSummaryVersion = 0;
|
226 |
+
totalSummaryVersions = 1; // 重置為 1
|
227 |
+
summaryVersions = [];
|
228 |
+
|
229 |
+
// 重置版本顯示
|
230 |
+
document.getElementById('summaryVersion').textContent = '版本:1/1';
|
231 |
+
|
232 |
+
// 禁用版本切換按鈕
|
233 |
+
document.getElementById('prevVersion').disabled = true;
|
234 |
+
document.getElementById('nextVersion').disabled = true;
|
235 |
+
|
236 |
+
// 清空反饋輸入框
|
237 |
+
document.getElementById('userFeedback').value = '';
|
238 |
+
}
|
239 |
+
|
240 |
+
function startProcessing(url_or_path) {
|
241 |
+
clearPreviousContent();
|
242 |
+
currentVideoTitle = '';
|
243 |
+
currentVideoUrl = url_or_path;
|
244 |
+
document.getElementById('status').classList.remove('hidden');
|
245 |
+
document.getElementById('status').textContent = '⌛️ 處理中...';
|
246 |
+
|
247 |
+
const formData = new FormData();
|
248 |
+
formData.append('url_or_path', url_or_path);
|
249 |
+
if (processingMethod === 'local') {
|
250 |
+
const fileInput = document.getElementById('localVideoFile');
|
251 |
+
if (fileInput.files.length > 0) {
|
252 |
+
formData.append('file', fileInput.files[0]);
|
253 |
+
currentVideoTitle = fileInput.files[0].name;
|
254 |
+
}
|
255 |
+
formData.append('localVideoDescription', document.getElementById('localVideoDescription').value);
|
256 |
+
} else {
|
257 |
+
currentVideoTitle = ''; // 將在收到視頻信息時更新
|
258 |
+
}
|
259 |
+
|
260 |
+
fetch('/process', {
|
261 |
+
method: 'POST',
|
262 |
+
body: formData
|
263 |
+
})
|
264 |
+
.then(response => {
|
265 |
+
const reader = response.body.getReader();
|
266 |
+
const decoder = new TextDecoder();
|
267 |
+
|
268 |
+
function readStream() {
|
269 |
+
return reader.read().then(({ done, value }) => {
|
270 |
+
if (done) {
|
271 |
+
console.log('Stream complete');
|
272 |
+
return;
|
273 |
+
}
|
274 |
+
const chunk = decoder.decode(value);
|
275 |
+
handleServerSentEvent(chunk);
|
276 |
+
return readStream();
|
277 |
+
});
|
278 |
+
}
|
279 |
+
|
280 |
+
return readStream();
|
281 |
+
})
|
282 |
+
.catch(error => {
|
283 |
+
console.error('Error:', error);
|
284 |
+
document.getElementById('status').textContent = `錯誤: ${error.message}`;
|
285 |
+
});
|
286 |
+
}
|
287 |
+
|
288 |
+
function handleServerSentEvent(chunk) {
|
289 |
+
const lines = chunk.split('\n');
|
290 |
+
lines.forEach(line => {
|
291 |
+
if (line.startsWith('data: ')) {
|
292 |
+
const data = JSON.parse(line.slice(6));
|
293 |
+
updateUI(data);
|
294 |
+
}
|
295 |
+
});
|
296 |
+
}
|
297 |
+
|
298 |
+
function updateUI(data) {
|
299 |
+
document.getElementById('status').textContent = data.status;
|
300 |
+
|
301 |
+
|
302 |
+
if (data.video_info) {
|
303 |
+
document.getElementById('videoInfo').classList.remove('hidden');
|
304 |
+
let videoInfoContent = `標題:${data.video_info.title}\n`;
|
305 |
+
videoInfoContent += `時長:${data.video_info.duration}\n`;
|
306 |
+
videoInfoContent += `觀看次數:${data.video_info.view_count}\n`;
|
307 |
+
videoInfoContent += `喜歡數:${data.video_info.like_count}\n`;
|
308 |
+
videoInfoContent += `描述:${data.video_info.description}`;
|
309 |
+
|
310 |
+
if (videoInfoContent.length > 300) {
|
311 |
+
videoInfoContent = videoInfoContent.substring(0, 300) + '... ...';
|
312 |
+
}
|
313 |
+
|
314 |
+
document.getElementById('videoInfoContent').textContent = videoInfoContent;
|
315 |
+
// 更新當前視頻標題
|
316 |
+
currentVideoTitle = data.video_info.title;
|
317 |
+
}
|
318 |
+
|
319 |
+
console.log("Received data:", data);
|
320 |
+
|
321 |
+
if (data.final_summary) {
|
322 |
+
console.log("Updating final summary");
|
323 |
+
document.getElementById('summary').classList.remove('hidden');
|
324 |
+
const summaryWithInfo = `影片名稱:${currentVideoTitle}\n網址或路徑:${currentVideoUrl}\n\n${data.final_summary}`;
|
325 |
+
|
326 |
+
/* // 從視頻信息中獲取標題和 URL/路徑
|
327 |
+
const videoTitle = document.getElementById('videoInfoContent').textContent.split('\n')[0].replace('標題:', '').trim();
|
328 |
+
const videoUrl = document.getElementById('youtubeUrl').value || document.getElementById('localVideoFile').value;
|
329 |
+
|
330 |
+
// 在摘要前添加影片名稱和 URL/路徑信息
|
331 |
+
const summaryWithInfo = `影片名稱:${videoTitle}\n網址或路徑:${videoUrl}\n\n${data.final_summary}`; */
|
332 |
+
|
333 |
+
summaryVersions = [summaryWithInfo];
|
334 |
+
currentSummaryVersion = 0;
|
335 |
+
totalSummaryVersions = 1; // 重置為 1
|
336 |
+
console.log("Calling updateSummaryDisplay");
|
337 |
+
window.updateSummaryDisplay();
|
338 |
+
console.log("updateSummaryDisplay called");
|
339 |
+
}
|
340 |
+
|
341 |
+
if (data.summary) {
|
342 |
+
document.getElementById('segmentSummaries').classList.remove('hidden');
|
343 |
+
document.getElementById('summaries').innerHTML += linkifyText(data.summary) + '\n\n';
|
344 |
+
}
|
345 |
+
|
346 |
+
if (data.transcript) {
|
347 |
+
document.getElementById('transcript').textContent = data.transcript;
|
348 |
+
}
|
349 |
+
|
350 |
+
if (data.error) {
|
351 |
+
alert(data.error);
|
352 |
+
}
|
353 |
+
}
|
354 |
+
|
355 |
+
function linkifyText(text) {
|
356 |
+
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
357 |
+
return text.replace(urlRegex, function(url) {
|
358 |
+
return `<a href="${url}" target="_blank" class="text-blue-600 hover:underline">${url}</a>`;
|
359 |
+
});
|
360 |
+
}
|
templates/index.html
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="zh-HK">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>廣東話 Youtube 內容總結工具</title>
|
7 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
8 |
+
<style>
|
9 |
+
.summary-content {
|
10 |
+
white-space: pre-wrap;
|
11 |
+
word-wrap: break-word;
|
12 |
+
}
|
13 |
+
.transcript-content {
|
14 |
+
display: none;
|
15 |
+
white-space: pre-wrap;
|
16 |
+
word-wrap: break-word;
|
17 |
+
}
|
18 |
+
.btn-custom {
|
19 |
+
transition: background-color 0.3s;
|
20 |
+
}
|
21 |
+
.btn-custom:hover {
|
22 |
+
opacity: 0.9;
|
23 |
+
}
|
24 |
+
</style>
|
25 |
+
</head>
|
26 |
+
<body class="bg-gray-100">
|
27 |
+
<div class="container mx-auto px-4 py-8">
|
28 |
+
<h1 class="text-3xl font-bold mb-4">廣東話 Youtube 內容總結工具</h1>
|
29 |
+
<p class="text-sm text-gray-500 mb-4">by Adam Chan. 2024-10 (Beta-0.8) 😃如果這工具對你有用,歡迎發個訊息👍給作者以示鼓勵。</p>
|
30 |
+
<p class="mb-6">這工具能自動下載 YouTube 視頻音頻,轉錄為文字,並生成詳細摘要。它支持長視頻,使用先進的 AI 模型進行轉錄和摘要生成。無論是研究、學習還是內容創作,這工具都能幫您快速掌握視頻核心內容,節省寶貴時間。現在還能利用視頻描述來提高摘要的準確性!</p>
|
31 |
+
|
32 |
+
<div class="mb-6">
|
33 |
+
<h2 class="text-xl font-semibold mb-2">選擇處理方式:</h2>
|
34 |
+
<div class="flex space-x-4">
|
35 |
+
<button id="youtubeBtn" class="btn-custom px-4 py-2 bg-[#E57373] text-white rounded">YouTube 視頻</button>
|
36 |
+
<button id="localVideoBtn" class="btn-custom px-4 py-2 bg-[#64B5F6] text-white rounded">本地視頻文件</button>
|
37 |
+
</div>
|
38 |
+
</div>
|
39 |
+
|
40 |
+
<div id="youtubeInput" class="mb-6 hidden">
|
41 |
+
<label for="youtubeUrl" class="block mb-2">輸入 YouTube URL:</label>
|
42 |
+
<input type="text" id="youtubeUrl" class="w-full p-2 border rounded" placeholder="https://www.youtube.com/watch?v=...">
|
43 |
+
</div>
|
44 |
+
|
45 |
+
<div id="localVideoInput" class="mb-6 hidden">
|
46 |
+
<label for="localVideoFile" class="block mb-2">選擇本地視頻文件(僅支持 .mp4):</label>
|
47 |
+
<input type="file" id="localVideoFile" accept=".mp4" class="w-full p-2 border rounded">
|
48 |
+
<textarea id="localVideoDescription" class="w-full p-2 border rounded mt-2" rows="4" placeholder="請輸入視頻描述(可選)"></textarea>
|
49 |
+
</div>
|
50 |
+
|
51 |
+
<button id="startProcessing" class="btn-custom px-6 py-3 bg-[#81C784] text-white rounded font-bold">開始處理</button>
|
52 |
+
|
53 |
+
<div id="videoInfo" class="mt-6 hidden">
|
54 |
+
<h3 class="text-lg font-semibold mb-2">📺 視頻信息:</h3>
|
55 |
+
<div id="videoInfoContent" class="summary-content p-4 bg-white rounded shadow"></div>
|
56 |
+
</div>
|
57 |
+
|
58 |
+
<div id="status" class="mt-6 p-4 bg-blue-100 rounded hidden"></div>
|
59 |
+
|
60 |
+
<div id="summary" class="mt-6 hidden">
|
61 |
+
<h3 class="text-lg font-semibold mb-2">📍 最終摘要:</h3>
|
62 |
+
<div id="finalSummary" class="summary-content p-4 bg-white rounded shadow"></div>
|
63 |
+
<div class="mt-4 flex justify-between items-center">
|
64 |
+
<button id="refineSummary" class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded">
|
65 |
+
重新生成摘要
|
66 |
+
</button>
|
67 |
+
<button id="prevVersion" class="bg-gray-300 hover:bg-gray-400 text-gray-800 font-bold py-2 px-4 rounded">
|
68 |
+
上一個版本
|
69 |
+
</button>
|
70 |
+
<button id="nextVersion" class="bg-gray-300 hover:bg-gray-400 text-gray-800 font-bold py-2 px-4 rounded">
|
71 |
+
下一個版本
|
72 |
+
</button>
|
73 |
+
<span id="summaryVersion">版本:1/1</span>
|
74 |
+
</div>
|
75 |
+
</div>
|
76 |
+
|
77 |
+
<!-- 添加 Lightbox -->
|
78 |
+
<div id="summaryLightbox" class="fixed inset-0 bg-gray-600 bg-opacity-50 overflow-y-auto h-full w-full hidden">
|
79 |
+
<div class="relative top-20 mx-auto p-5 border w-2/3 max-w-2xl shadow-lg rounded-md bg-white">
|
80 |
+
<h5 class="text-lg font-bold mb-4">請提供您的反饋意見</h5>
|
81 |
+
<textarea id="userFeedback" class="w-full h-32 p-2 border rounded" placeholder="請輸入您的反饋..."></textarea>
|
82 |
+
<button id="submitFeedback" class="mt-4 bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded">
|
83 |
+
提交
|
84 |
+
</button>
|
85 |
+
<div id="feedbackLoading" class="mt-4 text-center hidden">
|
86 |
+
⌛️ 新內容製作中...
|
87 |
+
</div>
|
88 |
+
</div>
|
89 |
+
</div>
|
90 |
+
|
91 |
+
<div id="segmentSummaries" class="mt-6 hidden">
|
92 |
+
<h3 class="text-lg font-semibold mb-2">🔗 段落摘要:</h3>
|
93 |
+
<div id="summaries" class="summary-content p-4 bg-white rounded shadow"></div>
|
94 |
+
</div>
|
95 |
+
|
96 |
+
<div class="mt-6">
|
97 |
+
<button id="toggleTranscript" class="btn-custom px-4 py-2 bg-gray-600 text-white rounded">顯示/隱藏轉錄文本</button>
|
98 |
+
<div id="transcript" class="mt-4 p-4 bg-white rounded shadow transcript-content"></div>
|
99 |
+
</div>
|
100 |
+
</div>
|
101 |
+
|
102 |
+
<script src="/static/main.js"></script>
|
103 |
+
</body>
|
104 |
+
</html>
|