Upload 3 files
Browse files- Dockerfile +17 -0
- README.md +6 -5
- main.ts +154 -0
Dockerfile
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 使用Deno官方镜像作为基础镜像
|
2 |
+
FROM denoland/deno:alpine-1.24.0
|
3 |
+
|
4 |
+
# 设置工作目录
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
# 将当前目录下的所有文件复制到工作目录中
|
8 |
+
COPY . .
|
9 |
+
|
10 |
+
# 设置环境变量(如果需要AUTH_TOKEN,可以取消注释并设置值)
|
11 |
+
# ENV AUTH_TOKEN=your_auth_token
|
12 |
+
|
13 |
+
# 暴露服务端口(假设你的服务运行在80端口,如果不同请修改)
|
14 |
+
EXPOSE 8000
|
15 |
+
|
16 |
+
# 运行服务
|
17 |
+
CMD ["run", "--allow-net", "--allow-env", "main.ts"]
|
README.md
CHANGED
@@ -1,10 +1,11 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
7 |
pinned: false
|
|
|
|
|
8 |
---
|
9 |
|
10 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: edge
|
3 |
+
emoji: 👀
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: yellow
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
+
license: gpl-3.0
|
9 |
+
app_port: 8000
|
10 |
---
|
11 |
|
|
main.ts
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { serve } from "https://deno.land/std/http/server.ts";
|
2 |
+
import { EdgeSpeechTTS } from "https://esm.sh/@lobehub/tts@1";
|
3 |
+
|
4 |
+
const AUTH_TOKEN = Deno.env.get("AUTH_TOKEN");
|
5 |
+
const VOICES_URL = "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4";
|
6 |
+
|
7 |
+
async function fetchVoiceList() {
|
8 |
+
const response = await fetch(VOICES_URL);
|
9 |
+
const voices = await response.json();
|
10 |
+
return voices.reduce((acc: Record<string, { model: string, name: string, friendlyName: string, locale: string }[]>, voice: any) => {
|
11 |
+
const { ShortName: model, ShortName: name, FriendlyName: friendlyName, Locale: locale } = voice;
|
12 |
+
if (!acc[locale]) acc[locale] = [];
|
13 |
+
acc[locale].push({ model, name, friendlyName, locale });
|
14 |
+
return acc;
|
15 |
+
}, {});
|
16 |
+
}
|
17 |
+
|
18 |
+
async function synthesizeSpeech(model: string, voice: string, text: string) {
|
19 |
+
let voiceName;
|
20 |
+
let rate = 0;
|
21 |
+
let pitch = 0;
|
22 |
+
|
23 |
+
if (model.includes("tts")) {
|
24 |
+
rate = 0.1;
|
25 |
+
pitch = 0.2;
|
26 |
+
|
27 |
+
switch (voice) {
|
28 |
+
case "alloy":
|
29 |
+
voiceName = "zh-CN-YunjianNeural";
|
30 |
+
break;
|
31 |
+
case "echo":
|
32 |
+
voiceName = "zh-CN-YunyangNeural";
|
33 |
+
break;
|
34 |
+
case "fable":
|
35 |
+
voiceName = "zh-CN-XiaoxiaoNeural";
|
36 |
+
break;
|
37 |
+
default:
|
38 |
+
voiceName = "zh-CN-YunxiNeural";
|
39 |
+
break;
|
40 |
+
}
|
41 |
+
} else {
|
42 |
+
voiceName = model;
|
43 |
+
const params = Object.fromEntries(
|
44 |
+
voice.split("|").map((p) => p.split(":") as [string, string])
|
45 |
+
);
|
46 |
+
rate = Number(params["rate"] || 0);
|
47 |
+
pitch = Number(params["pitch"] || 0);
|
48 |
+
}
|
49 |
+
|
50 |
+
const tts = new EdgeSpeechTTS();
|
51 |
+
|
52 |
+
const payload = {
|
53 |
+
input: text,
|
54 |
+
options: {
|
55 |
+
rate: rate,
|
56 |
+
pitch: pitch,
|
57 |
+
voice: voiceName
|
58 |
+
},
|
59 |
+
};
|
60 |
+
const response = await tts.create(payload);
|
61 |
+
const mp3Buffer = new Uint8Array(await response.arrayBuffer());
|
62 |
+
|
63 |
+
console.log(`Successfully synthesized speech, returning audio/mpeg response`);
|
64 |
+
return new Response(mp3Buffer, {
|
65 |
+
headers: { "Content-Type": "audio/mpeg" },
|
66 |
+
});
|
67 |
+
}
|
68 |
+
|
69 |
+
function unauthorized(req: Request) {
|
70 |
+
const authHeader = req.headers.get("Authorization");
|
71 |
+
return AUTH_TOKEN && authHeader !== `Bearer ${AUTH_TOKEN}`;
|
72 |
+
}
|
73 |
+
|
74 |
+
function validateContentType(req: Request, expected: string) {
|
75 |
+
const contentType = req.headers.get("Content-Type");
|
76 |
+
if (contentType !== expected) {
|
77 |
+
console.log(`Invalid Content-Type ${contentType}, expected ${expected}`);
|
78 |
+
return new Response("Bad Request", { status: 400 });
|
79 |
+
}
|
80 |
+
}
|
81 |
+
|
82 |
+
async function handleDebugRequest(req: Request) {
|
83 |
+
const url = new URL(req.url);
|
84 |
+
const voice = url.searchParams.get("voice") || "";
|
85 |
+
const model = url.searchParams.get("model") || "";
|
86 |
+
const text = url.searchParams.get("text") || "";
|
87 |
+
|
88 |
+
console.log(`Debug request with model=${model}, voice=${voice}, text=${text}`);
|
89 |
+
|
90 |
+
if (!voice || !model || !text) {
|
91 |
+
console.log("Missing required parameters");
|
92 |
+
return new Response("Bad Request", { status: 400 });
|
93 |
+
}
|
94 |
+
|
95 |
+
return synthesizeSpeech(model, voice, text);
|
96 |
+
}
|
97 |
+
|
98 |
+
async function handleSynthesisRequest(req: Request) {
|
99 |
+
if (unauthorized(req)) {
|
100 |
+
console.log("Unauthorized request");
|
101 |
+
return new Response("Unauthorized", { status: 401 });
|
102 |
+
}
|
103 |
+
|
104 |
+
if (req.method !== "POST") {
|
105 |
+
console.log(`Invalid method ${req.method}, expected POST`);
|
106 |
+
return new Response("Method Not Allowed", { status: 405 });
|
107 |
+
}
|
108 |
+
|
109 |
+
const invalidContentType = validateContentType(req, "application/json");
|
110 |
+
if (invalidContentType) return invalidContentType;
|
111 |
+
|
112 |
+
const { model, input, voice } = await req.json();
|
113 |
+
console.log(`Synthesis request with model=${model}, input=${input}, voice=${voice}`);
|
114 |
+
|
115 |
+
return synthesizeSpeech(model, voice, input);
|
116 |
+
}
|
117 |
+
|
118 |
+
|
119 |
+
async function handleDemoRequest(req: Request) {
|
120 |
+
const groupedVoiceList = await fetchVoiceList();
|
121 |
+
|
122 |
+
const html = `<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>语音合成演示</title><link href="https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@400;700&display=swap" rel="stylesheet"><style>:root{--primary-color:#6c8bd6;--primary-light:#a2b3e3;--primary-dark:#3d5b8f;--secondary-color:#f08080;--text-color:#333;--text-secondary:#777;--bg-color:#fff}body{font-family:'Noto Sans SC','Arial',sans-serif;color:var(--text-color);margin:0;padding:0;display:flex;justify-content:center;background-color:#fafafa;background-image:linear-gradient(135deg,#f5f7fa 0%,#c3cfe2 100%);position:relative;overflow:hidden}body::before{content:"";position:absolute;top:0;left:0;right:0;bottom:0;background:repeating-radial-gradient(circle at 50% 50%,rgba(255,255,255,0.8) 0%,rgba(255,255,255,0.8) 2%,transparent 2%,transparent 4%,rgba(255,255,255,0.8) 4%,rgba(255,255,255,0.8) 6%,transparent 6%,transparent 8%,rgba(255,255,255,0.8) 8%,rgba(255,255,255,0.8) 10%,transparent 10%),repeating-linear-gradient(45deg,#D4F4FF 0%,#D4F4FF 5%,#E6F9FF 5%,#E6F9FF 10%,#F0FAFF 10%,#F0FAFF 15%,#E6F9FF 15%,#E6F9FF 20%,#D4F4FF 20%,#D4F4FF 25%);background-blend-mode:multiply;opacity:0.8;z-index:-1;animation:glitch 15s infinite}.container{display:flex;max-width:1200px;width:100%;margin:40px;background:#fff;border-radius:12px;position:relative;background-color:rgba(255,255,255,0.8);z-index:1}@keyframes glitch{0%{background-position:0 0,0 0;filter:hue-rotate(0deg)}50%{background-position:10px 10px,-10px 10px;filter:hue-rotate(360deg)}100%{background-position:0 0,0 0;filter:hue-rotate(0deg)}}.input-area,.output-area{padding:30px;width:50%}.input-area{border-right:1px solid #E0E0E0}h1{font-size:36px;color:var(--primary-color);margin-bottom:30px}.filter-section{margin-bottom:30px}.filter-section label{display:block;font-size:16px;color:var(--text-secondary);margin-bottom:10px}.filter-section input{font-size:16px;padding:10px 15px;border:2px solid var(--primary-light);border-radius:8px;outline:none;transition:border-color .3s,box-shadow .3s;width:100%;box-sizing:border-box}.filter-section input:focus{border-color:var(--primary-color);box-shadow:0 0 0 2px var(--primary-light)}.slider-container{margin-bottom:30px}.slider-container label{display:block;font-size:16px;color:var(--text-secondary);margin-bottom:10px}.slider{-webkit-appearance:none;width:100%;height:10px;border-radius:5px;background:linear-gradient(to right,var(--secondary-color) 0%,var(--primary-color) 50%,var(--primary-light) 100%);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px rgba(255,255,255,0.1);outline:none;opacity:0.7;-webkit-transition:.2s;transition:opacity .2s;margin-bottom:10px}.slider:hover{opacity:1}.slider::-webkit-slider-thumb{-webkit-appearance:none;appearance:none;width:20px;height:20px;border-radius:50%;background:#fff;border:2px solid var(--primary-color);cursor:pointer}.slider::-moz-range-thumb{width:20px;height:20px;border-radius:50%;background:#fff;border:2px solid var(--primary-color);cursor:pointer}.slider-value{font-size:14px;color:var(--text-secondary)}.textarea-container{margin-bottom:30px}.textarea-container label{display:block;font-size:18px;margin-bottom:10px}.textarea-container textarea{width:100%;padding:10px;font-size:16px;border:2px solid var(--primary-light);border-radius:8px;outline:none;resize:vertical;transition:border-color .3s,box-shadow .3s;box-sizing:border-box;height:200px}.textarea-container textarea:focus{border-color:var(--primary-color);box-shadow:0 0 0 2px var(--primary-light)}.voice-group{margin-bottom:20px;border:2px solid var(--primary-light);border-radius:12px;overflow:hidden;cursor:move;background:#fff}.voice-header{padding:15px 20px;font-size:18px;background:var(--primary-light);color:#fff;cursor:pointer;display:flex;justify-content:space-between;align-items:center}.voice-header:hover{background:var(--primary-color)}.voice-buttons{padding:20px;display:none;gap:12px;flex-wrap:wrap}.voice-button{background:var(--secondary-color);color:#fff;border:none;padding:10px 20px;border-radius:50px;cursor:pointer;transition:filter .3s}.voice-button:hover{filter:brightness(0.9)}.chevron{transition:transform .3s}.voice-group.open .voice-buttons{display:flex}.voice-group.open .chevron{transform:rotate(180deg)}.dragging{opacity:0.5}</style></head><body><div class="container"><div class="input-area"><h1>输入文本</h1><div class="filter-section"><label for="keywords">Speaker筛选:</label><input type="text" id="keywords" value="multilingual,-TW,-CN"></div><div class="slider-container"><label for="rate">语速:</label><input type="range" min="-1" max="1" step="0.1" value="-0.1" class="slider" id="rate"><div class="slider-value" id="rateValue">-0.1</div><label for="pitch">音调:</label><input type="range" min="-1" max="1" step="0.1" value="0.1" class="slider" id="pitch"><div class="slider-value" id="pitchValue">0.1</div></div><div class="textarea-container"><label for="inputText">输入文本:</label><textarea id="inputText">Hello world</textarea></div></div><div class="output-area"><h1>选择语音</h1><div id="voices"></div></div></div><script>const voiceList = ${JSON.stringify(groupedVoiceList)};let audio=null;function filterVoices(){const keywords=document.getElementById('keywords').value.split(',').map(k=>k.trim().toLowerCase());const voicesDiv=document.getElementById('voices');voicesDiv.innerHTML='';const filteredVoices={};for(const[locale,voices]of Object.entries(voiceList)){const filtered=voices.filter(({name,friendlyName})=>keywords.some(keyword=>name.toLowerCase().includes(keyword)||friendlyName.toLowerCase().includes(keyword)));if(filtered.length>0){filteredVoices[locale]=filtered}}for(const[locale,voices]of Object.entries(filteredVoices)){const group=document.createElement('div');group.className='voice-group';group.draggable=true;const header=document.createElement('div');header.className='voice-header';header.textContent=locale.toUpperCase();const chevron=document.createElement('span');chevron.className='chevron';chevron.innerHTML='▼';header.appendChild(chevron);const buttonsContainer=document.createElement('div');buttonsContainer.className='voice-buttons';voices.forEach(({model,name})=>{const button=document.createElement('button');button.className='voice-button';button.textContent=name;button.onclick=()=>synthesize(model);buttonsContainer.appendChild(button)});header.onclick=()=>{group.classList.toggle('open')};group.appendChild(header);group.appendChild(buttonsContainer);voicesDiv.appendChild(group)}addDragDropListeners()}function synthesize(model){const text=document.getElementById('inputText').value||'Hello world';const rate=document.getElementById('rate').value||'-0.1';const pitch=document.getElementById('pitch').value||'0.1';const voice=\`rate:\${rate}|pitch:\${pitch}\`;if(audio){audio.pause();audio.currentTime=0}fetch('/v1/audio/speech',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({model,input:text,voice})}).then(response=>response.blob()).then(blob=>{const audioUrl=URL.createObjectURL(blob);audio=new Audio(audioUrl);audio.play()})}function addDragDropListeners(){const voicesDiv=document.getElementById('voices');let draggedItem=null;voicesDiv.addEventListener('dragstart',e=>{draggedItem=e.target;e.target.classList.add('dragging')});voicesDiv.addEventListener('dragend',e=>{e.target.classList.remove('dragging');draggedItem=null});voicesDiv.addEventListener('dragover',e=>{e.preventDefault();const afterElement=getDragAfterElement(voicesDiv,e.clientY);if(afterElement==null){voicesDiv.appendChild(draggedItem)}else{voicesDiv.insertBefore(draggedItem,afterElement)}})}function getDragAfterElement(container,y){const draggableElements=[...container.querySelectorAll('.voice-group:not(.dragging)')];return draggableElements.reduce((closest,child)=>{const box=child.getBoundingClientRect();const offset=y-box.top-box.height/2;if(offset<0&&offset>closest.offset){return{offset:offset,element:child}}else{return closest}},{offset:Number.NEGATIVE_INFINITY}).element}filterVoices();document.getElementById('keywords').addEventListener('input',filterVoices);const rateSlider=document.getElementById('rate');const rateValue=document.getElementById('rateValue');rateSlider.oninput=function(){rateValue.innerHTML=this.value};const pitchSlider=document.getElementById('pitch');const pitchValue=document.getElementById('pitchValue');pitchSlider.oninput=function(){pitchValue.innerHTML=this.value}</script></body></html>`;
|
123 |
+
|
124 |
+
return new Response(html, {
|
125 |
+
headers: { "Content-Type": "text/html" },
|
126 |
+
});
|
127 |
+
}
|
128 |
+
|
129 |
+
|
130 |
+
serve(async (req) => {
|
131 |
+
try {
|
132 |
+
const url = new URL(req.url);
|
133 |
+
|
134 |
+
if (url.pathname === "/") {
|
135 |
+
return handleDemoRequest(req);
|
136 |
+
}
|
137 |
+
|
138 |
+
if (url.pathname === "/tts") {
|
139 |
+
return handleDebugRequest(req);
|
140 |
+
}
|
141 |
+
|
142 |
+
if (url.pathname !== "/v1/audio/speech") {
|
143 |
+
console.log(`Unhandled path ${url.pathname}`);
|
144 |
+
return new Response("Not Found", { status: 404 });
|
145 |
+
}
|
146 |
+
|
147 |
+
return handleSynthesisRequest(req);
|
148 |
+
} catch (err) {
|
149 |
+
console.error(`Error processing request: ${err.message}`);
|
150 |
+
return new Response(`Internal Server Error\n${err.message}`, {
|
151 |
+
status: 500,
|
152 |
+
});
|
153 |
+
}
|
154 |
+
});
|