Spaces:
Running
Running
Commit
·
1cea837
1
Parent(s):
0df1259
rename from model to entity
Browse files- .env +2 -0
- package-lock.json +101 -48
- package.json +4 -1
- src/app/api/generators/speech/generateVoiceWithElevenLabs.txt +55 -0
- src/app/api/generators/speech/generateVoiceWithParlerTTS.ts +84 -0
- src/app/api/generators/speech/generateVoiceWithXTTS2.txt +92 -0
- src/app/api/utils/addBase64.ts +51 -0
- src/app/api/utils/getHuggingFaceSpaceStatus.ts +114 -0
- src/app/api/utils/getMediaInfo.ts +79 -0
- src/app/api/utils/makeSureSpaceIsRunning.ts +77 -0
- src/app/api/utils/readMp3FileToBase64.ts +17 -0
- src/app/api/utils/sleep.ts +6 -0
- src/app/api/utils/timeout.ts +15 -0
- src/app/api/utils/tryApiCall.ts +69 -0
- src/app/api/v1/edit/dialogues/route.ts +77 -0
- src/app/api/v1/edit/{models → entities}/generateAudioID.ts +0 -0
- src/app/api/v1/edit/{models → entities}/generateImageID.ts +0 -0
- src/app/api/v1/edit/{models → entities}/route.ts +19 -17
- src/app/api/v1/edit/{models → entities}/systemPrompt.ts +0 -0
- src/app/api/v1/edit/storyboards/route.ts +1 -2
- src/components/interface/latent-engine/core/prompts/getCharacterPrompt.ts +8 -8
- src/components/interface/latent-engine/core/prompts/getSpeechBackgroundAudioPrompt.ts +52 -0
- src/components/interface/latent-engine/core/prompts/getSpeechForegroundAudioPrompt.ts +20 -0
- src/components/interface/latent-engine/core/prompts/getVideoPrompt.ts +17 -17
- src/components/interface/latent-engine/core/useLatentEngine.ts +3 -5
- src/lib/business/getClapAssetSourceType.ts +0 -25
.env
CHANGED
@@ -28,6 +28,8 @@ AUTH_OPENAI_API_KEY=""
|
|
28 |
VIDEOCHAIN_API_URL=""
|
29 |
VIDEOCHAIN_API_KEY=""
|
30 |
|
|
|
|
|
31 |
# ----------- CENSORSHIP -------
|
32 |
ENABLE_CENSORSHIP=
|
33 |
FINGERPRINT_KEY=
|
|
|
28 |
VIDEOCHAIN_API_URL=""
|
29 |
VIDEOCHAIN_API_KEY=""
|
30 |
|
31 |
+
MICROSERVICE_API_SECRET_TOKEN=""
|
32 |
+
|
33 |
# ----------- CENSORSHIP -------
|
34 |
ENABLE_CENSORSHIP=
|
35 |
FINGERPRINT_KEY=
|
package-lock.json
CHANGED
@@ -1,14 +1,15 @@
|
|
1 |
{
|
2 |
-
"name": "
|
3 |
"version": "0.0.0",
|
4 |
"lockfileVersion": 3,
|
5 |
"requires": true,
|
6 |
"packages": {
|
7 |
"": {
|
8 |
-
"name": "
|
9 |
"version": "0.0.0",
|
10 |
"dependencies": {
|
11 |
-
"@aitube/clap": "
|
|
|
12 |
"@huggingface/hub": "0.12.3-oauth",
|
13 |
"@huggingface/inference": "^2.6.7",
|
14 |
"@jcoreio/async-throttle": "^1.6.0",
|
@@ -60,6 +61,7 @@
|
|
60 |
"eslint": "8.45.0",
|
61 |
"eslint-config-next": "13.4.10",
|
62 |
"fastest-levenshtein": "^1.0.16",
|
|
|
63 |
"gsplat": "^1.2.4",
|
64 |
"hash-wasm": "^4.11.0",
|
65 |
"jose": "^5.2.4",
|
@@ -103,6 +105,7 @@
|
|
103 |
"zustand": "^4.4.7"
|
104 |
},
|
105 |
"devDependencies": {
|
|
|
106 |
"@types/proper-lockfile": "^4.1.2",
|
107 |
"@types/qs": "^6.9.7",
|
108 |
"@types/react-copy-to-clipboard": "^5.0.7",
|
@@ -111,18 +114,10 @@
|
|
111 |
"daisyui": "^3.7.4"
|
112 |
}
|
113 |
},
|
114 |
-
"node_modules/@aashutoshrathi/word-wrap": {
|
115 |
-
"version": "1.2.6",
|
116 |
-
"resolved": "https://registry.npmjs.org/@aashutoshrathi/word-wrap/-/word-wrap-1.2.6.tgz",
|
117 |
-
"integrity": "sha512-1Yjs2SvM8TflER/OD3cOjhWWOZb58A2t7wpE2S9XfBYTiIl+XFhQG2bjy4Pu1I+EAlCNUzRDYDdFwFYUKvXcIA==",
|
118 |
-
"engines": {
|
119 |
-
"node": ">=0.10.0"
|
120 |
-
}
|
121 |
-
},
|
122 |
"node_modules/@aitube/clap": {
|
123 |
-
"version": "0.0.
|
124 |
-
"resolved": "https://registry.npmjs.org/@aitube/clap/-/clap-0.0.
|
125 |
-
"integrity": "sha512-
|
126 |
"dependencies": {
|
127 |
"pure-uuid": "^1.8.1",
|
128 |
"yaml": "^2.4.1"
|
@@ -131,6 +126,19 @@
|
|
131 |
"typescript": "^5.4.5"
|
132 |
}
|
133 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
"node_modules/@alloc/quick-lru": {
|
135 |
"version": "5.2.0",
|
136 |
"resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz",
|
@@ -901,28 +909,28 @@
|
|
901 |
}
|
902 |
},
|
903 |
"node_modules/@floating-ui/core": {
|
904 |
-
"version": "1.6.
|
905 |
-
"resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.6.
|
906 |
-
"integrity": "sha512-
|
907 |
"dependencies": {
|
908 |
-
"@floating-ui/utils": "^0.2.
|
909 |
}
|
910 |
},
|
911 |
"node_modules/@floating-ui/dom": {
|
912 |
-
"version": "1.6.
|
913 |
-
"resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.6.
|
914 |
-
"integrity": "sha512-
|
915 |
"dependencies": {
|
916 |
"@floating-ui/core": "^1.0.0",
|
917 |
"@floating-ui/utils": "^0.2.0"
|
918 |
}
|
919 |
},
|
920 |
"node_modules/@floating-ui/react-dom": {
|
921 |
-
"version": "2.0.
|
922 |
-
"resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.0.
|
923 |
-
"integrity": "sha512-
|
924 |
"dependencies": {
|
925 |
-
"@floating-ui/dom": "^1.
|
926 |
},
|
927 |
"peerDependencies": {
|
928 |
"react": ">=16.8.0",
|
@@ -930,9 +938,9 @@
|
|
930 |
}
|
931 |
},
|
932 |
"node_modules/@floating-ui/utils": {
|
933 |
-
"version": "0.2.
|
934 |
-
"resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.
|
935 |
-
"integrity": "sha512-
|
936 |
},
|
937 |
"node_modules/@huggingface/hub": {
|
938 |
"version": "0.12.3-oauth",
|
@@ -1507,9 +1515,9 @@
|
|
1507 |
}
|
1508 |
},
|
1509 |
"node_modules/@mediapipe/tasks-vision": {
|
1510 |
-
"version": "0.10.13-rc.
|
1511 |
-
"resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.13-rc.
|
1512 |
-
"integrity": "sha512-
|
1513 |
},
|
1514 |
"node_modules/@next/env": {
|
1515 |
"version": "14.2.3",
|
@@ -2930,6 +2938,15 @@
|
|
2930 |
"resolved": "https://registry.npmjs.org/@types/cookie/-/cookie-0.4.1.tgz",
|
2931 |
"integrity": "sha512-XW/Aa8APYr6jSVVA1y/DEIZX0/GMKLEVekNG727R8cs56ahETkRAy/3DR7+fJyh7oUgGwNQaRfXCun0+KbWY7Q=="
|
2932 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2933 |
"node_modules/@types/json5": {
|
2934 |
"version": "0.0.29",
|
2935 |
"resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz",
|
@@ -3479,6 +3496,11 @@
|
|
3479 |
"resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.8.tgz",
|
3480 |
"integrity": "sha512-OH/2E5Fg20h2aPrbe+QL8JZQFko0YZaF+j4mnQ7BGhfavO7OpSLa8a0y9sBwomHdSbkhTS8TQNayBfnW5DwbvQ=="
|
3481 |
},
|
|
|
|
|
|
|
|
|
|
|
3482 |
"node_modules/asynckit": {
|
3483 |
"version": "0.4.0",
|
3484 |
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
@@ -3713,9 +3735,9 @@
|
|
3713 |
}
|
3714 |
},
|
3715 |
"node_modules/caniuse-lite": {
|
3716 |
-
"version": "1.0.
|
3717 |
-
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.
|
3718 |
-
"integrity": "sha512-
|
3719 |
"funding": [
|
3720 |
{
|
3721 |
"type": "opencollective",
|
@@ -5116,6 +5138,29 @@
|
|
5116 |
"resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.1.tgz",
|
5117 |
"integrity": "sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw=="
|
5118 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5119 |
"node_modules/follow-redirects": {
|
5120 |
"version": "1.15.6",
|
5121 |
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
|
@@ -6655,16 +6700,16 @@
|
|
6655 |
}
|
6656 |
},
|
6657 |
"node_modules/optionator": {
|
6658 |
-
"version": "0.9.
|
6659 |
-
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.
|
6660 |
-
"integrity": "sha512-
|
6661 |
"dependencies": {
|
6662 |
-
"@aashutoshrathi/word-wrap": "^1.2.3",
|
6663 |
"deep-is": "^0.1.3",
|
6664 |
"fast-levenshtein": "^2.0.6",
|
6665 |
"levn": "^0.4.1",
|
6666 |
"prelude-ls": "^1.2.1",
|
6667 |
-
"type-check": "^0.4.0"
|
|
|
6668 |
},
|
6669 |
"engines": {
|
6670 |
"node": ">= 0.8.0"
|
@@ -6759,9 +6804,9 @@
|
|
6759 |
}
|
6760 |
},
|
6761 |
"node_modules/path-scurry/node_modules/lru-cache": {
|
6762 |
-
"version": "10.2.
|
6763 |
-
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.2.
|
6764 |
-
"integrity": "sha512-
|
6765 |
"engines": {
|
6766 |
"node": "14 || >=16.14"
|
6767 |
}
|
@@ -8248,9 +8293,9 @@
|
|
8248 |
}
|
8249 |
},
|
8250 |
"node_modules/type-fest": {
|
8251 |
-
"version": "4.
|
8252 |
-
"resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.
|
8253 |
-
"integrity": "sha512
|
8254 |
"engines": {
|
8255 |
"node": ">=16"
|
8256 |
},
|
@@ -8640,6 +8685,14 @@
|
|
8640 |
"url": "https://github.com/sponsors/ljharb"
|
8641 |
}
|
8642 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8643 |
"node_modules/wrap-ansi": {
|
8644 |
"version": "8.1.0",
|
8645 |
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz",
|
@@ -8766,9 +8819,9 @@
|
|
8766 |
"integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A=="
|
8767 |
},
|
8768 |
"node_modules/yaml": {
|
8769 |
-
"version": "2.4.
|
8770 |
-
"resolved": "https://registry.npmjs.org/yaml/-/yaml-2.4.
|
8771 |
-
"integrity": "sha512-
|
8772 |
"bin": {
|
8773 |
"yaml": "bin.mjs"
|
8774 |
},
|
|
|
1 |
{
|
2 |
+
"name": "@aitube/website",
|
3 |
"version": "0.0.0",
|
4 |
"lockfileVersion": 3,
|
5 |
"requires": true,
|
6 |
"packages": {
|
7 |
"": {
|
8 |
+
"name": "@aitube/website",
|
9 |
"version": "0.0.0",
|
10 |
"dependencies": {
|
11 |
+
"@aitube/clap": "0.0.7",
|
12 |
+
"@aitube/client": "0.0.7",
|
13 |
"@huggingface/hub": "0.12.3-oauth",
|
14 |
"@huggingface/inference": "^2.6.7",
|
15 |
"@jcoreio/async-throttle": "^1.6.0",
|
|
|
61 |
"eslint": "8.45.0",
|
62 |
"eslint-config-next": "13.4.10",
|
63 |
"fastest-levenshtein": "^1.0.16",
|
64 |
+
"fluent-ffmpeg": "^2.1.2",
|
65 |
"gsplat": "^1.2.4",
|
66 |
"hash-wasm": "^4.11.0",
|
67 |
"jose": "^5.2.4",
|
|
|
105 |
"zustand": "^4.4.7"
|
106 |
},
|
107 |
"devDependencies": {
|
108 |
+
"@types/fluent-ffmpeg": "^2.1.24",
|
109 |
"@types/proper-lockfile": "^4.1.2",
|
110 |
"@types/qs": "^6.9.7",
|
111 |
"@types/react-copy-to-clipboard": "^5.0.7",
|
|
|
114 |
"daisyui": "^3.7.4"
|
115 |
}
|
116 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
"node_modules/@aitube/clap": {
|
118 |
+
"version": "0.0.7",
|
119 |
+
"resolved": "https://registry.npmjs.org/@aitube/clap/-/clap-0.0.7.tgz",
|
120 |
+
"integrity": "sha512-0muPu4G1sRsNqSVZ/ICBCc4QibZ9OT33ORbahPP1+h3GYcD/7K+ZLYJjdbQwJWVEcpKDosDVaQKeNYdab0S0LA==",
|
121 |
"dependencies": {
|
122 |
"pure-uuid": "^1.8.1",
|
123 |
"yaml": "^2.4.1"
|
|
|
126 |
"typescript": "^5.4.5"
|
127 |
}
|
128 |
},
|
129 |
+
"node_modules/@aitube/client": {
|
130 |
+
"version": "0.0.7",
|
131 |
+
"resolved": "https://registry.npmjs.org/@aitube/client/-/client-0.0.7.tgz",
|
132 |
+
"integrity": "sha512-s6vxst7pkLt7tI96JS508gfk4EgdLJy5Itr76ej/zvtMRMgnKgAlfB6Bb8/1u7L5CToz4Wgk6h4kz8T+yEbEeg==",
|
133 |
+
"dependencies": {
|
134 |
+
"uuid": "^9.0.1",
|
135 |
+
"yaml": "^2.4.1"
|
136 |
+
},
|
137 |
+
"peerDependencies": {
|
138 |
+
"@aitube/clap": "0.0.7",
|
139 |
+
"typescript": "^5.4.5"
|
140 |
+
}
|
141 |
+
},
|
142 |
"node_modules/@alloc/quick-lru": {
|
143 |
"version": "5.2.0",
|
144 |
"resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz",
|
|
|
909 |
}
|
910 |
},
|
911 |
"node_modules/@floating-ui/core": {
|
912 |
+
"version": "1.6.1",
|
913 |
+
"resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.6.1.tgz",
|
914 |
+
"integrity": "sha512-42UH54oPZHPdRHdw6BgoBD6cg/eVTmVrFcgeRDM3jbO7uxSoipVcmcIGFcA5jmOHO5apcyvBhkSKES3fQJnu7A==",
|
915 |
"dependencies": {
|
916 |
+
"@floating-ui/utils": "^0.2.0"
|
917 |
}
|
918 |
},
|
919 |
"node_modules/@floating-ui/dom": {
|
920 |
+
"version": "1.6.4",
|
921 |
+
"resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.6.4.tgz",
|
922 |
+
"integrity": "sha512-0G8R+zOvQsAG1pg2Q99P21jiqxqGBW1iRe/iXHsBRBxnpXKFI8QwbB4x5KmYLggNO5m34IQgOIu9SCRfR/WWiQ==",
|
923 |
"dependencies": {
|
924 |
"@floating-ui/core": "^1.0.0",
|
925 |
"@floating-ui/utils": "^0.2.0"
|
926 |
}
|
927 |
},
|
928 |
"node_modules/@floating-ui/react-dom": {
|
929 |
+
"version": "2.0.9",
|
930 |
+
"resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.0.9.tgz",
|
931 |
+
"integrity": "sha512-q0umO0+LQK4+p6aGyvzASqKbKOJcAHJ7ycE9CuUvfx3s9zTHWmGJTPOIlM/hmSBfUfg/XfY5YhLBLR/LHwShQQ==",
|
932 |
"dependencies": {
|
933 |
+
"@floating-ui/dom": "^1.0.0"
|
934 |
},
|
935 |
"peerDependencies": {
|
936 |
"react": ">=16.8.0",
|
|
|
938 |
}
|
939 |
},
|
940 |
"node_modules/@floating-ui/utils": {
|
941 |
+
"version": "0.2.2",
|
942 |
+
"resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.2.tgz",
|
943 |
+
"integrity": "sha512-J4yDIIthosAsRZ5CPYP/jQvUAQtlZTTD/4suA08/FEnlxqW3sKS9iAhgsa9VYLZ6vDHn/ixJgIqRQPotoBjxIw=="
|
944 |
},
|
945 |
"node_modules/@huggingface/hub": {
|
946 |
"version": "0.12.3-oauth",
|
|
|
1515 |
}
|
1516 |
},
|
1517 |
"node_modules/@mediapipe/tasks-vision": {
|
1518 |
+
"version": "0.10.13-rc.20240428",
|
1519 |
+
"resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.13-rc.20240428.tgz",
|
1520 |
+
"integrity": "sha512-YMOshYcwxzLNNNEKSs4hWVTRjtuX+irWIjsbENrOee491t/oM1a9bnhggMdWLq0FBQ7xuCfvp1diu/JeZFoE0A=="
|
1521 |
},
|
1522 |
"node_modules/@next/env": {
|
1523 |
"version": "14.2.3",
|
|
|
2938 |
"resolved": "https://registry.npmjs.org/@types/cookie/-/cookie-0.4.1.tgz",
|
2939 |
"integrity": "sha512-XW/Aa8APYr6jSVVA1y/DEIZX0/GMKLEVekNG727R8cs56ahETkRAy/3DR7+fJyh7oUgGwNQaRfXCun0+KbWY7Q=="
|
2940 |
},
|
2941 |
+
"node_modules/@types/fluent-ffmpeg": {
|
2942 |
+
"version": "2.1.24",
|
2943 |
+
"resolved": "https://registry.npmjs.org/@types/fluent-ffmpeg/-/fluent-ffmpeg-2.1.24.tgz",
|
2944 |
+
"integrity": "sha512-g5oQO8Jgi2kFS3tTub7wLvfLztr1s8tdXmRd8PiL/hLMLzTIAyMR2sANkTggM/rdEDAg3d63nYRRVepwBiCw5A==",
|
2945 |
+
"dev": true,
|
2946 |
+
"dependencies": {
|
2947 |
+
"@types/node": "*"
|
2948 |
+
}
|
2949 |
+
},
|
2950 |
"node_modules/@types/json5": {
|
2951 |
"version": "0.0.29",
|
2952 |
"resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz",
|
|
|
3496 |
"resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.8.tgz",
|
3497 |
"integrity": "sha512-OH/2E5Fg20h2aPrbe+QL8JZQFko0YZaF+j4mnQ7BGhfavO7OpSLa8a0y9sBwomHdSbkhTS8TQNayBfnW5DwbvQ=="
|
3498 |
},
|
3499 |
+
"node_modules/async": {
|
3500 |
+
"version": "3.2.5",
|
3501 |
+
"resolved": "https://registry.npmjs.org/async/-/async-3.2.5.tgz",
|
3502 |
+
"integrity": "sha512-baNZyqaaLhyLVKm/DlvdW051MSgO6b8eVfIezl9E5PqWxFgzLm/wQntEW4zOytVburDEr0JlALEpdOFwvErLsg=="
|
3503 |
+
},
|
3504 |
"node_modules/asynckit": {
|
3505 |
"version": "0.4.0",
|
3506 |
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
|
|
3735 |
}
|
3736 |
},
|
3737 |
"node_modules/caniuse-lite": {
|
3738 |
+
"version": "1.0.30001614",
|
3739 |
+
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001614.tgz",
|
3740 |
+
"integrity": "sha512-jmZQ1VpmlRwHgdP1/uiKzgiAuGOfLEJsYFP4+GBou/QQ4U6IOJCB4NP1c+1p9RGLpwObcT94jA5/uO+F1vBbog==",
|
3741 |
"funding": [
|
3742 |
{
|
3743 |
"type": "opencollective",
|
|
|
5138 |
"resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.1.tgz",
|
5139 |
"integrity": "sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw=="
|
5140 |
},
|
5141 |
+
"node_modules/fluent-ffmpeg": {
|
5142 |
+
"version": "2.1.2",
|
5143 |
+
"resolved": "https://registry.npmjs.org/fluent-ffmpeg/-/fluent-ffmpeg-2.1.2.tgz",
|
5144 |
+
"integrity": "sha512-IZTB4kq5GK0DPp7sGQ0q/BWurGHffRtQQwVkiqDgeO6wYJLLV5ZhgNOQ65loZxxuPMKZKZcICCUnaGtlxBiR0Q==",
|
5145 |
+
"dependencies": {
|
5146 |
+
"async": ">=0.2.9",
|
5147 |
+
"which": "^1.1.1"
|
5148 |
+
},
|
5149 |
+
"engines": {
|
5150 |
+
"node": ">=0.8.0"
|
5151 |
+
}
|
5152 |
+
},
|
5153 |
+
"node_modules/fluent-ffmpeg/node_modules/which": {
|
5154 |
+
"version": "1.3.1",
|
5155 |
+
"resolved": "https://registry.npmjs.org/which/-/which-1.3.1.tgz",
|
5156 |
+
"integrity": "sha512-HxJdYWq1MTIQbJ3nw0cqssHoTNU267KlrDuGZ1WYlxDStUtKUhOaJmh112/TZmHxxUfuJqPXSOm7tDyas0OSIQ==",
|
5157 |
+
"dependencies": {
|
5158 |
+
"isexe": "^2.0.0"
|
5159 |
+
},
|
5160 |
+
"bin": {
|
5161 |
+
"which": "bin/which"
|
5162 |
+
}
|
5163 |
+
},
|
5164 |
"node_modules/follow-redirects": {
|
5165 |
"version": "1.15.6",
|
5166 |
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
|
|
|
6700 |
}
|
6701 |
},
|
6702 |
"node_modules/optionator": {
|
6703 |
+
"version": "0.9.4",
|
6704 |
+
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
|
6705 |
+
"integrity": "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==",
|
6706 |
"dependencies": {
|
|
|
6707 |
"deep-is": "^0.1.3",
|
6708 |
"fast-levenshtein": "^2.0.6",
|
6709 |
"levn": "^0.4.1",
|
6710 |
"prelude-ls": "^1.2.1",
|
6711 |
+
"type-check": "^0.4.0",
|
6712 |
+
"word-wrap": "^1.2.5"
|
6713 |
},
|
6714 |
"engines": {
|
6715 |
"node": ">= 0.8.0"
|
|
|
6804 |
}
|
6805 |
},
|
6806 |
"node_modules/path-scurry/node_modules/lru-cache": {
|
6807 |
+
"version": "10.2.2",
|
6808 |
+
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.2.2.tgz",
|
6809 |
+
"integrity": "sha512-9hp3Vp2/hFQUiIwKo8XCeFVnrg8Pk3TYNPIR7tJADKi5YfcF7vEaK7avFHTlSy3kOKYaJQaalfEo6YuXdceBOQ==",
|
6810 |
"engines": {
|
6811 |
"node": "14 || >=16.14"
|
6812 |
}
|
|
|
8293 |
}
|
8294 |
},
|
8295 |
"node_modules/type-fest": {
|
8296 |
+
"version": "4.18.0",
|
8297 |
+
"resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.18.0.tgz",
|
8298 |
+
"integrity": "sha512-+dbmiyliDY/2TTcjCS7NpI9yV2iEFlUDk5TKnsbkN7ZoRu5s7bT+zvYtNFhFXC2oLwURGT2frACAZvbbyNBI+w==",
|
8299 |
"engines": {
|
8300 |
"node": ">=16"
|
8301 |
},
|
|
|
8685 |
"url": "https://github.com/sponsors/ljharb"
|
8686 |
}
|
8687 |
},
|
8688 |
+
"node_modules/word-wrap": {
|
8689 |
+
"version": "1.2.5",
|
8690 |
+
"resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz",
|
8691 |
+
"integrity": "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==",
|
8692 |
+
"engines": {
|
8693 |
+
"node": ">=0.10.0"
|
8694 |
+
}
|
8695 |
+
},
|
8696 |
"node_modules/wrap-ansi": {
|
8697 |
"version": "8.1.0",
|
8698 |
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz",
|
|
|
8819 |
"integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A=="
|
8820 |
},
|
8821 |
"node_modules/yaml": {
|
8822 |
+
"version": "2.4.2",
|
8823 |
+
"resolved": "https://registry.npmjs.org/yaml/-/yaml-2.4.2.tgz",
|
8824 |
+
"integrity": "sha512-B3VqDZ+JAg1nZpaEmWtTXUlBneoGx6CPM9b0TENK6aoSu5t73dItudwdgmi6tHlIZZId4dZ9skcAQ2UbcyAeVA==",
|
8825 |
"bin": {
|
8826 |
"yaml": "bin.mjs"
|
8827 |
},
|
package.json
CHANGED
@@ -9,7 +9,8 @@
|
|
9 |
"lint": "next lint"
|
10 |
},
|
11 |
"dependencies": {
|
12 |
-
"@aitube/clap": "
|
|
|
13 |
"@huggingface/hub": "0.12.3-oauth",
|
14 |
"@huggingface/inference": "^2.6.7",
|
15 |
"@jcoreio/async-throttle": "^1.6.0",
|
@@ -61,6 +62,7 @@
|
|
61 |
"eslint": "8.45.0",
|
62 |
"eslint-config-next": "13.4.10",
|
63 |
"fastest-levenshtein": "^1.0.16",
|
|
|
64 |
"gsplat": "^1.2.4",
|
65 |
"hash-wasm": "^4.11.0",
|
66 |
"jose": "^5.2.4",
|
@@ -104,6 +106,7 @@
|
|
104 |
"zustand": "^4.4.7"
|
105 |
},
|
106 |
"devDependencies": {
|
|
|
107 |
"@types/proper-lockfile": "^4.1.2",
|
108 |
"@types/qs": "^6.9.7",
|
109 |
"@types/react-copy-to-clipboard": "^5.0.7",
|
|
|
9 |
"lint": "next lint"
|
10 |
},
|
11 |
"dependencies": {
|
12 |
+
"@aitube/clap": "0.0.7",
|
13 |
+
"@aitube/client": "0.0.7",
|
14 |
"@huggingface/hub": "0.12.3-oauth",
|
15 |
"@huggingface/inference": "^2.6.7",
|
16 |
"@jcoreio/async-throttle": "^1.6.0",
|
|
|
62 |
"eslint": "8.45.0",
|
63 |
"eslint-config-next": "13.4.10",
|
64 |
"fastest-levenshtein": "^1.0.16",
|
65 |
+
"fluent-ffmpeg": "^2.1.2",
|
66 |
"gsplat": "^1.2.4",
|
67 |
"hash-wasm": "^4.11.0",
|
68 |
"jose": "^5.2.4",
|
|
|
106 |
"zustand": "^4.4.7"
|
107 |
},
|
108 |
"devDependencies": {
|
109 |
+
"@types/fluent-ffmpeg": "^2.1.24",
|
110 |
"@types/proper-lockfile": "^4.1.2",
|
111 |
"@types/qs": "^6.9.7",
|
112 |
"@types/react-copy-to-clipboard": "^5.0.7",
|
src/app/api/generators/speech/generateVoiceWithElevenLabs.txt
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { getMediaInfo } from "../../utils/getMediaInfo"
|
2 |
+
import { readMp3FileToBase64 } from "../../utils/readMp3FileToBase64"
|
3 |
+
|
4 |
+
export async function generateSpeechWithElevenLabs({
|
5 |
+
text,
|
6 |
+
audioId,
|
7 |
+
debug = false,
|
8 |
+
}: {
|
9 |
+
text: string
|
10 |
+
audioId: string
|
11 |
+
debug?: boolean
|
12 |
+
}): Promise<{
|
13 |
+
filePath: string
|
14 |
+
fileName: string
|
15 |
+
format: string // "mp3"
|
16 |
+
base64: string // data uri
|
17 |
+
durationInSec: number
|
18 |
+
durationInMs: number
|
19 |
+
}> {
|
20 |
+
const api = await ElevenLabs()
|
21 |
+
|
22 |
+
// Converts text to speech, saves the file to the output folder and returns the relative path to the file.
|
23 |
+
// Output file is in the following format: TTS_date-time.mp3
|
24 |
+
// Returns an object with the following structure: { code: CODE, message: "STATUS_MESSAGE" }
|
25 |
+
const result = await api.tts(
|
26 |
+
text,
|
27 |
+
audioId
|
28 |
+
)
|
29 |
+
|
30 |
+
// ...really? that's the API?
|
31 |
+
let relativeOutputPath = result.message.split("File written successfully:").pop().trim()
|
32 |
+
|
33 |
+
// we remove the ./ at the beginning, so we get something like:
|
34 |
+
// "/../../../../var/folders/x4/2w7-------------------"
|
35 |
+
// then we remove relative navifation to only keep this:
|
36 |
+
// "/var/folders/x4/2w7-------------------"
|
37 |
+
const filePath = relativeOutputPath.slice(1).replaceAll("/..", "")
|
38 |
+
|
39 |
+
const fileName = filePath.split("/").pop()
|
40 |
+
|
41 |
+
const format = fileName.split(".").pop()
|
42 |
+
|
43 |
+
const { durationInSec, durationInMs } = await getMediaInfo(filePath)
|
44 |
+
|
45 |
+
const base64 = await readMp3FileToBase64(filePath)
|
46 |
+
|
47 |
+
return {
|
48 |
+
filePath,
|
49 |
+
fileName,
|
50 |
+
format,
|
51 |
+
base64,
|
52 |
+
durationInSec,
|
53 |
+
durationInMs,
|
54 |
+
}
|
55 |
+
}
|
src/app/api/generators/speech/generateVoiceWithParlerTTS.ts
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { addBase64Header } from "@/lib/data/addBase64Header"
|
2 |
+
import { tryApiCalls } from "../../utils/tryApiCall"
|
3 |
+
|
4 |
+
const gradioSpaceApiUrl = `https://jbilcke-hf-ai-tube-model-parler-tts-mini.hf.space`
|
5 |
+
const huggingFaceSpace = "jbilcke-hf/ai-tube-model-parler-tts-mini"
|
6 |
+
const apiKey = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}`
|
7 |
+
|
8 |
+
export async function generateSpeechWithParlerTTS({
|
9 |
+
text,
|
10 |
+
audioId,
|
11 |
+
debug = false,
|
12 |
+
neverThrow = false,
|
13 |
+
}: {
|
14 |
+
text: string
|
15 |
+
audioId: string
|
16 |
+
debug?: boolean
|
17 |
+
neverThrow?: boolean
|
18 |
+
}): Promise<string> {
|
19 |
+
|
20 |
+
const result = {
|
21 |
+
filePath: "",
|
22 |
+
fileName: "",
|
23 |
+
format: "mp3",
|
24 |
+
base64: "",
|
25 |
+
durationInSec: 5,
|
26 |
+
durationInMs: 5000
|
27 |
+
}
|
28 |
+
|
29 |
+
|
30 |
+
const actualFunction = async () => {
|
31 |
+
|
32 |
+
const res = await fetch(gradioSpaceApiUrl + (gradioSpaceApiUrl.endsWith("/") ? "" : "/") + "api/predict", {
|
33 |
+
method: "POST",
|
34 |
+
headers: {
|
35 |
+
"Content-Type": "application/json",
|
36 |
+
// Authorization: `Bearer ${token}`,
|
37 |
+
},
|
38 |
+
body: JSON.stringify({
|
39 |
+
fn_index: 0, // <- important!
|
40 |
+
data: [
|
41 |
+
apiKey,
|
42 |
+
text,
|
43 |
+
audioId,
|
44 |
+
],
|
45 |
+
}),
|
46 |
+
cache: "no-store",
|
47 |
+
// we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
|
48 |
+
// next: { revalidate: 1 }
|
49 |
+
})
|
50 |
+
|
51 |
+
if (res.status !== 200) {
|
52 |
+
throw new Error('Failed to fetch data')
|
53 |
+
}
|
54 |
+
|
55 |
+
const rawJson = await res.json()
|
56 |
+
|
57 |
+
console.log("rawJson:", rawJson)
|
58 |
+
|
59 |
+
// TODO: addBAse64 with the right header type
|
60 |
+
|
61 |
+
return ""
|
62 |
+
}
|
63 |
+
|
64 |
+
try {
|
65 |
+
if (!text?.length) {
|
66 |
+
throw new Error(`text is too short!`)
|
67 |
+
}
|
68 |
+
|
69 |
+
const result = await tryApiCalls({
|
70 |
+
func: actualFunction,
|
71 |
+
huggingFaceSpace,
|
72 |
+
debug,
|
73 |
+
failureMessage: "failed to generate the audio"
|
74 |
+
})
|
75 |
+
return result
|
76 |
+
} catch (err) {
|
77 |
+
if (neverThrow) {
|
78 |
+
console.error(`generateVoiceWithParlerTTS():`, err)
|
79 |
+
return ""
|
80 |
+
} else {
|
81 |
+
throw err
|
82 |
+
}
|
83 |
+
}
|
84 |
+
}
|
src/app/api/generators/speech/generateVoiceWithXTTS2.txt
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import { StoryLine } from "../../types/structures.mts"
|
3 |
+
import { tryApiCalls } from "../../utils/tryApiCalls.mts"
|
4 |
+
import { promptToGenerateAudioStory } from "../prompts/prompts.mts"
|
5 |
+
import { microserviceApiKey } from "../../config.mts"
|
6 |
+
import { addBase64Header } from "../../base64/addBase64.mts"
|
7 |
+
|
8 |
+
// TODO delete this? we don't need an env var for this I think?
|
9 |
+
const aiStoryServerApiUrl = `https://jbilcke-hf-ai-story-server.hf.space`
|
10 |
+
const huggingFaceSpace = "jbilcke-hf/ai-story-server"
|
11 |
+
|
12 |
+
export async function generateAudioStory({
|
13 |
+
prompt,
|
14 |
+
voice,
|
15 |
+
// maxLines,
|
16 |
+
neverThrow,
|
17 |
+
debug,
|
18 |
+
}: {
|
19 |
+
prompt: string
|
20 |
+
voice?: string
|
21 |
+
// maxLines: number
|
22 |
+
neverThrow?: boolean
|
23 |
+
debug?: boolean
|
24 |
+
}): Promise<StoryLine[]> {
|
25 |
+
const actualFunction = async () => {
|
26 |
+
|
27 |
+
const cropped = prompt.slice(0, 30)
|
28 |
+
// console.log(`user requested "${cropped}${cropped !== prompt ? "..." : ""}"`)
|
29 |
+
|
30 |
+
// positivePrompt = filterOutBadWords(positivePrompt)
|
31 |
+
|
32 |
+
const res = await fetch(aiStoryServerApiUrl + (aiStoryServerApiUrl.endsWith("/") ? "" : "/") + "api/predict", {
|
33 |
+
method: "POST",
|
34 |
+
headers: {
|
35 |
+
"Content-Type": "application/json",
|
36 |
+
// Authorization: `Bearer ${token}`,
|
37 |
+
},
|
38 |
+
body: JSON.stringify({
|
39 |
+
fn_index: 0, // <- important!
|
40 |
+
data: [
|
41 |
+
microserviceApiKey,
|
42 |
+
promptToGenerateAudioStory,
|
43 |
+
prompt,
|
44 |
+
|
45 |
+
// TODO: add support for custom wav
|
46 |
+
voice === "Julian" ? "Julian" : "Cloée",
|
47 |
+
|
48 |
+
// maxLines,
|
49 |
+
],
|
50 |
+
}),
|
51 |
+
cache: "no-store",
|
52 |
+
// we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
|
53 |
+
// next: { revalidate: 1 }
|
54 |
+
})
|
55 |
+
|
56 |
+
|
57 |
+
const rawJson = await res.json()
|
58 |
+
const data = rawJson.data as StoryLine[][]
|
59 |
+
|
60 |
+
const stories = data?.[0] || []
|
61 |
+
|
62 |
+
if (res.status !== 200) {
|
63 |
+
throw new Error('Failed to fetch data')
|
64 |
+
}
|
65 |
+
|
66 |
+
return stories.map(line => ({
|
67 |
+
text: line.text.replaceAll(" .", ".").replaceAll(" ?", "?").replaceAll(" !", "!").trim(),
|
68 |
+
audio: addBase64Header(line.audio, "mp4")
|
69 |
+
}))
|
70 |
+
}
|
71 |
+
|
72 |
+
try {
|
73 |
+
if (!prompt?.length) {
|
74 |
+
throw new Error(`prompt is too short!`)
|
75 |
+
}
|
76 |
+
|
77 |
+
const result = await tryApiCalls({
|
78 |
+
func: actualFunction,
|
79 |
+
huggingFaceSpace,
|
80 |
+
debug,
|
81 |
+
failureMessage: "failed to generate the audio story"
|
82 |
+
})
|
83 |
+
return result
|
84 |
+
} catch (err) {
|
85 |
+
if (neverThrow) {
|
86 |
+
console.error(`generateAudioStory():`, err)
|
87 |
+
return []
|
88 |
+
} else {
|
89 |
+
throw err
|
90 |
+
}
|
91 |
+
}
|
92 |
+
}
|
src/app/api/utils/addBase64.ts
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export function addBase64Header(
|
2 |
+
image?: string,
|
3 |
+
format?:
|
4 |
+
| "jpeg" | "jpg" | "png" | "webp" | "heic"
|
5 |
+
| "mp3" | "wav"
|
6 |
+
| "mp4" | "webm"
|
7 |
+
| string
|
8 |
+
) {
|
9 |
+
|
10 |
+
if (!image || typeof image !== "string" || image.length < 60) {
|
11 |
+
return ""
|
12 |
+
}
|
13 |
+
|
14 |
+
const ext = (`${format || ""}`.split(".").pop() || "").toLowerCase().trim()
|
15 |
+
|
16 |
+
let mime = ""
|
17 |
+
if (
|
18 |
+
ext === "jpeg" ||
|
19 |
+
ext === "jpg") {
|
20 |
+
mime = "image/jpeg"
|
21 |
+
} else if (
|
22 |
+
ext === "webp"
|
23 |
+
) {
|
24 |
+
mime = "image/webp"
|
25 |
+
} else if (
|
26 |
+
ext === "png") {
|
27 |
+
mime = "image/png"
|
28 |
+
} else if (ext === "heic") {
|
29 |
+
mime = "image/heic"
|
30 |
+
} else if (ext === "mp3") {
|
31 |
+
mime = "audio/mp3"
|
32 |
+
} else if (ext === "mp4") {
|
33 |
+
mime = "video/mp4"
|
34 |
+
} else if (ext === "webm") {
|
35 |
+
mime = "video/webm"
|
36 |
+
} else if (ext === "wav") {
|
37 |
+
mime = "audio/wav"
|
38 |
+
} else {
|
39 |
+
throw new Error(`addBase64Header failed (unsupported format: ${format})`)
|
40 |
+
}
|
41 |
+
|
42 |
+
if (image.startsWith('data:')) {
|
43 |
+
if (image.startsWith(`data:${mime};base64,`)) {
|
44 |
+
return image
|
45 |
+
} else {
|
46 |
+
throw new Error(`addBase64Header failed (input string is NOT a ${mime} image)`)
|
47 |
+
}
|
48 |
+
} else {
|
49 |
+
return `data:${mime};base64,${image}`
|
50 |
+
}
|
51 |
+
}
|
src/app/api/utils/getHuggingFaceSpaceStatus.ts
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
/** Actually `hf_${string}`, but for convenience, using the string type */
|
3 |
+
type AccessToken = string;
|
4 |
+
|
5 |
+
interface Credentials {
|
6 |
+
accessToken: AccessToken;
|
7 |
+
}
|
8 |
+
|
9 |
+
type SpaceHardwareFlavor =
|
10 |
+
| "cpu-basic"
|
11 |
+
| "cpu-upgrade"
|
12 |
+
| "t4-small"
|
13 |
+
| "t4-medium"
|
14 |
+
| "a10g-small"
|
15 |
+
| "a10g-large"
|
16 |
+
| "a100-large";
|
17 |
+
|
18 |
+
type SpaceSdk = "streamlit" | "gradio" | "docker" | "static";
|
19 |
+
|
20 |
+
type SpaceStage =
|
21 |
+
| "NO_APP_FILE"
|
22 |
+
| "CONFIG_ERROR"
|
23 |
+
| "BUILDING"
|
24 |
+
| "BUILD_ERROR"
|
25 |
+
| "RUNNING"
|
26 |
+
| "RUNNING_BUILDING"
|
27 |
+
| "RUNTIME_ERROR"
|
28 |
+
| "DELETING"
|
29 |
+
| "PAUSED"
|
30 |
+
| "SLEEPING";
|
31 |
+
|
32 |
+
type AccessTokenRole = "admin" | "write" | "contributor" | "read";
|
33 |
+
|
34 |
+
type AuthType = "access_token" | "app_token" | "app_token_as_user";
|
35 |
+
|
36 |
+
|
37 |
+
interface SpaceRuntime {
|
38 |
+
stage: SpaceStage;
|
39 |
+
sdk?: SpaceSdk;
|
40 |
+
sdkVersion?: string;
|
41 |
+
errorMessage?: string;
|
42 |
+
hardware?: {
|
43 |
+
current: SpaceHardwareFlavor | null;
|
44 |
+
currentPrettyName?: string;
|
45 |
+
requested: SpaceHardwareFlavor | null;
|
46 |
+
requestedPrettyName?: string;
|
47 |
+
};
|
48 |
+
/** when calling /spaces, those props are only fetched if ?full=true */
|
49 |
+
resources?: SpaceResourceConfig;
|
50 |
+
/** in seconds */
|
51 |
+
gcTimeout?: number | null;
|
52 |
+
}
|
53 |
+
|
54 |
+
interface SpaceResourceRequirement {
|
55 |
+
cpu?: string;
|
56 |
+
memory?: string;
|
57 |
+
gpu?: string;
|
58 |
+
gpuModel?: string;
|
59 |
+
ephemeral?: string;
|
60 |
+
}
|
61 |
+
|
62 |
+
interface SpaceResourceConfig {
|
63 |
+
requests: SpaceResourceRequirement;
|
64 |
+
limits: SpaceResourceRequirement;
|
65 |
+
replicas?: number;
|
66 |
+
throttled?: boolean;
|
67 |
+
is_custom?: boolean;
|
68 |
+
}
|
69 |
+
|
70 |
+
export interface HFSpaceStatus {
|
71 |
+
_id: string
|
72 |
+
id: string
|
73 |
+
author: string
|
74 |
+
sha: string
|
75 |
+
lastModified: string
|
76 |
+
private: boolean
|
77 |
+
gated: boolean
|
78 |
+
disabled: boolean
|
79 |
+
host: string
|
80 |
+
subdomain: string
|
81 |
+
tags: string[]
|
82 |
+
likes: number
|
83 |
+
sdk: string
|
84 |
+
runtime: SpaceRuntime
|
85 |
+
createdAt: string
|
86 |
+
}
|
87 |
+
|
88 |
+
export async function getHuggingFaceSpaceStatus({
|
89 |
+
space,
|
90 |
+
// userName,
|
91 |
+
// spaceName,
|
92 |
+
}: {
|
93 |
+
space: string // a joined "user_name/space_name"
|
94 |
+
// userName: string
|
95 |
+
// spaceName: string
|
96 |
+
}): Promise<HFSpaceStatus> {
|
97 |
+
const res = await fetch(`https://huggingface.co/api/spaces/${space}`, {
|
98 |
+
method: "GET",
|
99 |
+
headers: {
|
100 |
+
Authorization: `Bearer ${process.env.ADMIN_HUGGING_FACE_API_TOKEN || ""}`
|
101 |
+
}
|
102 |
+
})
|
103 |
+
|
104 |
+
if (res.status !== 200) {
|
105 |
+
throw new Error("failed to get the space data")
|
106 |
+
}
|
107 |
+
|
108 |
+
try {
|
109 |
+
const data = await res.json() as HFSpaceStatus
|
110 |
+
return data
|
111 |
+
} catch (err) {
|
112 |
+
throw new Error(`failed to parse space data: ${err}`)
|
113 |
+
}
|
114 |
+
}
|
src/app/api/utils/getMediaInfo.ts
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import ffmpeg from "fluent-ffmpeg";
|
2 |
+
|
3 |
+
import { tmpdir } from "node:os";
|
4 |
+
import { promises as fs } from "node:fs";
|
5 |
+
import { join } from "node:path";
|
6 |
+
|
7 |
+
export type MediaMetadata = {
|
8 |
+
durationInSec: number;
|
9 |
+
durationInMs: number;
|
10 |
+
hasAudio: boolean;
|
11 |
+
};
|
12 |
+
|
13 |
+
/**
|
14 |
+
* Get the media info of a base64 or file path
|
15 |
+
* @param input
|
16 |
+
* @returns
|
17 |
+
*/
|
18 |
+
export async function getMediaInfo(input: string): Promise<MediaMetadata> {
|
19 |
+
// If the input is a base64 string
|
20 |
+
if (input.startsWith("data:")) {
|
21 |
+
// Extract the base64 content
|
22 |
+
const base64Content = input.split(";base64,").pop();
|
23 |
+
if (!base64Content) {
|
24 |
+
throw new Error("Invalid base64 data");
|
25 |
+
}
|
26 |
+
|
27 |
+
// Decode the base64 content to a buffer
|
28 |
+
const buffer = Buffer.from(base64Content, 'base64');
|
29 |
+
|
30 |
+
// Generate a temporary file name
|
31 |
+
const tempFileName = join(tmpdir(), `temp-media-${Date.now()}`);
|
32 |
+
|
33 |
+
// Write the buffer to a temporary file
|
34 |
+
await fs.writeFile(tempFileName, buffer);
|
35 |
+
|
36 |
+
// Get metadata from the temporary file then delete the file
|
37 |
+
try {
|
38 |
+
return await getMetaDataFromPath(tempFileName);
|
39 |
+
} finally {
|
40 |
+
await fs.rm(tempFileName);
|
41 |
+
}
|
42 |
+
}
|
43 |
+
|
44 |
+
// If the input is a path to the file
|
45 |
+
return await getMetaDataFromPath(input);
|
46 |
+
}
|
47 |
+
|
48 |
+
async function getMetaDataFromPath(filePath: string): Promise<MediaMetadata> {
|
49 |
+
return new Promise((resolve, reject) => {
|
50 |
+
ffmpeg.ffprobe(filePath, (err, metadata) => {
|
51 |
+
|
52 |
+
let results = {
|
53 |
+
durationInSec: 0,
|
54 |
+
durationInMs: 0,
|
55 |
+
hasAudio: false,
|
56 |
+
}
|
57 |
+
|
58 |
+
if (err) {
|
59 |
+
console.error("getMediaInfo(): failed to analyze the source (might happen with empty files)")
|
60 |
+
// reject(err);
|
61 |
+
resolve(results);
|
62 |
+
return;
|
63 |
+
}
|
64 |
+
|
65 |
+
try {
|
66 |
+
results.durationInSec = metadata?.format?.duration || 0;
|
67 |
+
results.durationInMs = results.durationInSec * 1000;
|
68 |
+
results.hasAudio = (metadata?.streams || []).some((stream) => stream.codec_type === 'audio');
|
69 |
+
|
70 |
+
} catch (err) {
|
71 |
+
console.error(`getMediaInfo(): failed to analyze the source (might happen with empty files)`)
|
72 |
+
results.durationInSec = 0
|
73 |
+
results.durationInMs = 0
|
74 |
+
results.hasAudio = false
|
75 |
+
}
|
76 |
+
resolve(results);
|
77 |
+
});
|
78 |
+
});
|
79 |
+
}
|
src/app/api/utils/makeSureSpaceIsRunning.ts
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { getHuggingFaceSpaceStatus } from "./getHuggingFaceSpaceStatus"
|
2 |
+
import { sleep } from "./sleep"
|
3 |
+
|
4 |
+
|
5 |
+
export async function makeSureSpaceIsRunning({
|
6 |
+
space,
|
7 |
+
maxWaitTimeInSec = 15 * 60, // some spaces are ultra slow to cold boot (eg. data dl at runtime)
|
8 |
+
statusUpdateFrequencyInSec = 5,
|
9 |
+
// userName,
|
10 |
+
// spaceName,
|
11 |
+
}: {
|
12 |
+
space?: string // a joined "user_name/space_name"
|
13 |
+
|
14 |
+
maxWaitTimeInSec?: number
|
15 |
+
|
16 |
+
statusUpdateFrequencyInSec?: number
|
17 |
+
|
18 |
+
// userName: string
|
19 |
+
// spaceName: string
|
20 |
+
}): Promise<void> {
|
21 |
+
if (!space) { return }
|
22 |
+
|
23 |
+
// process.stdout.write(`trying to restart space "${space}"`)
|
24 |
+
try {
|
25 |
+
const { runtime: { stage } } = await getHuggingFaceSpaceStatus({ space })
|
26 |
+
if (stage === "RUNNING") {
|
27 |
+
// process.stdout.write(`: well, it is already ${stage}!\n`)
|
28 |
+
return
|
29 |
+
}
|
30 |
+
} catch (err) {
|
31 |
+
}
|
32 |
+
|
33 |
+
const res = await fetch(`https://huggingface.co/api/spaces/${space}/restart`, {
|
34 |
+
method: "POST",
|
35 |
+
headers: {
|
36 |
+
Authorization: `Bearer ${process.env.ADMIN_HUGGING_FACE_API_TOKEN || ""}`
|
37 |
+
}
|
38 |
+
})
|
39 |
+
|
40 |
+
if (res.status !== 200) {
|
41 |
+
process.stdout.write(`failure!\nwe couldn't trigger the restart of space "${space}"\n`)
|
42 |
+
|
43 |
+
throw new Error(`failed to trigger the restart of space "${space}" (status is not 200)`)
|
44 |
+
}
|
45 |
+
|
46 |
+
let elapsedTime = 0
|
47 |
+
|
48 |
+
process.stdout.write(`trying to restart space "${space}"`)
|
49 |
+
|
50 |
+
while (true) {
|
51 |
+
process.stdout.write(".")
|
52 |
+
const { runtime: { stage } } = await getHuggingFaceSpaceStatus({ space })
|
53 |
+
|
54 |
+
if (stage === "RUNNING") {
|
55 |
+
process.stdout.write(`success!\nspace "${space}" is ${stage} (took ${elapsedTime} sec)\n`)
|
56 |
+
return
|
57 |
+
} else if (stage === "BUILDING" || stage === "RUNNING_BUILDING") {
|
58 |
+
// let's wait more
|
59 |
+
await sleep(statusUpdateFrequencyInSec * 1000)
|
60 |
+
|
61 |
+
elapsedTime += statusUpdateFrequencyInSec
|
62 |
+
|
63 |
+
if (elapsedTime >= maxWaitTimeInSec) {
|
64 |
+
process.stdout.write(`failure!\nspace "${space}" is still ${stage} (after ${elapsedTime} sec)\n`)
|
65 |
+
if (stage === "BUILDING") {
|
66 |
+
throw new Error(`failed to start space ${space} (reason: space is ${stage}, but we reached the ${maxWaitTimeInSec} sec timeout)`)
|
67 |
+
} else {
|
68 |
+
// if we are "RUNNING_BUILDING" we assume it is.. okay? I guess?
|
69 |
+
return
|
70 |
+
}
|
71 |
+
}
|
72 |
+
} else {
|
73 |
+
process.stdout.write(`failure!\nspace "${space}" is ${stage} (after ${elapsedTime} sec)\n`)
|
74 |
+
throw new Error(`failed to build space ${space} (reason: space is ${stage})`)
|
75 |
+
}
|
76 |
+
}
|
77 |
+
}
|
src/app/api/utils/readMp3FileToBase64.ts
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { readFile } from "node:fs/promises"
|
2 |
+
|
3 |
+
export async function readMp3FileToBase64(filePath: string): Promise<string> {
|
4 |
+
try {
|
5 |
+
// Read the file's content as a Buffer
|
6 |
+
const fileBuffer = await readFile(filePath);
|
7 |
+
|
8 |
+
// Convert the buffer to a base64 string
|
9 |
+
const base64 = fileBuffer.toString('base64');
|
10 |
+
|
11 |
+
return `data:audio/mp3;base64,${base64}`;
|
12 |
+
} catch (error) {
|
13 |
+
// Handle errors (e.g., file not found, no permissions, etc.)
|
14 |
+
console.error(error);
|
15 |
+
throw error;
|
16 |
+
}
|
17 |
+
}
|
src/app/api/utils/sleep.ts
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export const sleep = async (durationInMs: number) =>
|
2 |
+
new Promise((resolve) => {
|
3 |
+
setTimeout(() => {
|
4 |
+
resolve(true)
|
5 |
+
}, durationInMs)
|
6 |
+
})
|
src/app/api/utils/timeout.ts
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export function timeout<T>(
|
2 |
+
promise: Promise<T>,
|
3 |
+
ms: number,
|
4 |
+
timeoutError = new Error('Promise timed out')
|
5 |
+
): Promise<T> {
|
6 |
+
// create a promise that rejects in milliseconds
|
7 |
+
const promiseWithTimeout = new Promise<never>((_, reject) => {
|
8 |
+
setTimeout(() => {
|
9 |
+
reject(timeoutError);
|
10 |
+
}, ms);
|
11 |
+
});
|
12 |
+
|
13 |
+
// returns a race between timeout and the passed promise
|
14 |
+
return Promise.race<T>([promise, promiseWithTimeout]);
|
15 |
+
}
|
src/app/api/utils/tryApiCall.ts
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { makeSureSpaceIsRunning } from "./makeSureSpaceIsRunning"
|
2 |
+
import { sleep } from "./sleep"
|
3 |
+
import { timeout } from "./timeout"
|
4 |
+
|
5 |
+
const sec = 1000
|
6 |
+
const min = 60 *sec
|
7 |
+
|
8 |
+
export async function tryApiCalls<T>({
|
9 |
+
func,
|
10 |
+
huggingFaceSpace,
|
11 |
+
debug = false,
|
12 |
+
failureMessage = "failed to call the endpoint",
|
13 |
+
autostart = true,
|
14 |
+
|
15 |
+
// wait up to 10 min
|
16 |
+
timeoutInSec = 10 * 60,
|
17 |
+
|
18 |
+
delays = [
|
19 |
+
5 *sec,
|
20 |
+
15 *sec,
|
21 |
+
40 *sec, // total 1 min wait time
|
22 |
+
|
23 |
+
//at this stage, if it is so slow it means we are probably waking up a model
|
24 |
+
// which is a slow operation (takes ~5 min)
|
25 |
+
|
26 |
+
2 *min, // ~ 3 min ~
|
27 |
+
1 *min, // ~ 4 min ~
|
28 |
+
1 *min, // ~ 5 min ~
|
29 |
+
]
|
30 |
+
}: {
|
31 |
+
func: () => Promise<T>
|
32 |
+
|
33 |
+
// optional: the name of the hugging face space
|
34 |
+
// this will be used to "wake up" the space if necessary
|
35 |
+
huggingFaceSpace?: string
|
36 |
+
|
37 |
+
debug?: boolean
|
38 |
+
failureMessage?: string
|
39 |
+
autostart?: boolean
|
40 |
+
timeoutInSec?: number
|
41 |
+
delays?: number[]
|
42 |
+
}) {
|
43 |
+
|
44 |
+
for (let i = 0; i < delays.length; i++) {
|
45 |
+
try {
|
46 |
+
if (autostart) {
|
47 |
+
await makeSureSpaceIsRunning({ space: huggingFaceSpace })
|
48 |
+
}
|
49 |
+
|
50 |
+
// due to an error with the Gradio client, sometimes calling the api.predict
|
51 |
+
// will never throw an error
|
52 |
+
const result = await timeout(
|
53 |
+
func(), // grab the promise
|
54 |
+
timeoutInSec * 1000,
|
55 |
+
new Error(`call to ${huggingFaceSpace || "the API"} failed after ${timeoutInSec} seconds`)
|
56 |
+
)
|
57 |
+
return result
|
58 |
+
} catch (err) {
|
59 |
+
if (debug) { console.error(err) }
|
60 |
+
process.stdout.write(".")
|
61 |
+
|
62 |
+
if (i > 0) {
|
63 |
+
await sleep(delays[i])
|
64 |
+
}
|
65 |
+
}
|
66 |
+
}
|
67 |
+
|
68 |
+
throw new Error(`${failureMessage} after ${delays.length} attempts`)
|
69 |
+
}
|
src/app/api/v1/edit/dialogues/route.ts
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { NextResponse, NextRequest } from "next/server"
|
2 |
+
|
3 |
+
import { ClapEntity, ClapProject, ClapSegment, getClapAssetSourceType, newSegment, parseClap, serializeClap } from "@aitube/clap"
|
4 |
+
|
5 |
+
import { startOfSegment1IsWithinSegment2 } from "@/lib/utils/startOfSegment1IsWithinSegment2"
|
6 |
+
import { getToken } from "@/app/api/auth/getToken"
|
7 |
+
|
8 |
+
import { getSpeechBackgroundAudioPrompt } from "@/components/interface/latent-engine/core/prompts/getSpeechBackgroundAudioPrompt"
|
9 |
+
import { getSpeechForegroundAudioPrompt } from "@/components/interface/latent-engine/core/prompts/getSpeechForegroundAudioPrompt"
|
10 |
+
import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
|
11 |
+
|
12 |
+
// a helper to generate speech for a Clap
|
13 |
+
export async function POST(req: NextRequest) {
|
14 |
+
|
15 |
+
const jwtToken = await getToken({ user: "anonymous" })
|
16 |
+
|
17 |
+
const blob = await req.blob()
|
18 |
+
|
19 |
+
const clap: ClapProject = await parseClap(blob)
|
20 |
+
|
21 |
+
if (!clap?.segments) { throw new Error(`no segment found in the provided clap!`) }
|
22 |
+
|
23 |
+
console.log(`[api/generate/dialogues] detected ${clap.segments.length} segments`)
|
24 |
+
|
25 |
+
const shotsSegments: ClapSegment[] = clap.segments.filter(s => s.category === "camera")
|
26 |
+
console.log(`[api/generate/dialogues] detected ${shotsSegments.length} shots`)
|
27 |
+
|
28 |
+
if (shotsSegments.length > 32) {
|
29 |
+
throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
|
30 |
+
}
|
31 |
+
|
32 |
+
|
33 |
+
for (const shotSegment of shotsSegments) {
|
34 |
+
|
35 |
+
const shotSegments: ClapSegment[] = clap.segments.filter(s =>
|
36 |
+
startOfSegment1IsWithinSegment2(s, shotSegment)
|
37 |
+
)
|
38 |
+
|
39 |
+
const shotDialogueSegments: ClapSegment[] = shotSegments.filter(s =>
|
40 |
+
s.category === "dialogue"
|
41 |
+
)
|
42 |
+
|
43 |
+
let shotDialogueSegment: ClapSegment | undefined = shotDialogueSegments.at(0)
|
44 |
+
|
45 |
+
console.log(`[api/generate/dialogues] shot [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}] has ${shotSegments.length} segments (${shotDialogueSegments.length} dialogues)`)
|
46 |
+
|
47 |
+
if (shotDialogueSegment && !shotDialogueSegment.assetUrl) {
|
48 |
+
console.log(`[api/generate/dialogues] generating audio..`)
|
49 |
+
|
50 |
+
try {
|
51 |
+
shotDialogueSegment.assetUrl = await generateSpeechWithParlerTTS({
|
52 |
+
text: shotDialogueSegment.prompt,
|
53 |
+
audioId: getSpeechBackgroundAudioPrompt(shotSegments, clap.entityIndex, ["high quality", "crisp", "detailed"]),
|
54 |
+
debug: true,
|
55 |
+
})
|
56 |
+
shotDialogueSegment.assetSourceType = getClapAssetSourceType(shotDialogueSegment.assetUrl)
|
57 |
+
|
58 |
+
console.log("TODO julian: properly set the asset type format")
|
59 |
+
|
60 |
+
} catch (err) {
|
61 |
+
console.log(`[api/generate/dialogues] failed to generate audio: ${err}`)
|
62 |
+
throw err
|
63 |
+
}
|
64 |
+
|
65 |
+
console.log(`[api/generate/dialogues] generated dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
|
66 |
+
} else {
|
67 |
+
console.log(`[api/generate/dialogues] there is already a dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
|
68 |
+
}
|
69 |
+
}
|
70 |
+
|
71 |
+
console.log(`[api/generate/dialogues] returning the clap augmented with dialogues`)
|
72 |
+
|
73 |
+
return new NextResponse(await serializeClap(clap), {
|
74 |
+
status: 200,
|
75 |
+
headers: new Headers({ "content-type": "application/x-gzip" }),
|
76 |
+
})
|
77 |
+
}
|
src/app/api/v1/edit/{models → entities}/generateAudioID.ts
RENAMED
File without changes
|
src/app/api/v1/edit/{models → entities}/generateImageID.ts
RENAMED
File without changes
|
src/app/api/v1/edit/{models → entities}/route.ts
RENAMED
@@ -1,7 +1,7 @@
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
import queryString from "query-string"
|
3 |
|
4 |
-
import { parseClap, serializeClap
|
5 |
import { getToken } from "@/app/api/auth/getToken"
|
6 |
|
7 |
import { generateImageID } from "./generateImageID"
|
@@ -25,7 +25,7 @@ export async function POST(req: NextRequest) {
|
|
25 |
if (!prompt.length) { throw new Error(`please provide a prompt`) }
|
26 |
*/
|
27 |
|
28 |
-
console.log("[api/generate/
|
29 |
|
30 |
const jwtToken = await getToken({ user: "anonymous" })
|
31 |
|
@@ -33,40 +33,42 @@ export async function POST(req: NextRequest) {
|
|
33 |
|
34 |
const clap = await parseClap(blob)
|
35 |
|
36 |
-
if (!clap.
|
37 |
|
38 |
-
for (const
|
39 |
|
40 |
// TASK 1: GENERATE THE IMAGE PROMPT IF MISSING
|
41 |
-
if (!
|
42 |
-
|
43 |
}
|
44 |
|
45 |
// TASK 2: GENERATE THE IMAGE ID IF MISSING
|
46 |
-
if (!
|
47 |
-
|
48 |
-
prompt:
|
49 |
-
seed:
|
50 |
})
|
|
|
51 |
}
|
52 |
|
53 |
// TASK 3: GENERATE THE AUDIO PROMPT IF MISSING
|
54 |
-
if (!
|
55 |
-
|
56 |
}
|
57 |
|
58 |
// TASK 4: GENERATE THE AUDIO ID IF MISSING
|
59 |
|
60 |
// TODO here: call Parler-TTS or a generic audio generator
|
61 |
-
if (!
|
62 |
-
|
63 |
-
prompt:
|
64 |
-
seed:
|
65 |
})
|
|
|
66 |
}
|
67 |
}
|
68 |
|
69 |
-
console.log(`[api/generate/
|
70 |
|
71 |
return new NextResponse(await serializeClap(clap), {
|
72 |
status: 200,
|
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
import queryString from "query-string"
|
3 |
|
4 |
+
import { getClapAssetSourceType, parseClap, serializeClap } from "@aitube/clap"
|
5 |
import { getToken } from "@/app/api/auth/getToken"
|
6 |
|
7 |
import { generateImageID } from "./generateImageID"
|
|
|
25 |
if (!prompt.length) { throw new Error(`please provide a prompt`) }
|
26 |
*/
|
27 |
|
28 |
+
console.log("[api/generate/entities] request:", prompt)
|
29 |
|
30 |
const jwtToken = await getToken({ user: "anonymous" })
|
31 |
|
|
|
33 |
|
34 |
const clap = await parseClap(blob)
|
35 |
|
36 |
+
if (!clap.entities.length) { throw new Error(`please provide at least one entity`) }
|
37 |
|
38 |
+
for (const entity of clap.entities) {
|
39 |
|
40 |
// TASK 1: GENERATE THE IMAGE PROMPT IF MISSING
|
41 |
+
if (!entity.imagePrompt) {
|
42 |
+
entity.imagePrompt = "a man with a beard"
|
43 |
}
|
44 |
|
45 |
// TASK 2: GENERATE THE IMAGE ID IF MISSING
|
46 |
+
if (!entity.imageId) {
|
47 |
+
entity.imageId = await generateImageID({
|
48 |
+
prompt: entity.imagePrompt,
|
49 |
+
seed: entity.seed
|
50 |
})
|
51 |
+
entity.imageSourceType = getClapAssetSourceType(entity.imageId)
|
52 |
}
|
53 |
|
54 |
// TASK 3: GENERATE THE AUDIO PROMPT IF MISSING
|
55 |
+
if (!entity.audioPrompt) {
|
56 |
+
entity.audioPrompt = "a man with a beard"
|
57 |
}
|
58 |
|
59 |
// TASK 4: GENERATE THE AUDIO ID IF MISSING
|
60 |
|
61 |
// TODO here: call Parler-TTS or a generic audio generator
|
62 |
+
if (!entity.audioId) {
|
63 |
+
entity.audioId = await generateAudioID({
|
64 |
+
prompt: entity.audioPrompt,
|
65 |
+
seed: entity.seed
|
66 |
})
|
67 |
+
entity.audioSourceType = getClapAssetSourceType(entity.audioId)
|
68 |
}
|
69 |
}
|
70 |
|
71 |
+
console.log(`[api/generate/entities] returning the clap extended with the entities`)
|
72 |
|
73 |
return new NextResponse(await serializeClap(clap), {
|
74 |
status: 200,
|
src/app/api/v1/edit/{models → entities}/systemPrompt.ts
RENAMED
File without changes
|
src/app/api/v1/edit/storyboards/route.ts
CHANGED
@@ -6,7 +6,6 @@ import { startOfSegment1IsWithinSegment2 } from "@/lib/utils/startOfSegment1IsWi
|
|
6 |
import { getVideoPrompt } from "@/components/interface/latent-engine/core/prompts/getVideoPrompt"
|
7 |
import { getToken } from "@/app/api/auth/getToken"
|
8 |
|
9 |
-
import { newRender, getRender } from "@/app/api/providers/videochain/renderWithVideoChain"
|
10 |
import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
|
11 |
import { generateStoryboard } from "./generateStoryboard"
|
12 |
|
@@ -68,7 +67,7 @@ export async function POST(req: NextRequest) {
|
|
68 |
// TASK 2: GENERATE MISSING STORYBOARD PROMPT
|
69 |
if (shotStoryboardSegment && !shotStoryboardSegment?.prompt) {
|
70 |
// storyboard is missing, let's generate it
|
71 |
-
shotStoryboardSegment.prompt = getVideoPrompt(shotSegments,
|
72 |
console.log(`[api/generate/storyboards] generating storyboard prompt: ${shotStoryboardSegment.prompt}`)
|
73 |
}
|
74 |
|
|
|
6 |
import { getVideoPrompt } from "@/components/interface/latent-engine/core/prompts/getVideoPrompt"
|
7 |
import { getToken } from "@/app/api/auth/getToken"
|
8 |
|
|
|
9 |
import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
|
10 |
import { generateStoryboard } from "./generateStoryboard"
|
11 |
|
|
|
67 |
// TASK 2: GENERATE MISSING STORYBOARD PROMPT
|
68 |
if (shotStoryboardSegment && !shotStoryboardSegment?.prompt) {
|
69 |
// storyboard is missing, let's generate it
|
70 |
+
shotStoryboardSegment.prompt = getVideoPrompt(shotSegments, clap.entityIndex, ["high quality", "crisp", "detailed"])
|
71 |
console.log(`[api/generate/storyboards] generating storyboard prompt: ${shotStoryboardSegment.prompt}`)
|
72 |
}
|
73 |
|
src/components/interface/latent-engine/core/prompts/getCharacterPrompt.ts
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
-
import {
|
2 |
|
3 |
-
export function getCharacterPrompt(
|
4 |
|
5 |
let characterPrompt = ""
|
6 |
-
if (
|
7 |
characterPrompt = [
|
8 |
// the label (character name) can help making the prompt more unique
|
9 |
// this might backfires however, if the name is
|
@@ -11,15 +11,15 @@ export function getCharacterPrompt(model: ClapModel): string {
|
|
11 |
// I'm not sure stable diffusion really needs this,
|
12 |
// so let's skip it for now (might still be useful for locations, though)
|
13 |
// we also want to avoid triggering "famous people" (BARBOSSA etc)
|
14 |
-
//
|
15 |
|
16 |
-
|
17 |
].join(", ")
|
18 |
} else {
|
19 |
characterPrompt = [
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
].map(i => i.trim()).filter(i => i).join(", ")
|
24 |
}
|
25 |
return characterPrompt
|
|
|
1 |
+
import { ClapEntity } from "@aitube/clap"
|
2 |
|
3 |
+
export function getCharacterPrompt(entity: ClapEntity): string {
|
4 |
|
5 |
let characterPrompt = ""
|
6 |
+
if (entity.description) {
|
7 |
characterPrompt = [
|
8 |
// the label (character name) can help making the prompt more unique
|
9 |
// this might backfires however, if the name is
|
|
|
11 |
// I'm not sure stable diffusion really needs this,
|
12 |
// so let's skip it for now (might still be useful for locations, though)
|
13 |
// we also want to avoid triggering "famous people" (BARBOSSA etc)
|
14 |
+
// entity.label,
|
15 |
|
16 |
+
entity.description
|
17 |
].join(", ")
|
18 |
} else {
|
19 |
characterPrompt = [
|
20 |
+
entity.gender !== "object" ? entity.gender : "",
|
21 |
+
entity.age ? `aged ${entity.age}yo` : '',
|
22 |
+
entity.label ? `named ${entity.label}` : '',
|
23 |
].map(i => i.trim()).filter(i => i).join(", ")
|
24 |
}
|
25 |
return characterPrompt
|
src/components/interface/latent-engine/core/prompts/getSpeechBackgroundAudioPrompt.ts
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { ClapEntity, ClapSegment } from "@aitube/clap"
|
2 |
+
|
3 |
+
import { getCharacterPrompt } from "./getCharacterPrompt"
|
4 |
+
|
5 |
+
/**
|
6 |
+
* Construct an audio background for a voice from a list of active segments
|
7 |
+
*
|
8 |
+
* @param segments
|
9 |
+
* @returns
|
10 |
+
*/
|
11 |
+
export function getSpeechBackgroundAudioPrompt(
|
12 |
+
segments: ClapSegment[] = [],
|
13 |
+
entitiesById: Record<string, ClapEntity> = {},
|
14 |
+
extraPositivePrompt: string[] = [] // "clear sound, high quality" etc
|
15 |
+
): string {
|
16 |
+
return segments
|
17 |
+
.filter(({ category, outputType }) => (
|
18 |
+
category === "dialogue" ||
|
19 |
+
category === "weather" ||
|
20 |
+
category === "location"
|
21 |
+
))
|
22 |
+
.sort((a, b) => b.label.localeCompare(a.label))
|
23 |
+
.map(segment => {
|
24 |
+
const entity: ClapEntity | undefined = entitiesById[segment?.entityId || ""] || undefined
|
25 |
+
|
26 |
+
if (segment.category === "dialogue") {
|
27 |
+
// if we can't find the entity then we are unable
|
28 |
+
// to make any assumption about the gender, age and voice timbre
|
29 |
+
if (!entity) {
|
30 |
+
return `person, speaking normally`
|
31 |
+
}
|
32 |
+
|
33 |
+
const characterPrompt = getCharacterPrompt(entity)
|
34 |
+
|
35 |
+
return `${characterPrompt}, speaking normally`
|
36 |
+
|
37 |
+
} else if (segment.category === "location") {
|
38 |
+
// the location is part of the background noise
|
39 |
+
// but this might produce unexpected results - we'll see!
|
40 |
+
return segment.prompt
|
41 |
+
} else if (segment.category === "weather") {
|
42 |
+
// the weather is part of the background noise
|
43 |
+
// here too this might produce weird and unexpected results 🍿
|
44 |
+
return segment.prompt
|
45 |
+
}
|
46 |
+
// ignore the rest
|
47 |
+
return ""
|
48 |
+
})
|
49 |
+
.filter(x => x)
|
50 |
+
.concat([ ...extraPositivePrompt ])
|
51 |
+
.join(". ")
|
52 |
+
}
|
src/components/interface/latent-engine/core/prompts/getSpeechForegroundAudioPrompt.ts
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { ClapSegment } from "@aitube/clap"
|
2 |
+
|
3 |
+
/**
|
4 |
+
* Construct an audio foreground for a voice from a list of active segments
|
5 |
+
*
|
6 |
+
* This is the "dialogue" prompt, ie. the actual spoken words,
|
7 |
+
* so we don't need to do anything fancy here, we only use the raw text
|
8 |
+
*
|
9 |
+
* @param segments
|
10 |
+
* @returns
|
11 |
+
*/
|
12 |
+
export function getSpeechForegroundAudioPrompt(
|
13 |
+
segments: ClapSegment[] = []
|
14 |
+
): string {
|
15 |
+
return segments
|
16 |
+
.filter(({ category }) => category === "dialogue")
|
17 |
+
.sort((a, b) => b.label.localeCompare(a.label))
|
18 |
+
.map(({ prompt }) => prompt).filter(x => x)
|
19 |
+
.join(". ")
|
20 |
+
}
|
src/components/interface/latent-engine/core/prompts/getVideoPrompt.ts
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import {
|
2 |
|
3 |
import { deduplicatePrompt } from "../../utils/prompting/deduplicatePrompt"
|
4 |
|
@@ -11,12 +11,12 @@ import { getCharacterPrompt } from "./getCharacterPrompt"
|
|
11 |
* @returns
|
12 |
*/
|
13 |
export function getVideoPrompt(
|
14 |
-
segments: ClapSegment[],
|
15 |
-
|
16 |
-
extraPositivePrompt: string[]
|
17 |
): string {
|
18 |
|
19 |
-
// console.log("
|
20 |
|
21 |
// to construct the video we need to collect all the segments describing it
|
22 |
// we ignore unrelated categories (music, dialogue) or non-prompt items (eg. an audio sample)
|
@@ -60,23 +60,23 @@ export function getVideoPrompt(
|
|
60 |
tmp.sort((a, b) => b.label.localeCompare(a.label))
|
61 |
|
62 |
let videoPrompt = tmp.map(segment => {
|
63 |
-
const
|
64 |
|
65 |
if (segment.category === "dialogue") {
|
66 |
|
67 |
-
// if we can't find the
|
68 |
// to make any assumption about the gender, age or appearance
|
69 |
-
if (!
|
70 |
-
console.log("ERROR: this is a dialogue, but couldn't find the
|
71 |
return `portrait of a person speaking, blurry background, bokeh`
|
72 |
}
|
73 |
|
74 |
-
const characterTrigger =
|
75 |
-
const characterLabel =
|
76 |
-
const characterDescription =
|
77 |
const dialogueLine = segment?.prompt || ""
|
78 |
|
79 |
-
const characterPrompt = getCharacterPrompt(
|
80 |
|
81 |
// in the context of a video, we some something additional:
|
82 |
// we create a "bokeh" style
|
@@ -84,13 +84,13 @@ export function getVideoPrompt(
|
|
84 |
|
85 |
} else if (segment.category === "location") {
|
86 |
|
87 |
-
// if we can't find the location's
|
88 |
-
if (!
|
89 |
-
console.log("ERROR: this is a location, but couldn't find the
|
90 |
return segment.prompt
|
91 |
}
|
92 |
|
93 |
-
return
|
94 |
} else {
|
95 |
return segment.prompt
|
96 |
}
|
|
|
1 |
+
import { ClapEntity, ClapSegment } from "@aitube/clap"
|
2 |
|
3 |
import { deduplicatePrompt } from "../../utils/prompting/deduplicatePrompt"
|
4 |
|
|
|
11 |
* @returns
|
12 |
*/
|
13 |
export function getVideoPrompt(
|
14 |
+
segments: ClapSegment[] = [],
|
15 |
+
entitiesIndex: Record<string, ClapEntity> = {},
|
16 |
+
extraPositivePrompt: string[] = []
|
17 |
): string {
|
18 |
|
19 |
+
// console.log("entitiesIndex:", entitiesIndex)
|
20 |
|
21 |
// to construct the video we need to collect all the segments describing it
|
22 |
// we ignore unrelated categories (music, dialogue) or non-prompt items (eg. an audio sample)
|
|
|
60 |
tmp.sort((a, b) => b.label.localeCompare(a.label))
|
61 |
|
62 |
let videoPrompt = tmp.map(segment => {
|
63 |
+
const entity: ClapEntity | undefined = entitiesIndex[segment?.entityId || ""] || undefined
|
64 |
|
65 |
if (segment.category === "dialogue") {
|
66 |
|
67 |
+
// if we can't find the entity, then we are unable
|
68 |
// to make any assumption about the gender, age or appearance
|
69 |
+
if (!entity) {
|
70 |
+
console.log("ERROR: this is a dialogue, but couldn't find the entity!")
|
71 |
return `portrait of a person speaking, blurry background, bokeh`
|
72 |
}
|
73 |
|
74 |
+
const characterTrigger = entity?.triggerName || ""
|
75 |
+
const characterLabel = entity?.label || ""
|
76 |
+
const characterDescription = entity?.description || ""
|
77 |
const dialogueLine = segment?.prompt || ""
|
78 |
|
79 |
+
const characterPrompt = getCharacterPrompt(entity)
|
80 |
|
81 |
// in the context of a video, we some something additional:
|
82 |
// we create a "bokeh" style
|
|
|
84 |
|
85 |
} else if (segment.category === "location") {
|
86 |
|
87 |
+
// if we can't find the location's entity, we default to returning the prompt
|
88 |
+
if (!entity) {
|
89 |
+
console.log("ERROR: this is a location, but couldn't find the entity!")
|
90 |
return segment.prompt
|
91 |
}
|
92 |
|
93 |
+
return entity.description
|
94 |
} else {
|
95 |
return segment.prompt
|
96 |
}
|
src/components/interface/latent-engine/core/useLatentEngine.ts
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
|
2 |
import { create } from "zustand"
|
3 |
|
4 |
-
import {
|
5 |
|
6 |
import { LatentEngineStore } from "./types"
|
7 |
import { resolveSegments } from "../resolvers/resolveSegments"
|
@@ -409,9 +409,7 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
|
|
409 |
//
|
410 |
// yes: I know the code is complex and not intuitive - sorry about that
|
411 |
|
412 |
-
|
413 |
-
const modelsById: Record<string, ClapModel> = {}
|
414 |
-
const extraPositivePrompt: string[] = []
|
415 |
|
416 |
let bufferAheadOfCurrentPositionInMs = positionInMs
|
417 |
|
@@ -427,7 +425,7 @@ export const useLatentEngine = create<LatentEngineStore>((set, get) => ({
|
|
427 |
|
428 |
bufferAheadOfCurrentPositionInMs += videoDurationInMs
|
429 |
|
430 |
-
const prompt = getVideoPrompt(shotSegmentsToPreload,
|
431 |
|
432 |
console.log(`video prompt: ${prompt}`)
|
433 |
// could also be the camera
|
|
|
1 |
|
2 |
import { create } from "zustand"
|
3 |
|
4 |
+
import { ClapEntity, ClapProject, ClapSegment, newClap, parseClap } from "@aitube/clap"
|
5 |
|
6 |
import { LatentEngineStore } from "./types"
|
7 |
import { resolveSegments } from "../resolvers/resolveSegments"
|
|
|
409 |
//
|
410 |
// yes: I know the code is complex and not intuitive - sorry about that
|
411 |
|
412 |
+
const extraPositivePrompt: string[] = ["high quality", "crisp", "detailed"]
|
|
|
|
|
413 |
|
414 |
let bufferAheadOfCurrentPositionInMs = positionInMs
|
415 |
|
|
|
425 |
|
426 |
bufferAheadOfCurrentPositionInMs += videoDurationInMs
|
427 |
|
428 |
+
const prompt = getVideoPrompt(shotSegmentsToPreload, clap.entityIndex, extraPositivePrompt)
|
429 |
|
430 |
console.log(`video prompt: ${prompt}`)
|
431 |
// could also be the camera
|
src/lib/business/getClapAssetSourceType.ts
DELETED
@@ -1,25 +0,0 @@
|
|
1 |
-
import { ClapAssetSource } from "@aitube/clap"
|
2 |
-
|
3 |
-
export function getClapAssetSourceSource(input: string = ""): ClapAssetSource {
|
4 |
-
|
5 |
-
const str = `${input || ""}`.trim()
|
6 |
-
|
7 |
-
if (!str || !str.length) {
|
8 |
-
return "EMPTY"
|
9 |
-
}
|
10 |
-
|
11 |
-
if (str.startsWith("https://") || str.startsWith("http://")) {
|
12 |
-
return "REMOTE"
|
13 |
-
}
|
14 |
-
|
15 |
-
// note that "path" assets are potentially a security risk, they need to be treated with care
|
16 |
-
if (str.startsWith("/") || str.startsWith("../") || str.startsWith("./")) {
|
17 |
-
return "PATH"
|
18 |
-
}
|
19 |
-
|
20 |
-
if (str.startsWith("data:")) {
|
21 |
-
return "DATA"
|
22 |
-
}
|
23 |
-
|
24 |
-
return "PROMPT"
|
25 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|