Spaces:
Running
Running
Commit
Β·
f24ad59
1
Parent(s):
6419aeb
upgraded to @aitube/client 0.0.12
Browse files- package-lock.json +13 -13
- package.json +1 -1
- src/app/api/actions/ai-tube-hf/downloadClapProject.ts +1 -1
- src/app/api/actions/ai-tube-hf/getVideoRequestsFromChannel.ts +2 -2
- src/app/api/actions/ai-tube-hf/parseChannel.ts +1 -1
- src/app/api/generators/clap/getLatentScenes.ts +1 -1
- src/app/api/generators/clap/unknownObjectToLatentScenes.ts +1 -1
- src/app/api/generators/search/getLatentSearchResults.ts +1 -1
- src/app/api/generators/search/unknownObjectToLatentSearchResults.ts +2 -2
- src/app/api/parsers/parseBasicSearchResult.ts +8 -0
- src/app/api/parsers/parseBasicSearchResults.ts +13 -0
- src/app/api/parsers/parseCompletionMode.ts +10 -0
- src/app/api/{utils β parsers}/parseDatasetPrompt.ts +0 -0
- src/app/api/{utils β parsers}/parseDatasetReadme.ts +0 -0
- src/app/api/parsers/parseLatentSearchMode.ts +10 -0
- src/app/api/{utils β parsers}/parseProjectionFromLoRA.ts +0 -0
- src/app/api/parsers/parsePrompt.ts +9 -0
- src/app/api/{utils β parsers}/parsePromptFileName.ts +0 -0
- src/app/api/{utils β parsers}/parseRawStringToYAML.ts +0 -0
- src/app/api/{utils β parsers}/parseString.ts +0 -0
- src/app/api/{utils β parsers}/parseStringArray.ts +0 -0
- src/app/api/{utils β parsers}/parseVideoModelName.ts +0 -0
- src/app/api/{utils β parsers}/parseVideoOrientation.ts +0 -0
- src/app/api/utils/computeOrientationProjectionWidthHeight.ts +2 -2
- src/app/api/v1/create/index.ts +143 -0
- src/app/api/v1/create/route.ts +6 -126
- src/app/api/v1/create/types.ts +6 -0
- src/app/api/v1/edit/dialogues/processShot.ts +20 -5
- src/app/api/v1/edit/dialogues/route.ts +18 -7
- src/app/api/v1/edit/entities/index.ts +69 -0
- src/app/api/v1/edit/entities/route.ts +15 -64
- src/app/api/v1/edit/storyboards/processShot.ts +26 -9
- src/app/api/v1/edit/storyboards/route.ts +19 -8
- src/app/api/v1/edit/videos/processShot.ts +25 -7
- src/app/api/v1/edit/videos/route.ts +19 -8
- src/app/api/v1/search/index.ts +1 -1
- src/app/api/v1/search/route.ts +7 -18
- src/app/latent/watch/page.tsx +6 -7
- src/app/main.tsx +1 -1
- src/app/views/user-channel-view/index.tsx +2 -2
- src/lib/utils/parseMediaProjectionType.ts +1 -1
package-lock.json
CHANGED
@@ -9,7 +9,7 @@
|
|
9 |
"version": "0.0.0",
|
10 |
"dependencies": {
|
11 |
"@aitube/clap": "0.0.10",
|
12 |
-
"@aitube/client": "0.0.
|
13 |
"@aitube/engine": "0.0.2",
|
14 |
"@huggingface/hub": "0.12.3-oauth",
|
15 |
"@huggingface/inference": "^2.6.7",
|
@@ -129,9 +129,9 @@
|
|
129 |
}
|
130 |
},
|
131 |
"node_modules/@aitube/client": {
|
132 |
-
"version": "0.0.
|
133 |
-
"resolved": "https://registry.npmjs.org/@aitube/client/-/client-0.0.
|
134 |
-
"integrity": "sha512-
|
135 |
"peerDependencies": {
|
136 |
"@aitube/clap": "0.0.10"
|
137 |
}
|
@@ -1520,9 +1520,9 @@
|
|
1520 |
}
|
1521 |
},
|
1522 |
"node_modules/@mediapipe/tasks-vision": {
|
1523 |
-
"version": "0.10.13
|
1524 |
-
"resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.13
|
1525 |
-
"integrity": "sha512-
|
1526 |
},
|
1527 |
"node_modules/@next/env": {
|
1528 |
"version": "14.2.3",
|
@@ -4314,9 +4314,9 @@
|
|
4314 |
"integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA=="
|
4315 |
},
|
4316 |
"node_modules/electron-to-chromium": {
|
4317 |
-
"version": "1.4.
|
4318 |
-
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.
|
4319 |
-
"integrity": "sha512-
|
4320 |
},
|
4321 |
"node_modules/elliptic": {
|
4322 |
"version": "6.5.4",
|
@@ -8423,9 +8423,9 @@
|
|
8423 |
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="
|
8424 |
},
|
8425 |
"node_modules/update-browserslist-db": {
|
8426 |
-
"version": "1.0.
|
8427 |
-
"resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.
|
8428 |
-
"integrity": "sha512-
|
8429 |
"funding": [
|
8430 |
{
|
8431 |
"type": "opencollective",
|
|
|
9 |
"version": "0.0.0",
|
10 |
"dependencies": {
|
11 |
"@aitube/clap": "0.0.10",
|
12 |
+
"@aitube/client": "0.0.12",
|
13 |
"@aitube/engine": "0.0.2",
|
14 |
"@huggingface/hub": "0.12.3-oauth",
|
15 |
"@huggingface/inference": "^2.6.7",
|
|
|
129 |
}
|
130 |
},
|
131 |
"node_modules/@aitube/client": {
|
132 |
+
"version": "0.0.12",
|
133 |
+
"resolved": "https://registry.npmjs.org/@aitube/client/-/client-0.0.12.tgz",
|
134 |
+
"integrity": "sha512-b/QFTtAKwr7H5dMSco+iXhwJRpPw/sT487EGpNjDbuQamIJ3FqdlVMTC/c5jdX8meFp+m35n/dY58Iy39Lle5A==",
|
135 |
"peerDependencies": {
|
136 |
"@aitube/clap": "0.0.10"
|
137 |
}
|
|
|
1520 |
}
|
1521 |
},
|
1522 |
"node_modules/@mediapipe/tasks-vision": {
|
1523 |
+
"version": "0.10.13",
|
1524 |
+
"resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.13.tgz",
|
1525 |
+
"integrity": "sha512-8uYOKbtASqZu4m1Tf0nBvOaT50pGTVt0siQ3AWJJ4OV+r+HsWDYquQvev/fo78i49mt2IM2eskV7UxX4+e4tLw=="
|
1526 |
},
|
1527 |
"node_modules/@next/env": {
|
1528 |
"version": "14.2.3",
|
|
|
4314 |
"integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA=="
|
4315 |
},
|
4316 |
"node_modules/electron-to-chromium": {
|
4317 |
+
"version": "1.4.756",
|
4318 |
+
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.756.tgz",
|
4319 |
+
"integrity": "sha512-RJKZ9+vEBMeiPAvKNWyZjuYyUqMndcP1f335oHqn3BEQbs2NFtVrnK5+6Xg5wSM9TknNNpWghGDUCKGYF+xWXw=="
|
4320 |
},
|
4321 |
"node_modules/elliptic": {
|
4322 |
"version": "6.5.4",
|
|
|
8423 |
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="
|
8424 |
},
|
8425 |
"node_modules/update-browserslist-db": {
|
8426 |
+
"version": "1.0.15",
|
8427 |
+
"resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.15.tgz",
|
8428 |
+
"integrity": "sha512-K9HWH62x3/EalU1U6sjSZiylm9C8tgq2mSvshZpqc7QE69RaA2qjhkW2HlNA0tFpEbtyFz7HTqbSdN4MSwUodA==",
|
8429 |
"funding": [
|
8430 |
{
|
8431 |
"type": "opencollective",
|
package.json
CHANGED
@@ -11,7 +11,7 @@
|
|
11 |
},
|
12 |
"dependencies": {
|
13 |
"@aitube/clap": "0.0.10",
|
14 |
-
"@aitube/client": "0.0.
|
15 |
"@aitube/engine": "0.0.2",
|
16 |
"@huggingface/hub": "0.12.3-oauth",
|
17 |
"@huggingface/inference": "^2.6.7",
|
|
|
11 |
},
|
12 |
"dependencies": {
|
13 |
"@aitube/clap": "0.0.10",
|
14 |
+
"@aitube/client": "0.0.12",
|
15 |
"@aitube/engine": "0.0.2",
|
16 |
"@huggingface/hub": "0.12.3-oauth",
|
17 |
"@huggingface/inference": "^2.6.7",
|
src/app/api/actions/ai-tube-hf/downloadClapProject.ts
CHANGED
@@ -6,7 +6,7 @@ import { ClapProject, parseClap } from "@aitube/clap"
|
|
6 |
import { ChannelInfo, MediaInfo, VideoRequest } from "@/types/general"
|
7 |
import { defaultVideoModel } from "@/app/config"
|
8 |
|
9 |
-
import { parseVideoModelName } from "../../
|
10 |
import { computeOrientationProjectionWidthHeight } from "../../utils/computeOrientationProjectionWidthHeight"
|
11 |
|
12 |
import { downloadFileAsBlob } from "./downloadFileAsBlob"
|
|
|
6 |
import { ChannelInfo, MediaInfo, VideoRequest } from "@/types/general"
|
7 |
import { defaultVideoModel } from "@/app/config"
|
8 |
|
9 |
+
import { parseVideoModelName } from "../../parsers/parseVideoModelName"
|
10 |
import { computeOrientationProjectionWidthHeight } from "../../utils/computeOrientationProjectionWidthHeight"
|
11 |
|
12 |
import { downloadFileAsBlob } from "./downloadFileAsBlob"
|
src/app/api/actions/ai-tube-hf/getVideoRequestsFromChannel.ts
CHANGED
@@ -3,9 +3,9 @@
|
|
3 |
import { ChannelInfo, VideoRequest } from "@/types/general"
|
4 |
import { getCredentials } from "./getCredentials"
|
5 |
import { listFiles } from "@/lib/huggingface/hub/src"
|
6 |
-
import { parsePromptFileName } from "../../
|
7 |
import { downloadFileAsText } from "./downloadFileAsText"
|
8 |
-
import { parseDatasetPrompt } from "../../
|
9 |
import { computeOrientationProjectionWidthHeight } from "../../utils/computeOrientationProjectionWidthHeight"
|
10 |
import { downloadClapProject } from "./downloadClapProject"
|
11 |
|
|
|
3 |
import { ChannelInfo, VideoRequest } from "@/types/general"
|
4 |
import { getCredentials } from "./getCredentials"
|
5 |
import { listFiles } from "@/lib/huggingface/hub/src"
|
6 |
+
import { parsePromptFileName } from "../../parsers/parsePromptFileName"
|
7 |
import { downloadFileAsText } from "./downloadFileAsText"
|
8 |
+
import { parseDatasetPrompt } from "../../parsers/parseDatasetPrompt"
|
9 |
import { computeOrientationProjectionWidthHeight } from "../../utils/computeOrientationProjectionWidthHeight"
|
10 |
import { downloadClapProject } from "./downloadClapProject"
|
11 |
|
src/app/api/actions/ai-tube-hf/parseChannel.ts
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
"use server"
|
2 |
|
3 |
import { Credentials, downloadFile, whoAmI } from "@/lib/huggingface/hub/src"
|
4 |
-
import { parseDatasetReadme } from "@/app/api/
|
5 |
import { ChannelInfo, VideoGenerationModel, VideoOrientation } from "@/types/general"
|
6 |
|
7 |
import { adminCredentials } from "../config"
|
|
|
1 |
"use server"
|
2 |
|
3 |
import { Credentials, downloadFile, whoAmI } from "@/lib/huggingface/hub/src"
|
4 |
+
import { parseDatasetReadme } from "@/app/api/parsers/parseDatasetReadme"
|
5 |
import { ChannelInfo, VideoGenerationModel, VideoOrientation } from "@/types/general"
|
6 |
|
7 |
import { adminCredentials } from "../config"
|
src/app/api/generators/clap/getLatentScenes.ts
CHANGED
@@ -8,7 +8,7 @@ import { predict as predictWithOpenAI } from "@/app/api/providers/openai/predict
|
|
8 |
import { LatentScenes } from "./types"
|
9 |
import { getSystemPrompt } from "./getSystemPrompt"
|
10 |
import { unknownObjectToLatentScenes } from "./unknownObjectToLatentScenes"
|
11 |
-
import { parseRawStringToYAML } from "../../
|
12 |
|
13 |
export async function getLatentScenes({
|
14 |
prompt = "",
|
|
|
8 |
import { LatentScenes } from "./types"
|
9 |
import { getSystemPrompt } from "./getSystemPrompt"
|
10 |
import { unknownObjectToLatentScenes } from "./unknownObjectToLatentScenes"
|
11 |
+
import { parseRawStringToYAML } from "../../parsers/parseRawStringToYAML"
|
12 |
|
13 |
export async function getLatentScenes({
|
14 |
prompt = "",
|
src/app/api/generators/clap/unknownObjectToLatentScenes.ts
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import { parseStringArray } from "../../
|
2 |
import { LatentScene, LatentScenes } from "./types"
|
3 |
|
4 |
/**
|
|
|
1 |
+
import { parseStringArray } from "../../parsers/parseStringArray"
|
2 |
import { LatentScene, LatentScenes } from "./types"
|
3 |
|
4 |
/**
|
src/app/api/generators/search/getLatentSearchResults.ts
CHANGED
@@ -6,7 +6,7 @@ import { predict as predictWithHuggingFace } from "@/app/api/providers/huggingfa
|
|
6 |
import { predict as predictWithOpenAI } from "@/app/api/providers/openai/predictWithOpenAI"
|
7 |
import { LatentSearchResults } from "./types"
|
8 |
import { getSystemPrompt } from "./getSystemPrompt"
|
9 |
-
import { parseRawStringToYAML } from "../../
|
10 |
import { unknownObjectToLatentSearchResults } from "./unknownObjectToLatentSearchResults"
|
11 |
|
12 |
export async function getLatentSearchResults({
|
|
|
6 |
import { predict as predictWithOpenAI } from "@/app/api/providers/openai/predictWithOpenAI"
|
7 |
import { LatentSearchResults } from "./types"
|
8 |
import { getSystemPrompt } from "./getSystemPrompt"
|
9 |
+
import { parseRawStringToYAML } from "../../parsers/parseRawStringToYAML"
|
10 |
import { unknownObjectToLatentSearchResults } from "./unknownObjectToLatentSearchResults"
|
11 |
|
12 |
export async function getLatentSearchResults({
|
src/app/api/generators/search/unknownObjectToLatentSearchResults.ts
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import { generateSeed } from "@aitube/clap"
|
2 |
|
3 |
-
import { parseString } from "../../
|
4 |
-
import { parseStringArray } from "../../
|
5 |
import { LatentSearchResult, LatentSearchResults } from "./types"
|
6 |
|
7 |
export function unknownObjectToLatentSearchResults(something: any): LatentSearchResults {
|
|
|
1 |
import { generateSeed } from "@aitube/clap"
|
2 |
|
3 |
+
import { parseString } from "../../parsers/parseString"
|
4 |
+
import { parseStringArray } from "../../parsers/parseStringArray"
|
5 |
import { LatentSearchResult, LatentSearchResults } from "./types"
|
6 |
|
7 |
export function unknownObjectToLatentSearchResults(something: any): LatentSearchResults {
|
src/app/api/parsers/parseBasicSearchResult.ts
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { decode } from "js-base64"
|
2 |
+
|
3 |
+
import { BasicSearchResult } from "../v1/search/types"
|
4 |
+
|
5 |
+
export function parseBasicSearchResult(input?: any): BasicSearchResult {
|
6 |
+
let basicResult = JSON.parse(decode(`${input || ""}`)) as BasicSearchResult
|
7 |
+
return basicResult
|
8 |
+
}
|
src/app/api/parsers/parseBasicSearchResults.ts
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { BasicSearchResult } from "../v1/search/types"
|
2 |
+
|
3 |
+
export function parseBasicSearchResult(input?: any, defaultResults: BasicSearchResult[] = []): BasicSearchResult[] {
|
4 |
+
let basicResults: BasicSearchResult[] = defaultResults
|
5 |
+
try {
|
6 |
+
const rawString = decodeURIComponent(`${input || ""}` || "").trim() as string
|
7 |
+
const maybeExistingResults = JSON.parse(rawString)
|
8 |
+
if (Array.isArray(maybeExistingResults)) {
|
9 |
+
basicResults = maybeExistingResults
|
10 |
+
}
|
11 |
+
} catch (err) {}
|
12 |
+
return basicResults
|
13 |
+
}
|
src/app/api/parsers/parseCompletionMode.ts
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { ClapCompletionMode } from "../v1/edit/types"
|
2 |
+
|
3 |
+
export function parseCompletionMode(input?: any, defaultMode: ClapCompletionMode = "partial"): ClapCompletionMode {
|
4 |
+
let mode = defaultMode
|
5 |
+
try {
|
6 |
+
let maybeMode = decodeURIComponent(`${input || ""}` || defaultMode).trim()
|
7 |
+
mode = ["partial", "full"].includes(maybeMode) ? (maybeMode as ClapCompletionMode) : defaultMode
|
8 |
+
} catch (err) {}
|
9 |
+
return mode
|
10 |
+
}
|
src/app/api/{utils β parsers}/parseDatasetPrompt.ts
RENAMED
File without changes
|
src/app/api/{utils β parsers}/parseDatasetReadme.ts
RENAMED
File without changes
|
src/app/api/parsers/parseLatentSearchMode.ts
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { LatentSearchMode } from "../v1/search/route"
|
2 |
+
|
3 |
+
export function parseLatentSearchMode(input?: any, defaultMode: LatentSearchMode = "basic"): LatentSearchMode {
|
4 |
+
let mode = defaultMode
|
5 |
+
try {
|
6 |
+
let maybeMode = decodeURIComponent(`${input || ""}` || defaultMode).trim()
|
7 |
+
mode = ["basic", "extended"].includes(maybeMode) ? (maybeMode as LatentSearchMode) : defaultMode
|
8 |
+
} catch (err) {}
|
9 |
+
return mode
|
10 |
+
}
|
src/app/api/{utils β parsers}/parseProjectionFromLoRA.ts
RENAMED
File without changes
|
src/app/api/parsers/parsePrompt.ts
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export function parsePrompt(input?: any) {
|
2 |
+
let res = ""
|
3 |
+
try {
|
4 |
+
res = decodeURIComponent(`${input || ""}` || "").trim()
|
5 |
+
} catch (err) {}
|
6 |
+
|
7 |
+
if (!prompt.length) { throw new Error(`please provide a prompt`) }
|
8 |
+
return res
|
9 |
+
}
|
src/app/api/{utils β parsers}/parsePromptFileName.ts
RENAMED
File without changes
|
src/app/api/{utils β parsers}/parseRawStringToYAML.ts
RENAMED
File without changes
|
src/app/api/{utils β parsers}/parseString.ts
RENAMED
File without changes
|
src/app/api/{utils β parsers}/parseStringArray.ts
RENAMED
File without changes
|
src/app/api/{utils β parsers}/parseVideoModelName.ts
RENAMED
File without changes
|
src/app/api/{utils β parsers}/parseVideoOrientation.ts
RENAMED
File without changes
|
src/app/api/utils/computeOrientationProjectionWidthHeight.ts
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import { VideoOrientation, MediaProjection } from "@/types/general"
|
2 |
|
3 |
-
import { parseVideoOrientation } from "
|
4 |
-
import { parseProjectionFromLoRA } from "
|
5 |
|
6 |
export function computeOrientationProjectionWidthHeight({
|
7 |
lora: maybeLora,
|
|
|
1 |
import { VideoOrientation, MediaProjection } from "@/types/general"
|
2 |
|
3 |
+
import { parseVideoOrientation } from "../parsers/parseVideoOrientation"
|
4 |
+
import { parseProjectionFromLoRA } from "../parsers/parseProjectionFromLoRA"
|
5 |
|
6 |
export function computeOrientationProjectionWidthHeight({
|
7 |
lora: maybeLora,
|
src/app/api/v1/create/index.ts
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"use server"
|
2 |
+
|
3 |
+
import { ClapProject, getValidNumber, newClap, newSegment } from "@aitube/clap"
|
4 |
+
|
5 |
+
import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
|
6 |
+
import { parseRawStringToYAML } from "@/app/api/parsers/parseRawStringToYAML"
|
7 |
+
|
8 |
+
import { systemPrompt } from "./systemPrompt"
|
9 |
+
import { LatentStory } from "./types"
|
10 |
+
|
11 |
+
// a helper to generate Clap stories from a few sentences
|
12 |
+
// this is mostly used by external apps such as the Stories Factory
|
13 |
+
export async function create(request: {
|
14 |
+
prompt?: string
|
15 |
+
width?: number
|
16 |
+
height?: number
|
17 |
+
}= {
|
18 |
+
prompt: "",
|
19 |
+
width: 1024,
|
20 |
+
height: 576,
|
21 |
+
}): Promise<ClapProject> {
|
22 |
+
|
23 |
+
const prompt = `${request?.prompt || ""}`.trim()
|
24 |
+
|
25 |
+
console.log("api/v1/create(): request:", request)
|
26 |
+
|
27 |
+
if (!prompt.length) { throw new Error(`please provide a prompt`) }
|
28 |
+
|
29 |
+
const width = getValidNumber(request?.width, 256, 8192, 1024)
|
30 |
+
const height = getValidNumber(request?.height, 256, 8192, 576)
|
31 |
+
|
32 |
+
const userPrompt = `Video story to generate: ${prompt}`
|
33 |
+
|
34 |
+
// TODO use streaming for the Hugging Face prediction
|
35 |
+
//
|
36 |
+
// note that a Clap file is actually a YAML stream of documents
|
37 |
+
// so technically we could stream everything from end-to-end
|
38 |
+
// (but I haven't coded the helpers to do this yet)
|
39 |
+
const rawString = await predict({
|
40 |
+
systemPrompt,
|
41 |
+
userPrompt,
|
42 |
+
nbMaxNewTokens: 1400,
|
43 |
+
prefix: "```yaml\n",
|
44 |
+
})
|
45 |
+
|
46 |
+
console.log("api/v1/create(): rawString: ", rawString)
|
47 |
+
|
48 |
+
const shots = parseRawStringToYAML<LatentStory[]>(rawString, [])
|
49 |
+
|
50 |
+
console.log(`api/v1/create(): generated ${shots.length} shots`)
|
51 |
+
|
52 |
+
// this is approximate - TTS generation will determine the final duration of each shot
|
53 |
+
const defaultSegmentDurationInMs = 7000
|
54 |
+
|
55 |
+
let currentElapsedTimeInMs = 0
|
56 |
+
let currentSegmentDurationInMs = defaultSegmentDurationInMs
|
57 |
+
|
58 |
+
const clap: ClapProject = newClap({
|
59 |
+
meta: {
|
60 |
+
title: "Not needed", // we don't need a title actually
|
61 |
+
description: "This video has been generated using AI",
|
62 |
+
synopsis: "",
|
63 |
+
licence: "Non Commercial",
|
64 |
+
orientation: "vertical",
|
65 |
+
width,
|
66 |
+
height,
|
67 |
+
isInteractive: false,
|
68 |
+
isLoop: false,
|
69 |
+
durationInMs: shots.length * defaultSegmentDurationInMs,
|
70 |
+
defaultVideoModel: "AnimateDiff-Lightning",
|
71 |
+
}
|
72 |
+
})
|
73 |
+
|
74 |
+
for (const { title, image, voice } of shots) {
|
75 |
+
|
76 |
+
console.log(`api/v1/create(): - ${title}`)
|
77 |
+
|
78 |
+
// note: it would be nice if we could have a convention saying that
|
79 |
+
// track 0 is for videos and track 1 storyboards
|
80 |
+
//
|
81 |
+
// however, that's a bit constraining as people will generate .clap
|
82 |
+
// using all kind of tools and development experience,
|
83 |
+
// and they may not wish to learn the Clap protocol format completely
|
84 |
+
//
|
85 |
+
// TL;DR:
|
86 |
+
// we should fix the Clap file editor to make it able to react videos
|
87 |
+
// from any track number
|
88 |
+
|
89 |
+
|
90 |
+
/*
|
91 |
+
we disable it, because we don't generate animated videos yet
|
92 |
+
clap.segments.push(newSegment({
|
93 |
+
track: 0,
|
94 |
+
category: "video",
|
95 |
+
prompt: image,
|
96 |
+
outputType: "video"
|
97 |
+
}))
|
98 |
+
*/
|
99 |
+
|
100 |
+
clap.segments.push(newSegment({
|
101 |
+
track: 1,
|
102 |
+
startTimeInMs: currentSegmentDurationInMs,
|
103 |
+
assetDurationInMs: defaultSegmentDurationInMs,
|
104 |
+
category: "storyboard",
|
105 |
+
prompt: image,
|
106 |
+
outputType: "image"
|
107 |
+
}))
|
108 |
+
|
109 |
+
clap.segments.push(newSegment({
|
110 |
+
track: 2,
|
111 |
+
startTimeInMs: currentSegmentDurationInMs,
|
112 |
+
assetDurationInMs: defaultSegmentDurationInMs,
|
113 |
+
category: "interface",
|
114 |
+
prompt: title,
|
115 |
+
// assetUrl: `data:text/plain;base64,${btoa(title)}`,
|
116 |
+
assetUrl: title,
|
117 |
+
outputType: "text"
|
118 |
+
}))
|
119 |
+
|
120 |
+
clap.segments.push(newSegment({
|
121 |
+
track: 3,
|
122 |
+
startTimeInMs: currentSegmentDurationInMs,
|
123 |
+
assetDurationInMs: defaultSegmentDurationInMs,
|
124 |
+
category: "dialogue",
|
125 |
+
prompt: voice,
|
126 |
+
outputType: "audio"
|
127 |
+
}))
|
128 |
+
|
129 |
+
// the presence of a camera is mandatory
|
130 |
+
clap.segments.push(newSegment({
|
131 |
+
track: 4,
|
132 |
+
startTimeInMs: currentSegmentDurationInMs,
|
133 |
+
assetDurationInMs: defaultSegmentDurationInMs,
|
134 |
+
category: "camera",
|
135 |
+
prompt: "vertical video",
|
136 |
+
outputType: "text"
|
137 |
+
}))
|
138 |
+
|
139 |
+
currentSegmentDurationInMs += defaultSegmentDurationInMs
|
140 |
+
}
|
141 |
+
|
142 |
+
return clap
|
143 |
+
}
|
src/app/api/v1/create/route.ts
CHANGED
@@ -1,16 +1,7 @@
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
-
import {
|
3 |
|
4 |
-
import {
|
5 |
-
import { parseRawStringToYAML } from "@/app/api/utils/parseRawStringToYAML"
|
6 |
-
|
7 |
-
import { systemPrompt } from "./systemPrompt"
|
8 |
-
|
9 |
-
export type LatentStory = {
|
10 |
-
title: string
|
11 |
-
image: string
|
12 |
-
voice: string
|
13 |
-
}
|
14 |
|
15 |
// a helper to generate Clap stories from a few sentences
|
16 |
// this is mostly used by external apps such as the Stories Factory
|
@@ -23,125 +14,14 @@ export async function POST(req: NextRequest) {
|
|
23 |
// can add more stuff for the V2 of Stories Factory
|
24 |
}
|
25 |
|
26 |
-
const prompt = `${request?.prompt || ""}`.trim()
|
27 |
-
|
28 |
console.log("[api/v1/create] request:", request)
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
const userPrompt = `Video story to generate: ${prompt}`
|
36 |
-
|
37 |
-
// TODO use streaming for the Hugging Face prediction
|
38 |
-
//
|
39 |
-
// note that a Clap file is actually a YAML stream of documents
|
40 |
-
// so technically we could stream everything from end-to-end
|
41 |
-
// (but I haven't coded the helpers to do this yet)
|
42 |
-
const rawString = await predict({
|
43 |
-
systemPrompt,
|
44 |
-
userPrompt,
|
45 |
-
nbMaxNewTokens: 1400,
|
46 |
-
prefix: "```yaml\n",
|
47 |
})
|
48 |
|
49 |
-
console.log("[api/v1/create] rawString: ", rawString)
|
50 |
-
|
51 |
-
const shots = parseRawStringToYAML<LatentStory[]>(rawString, [])
|
52 |
-
|
53 |
-
console.log(`[api/v1/create] generated ${shots.length} shots`)
|
54 |
-
|
55 |
-
// this is approximate - TTS generation will determine the final duration of each shot
|
56 |
-
const defaultSegmentDurationInMs = 7000
|
57 |
-
|
58 |
-
let currentElapsedTimeInMs = 0
|
59 |
-
let currentSegmentDurationInMs = defaultSegmentDurationInMs
|
60 |
-
|
61 |
-
const clap: ClapProject = newClap({
|
62 |
-
meta: {
|
63 |
-
title: "Not needed", // we don't need a title actually
|
64 |
-
description: "This video has been generated using AI",
|
65 |
-
synopsis: "",
|
66 |
-
licence: "Non Commercial",
|
67 |
-
orientation: "vertical",
|
68 |
-
width,
|
69 |
-
height,
|
70 |
-
isInteractive: false,
|
71 |
-
isLoop: false,
|
72 |
-
durationInMs: shots.length * defaultSegmentDurationInMs,
|
73 |
-
defaultVideoModel: "AnimateDiff-Lightning",
|
74 |
-
}
|
75 |
-
})
|
76 |
-
|
77 |
-
for (const { title, image, voice } of shots) {
|
78 |
-
|
79 |
-
console.log(`[api/v1/create] - ${title}`)
|
80 |
-
|
81 |
-
// note: it would be nice if we could have a convention saying that
|
82 |
-
// track 0 is for videos and track 1 storyboards
|
83 |
-
//
|
84 |
-
// however, that's a bit constraining as people will generate .clap
|
85 |
-
// using all kind of tools and development experience,
|
86 |
-
// and they may not wish to learn the Clap protocol format completely
|
87 |
-
//
|
88 |
-
// TL;DR:
|
89 |
-
// we should fix the Clap file editor to make it able to react videos
|
90 |
-
// from any track number
|
91 |
-
|
92 |
-
|
93 |
-
/*
|
94 |
-
we disable it, because we don't generate animated videos yet
|
95 |
-
clap.segments.push(newSegment({
|
96 |
-
track: 0,
|
97 |
-
category: "video",
|
98 |
-
prompt: image,
|
99 |
-
outputType: "video"
|
100 |
-
}))
|
101 |
-
*/
|
102 |
-
|
103 |
-
clap.segments.push(newSegment({
|
104 |
-
track: 1,
|
105 |
-
startTimeInMs: currentSegmentDurationInMs,
|
106 |
-
assetDurationInMs: defaultSegmentDurationInMs,
|
107 |
-
category: "storyboard",
|
108 |
-
prompt: image,
|
109 |
-
outputType: "image"
|
110 |
-
}))
|
111 |
-
|
112 |
-
clap.segments.push(newSegment({
|
113 |
-
track: 2,
|
114 |
-
startTimeInMs: currentSegmentDurationInMs,
|
115 |
-
assetDurationInMs: defaultSegmentDurationInMs,
|
116 |
-
category: "interface",
|
117 |
-
prompt: title,
|
118 |
-
// assetUrl: `data:text/plain;base64,${btoa(title)}`,
|
119 |
-
assetUrl: title,
|
120 |
-
outputType: "text"
|
121 |
-
}))
|
122 |
-
|
123 |
-
clap.segments.push(newSegment({
|
124 |
-
track: 3,
|
125 |
-
startTimeInMs: currentSegmentDurationInMs,
|
126 |
-
assetDurationInMs: defaultSegmentDurationInMs,
|
127 |
-
category: "dialogue",
|
128 |
-
prompt: voice,
|
129 |
-
outputType: "audio"
|
130 |
-
}))
|
131 |
-
|
132 |
-
// the presence of a camera is mandatory
|
133 |
-
clap.segments.push(newSegment({
|
134 |
-
track: 4,
|
135 |
-
startTimeInMs: currentSegmentDurationInMs,
|
136 |
-
assetDurationInMs: defaultSegmentDurationInMs,
|
137 |
-
category: "camera",
|
138 |
-
prompt: "vertical video",
|
139 |
-
outputType: "text"
|
140 |
-
}))
|
141 |
-
|
142 |
-
currentSegmentDurationInMs += defaultSegmentDurationInMs
|
143 |
-
}
|
144 |
-
|
145 |
// TODO replace by Clap file streaming
|
146 |
return new NextResponse(await serializeClap(clap), {
|
147 |
status: 200,
|
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
+
import { getValidNumber, serializeClap } from "@aitube/clap"
|
3 |
|
4 |
+
import { create } from "."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
// a helper to generate Clap stories from a few sentences
|
7 |
// this is mostly used by external apps such as the Stories Factory
|
|
|
14 |
// can add more stuff for the V2 of Stories Factory
|
15 |
}
|
16 |
|
|
|
|
|
17 |
console.log("[api/v1/create] request:", request)
|
18 |
|
19 |
+
const clap = await create({
|
20 |
+
prompt: `${request?.prompt || ""}`.trim(),
|
21 |
+
width: getValidNumber(request?.width, 256, 8192, 1024),
|
22 |
+
height: getValidNumber(request?.height, 256, 8192, 576)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
})
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
// TODO replace by Clap file streaming
|
26 |
return new NextResponse(await serializeClap(clap), {
|
27 |
status: 200,
|
src/app/api/v1/create/types.ts
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
export type LatentStory = {
|
3 |
+
title: string
|
4 |
+
image: string
|
5 |
+
voice: string
|
6 |
+
}
|
src/app/api/v1/edit/dialogues/processShot.ts
CHANGED
@@ -5,18 +5,24 @@ import { getSpeechBackgroundAudioPrompt } from "@aitube/engine"
|
|
5 |
import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
|
6 |
import { getMediaInfo } from "@/app/api/utils/getMediaInfo"
|
7 |
|
|
|
|
|
8 |
export async function processShot({
|
9 |
shotSegment,
|
10 |
-
|
|
|
|
|
11 |
}: {
|
12 |
shotSegment: ClapSegment
|
13 |
-
|
|
|
|
|
14 |
}): Promise<void> {
|
15 |
|
16 |
const shotSegments: ClapSegment[] = filterSegments(
|
17 |
ClapSegmentFilteringMode.START,
|
18 |
shotSegment,
|
19 |
-
|
20 |
)
|
21 |
|
22 |
const shotDialogueSegments: ClapSegment[] = shotSegments.filter(s =>
|
@@ -34,7 +40,11 @@ export async function processShot({
|
|
34 |
// this generates a mp3
|
35 |
shotDialogueSegment.assetUrl = await generateSpeechWithParlerTTS({
|
36 |
text: shotDialogueSegment.prompt,
|
37 |
-
audioId: getSpeechBackgroundAudioPrompt(
|
|
|
|
|
|
|
|
|
38 |
debug: true,
|
39 |
})
|
40 |
shotDialogueSegment.assetSourceType = getClapAssetSourceType(shotDialogueSegment.assetUrl)
|
@@ -47,7 +57,7 @@ export async function processShot({
|
|
47 |
|
48 |
// we update the duration of all the segments for this shot
|
49 |
// (it is possible that this makes the two previous lines redundant)
|
50 |
-
|
51 |
s.assetDurationInMs = durationInMs
|
52 |
})
|
53 |
}
|
@@ -58,6 +68,11 @@ export async function processShot({
|
|
58 |
}
|
59 |
|
60 |
console.log(`[api/edit/dialogues] processShot: generated dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
|
|
|
|
|
|
|
|
|
|
|
61 |
} else {
|
62 |
console.log(`[api/edit/dialogues] processShot: there is already a dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
|
63 |
}
|
|
|
5 |
import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
|
6 |
import { getMediaInfo } from "@/app/api/utils/getMediaInfo"
|
7 |
|
8 |
+
import { ClapCompletionMode } from "../types"
|
9 |
+
|
10 |
export async function processShot({
|
11 |
shotSegment,
|
12 |
+
existingClap,
|
13 |
+
newerClap,
|
14 |
+
mode
|
15 |
}: {
|
16 |
shotSegment: ClapSegment
|
17 |
+
existingClap: ClapProject
|
18 |
+
newerClap: ClapProject
|
19 |
+
mode: ClapCompletionMode
|
20 |
}): Promise<void> {
|
21 |
|
22 |
const shotSegments: ClapSegment[] = filterSegments(
|
23 |
ClapSegmentFilteringMode.START,
|
24 |
shotSegment,
|
25 |
+
existingClap.segments
|
26 |
)
|
27 |
|
28 |
const shotDialogueSegments: ClapSegment[] = shotSegments.filter(s =>
|
|
|
40 |
// this generates a mp3
|
41 |
shotDialogueSegment.assetUrl = await generateSpeechWithParlerTTS({
|
42 |
text: shotDialogueSegment.prompt,
|
43 |
+
audioId: getSpeechBackgroundAudioPrompt(
|
44 |
+
shotSegments,
|
45 |
+
existingClap.entityIndex,
|
46 |
+
["high quality", "crisp", "detailed"]
|
47 |
+
),
|
48 |
debug: true,
|
49 |
})
|
50 |
shotDialogueSegment.assetSourceType = getClapAssetSourceType(shotDialogueSegment.assetUrl)
|
|
|
57 |
|
58 |
// we update the duration of all the segments for this shot
|
59 |
// (it is possible that this makes the two previous lines redundant)
|
60 |
+
existingClap.segments.forEach(s => {
|
61 |
s.assetDurationInMs = durationInMs
|
62 |
})
|
63 |
}
|
|
|
68 |
}
|
69 |
|
70 |
console.log(`[api/edit/dialogues] processShot: generated dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
|
71 |
+
|
72 |
+
// if it's partial, we need to manually add it
|
73 |
+
if (mode === "partial") {
|
74 |
+
newerClap.segments.push(shotDialogueSegment)
|
75 |
+
}
|
76 |
} else {
|
77 |
console.log(`[api/edit/dialogues] processShot: there is already a dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
|
78 |
}
|
src/app/api/v1/edit/dialogues/route.ts
CHANGED
@@ -1,42 +1,53 @@
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
|
3 |
-
import { ClapProject, ClapSegment, parseClap, serializeClap } from "@aitube/clap"
|
4 |
|
5 |
import { getToken } from "@/app/api/auth/getToken"
|
6 |
|
7 |
import { processShot } from "./processShot"
|
|
|
|
|
8 |
|
9 |
// a helper to generate speech for a Clap
|
10 |
export async function POST(req: NextRequest) {
|
11 |
|
12 |
const jwtToken = await getToken({ user: "anonymous" })
|
13 |
|
|
|
|
|
|
|
|
|
|
|
14 |
const blob = await req.blob()
|
15 |
|
16 |
-
const
|
17 |
|
18 |
-
if (!
|
19 |
|
20 |
-
console.log(`[api/edit/dialogues] detected ${
|
21 |
|
22 |
-
const shotsSegments: ClapSegment[] =
|
23 |
console.log(`[api/edit/dialogues] detected ${shotsSegments.length} shots`)
|
24 |
|
25 |
if (shotsSegments.length > 32) {
|
26 |
throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
|
27 |
}
|
28 |
|
|
|
|
|
29 |
// we process the shots in parallel (this will increase the queue size in the Gradio spaces)
|
30 |
await Promise.all(shotsSegments.map(shotSegment =>
|
31 |
processShot({
|
32 |
shotSegment,
|
33 |
-
|
|
|
|
|
34 |
})
|
35 |
))
|
36 |
|
37 |
// console.log(`[api/edit/dialogues] returning the clap augmented with dialogues`)
|
38 |
|
39 |
-
return new NextResponse(await serializeClap(
|
40 |
status: 200,
|
41 |
headers: new Headers({ "content-type": "application/x-gzip" }),
|
42 |
})
|
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
|
3 |
+
import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
|
4 |
|
5 |
import { getToken } from "@/app/api/auth/getToken"
|
6 |
|
7 |
import { processShot } from "./processShot"
|
8 |
+
import queryString from "query-string"
|
9 |
+
import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
|
10 |
|
11 |
// a helper to generate speech for a Clap
|
12 |
export async function POST(req: NextRequest) {
|
13 |
|
14 |
const jwtToken = await getToken({ user: "anonymous" })
|
15 |
|
16 |
+
const qs = queryString.parseUrl(req.url || "")
|
17 |
+
const query = (qs || {}).query
|
18 |
+
|
19 |
+
const mode = parseCompletionMode(query?.c)
|
20 |
+
|
21 |
const blob = await req.blob()
|
22 |
|
23 |
+
const existingClap: ClapProject = await parseClap(blob)
|
24 |
|
25 |
+
if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
|
26 |
|
27 |
+
console.log(`[api/edit/dialogues] detected ${existingClap.segments.length} segments`)
|
28 |
|
29 |
+
const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === "camera")
|
30 |
console.log(`[api/edit/dialogues] detected ${shotsSegments.length} shots`)
|
31 |
|
32 |
if (shotsSegments.length > 32) {
|
33 |
throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
|
34 |
}
|
35 |
|
36 |
+
const newerClap = mode === "full" ? existingClap : newClap()
|
37 |
+
|
38 |
// we process the shots in parallel (this will increase the queue size in the Gradio spaces)
|
39 |
await Promise.all(shotsSegments.map(shotSegment =>
|
40 |
processShot({
|
41 |
shotSegment,
|
42 |
+
existingClap,
|
43 |
+
newerClap,
|
44 |
+
mode
|
45 |
})
|
46 |
))
|
47 |
|
48 |
// console.log(`[api/edit/dialogues] returning the clap augmented with dialogues`)
|
49 |
|
50 |
+
return new NextResponse(await serializeClap(newerClap), {
|
51 |
status: 200,
|
52 |
headers: new Headers({ "content-type": "application/x-gzip" }),
|
53 |
})
|
src/app/api/v1/edit/entities/index.ts
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import { ClapProject, getClapAssetSourceType, newClap } from "@aitube/clap"
|
3 |
+
|
4 |
+
import { generateImageID } from "./generateImageID"
|
5 |
+
import { generateAudioID } from "./generateAudioID"
|
6 |
+
|
7 |
+
import { ClapCompletionMode } from "../types"
|
8 |
+
|
9 |
+
export async function editEntities({
|
10 |
+
existingClap,
|
11 |
+
newerClap,
|
12 |
+
mode
|
13 |
+
}: {
|
14 |
+
existingClap: ClapProject
|
15 |
+
newerClap: ClapProject
|
16 |
+
mode: ClapCompletionMode
|
17 |
+
}) {
|
18 |
+
|
19 |
+
if (!existingClap.entities.length) { throw new Error(`please provide at least one entity`) }
|
20 |
+
|
21 |
+
for (const entity of existingClap.entities) {
|
22 |
+
|
23 |
+
let entityHasBeenModified = false
|
24 |
+
|
25 |
+
// TASK 1: GENERATE THE IMAGE PROMPT IF MISSING
|
26 |
+
if (!entity.imagePrompt) {
|
27 |
+
entity.imagePrompt = "a man with a beard"
|
28 |
+
entityHasBeenModified = true
|
29 |
+
}
|
30 |
+
|
31 |
+
// TASK 2: GENERATE THE IMAGE ID IF MISSING
|
32 |
+
if (!entity.imageId) {
|
33 |
+
entity.imageId = await generateImageID({
|
34 |
+
prompt: entity.imagePrompt,
|
35 |
+
seed: entity.seed
|
36 |
+
})
|
37 |
+
entity.imageSourceType = getClapAssetSourceType(entity.imageId)
|
38 |
+
entityHasBeenModified = true
|
39 |
+
}
|
40 |
+
|
41 |
+
// TASK 3: GENERATE THE AUDIO PROMPT IF MISSING
|
42 |
+
if (!entity.audioPrompt) {
|
43 |
+
entity.audioPrompt = "a man with a beard"
|
44 |
+
entityHasBeenModified = true
|
45 |
+
}
|
46 |
+
|
47 |
+
// TASK 4: GENERATE THE AUDIO ID IF MISSING
|
48 |
+
|
49 |
+
// TODO here: call Parler-TTS or a generic audio generator
|
50 |
+
if (!entity.audioId) {
|
51 |
+
entity.audioId = await generateAudioID({
|
52 |
+
prompt: entity.audioPrompt,
|
53 |
+
seed: entity.seed
|
54 |
+
})
|
55 |
+
entity.audioSourceType = getClapAssetSourceType(entity.audioId)
|
56 |
+
entityHasBeenModified = true
|
57 |
+
}
|
58 |
+
|
59 |
+
// in case we are doing a partial update
|
60 |
+
if (mode === "partial" && entityHasBeenModified && !newerClap.entityIndex[entity.id]) {
|
61 |
+
newerClap.entities.push(entity)
|
62 |
+
newerClap.entityIndex[entity.id] = entity
|
63 |
+
}
|
64 |
+
}
|
65 |
+
|
66 |
+
console.log(`[api/edit/entities] returning the newerClap`)
|
67 |
+
|
68 |
+
return newerClap
|
69 |
+
}
|
src/app/api/v1/edit/entities/route.ts
CHANGED
@@ -1,86 +1,37 @@
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
import queryString from "query-string"
|
|
|
3 |
|
4 |
-
import { getClapAssetSourceType, parseClap, serializeClap } from "@aitube/clap"
|
5 |
import { getToken } from "@/app/api/auth/getToken"
|
|
|
6 |
|
7 |
-
import {
|
8 |
-
import { generateAudioID } from "./generateAudioID"
|
9 |
-
import { ClapCompletionMode } from "../types"
|
10 |
-
|
11 |
-
const defaultMode: ClapCompletionMode = "full"
|
12 |
|
13 |
export async function POST(req: NextRequest) {
|
14 |
|
15 |
const qs = queryString.parseUrl(req.url || "")
|
16 |
const query = (qs || {}).query
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
let prompt = ""
|
21 |
-
try {
|
22 |
-
prompt = decodeURIComponent(query?.p?.toString() || "").trim()
|
23 |
-
} catch (err) {}
|
24 |
-
if (!prompt) {
|
25 |
-
return NextResponse.json({ error: 'no prompt provided' }, { status: 400 });
|
26 |
-
}
|
27 |
-
|
28 |
-
if (!prompt.length) { throw new Error(`please provide a prompt`) }
|
29 |
-
*/
|
30 |
-
|
31 |
-
|
32 |
-
let mode = defaultMode
|
33 |
-
try {
|
34 |
-
let maybeMode = decodeURIComponent(query?.mode?.toString() || defaultMode).trim()
|
35 |
-
mode = ["partial", "full"].includes(maybeMode) ? (maybeMode as ClapCompletionMode) : "full"
|
36 |
-
} catch (err) {}
|
37 |
-
|
38 |
-
console.log("[api/edit/entities] request:", prompt)
|
39 |
|
40 |
const jwtToken = await getToken({ user: "anonymous" })
|
41 |
|
42 |
const blob = await req.blob()
|
43 |
|
44 |
-
const
|
45 |
-
|
46 |
-
if (!clap.entities.length) { throw new Error(`please provide at least one entity`) }
|
47 |
|
48 |
-
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
entity.imageId = await generateImageID({
|
58 |
-
prompt: entity.imagePrompt,
|
59 |
-
seed: entity.seed
|
60 |
-
})
|
61 |
-
entity.imageSourceType = getClapAssetSourceType(entity.imageId)
|
62 |
-
}
|
63 |
-
|
64 |
-
// TASK 3: GENERATE THE AUDIO PROMPT IF MISSING
|
65 |
-
if (!entity.audioPrompt) {
|
66 |
-
entity.audioPrompt = "a man with a beard"
|
67 |
-
}
|
68 |
-
|
69 |
-
// TASK 4: GENERATE THE AUDIO ID IF MISSING
|
70 |
-
|
71 |
-
// TODO here: call Parler-TTS or a generic audio generator
|
72 |
-
if (!entity.audioId) {
|
73 |
-
entity.audioId = await generateAudioID({
|
74 |
-
prompt: entity.audioPrompt,
|
75 |
-
seed: entity.seed
|
76 |
-
})
|
77 |
-
entity.audioSourceType = getClapAssetSourceType(entity.audioId)
|
78 |
-
}
|
79 |
-
}
|
80 |
-
|
81 |
-
console.log(`[api/edit/entities] returning the clap extended with the entities`)
|
82 |
|
83 |
-
return new NextResponse(await serializeClap(
|
84 |
status: 200,
|
85 |
headers: new Headers({ "content-type": "application/x-gzip" }),
|
86 |
})
|
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
import queryString from "query-string"
|
3 |
+
import { newClap, parseClap, serializeClap } from "@aitube/clap"
|
4 |
|
|
|
5 |
import { getToken } from "@/app/api/auth/getToken"
|
6 |
+
import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
|
7 |
|
8 |
+
import { editEntities } from "."
|
|
|
|
|
|
|
|
|
9 |
|
10 |
export async function POST(req: NextRequest) {
|
11 |
|
12 |
const qs = queryString.parseUrl(req.url || "")
|
13 |
const query = (qs || {}).query
|
14 |
|
15 |
+
const mode = parseCompletionMode(query?.c)
|
16 |
+
// const prompt = parsePrompt(query?.p)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
const jwtToken = await getToken({ user: "anonymous" })
|
19 |
|
20 |
const blob = await req.blob()
|
21 |
|
22 |
+
const existingClap = await parseClap(blob)
|
|
|
|
|
23 |
|
24 |
+
const newerClap = mode === "full" ? existingClap : newClap()
|
25 |
|
26 |
+
await editEntities({
|
27 |
+
existingClap,
|
28 |
+
newerClap,
|
29 |
+
mode
|
30 |
+
})
|
31 |
+
|
32 |
+
console.log(`[api/edit/entities] returning the newer clap extended with the entities`)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
+
return new NextResponse(await serializeClap(newerClap), {
|
35 |
status: 200,
|
36 |
headers: new Headers({ "content-type": "application/x-gzip" }),
|
37 |
})
|
src/app/api/v1/edit/storyboards/processShot.ts
CHANGED
@@ -1,22 +1,27 @@
|
|
1 |
import { ClapProject, ClapSegment, getClapAssetSourceType, newSegment, filterSegments, ClapSegmentFilteringMode } from "@aitube/clap"
|
2 |
-
|
3 |
import { getVideoPrompt } from "@aitube/engine"
|
4 |
|
5 |
import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
|
|
|
6 |
import { generateStoryboard } from "./generateStoryboard"
|
|
|
7 |
|
8 |
export async function processShot({
|
9 |
shotSegment,
|
10 |
-
|
|
|
|
|
11 |
}: {
|
12 |
shotSegment: ClapSegment
|
13 |
-
|
|
|
|
|
14 |
}): Promise<void> {
|
15 |
|
16 |
const shotSegments: ClapSegment[] = filterSegments(
|
17 |
ClapSegmentFilteringMode.START,
|
18 |
shotSegment,
|
19 |
-
|
20 |
)
|
21 |
|
22 |
const shotStoryboardSegments: ClapSegment[] = shotSegments.filter(s =>
|
@@ -38,18 +43,24 @@ export async function processShot({
|
|
38 |
outputType: "image"
|
39 |
})
|
40 |
|
|
|
41 |
if (shotStoryboardSegment) {
|
42 |
-
|
43 |
}
|
44 |
|
45 |
console.log(`[api/v1/edit/storyboards] processShot: generated storyboard segment [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}]`)
|
46 |
}
|
|
|
47 |
if (!shotStoryboardSegment) { throw new Error(`failed to generate a newSegment`) }
|
48 |
|
49 |
// TASK 2: GENERATE MISSING STORYBOARD PROMPT
|
50 |
if (!shotStoryboardSegment?.prompt) {
|
51 |
// storyboard is missing, let's generate it
|
52 |
-
shotStoryboardSegment.prompt = getVideoPrompt(
|
|
|
|
|
|
|
|
|
53 |
console.log(`[api/v1/edit/storyboards] processShot: generating storyboard prompt: ${shotStoryboardSegment.prompt}`)
|
54 |
}
|
55 |
|
@@ -60,8 +71,8 @@ export async function processShot({
|
|
60 |
try {
|
61 |
shotStoryboardSegment.assetUrl = await generateStoryboard({
|
62 |
prompt: getPositivePrompt(shotStoryboardSegment.prompt),
|
63 |
-
width:
|
64 |
-
height:
|
65 |
})
|
66 |
shotStoryboardSegment.assetSourceType = getClapAssetSourceType(shotStoryboardSegment.assetUrl)
|
67 |
} catch (err) {
|
@@ -69,7 +80,13 @@ export async function processShot({
|
|
69 |
throw err
|
70 |
}
|
71 |
|
72 |
-
console.log(`[api/v1/edit/storyboards] processShot: generated storyboard image: ${shotStoryboardSegment?.assetUrl?.slice?.(0, 50)}...`)
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
} else {
|
74 |
console.log(`[api/v1/edit/storyboards] processShot: there is already a storyboard image: ${shotStoryboardSegment?.assetUrl?.slice?.(0, 50)}...`)
|
75 |
}
|
|
|
1 |
import { ClapProject, ClapSegment, getClapAssetSourceType, newSegment, filterSegments, ClapSegmentFilteringMode } from "@aitube/clap"
|
|
|
2 |
import { getVideoPrompt } from "@aitube/engine"
|
3 |
|
4 |
import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
|
5 |
+
|
6 |
import { generateStoryboard } from "./generateStoryboard"
|
7 |
+
import { ClapCompletionMode } from "../types"
|
8 |
|
9 |
export async function processShot({
|
10 |
shotSegment,
|
11 |
+
existingClap,
|
12 |
+
newerClap,
|
13 |
+
mode
|
14 |
}: {
|
15 |
shotSegment: ClapSegment
|
16 |
+
existingClap: ClapProject
|
17 |
+
newerClap: ClapProject
|
18 |
+
mode: ClapCompletionMode
|
19 |
}): Promise<void> {
|
20 |
|
21 |
const shotSegments: ClapSegment[] = filterSegments(
|
22 |
ClapSegmentFilteringMode.START,
|
23 |
shotSegment,
|
24 |
+
existingClap.segments
|
25 |
)
|
26 |
|
27 |
const shotStoryboardSegments: ClapSegment[] = shotSegments.filter(s =>
|
|
|
43 |
outputType: "image"
|
44 |
})
|
45 |
|
46 |
+
// we fix the existing clap
|
47 |
if (shotStoryboardSegment) {
|
48 |
+
existingClap.segments.push(shotStoryboardSegment)
|
49 |
}
|
50 |
|
51 |
console.log(`[api/v1/edit/storyboards] processShot: generated storyboard segment [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}]`)
|
52 |
}
|
53 |
+
|
54 |
if (!shotStoryboardSegment) { throw new Error(`failed to generate a newSegment`) }
|
55 |
|
56 |
// TASK 2: GENERATE MISSING STORYBOARD PROMPT
|
57 |
if (!shotStoryboardSegment?.prompt) {
|
58 |
// storyboard is missing, let's generate it
|
59 |
+
shotStoryboardSegment.prompt = getVideoPrompt(
|
60 |
+
shotSegments,
|
61 |
+
existingClap.entityIndex,
|
62 |
+
["high quality", "crisp", "detailed"]
|
63 |
+
)
|
64 |
console.log(`[api/v1/edit/storyboards] processShot: generating storyboard prompt: ${shotStoryboardSegment.prompt}`)
|
65 |
}
|
66 |
|
|
|
71 |
try {
|
72 |
shotStoryboardSegment.assetUrl = await generateStoryboard({
|
73 |
prompt: getPositivePrompt(shotStoryboardSegment.prompt),
|
74 |
+
width: existingClap.meta.width,
|
75 |
+
height: existingClap.meta.height,
|
76 |
})
|
77 |
shotStoryboardSegment.assetSourceType = getClapAssetSourceType(shotStoryboardSegment.assetUrl)
|
78 |
} catch (err) {
|
|
|
80 |
throw err
|
81 |
}
|
82 |
|
83 |
+
console.log(`[api/v1/edit/storyboards] processShot: generated storyboard image: ${shotStoryboardSegment?.assetUrl?.slice?.(0, 50)}...`)
|
84 |
+
|
85 |
+
// if mode is full, newerClap already contains the ference to shotStoryboardSegment
|
86 |
+
// but if it's partial, we need to manually add it
|
87 |
+
if (mode === "partial") {
|
88 |
+
newerClap.segments.push(shotStoryboardSegment)
|
89 |
+
}
|
90 |
} else {
|
91 |
console.log(`[api/v1/edit/storyboards] processShot: there is already a storyboard image: ${shotStoryboardSegment?.assetUrl?.slice?.(0, 50)}...`)
|
92 |
}
|
src/app/api/v1/edit/storyboards/route.ts
CHANGED
@@ -1,9 +1,11 @@
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
-
|
3 |
-
import { ClapProject, ClapSegment, parseClap, serializeClap } from "@aitube/clap"
|
4 |
|
5 |
import { getToken } from "@/app/api/auth/getToken"
|
6 |
|
|
|
|
|
7 |
import { processShot } from "./processShot"
|
8 |
|
9 |
// a helper to generate storyboards for a Clap
|
@@ -17,32 +19,41 @@ export async function POST(req: NextRequest) {
|
|
17 |
|
18 |
const jwtToken = await getToken({ user: "anonymous" })
|
19 |
|
|
|
|
|
|
|
|
|
|
|
20 |
const blob = await req.blob()
|
21 |
|
22 |
-
const
|
23 |
|
24 |
-
if (!
|
25 |
|
26 |
-
console.log(`[api/v1/edit/storyboards] detected ${
|
27 |
|
28 |
-
const shotsSegments: ClapSegment[] =
|
29 |
console.log(`[api/v1/edit/storyboards] detected ${shotsSegments.length} shots`)
|
30 |
|
31 |
if (shotsSegments.length > 32) {
|
32 |
throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
|
33 |
}
|
34 |
|
|
|
|
|
35 |
// we process the shots in parallel (this will increase the queue size in the Gradio spaces)
|
36 |
await Promise.all(shotsSegments.map(shotSegment =>
|
37 |
processShot({
|
38 |
shotSegment,
|
39 |
-
|
|
|
|
|
40 |
})
|
41 |
))
|
42 |
|
43 |
// console.log(`[api/v1/edit/storyboards] returning the clap augmented with storyboards`)
|
44 |
|
45 |
-
return new NextResponse(await serializeClap(
|
46 |
status: 200,
|
47 |
headers: new Headers({ "content-type": "application/x-gzip" }),
|
48 |
})
|
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
+
import queryString from "query-string"
|
3 |
+
import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
|
4 |
|
5 |
import { getToken } from "@/app/api/auth/getToken"
|
6 |
|
7 |
+
import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
|
8 |
+
|
9 |
import { processShot } from "./processShot"
|
10 |
|
11 |
// a helper to generate storyboards for a Clap
|
|
|
19 |
|
20 |
const jwtToken = await getToken({ user: "anonymous" })
|
21 |
|
22 |
+
const qs = queryString.parseUrl(req.url || "")
|
23 |
+
const query = (qs || {}).query
|
24 |
+
|
25 |
+
const mode = parseCompletionMode(query?.c)
|
26 |
+
|
27 |
const blob = await req.blob()
|
28 |
|
29 |
+
const existingClap: ClapProject = await parseClap(blob)
|
30 |
|
31 |
+
if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
|
32 |
|
33 |
+
console.log(`[api/v1/edit/storyboards] detected ${existingClap.segments.length} segments`)
|
34 |
|
35 |
+
const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === "camera")
|
36 |
console.log(`[api/v1/edit/storyboards] detected ${shotsSegments.length} shots`)
|
37 |
|
38 |
if (shotsSegments.length > 32) {
|
39 |
throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
|
40 |
}
|
41 |
|
42 |
+
const newerClap = mode === "full" ? existingClap : newClap()
|
43 |
+
|
44 |
// we process the shots in parallel (this will increase the queue size in the Gradio spaces)
|
45 |
await Promise.all(shotsSegments.map(shotSegment =>
|
46 |
processShot({
|
47 |
shotSegment,
|
48 |
+
existingClap,
|
49 |
+
newerClap,
|
50 |
+
mode,
|
51 |
})
|
52 |
))
|
53 |
|
54 |
// console.log(`[api/v1/edit/storyboards] returning the clap augmented with storyboards`)
|
55 |
|
56 |
+
return new NextResponse(await serializeClap(newerClap), {
|
57 |
status: 200,
|
58 |
headers: new Headers({ "content-type": "application/x-gzip" }),
|
59 |
})
|
src/app/api/v1/edit/videos/processShot.ts
CHANGED
@@ -5,18 +5,23 @@ import { getVideoPrompt } from "@aitube/engine"
|
|
5 |
import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
|
6 |
|
7 |
import { generateVideo } from "./generateVideo"
|
|
|
8 |
|
9 |
export async function processShot({
|
10 |
shotSegment,
|
11 |
-
|
|
|
|
|
12 |
}: {
|
13 |
shotSegment: ClapSegment
|
14 |
-
|
|
|
|
|
15 |
}): Promise<void> {
|
16 |
const shotSegments: ClapSegment[] = filterSegments(
|
17 |
ClapSegmentFilteringMode.START,
|
18 |
shotSegment,
|
19 |
-
|
20 |
)
|
21 |
|
22 |
const shotVideoSegments: ClapSegment[] = shotSegments.filter(s =>
|
@@ -40,8 +45,9 @@ export async function processShot({
|
|
40 |
outputType: "video"
|
41 |
})
|
42 |
|
|
|
43 |
if (shotVideoSegment) {
|
44 |
-
|
45 |
}
|
46 |
|
47 |
console.log(`[api/edit/videos] processShot: generated video segment [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}]`)
|
@@ -51,10 +57,15 @@ export async function processShot({
|
|
51 |
throw new Error(`failed to generate a new segment`)
|
52 |
}
|
53 |
|
|
|
54 |
// TASK 2: GENERATE MISSING VIDEO PROMPT
|
55 |
if (!shotVideoSegment?.prompt) {
|
56 |
// video is missing, let's generate it
|
57 |
-
shotVideoSegment.prompt = getVideoPrompt(
|
|
|
|
|
|
|
|
|
58 |
console.log(`[api/edit/videos] processShot: generating video prompt: ${shotVideoSegment.prompt}`)
|
59 |
}
|
60 |
|
@@ -65,8 +76,8 @@ export async function processShot({
|
|
65 |
try {
|
66 |
shotVideoSegment.assetUrl = await generateVideo({
|
67 |
prompt: getPositivePrompt(shotVideoSegment.prompt),
|
68 |
-
width:
|
69 |
-
height:
|
70 |
})
|
71 |
shotVideoSegment.assetSourceType = getClapAssetSourceType(shotVideoSegment.assetUrl)
|
72 |
} catch (err) {
|
@@ -75,6 +86,13 @@ export async function processShot({
|
|
75 |
}
|
76 |
|
77 |
console.log(`[api/edit/videos] processShot: generated video files: ${shotVideoSegment?.assetUrl?.slice?.(0, 50)}...`)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
} else {
|
79 |
console.log(`[api/edit/videos] processShot: there is already a video file: ${shotVideoSegment?.assetUrl?.slice?.(0, 50)}...`)
|
80 |
}
|
|
|
5 |
import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
|
6 |
|
7 |
import { generateVideo } from "./generateVideo"
|
8 |
+
import { ClapCompletionMode } from "../types"
|
9 |
|
10 |
export async function processShot({
|
11 |
shotSegment,
|
12 |
+
existingClap,
|
13 |
+
newerClap,
|
14 |
+
mode
|
15 |
}: {
|
16 |
shotSegment: ClapSegment
|
17 |
+
existingClap: ClapProject
|
18 |
+
newerClap: ClapProject
|
19 |
+
mode: ClapCompletionMode
|
20 |
}): Promise<void> {
|
21 |
const shotSegments: ClapSegment[] = filterSegments(
|
22 |
ClapSegmentFilteringMode.START,
|
23 |
shotSegment,
|
24 |
+
existingClap.segments
|
25 |
)
|
26 |
|
27 |
const shotVideoSegments: ClapSegment[] = shotSegments.filter(s =>
|
|
|
45 |
outputType: "video"
|
46 |
})
|
47 |
|
48 |
+
// we fix the existing clap
|
49 |
if (shotVideoSegment) {
|
50 |
+
existingClap.segments.push(shotSegment)
|
51 |
}
|
52 |
|
53 |
console.log(`[api/edit/videos] processShot: generated video segment [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}]`)
|
|
|
57 |
throw new Error(`failed to generate a new segment`)
|
58 |
}
|
59 |
|
60 |
+
|
61 |
// TASK 2: GENERATE MISSING VIDEO PROMPT
|
62 |
if (!shotVideoSegment?.prompt) {
|
63 |
// video is missing, let's generate it
|
64 |
+
shotVideoSegment.prompt = getVideoPrompt(
|
65 |
+
shotSegments,
|
66 |
+
existingClap.entityIndex,
|
67 |
+
["high quality", "crisp", "detailed"]
|
68 |
+
)
|
69 |
console.log(`[api/edit/videos] processShot: generating video prompt: ${shotVideoSegment.prompt}`)
|
70 |
}
|
71 |
|
|
|
76 |
try {
|
77 |
shotVideoSegment.assetUrl = await generateVideo({
|
78 |
prompt: getPositivePrompt(shotVideoSegment.prompt),
|
79 |
+
width: existingClap.meta.width,
|
80 |
+
height: existingClap.meta.height,
|
81 |
})
|
82 |
shotVideoSegment.assetSourceType = getClapAssetSourceType(shotVideoSegment.assetUrl)
|
83 |
} catch (err) {
|
|
|
86 |
}
|
87 |
|
88 |
console.log(`[api/edit/videos] processShot: generated video files: ${shotVideoSegment?.assetUrl?.slice?.(0, 50)}...`)
|
89 |
+
|
90 |
+
// if mode is full, newerClap already contains the ference to shotVideoSegment
|
91 |
+
// but if it's partial, we need to manually add it
|
92 |
+
if (mode === "partial") {
|
93 |
+
newerClap.segments.push(shotVideoSegment)
|
94 |
+
}
|
95 |
+
|
96 |
} else {
|
97 |
console.log(`[api/edit/videos] processShot: there is already a video file: ${shotVideoSegment?.assetUrl?.slice?.(0, 50)}...`)
|
98 |
}
|
src/app/api/v1/edit/videos/route.ts
CHANGED
@@ -1,11 +1,13 @@
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
-
|
3 |
-
import { ClapProject, ClapSegment, parseClap, serializeClap } from "@aitube/clap"
|
4 |
|
5 |
import { getToken } from "@/app/api/auth/getToken"
|
6 |
|
7 |
import { processShot } from "./processShot"
|
8 |
|
|
|
|
|
9 |
|
10 |
// a helper to generate videos for a Clap
|
11 |
// this is mostly used by external apps such as the Stories Factory
|
@@ -18,32 +20,41 @@ export async function POST(req: NextRequest) {
|
|
18 |
|
19 |
const jwtToken = await getToken({ user: "anonymous" })
|
20 |
|
|
|
|
|
|
|
|
|
|
|
21 |
const blob = await req.blob()
|
22 |
|
23 |
-
const
|
24 |
|
25 |
-
if (!
|
26 |
|
27 |
-
console.log(`[api/edit/videos] detected ${
|
28 |
|
29 |
-
const shotsSegments: ClapSegment[] =
|
30 |
console.log(`[api/edit/videos] detected ${shotsSegments.length} shots`)
|
31 |
|
32 |
if (shotsSegments.length > 32) {
|
33 |
throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
|
34 |
}
|
35 |
|
|
|
|
|
36 |
// we process the shots in parallel (this will increase the queue size in the Gradio spaces)
|
37 |
await Promise.all(shotsSegments.map(shotSegment =>
|
38 |
processShot({
|
39 |
shotSegment,
|
40 |
-
|
|
|
|
|
41 |
})
|
42 |
))
|
43 |
|
44 |
console.log(`[api/edit/videos] returning the clap augmented with videos`)
|
45 |
|
46 |
-
return new NextResponse(await serializeClap(
|
47 |
status: 200,
|
48 |
headers: new Headers({ "content-type": "application/x-gzip" }),
|
49 |
})
|
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
+
import queryString from "query-string"
|
3 |
+
import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
|
4 |
|
5 |
import { getToken } from "@/app/api/auth/getToken"
|
6 |
|
7 |
import { processShot } from "./processShot"
|
8 |
|
9 |
+
import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
|
10 |
+
|
11 |
|
12 |
// a helper to generate videos for a Clap
|
13 |
// this is mostly used by external apps such as the Stories Factory
|
|
|
20 |
|
21 |
const jwtToken = await getToken({ user: "anonymous" })
|
22 |
|
23 |
+
const qs = queryString.parseUrl(req.url || "")
|
24 |
+
const query = (qs || {}).query
|
25 |
+
|
26 |
+
const mode = parseCompletionMode(query?.c)
|
27 |
+
|
28 |
const blob = await req.blob()
|
29 |
|
30 |
+
const existingClap: ClapProject = await parseClap(blob)
|
31 |
|
32 |
+
if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
|
33 |
|
34 |
+
console.log(`[api/edit/videos] detected ${existingClap.segments.length} segments`)
|
35 |
|
36 |
+
const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === "camera")
|
37 |
console.log(`[api/edit/videos] detected ${shotsSegments.length} shots`)
|
38 |
|
39 |
if (shotsSegments.length > 32) {
|
40 |
throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
|
41 |
}
|
42 |
|
43 |
+
const newerClap = mode === "full" ? existingClap : newClap()
|
44 |
+
|
45 |
// we process the shots in parallel (this will increase the queue size in the Gradio spaces)
|
46 |
await Promise.all(shotsSegments.map(shotSegment =>
|
47 |
processShot({
|
48 |
shotSegment,
|
49 |
+
existingClap,
|
50 |
+
newerClap,
|
51 |
+
mode
|
52 |
})
|
53 |
))
|
54 |
|
55 |
console.log(`[api/edit/videos] returning the clap augmented with videos`)
|
56 |
|
57 |
+
return new NextResponse(await serializeClap(newerClap), {
|
58 |
status: 200,
|
59 |
headers: new Headers({ "content-type": "application/x-gzip" }),
|
60 |
})
|
src/app/api/v1/search/index.ts
CHANGED
@@ -3,7 +3,7 @@
|
|
3 |
import YAML from "yaml"
|
4 |
|
5 |
import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
|
6 |
-
import { parseRawStringToYAML } from "@/app/api/
|
7 |
|
8 |
import { systemPromptForBasicSearchResults, systemPromptForExtendedSearchResults } from "./systemPrompt"
|
9 |
import type { BasicSearchResult, ExtendedSearchResult } from "./types"
|
|
|
3 |
import YAML from "yaml"
|
4 |
|
5 |
import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
|
6 |
+
import { parseRawStringToYAML } from "@/app/api/parsers/parseRawStringToYAML"
|
7 |
|
8 |
import { systemPromptForBasicSearchResults, systemPromptForExtendedSearchResults } from "./systemPrompt"
|
9 |
import type { BasicSearchResult, ExtendedSearchResult } from "./types"
|
src/app/api/v1/search/route.ts
CHANGED
@@ -2,6 +2,9 @@ import { NextResponse, NextRequest } from "next/server"
|
|
2 |
import queryString from "query-string"
|
3 |
import { BasicSearchResult, ExtendedSearchResult } from "./types"
|
4 |
import { extend, search } from "."
|
|
|
|
|
|
|
5 |
|
6 |
export type LatentSearchMode =
|
7 |
| "basic"
|
@@ -13,18 +16,11 @@ export async function GET(req: NextRequest, res: NextResponse) {
|
|
13 |
const qs = queryString.parseUrl(req.url || "")
|
14 |
const query = (qs || {}).query
|
15 |
|
16 |
-
|
17 |
-
try {
|
18 |
-
mode = decodeURIComponent(query?.m?.toString() || "basic").trim() as LatentSearchMode
|
19 |
-
} catch (err) {}
|
20 |
-
|
21 |
|
22 |
if (mode === "basic") {
|
23 |
-
|
24 |
-
|
25 |
-
prompt = decodeURIComponent(query?.p?.toString() || "").trim() as string
|
26 |
-
} catch (err) {}
|
27 |
-
|
28 |
const basicSearchResults: BasicSearchResult[] = await search({
|
29 |
prompt,
|
30 |
nbResults: 4
|
@@ -39,14 +35,7 @@ export async function GET(req: NextRequest, res: NextResponse) {
|
|
39 |
})
|
40 |
} else if (mode === "extended") {
|
41 |
|
42 |
-
|
43 |
-
try {
|
44 |
-
const rawString = decodeURIComponent(query?.e?.toString() || "").trim() as string
|
45 |
-
const maybeExistingResults = JSON.parse(rawString)
|
46 |
-
if (Array.isArray(maybeExistingResults)) {
|
47 |
-
basicResults = maybeExistingResults
|
48 |
-
}
|
49 |
-
} catch (err) {}
|
50 |
|
51 |
const extendedSearchResults: ExtendedSearchResult[] = await extend({
|
52 |
basicResults
|
|
|
2 |
import queryString from "query-string"
|
3 |
import { BasicSearchResult, ExtendedSearchResult } from "./types"
|
4 |
import { extend, search } from "."
|
5 |
+
import { parsePrompt } from "../../parsers/parsePrompt"
|
6 |
+
import { parseLatentSearchMode } from "../../parsers/parseLatentSearchMode"
|
7 |
+
import { parseBasicSearchResult } from "../../parsers/parseBasicSearchResults"
|
8 |
|
9 |
export type LatentSearchMode =
|
10 |
| "basic"
|
|
|
16 |
const qs = queryString.parseUrl(req.url || "")
|
17 |
const query = (qs || {}).query
|
18 |
|
19 |
+
const mode = parseLatentSearchMode(query?.m)
|
|
|
|
|
|
|
|
|
20 |
|
21 |
if (mode === "basic") {
|
22 |
+
const prompt = parsePrompt(query?.p)
|
23 |
+
|
|
|
|
|
|
|
24 |
const basicSearchResults: BasicSearchResult[] = await search({
|
25 |
prompt,
|
26 |
nbResults: 4
|
|
|
35 |
})
|
36 |
} else if (mode === "extended") {
|
37 |
|
38 |
+
const basicResults = parseBasicSearchResult(query?.e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
const extendedSearchResults: ExtendedSearchResult[] = await extend({
|
41 |
basicResults
|
src/app/latent/watch/page.tsx
CHANGED
@@ -1,14 +1,13 @@
|
|
1 |
-
import { encode, decode } from 'js-base64'
|
2 |
-
|
3 |
import { LatentQueryProps } from "@/types/general"
|
4 |
-
import {
|
|
|
|
|
|
|
5 |
|
6 |
import { Main } from "../../main"
|
7 |
import { getNewMediaInfo } from "../../api/generators/search/getNewMediaInfo"
|
8 |
import { getToken } from "../../api/auth/getToken"
|
9 |
|
10 |
-
import { extend } from "@/app/api/v1/search"
|
11 |
-
|
12 |
// https://jmswrnr.com/blog/protecting-next-js-api-routes-query-parameters
|
13 |
|
14 |
export default async function DreamPage({
|
@@ -19,8 +18,8 @@ export default async function DreamPage({
|
|
19 |
}: LatentQueryProps) {
|
20 |
const jwtToken = await getToken({ user: "anonymous" })
|
21 |
console.log(`[/latent/watch] prompt =`, prompt)
|
22 |
-
|
23 |
-
|
24 |
console.log("[/latent/watch] basicResult:", basicResult)
|
25 |
|
26 |
// note that we should generate a longer synopsis from the autocomplete result
|
|
|
|
|
|
|
1 |
import { LatentQueryProps } from "@/types/general"
|
2 |
+
import { ExtendedSearchResult } from "@/app/api/v1/search/types"
|
3 |
+
import { extend } from "@/app/api/v1/search"
|
4 |
+
import { parseBasicSearchResult } from '@/app/api/parsers/parseBasicSearchResult'
|
5 |
+
|
6 |
|
7 |
import { Main } from "../../main"
|
8 |
import { getNewMediaInfo } from "../../api/generators/search/getNewMediaInfo"
|
9 |
import { getToken } from "../../api/auth/getToken"
|
10 |
|
|
|
|
|
11 |
// https://jmswrnr.com/blog/protecting-next-js-api-routes-query-parameters
|
12 |
|
13 |
export default async function DreamPage({
|
|
|
18 |
}: LatentQueryProps) {
|
19 |
const jwtToken = await getToken({ user: "anonymous" })
|
20 |
console.log(`[/latent/watch] prompt =`, prompt)
|
21 |
+
const basicResult = parseBasicSearchResult(prompt)
|
22 |
+
|
23 |
console.log("[/latent/watch] basicResult:", basicResult)
|
24 |
|
25 |
// note that we should generate a longer synopsis from the autocomplete result
|
src/app/main.tsx
CHANGED
@@ -81,7 +81,7 @@ export function Main({
|
|
81 |
const setPublicTracks = useStore(s => s.setPublicTracks)
|
82 |
const setPublicTrack = useStore(s => s.setPublicTrack)
|
83 |
|
84 |
-
console.log("[main.tsx] latentMedia = ", latentMedia)
|
85 |
|
86 |
useEffect(() => {
|
87 |
if (typeof jwtToken !== "string" && !jwtToken) { return }
|
|
|
81 |
const setPublicTracks = useStore(s => s.setPublicTracks)
|
82 |
const setPublicTrack = useStore(s => s.setPublicTrack)
|
83 |
|
84 |
+
// console.log("[main.tsx] latentMedia = ", latentMedia)
|
85 |
|
86 |
useEffect(() => {
|
87 |
if (typeof jwtToken !== "string" && !jwtToken) { return }
|
src/app/views/user-channel-view/index.tsx
CHANGED
@@ -15,10 +15,10 @@ import { Button } from "@/components/ui/button"
|
|
15 |
import { submitVideoRequest } from "@/app/api/actions/submitVideoRequest"
|
16 |
import { PendingVideoList } from "@/components/interface/pending-video-list"
|
17 |
import { getChannelVideos } from "@/app/api/actions/ai-tube-hf/getChannelVideos"
|
18 |
-
import { parseVideoModelName } from "@/app/api/
|
19 |
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select"
|
20 |
import { defaultVideoModel, defaultVideoOrientation, defaultVoice } from "@/app/config"
|
21 |
-
import { parseVideoOrientation } from "@/app/api/
|
22 |
|
23 |
export function UserChannelView() {
|
24 |
const [_isPending, startTransition] = useTransition()
|
|
|
15 |
import { submitVideoRequest } from "@/app/api/actions/submitVideoRequest"
|
16 |
import { PendingVideoList } from "@/components/interface/pending-video-list"
|
17 |
import { getChannelVideos } from "@/app/api/actions/ai-tube-hf/getChannelVideos"
|
18 |
+
import { parseVideoModelName } from "@/app/api/parsers/parseVideoModelName"
|
19 |
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select"
|
20 |
import { defaultVideoModel, defaultVideoOrientation, defaultVoice } from "@/app/config"
|
21 |
+
import { parseVideoOrientation } from "@/app/api/parsers/parseVideoOrientation"
|
22 |
|
23 |
export function UserChannelView() {
|
24 |
const [_isPending, startTransition] = useTransition()
|
src/lib/utils/parseMediaProjectionType.ts
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import { parseProjectionFromLoRA } from "@/app/api/
|
2 |
import { MediaInfo, MediaProjection } from "@/types/general"
|
3 |
|
4 |
import { parseAssetToCheckIfGaussian } from "./parseAssetToCheckIfGaussian"
|
|
|
1 |
+
import { parseProjectionFromLoRA } from "@/app/api/parsers/parseProjectionFromLoRA"
|
2 |
import { MediaInfo, MediaProjection } from "@/types/general"
|
3 |
|
4 |
import { parseAssetToCheckIfGaussian } from "./parseAssetToCheckIfGaussian"
|