File size: 2,470 Bytes
67f97d0
 
1cea837
67f97d0
 
 
 
1cea837
 
67f97d0
1cea837
67f97d0
1cea837
67f97d0
 
 
 
 
1cea837
67f97d0
 
1cea837
67f97d0
 
1cea837
 
67f97d0
1cea837
 
 
 
67f97d0
 
 
1cea837
 
 
 
 
 
 
 
67f97d0
1cea837
 
 
 
67f97d0
 
1cea837
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67f97d0
1cea837
 
 
 
 
 
 
 
 
 
 
 
 
 
67f97d0
1cea837
 
 
67f97d0
1cea837
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import { addBase64Header } from "@/lib/data/addBase64Header"
import { tryApiCalls } from "../../utils/tryApiCall"

export type StoryLine = {
  text: string
  audio: string // in base64
}

// TODO delete this? we don't need an env var for this I think?
const gradioSpaceApiUrl = `https://jbilcke-hf-ai-story-server.hf.space`
const huggingFaceSpace = "jbilcke-hf/ai-story-server"
const apiKey = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}`

export async function generateSpeechWithParlerTTS({
  text,
  audioId,
  debug = false,
  neverThrow = false,
}: {
  text: string
  audioId: string
  debug?: boolean
  neverThrow?: boolean
}): Promise<string> {
  const actualFunction = async () => {

    const prompt = text.slice(0, 30)
    // console.log(`user requested "${cropped}${cropped !== prompt ? "..." : ""}"`)

    // positivePrompt = filterOutBadWords(positivePrompt)

    const promptToGenerateAudioStory = ``

    const res = await fetch(gradioSpaceApiUrl + (gradioSpaceApiUrl.endsWith("/") ? "" : "/") + "api/predict", {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        // Authorization: `Bearer ${token}`,
      },
      body: JSON.stringify({
        fn_index: 0, // <- important!
        data: [
          apiKey,
          promptToGenerateAudioStory,
          prompt,

          // TODO: add support for custom wav
          // voice === "Julian" ? "Julian" : "Cloée",
          "Julian",

          // maxLines,
        ],
      }),
      cache: "no-store",
      // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
      // next: { revalidate: 1 }
    })


    const rawJson = await res.json()
    const data = rawJson.data as StoryLine[][]

    const stories = data?.[0] || []

    if (res.status !== 200) {
      throw new Error('Failed to fetch data')
    }

    return stories.map(line => ({
      text: line.text.replaceAll(" .", ".").replaceAll(" ?", "?").replaceAll(" !", "!").trim(),
      audio: addBase64Header(line.audio, "mp3")
    }))
  }

  try {
    if (!prompt?.length) {
      throw new Error(`prompt is too short!`)
    }

    const result = await tryApiCalls({
      func: actualFunction,
      huggingFaceSpace,
      debug,
      failureMessage: "failed to generate the audio story"
    })
    return BROKEN
  } catch (err) {
    if (neverThrow) {
      console.error(`generateAudioStory():`, err)
      return ""
    } else {
      throw err
    }
  }
}