Spaces:
Build error
Build error
Upload whisper_chat.livemd
Browse files
public-apps/whisper_chat.livemd
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!-- livebook:{"autosave_interval_s":60,"persist_outputs":true} -->
|
2 |
+
|
3 |
+
# whisper_chat
|
4 |
+
|
5 |
+
```elixir
|
6 |
+
Mix.install(
|
7 |
+
[
|
8 |
+
{:kino_bumblebee, "~> 0.5.0"},
|
9 |
+
{:exla, ">= 0.0.0"}
|
10 |
+
],
|
11 |
+
config: [nx: [default_backend: EXLA.Backend]]
|
12 |
+
)
|
13 |
+
|
14 |
+
# Nx.global_default_backend(EXLA.Backend)
|
15 |
+
# Nx.Defn.default_options(compiler: EXLA, client: :cuda, lazy_transfers: :always)
|
16 |
+
```
|
17 |
+
|
18 |
+
## Model
|
19 |
+
|
20 |
+
<!-- livebook:{"attrs":"eyJjb21waWxlciI6bnVsbCwibWF4X25ld190b2tlbnMiOjEwMCwidGFza19pZCI6InNwZWVjaF90b190ZXh0IiwidmFyaWFudF9pZCI6IndoaXNwZXJfdGlueSJ9","chunks":[[0,618],[620,1063]],"kind":"Elixir.KinoBumblebee.TaskCell","livebook_object":"smart_cell"} -->
|
21 |
+
|
22 |
+
```elixir
|
23 |
+
{:ok, model_info} = Bumblebee.load_model({:hf, "openai/whisper-tiny"})
|
24 |
+
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"})
|
25 |
+
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"})
|
26 |
+
{:ok, generation_config} = Bumblebee.load_generation_config({:hf, "openai/whisper-tiny"})
|
27 |
+
generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100)
|
28 |
+
|
29 |
+
serving =
|
30 |
+
Bumblebee.Audio.speech_to_text_whisper(
|
31 |
+
model_info,
|
32 |
+
featurizer,
|
33 |
+
tokenizer,
|
34 |
+
generation_config,
|
35 |
+
compile: [batch_size: 4],
|
36 |
+
chunk_num_seconds: 30,
|
37 |
+
timestamps: :segments,
|
38 |
+
stream: true
|
39 |
+
)
|
40 |
+
|
41 |
+
audio_input = Kino.Input.audio("Audio", sampling_rate: featurizer.sampling_rate)
|
42 |
+
form = Kino.Control.form([audio: audio_input], submit: "Run")
|
43 |
+
frame = Kino.Frame.new()
|
44 |
+
|
45 |
+
Kino.listen(form, fn %{data: %{audio: audio}} ->
|
46 |
+
if audio do
|
47 |
+
audio =
|
48 |
+
audio.file_ref
|
49 |
+
|> Kino.Input.file_path()
|
50 |
+
|> File.read!()
|
51 |
+
|> Nx.from_binary(:f32)
|
52 |
+
|> Nx.reshape({:auto, audio.num_channels})
|
53 |
+
|> Nx.mean(axes: [1])
|
54 |
+
|
55 |
+
Kino.Frame.render(frame, Kino.Text.new("(Start of transcription)", chunk: true))
|
56 |
+
|
57 |
+
for chunk <- Nx.Serving.run(serving, audio) do
|
58 |
+
[start_mark, end_mark] =
|
59 |
+
for seconds <- [chunk.start_timestamp_seconds, chunk.end_timestamp_seconds] do
|
60 |
+
seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
|
61 |
+
end
|
62 |
+
|
63 |
+
text = "
|
64 |
+
#{start_mark}-#{end_mark}: #{chunk.text}"
|
65 |
+
Kino.Frame.append(frame, Kino.Text.new(text, chunk: true))
|
66 |
+
end
|
67 |
+
|
68 |
+
Kino.Frame.append(frame, Kino.Text.new("\n(End of transcription)", chunk: true))
|
69 |
+
end
|
70 |
+
end)
|
71 |
+
|
72 |
+
Kino.Layout.grid([form, frame], boxed: true, gap: 16)
|
73 |
+
```
|
74 |
+
|
75 |
+
<!-- livebook:{"output":true} -->
|
76 |
+
|
77 |
+
```
|
78 |
+
|
79 |
+
14:10:10.810 [info] Loaded cuDNN version 90501
|
80 |
+
|
81 |
+
```
|