icostan commited on
Commit
15fe45a
·
verified ·
1 Parent(s): a670533

Upload whisper_chat.livemd

Browse files
Files changed (1) hide show
  1. public-apps/whisper_chat.livemd +81 -0
public-apps/whisper_chat.livemd ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- livebook:{"autosave_interval_s":60,"persist_outputs":true} -->
2
+
3
+ # whisper_chat
4
+
5
+ ```elixir
6
+ Mix.install(
7
+ [
8
+ {:kino_bumblebee, "~> 0.5.0"},
9
+ {:exla, ">= 0.0.0"}
10
+ ],
11
+ config: [nx: [default_backend: EXLA.Backend]]
12
+ )
13
+
14
+ # Nx.global_default_backend(EXLA.Backend)
15
+ # Nx.Defn.default_options(compiler: EXLA, client: :cuda, lazy_transfers: :always)
16
+ ```
17
+
18
+ ## Model
19
+
20
+ <!-- livebook:{"attrs":"eyJjb21waWxlciI6bnVsbCwibWF4X25ld190b2tlbnMiOjEwMCwidGFza19pZCI6InNwZWVjaF90b190ZXh0IiwidmFyaWFudF9pZCI6IndoaXNwZXJfdGlueSJ9","chunks":[[0,618],[620,1063]],"kind":"Elixir.KinoBumblebee.TaskCell","livebook_object":"smart_cell"} -->
21
+
22
+ ```elixir
23
+ {:ok, model_info} = Bumblebee.load_model({:hf, "openai/whisper-tiny"})
24
+ {:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"})
25
+ {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"})
26
+ {:ok, generation_config} = Bumblebee.load_generation_config({:hf, "openai/whisper-tiny"})
27
+ generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100)
28
+
29
+ serving =
30
+ Bumblebee.Audio.speech_to_text_whisper(
31
+ model_info,
32
+ featurizer,
33
+ tokenizer,
34
+ generation_config,
35
+ compile: [batch_size: 4],
36
+ chunk_num_seconds: 30,
37
+ timestamps: :segments,
38
+ stream: true
39
+ )
40
+
41
+ audio_input = Kino.Input.audio("Audio", sampling_rate: featurizer.sampling_rate)
42
+ form = Kino.Control.form([audio: audio_input], submit: "Run")
43
+ frame = Kino.Frame.new()
44
+
45
+ Kino.listen(form, fn %{data: %{audio: audio}} ->
46
+ if audio do
47
+ audio =
48
+ audio.file_ref
49
+ |> Kino.Input.file_path()
50
+ |> File.read!()
51
+ |> Nx.from_binary(:f32)
52
+ |> Nx.reshape({:auto, audio.num_channels})
53
+ |> Nx.mean(axes: [1])
54
+
55
+ Kino.Frame.render(frame, Kino.Text.new("(Start of transcription)", chunk: true))
56
+
57
+ for chunk <- Nx.Serving.run(serving, audio) do
58
+ [start_mark, end_mark] =
59
+ for seconds <- [chunk.start_timestamp_seconds, chunk.end_timestamp_seconds] do
60
+ seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
61
+ end
62
+
63
+ text = "
64
+ #{start_mark}-#{end_mark}: #{chunk.text}"
65
+ Kino.Frame.append(frame, Kino.Text.new(text, chunk: true))
66
+ end
67
+
68
+ Kino.Frame.append(frame, Kino.Text.new("\n(End of transcription)", chunk: true))
69
+ end
70
+ end)
71
+
72
+ Kino.Layout.grid([form, frame], boxed: true, gap: 16)
73
+ ```
74
+
75
+ <!-- livebook:{"output":true} -->
76
+
77
+ ```
78
+
79
+ 14:10:10.810 [info] Loaded cuDNN version 90501
80
+
81
+ ```