teticio commited on
Commit
5e522f8
1 Parent(s): c190f5b

add cell to generate audios

Browse files
Files changed (1) hide show
  1. notebooks/test-model.ipynb +35 -3
notebooks/test-model.ipynb CHANGED
@@ -72,14 +72,23 @@
72
  },
73
  {
74
  "cell_type": "code",
75
- "execution_count": 7,
76
- "id": "b809fed5",
77
  "metadata": {},
78
  "outputs": [],
79
  "source": [
80
  "model_id = \"teticio/audio-diffusion-256\"\n",
81
  "ddpm = DDPMPipeline.from_pretrained(model_id) # you can replace DDPMPipeline with DDIMPipeline or PNDMPipeline for faster inference\n",
82
- "ddpm.to(\"cuda\")\n",
 
 
 
 
 
 
 
 
 
83
  "images = ddpm(output_type=\"numpy\")[\"sample\"]\n",
84
  "images = (images * 255).round().astype(\"uint8\").transpose(0, 3, 1, 2)"
85
  ]
@@ -145,6 +154,29 @@
145
  "Audio(data=audio, rate=mel.get_sample_rate())"
146
  ]
147
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  {
149
  "cell_type": "markdown",
150
  "id": "ef54cef3",
 
72
  },
73
  {
74
  "cell_type": "code",
75
+ "execution_count": null,
76
+ "id": "e0f6e8fd",
77
  "metadata": {},
78
  "outputs": [],
79
  "source": [
80
  "model_id = \"teticio/audio-diffusion-256\"\n",
81
  "ddpm = DDPMPipeline.from_pretrained(model_id) # you can replace DDPMPipeline with DDIMPipeline or PNDMPipeline for faster inference\n",
82
+ "ddpm.to(\"cuda\")"
83
+ ]
84
+ },
85
+ {
86
+ "cell_type": "code",
87
+ "execution_count": 7,
88
+ "id": "b809fed5",
89
+ "metadata": {},
90
+ "outputs": [],
91
+ "source": [
92
  "images = ddpm(output_type=\"numpy\")[\"sample\"]\n",
93
  "images = (images * 255).round().astype(\"uint8\").transpose(0, 3, 1, 2)"
94
  ]
 
154
  "Audio(data=audio, rate=mel.get_sample_rate())"
155
  ]
156
  },
157
+ {
158
+ "cell_type": "markdown",
159
+ "id": "ba4ccac5",
160
+ "metadata": {},
161
+ "source": [
162
+ "### Generate audios"
163
+ ]
164
+ },
165
+ {
166
+ "cell_type": "code",
167
+ "execution_count": null,
168
+ "id": "300a6a8d",
169
+ "metadata": {},
170
+ "outputs": [],
171
+ "source": [
172
+ "while True:\n",
173
+ " images = ddpm(output_type=\"numpy\")[\"sample\"]\n",
174
+ " images = (images * 255).round().astype(\"uint8\").transpose(0, 3, 1, 2)\n",
175
+ " image = Image.fromarray(images[0][0])\n",
176
+ " audio = mel.image_to_audio(image)\n",
177
+ " display(Audio(data=audio, rate=mel.get_sample_rate()))"
178
+ ]
179
+ },
180
  {
181
  "cell_type": "markdown",
182
  "id": "ef54cef3",