sradc commited on
Commit
5a3ba8c
1 Parent(s): 270d9ba

data wrangling notebook, to add base64 images to the parquet itself

Browse files
Files changed (1) hide show
  1. _dev/add_videos_to_dataset.ipynb +538 -0
_dev/add_videos_to_dataset.ipynb ADDED
@@ -0,0 +1,538 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "attachments": {},
5
+ "cell_type": "markdown",
6
+ "metadata": {},
7
+ "source": [
8
+ "One off conversion / data wrangling script, to avoid redownloading videos."
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": 17,
14
+ "metadata": {},
15
+ "outputs": [],
16
+ "source": [
17
+ "import pandas as pd\n",
18
+ "import base64\n",
19
+ "\n",
20
+ "from tqdm import tqdm"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 16,
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": [
29
+ "df = pd.read_parquet(\"../data/dataset_original.parquet\")"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": 14,
35
+ "metadata": {},
36
+ "outputs": [
37
+ {
38
+ "data": {
39
+ "text/html": [
40
+ "<div>\n",
41
+ "<style scoped>\n",
42
+ " .dataframe tbody tr th:only-of-type {\n",
43
+ " vertical-align: middle;\n",
44
+ " }\n",
45
+ "\n",
46
+ " .dataframe tbody tr th {\n",
47
+ " vertical-align: top;\n",
48
+ " }\n",
49
+ "\n",
50
+ " .dataframe thead th {\n",
51
+ " text-align: right;\n",
52
+ " }\n",
53
+ "</style>\n",
54
+ "<table border=\"1\" class=\"dataframe\">\n",
55
+ " <thead>\n",
56
+ " <tr style=\"text-align: right;\">\n",
57
+ " <th></th>\n",
58
+ " <th>video_id</th>\n",
59
+ " <th>frame_idx</th>\n",
60
+ " <th>timestamp</th>\n",
61
+ " <th>image_path</th>\n",
62
+ " <th>dim_0</th>\n",
63
+ " <th>dim_1</th>\n",
64
+ " <th>dim_2</th>\n",
65
+ " <th>dim_3</th>\n",
66
+ " <th>dim_4</th>\n",
67
+ " <th>dim_5</th>\n",
68
+ " <th>...</th>\n",
69
+ " <th>dim_502</th>\n",
70
+ " <th>dim_503</th>\n",
71
+ " <th>dim_504</th>\n",
72
+ " <th>dim_505</th>\n",
73
+ " <th>dim_506</th>\n",
74
+ " <th>dim_507</th>\n",
75
+ " <th>dim_508</th>\n",
76
+ " <th>dim_509</th>\n",
77
+ " <th>dim_510</th>\n",
78
+ " <th>dim_511</th>\n",
79
+ " </tr>\n",
80
+ " </thead>\n",
81
+ " <tbody>\n",
82
+ " <tr>\n",
83
+ " <th>0</th>\n",
84
+ " <td>8Ilh1ewceco</td>\n",
85
+ " <td>0</td>\n",
86
+ " <td>0.0</td>\n",
87
+ " <td>data/images/8Ilh1ewceco/0.jpg</td>\n",
88
+ " <td>-0.013127</td>\n",
89
+ " <td>-0.022996</td>\n",
90
+ " <td>-0.049374</td>\n",
91
+ " <td>-0.006306</td>\n",
92
+ " <td>0.013602</td>\n",
93
+ " <td>-0.003762</td>\n",
94
+ " <td>...</td>\n",
95
+ " <td>-0.006920</td>\n",
96
+ " <td>0.013831</td>\n",
97
+ " <td>0.056647</td>\n",
98
+ " <td>0.007946</td>\n",
99
+ " <td>-0.002478</td>\n",
100
+ " <td>-0.030497</td>\n",
101
+ " <td>-0.011770</td>\n",
102
+ " <td>0.067427</td>\n",
103
+ " <td>-0.031810</td>\n",
104
+ " <td>-0.025615</td>\n",
105
+ " </tr>\n",
106
+ " <tr>\n",
107
+ " <th>1</th>\n",
108
+ " <td>8Ilh1ewceco</td>\n",
109
+ " <td>145</td>\n",
110
+ " <td>5.0</td>\n",
111
+ " <td>data/images/8Ilh1ewceco/145.jpg</td>\n",
112
+ " <td>0.009040</td>\n",
113
+ " <td>0.003338</td>\n",
114
+ " <td>0.029684</td>\n",
115
+ " <td>-0.033058</td>\n",
116
+ " <td>0.040864</td>\n",
117
+ " <td>-0.006447</td>\n",
118
+ " <td>...</td>\n",
119
+ " <td>0.033575</td>\n",
120
+ " <td>-0.019076</td>\n",
121
+ " <td>0.047166</td>\n",
122
+ " <td>-0.010574</td>\n",
123
+ " <td>-0.018608</td>\n",
124
+ " <td>-0.013465</td>\n",
125
+ " <td>-0.020017</td>\n",
126
+ " <td>0.086240</td>\n",
127
+ " <td>-0.029653</td>\n",
128
+ " <td>0.035949</td>\n",
129
+ " </tr>\n",
130
+ " <tr>\n",
131
+ " <th>2</th>\n",
132
+ " <td>8Ilh1ewceco</td>\n",
133
+ " <td>290</td>\n",
134
+ " <td>10.0</td>\n",
135
+ " <td>data/images/8Ilh1ewceco/290.jpg</td>\n",
136
+ " <td>0.004891</td>\n",
137
+ " <td>0.006527</td>\n",
138
+ " <td>0.004417</td>\n",
139
+ " <td>-0.000323</td>\n",
140
+ " <td>0.006400</td>\n",
141
+ " <td>-0.024191</td>\n",
142
+ " <td>...</td>\n",
143
+ " <td>-0.043122</td>\n",
144
+ " <td>-0.010695</td>\n",
145
+ " <td>0.005672</td>\n",
146
+ " <td>0.000172</td>\n",
147
+ " <td>-0.014442</td>\n",
148
+ " <td>-0.014647</td>\n",
149
+ " <td>-0.016840</td>\n",
150
+ " <td>0.100285</td>\n",
151
+ " <td>0.013794</td>\n",
152
+ " <td>0.015046</td>\n",
153
+ " </tr>\n",
154
+ " <tr>\n",
155
+ " <th>3</th>\n",
156
+ " <td>8Ilh1ewceco</td>\n",
157
+ " <td>435</td>\n",
158
+ " <td>15.0</td>\n",
159
+ " <td>data/images/8Ilh1ewceco/435.jpg</td>\n",
160
+ " <td>-0.022159</td>\n",
161
+ " <td>0.020703</td>\n",
162
+ " <td>-0.021607</td>\n",
163
+ " <td>-0.019721</td>\n",
164
+ " <td>-0.006067</td>\n",
165
+ " <td>-0.035070</td>\n",
166
+ " <td>...</td>\n",
167
+ " <td>-0.017047</td>\n",
168
+ " <td>-0.018341</td>\n",
169
+ " <td>-0.006733</td>\n",
170
+ " <td>-0.007040</td>\n",
171
+ " <td>-0.008368</td>\n",
172
+ " <td>0.009755</td>\n",
173
+ " <td>-0.045662</td>\n",
174
+ " <td>0.116601</td>\n",
175
+ " <td>-0.000572</td>\n",
176
+ " <td>-0.000985</td>\n",
177
+ " </tr>\n",
178
+ " <tr>\n",
179
+ " <th>4</th>\n",
180
+ " <td>8Ilh1ewceco</td>\n",
181
+ " <td>580</td>\n",
182
+ " <td>20.0</td>\n",
183
+ " <td>data/images/8Ilh1ewceco/580.jpg</td>\n",
184
+ " <td>-0.015903</td>\n",
185
+ " <td>0.033545</td>\n",
186
+ " <td>0.009257</td>\n",
187
+ " <td>-0.033540</td>\n",
188
+ " <td>0.010586</td>\n",
189
+ " <td>-0.028067</td>\n",
190
+ " <td>...</td>\n",
191
+ " <td>-0.016532</td>\n",
192
+ " <td>0.012388</td>\n",
193
+ " <td>0.020868</td>\n",
194
+ " <td>-0.012635</td>\n",
195
+ " <td>0.010914</td>\n",
196
+ " <td>0.009203</td>\n",
197
+ " <td>-0.010078</td>\n",
198
+ " <td>0.063971</td>\n",
199
+ " <td>-0.038024</td>\n",
200
+ " <td>0.025840</td>\n",
201
+ " </tr>\n",
202
+ " </tbody>\n",
203
+ "</table>\n",
204
+ "<p>5 rows × 516 columns</p>\n",
205
+ "</div>"
206
+ ],
207
+ "text/plain": [
208
+ " video_id frame_idx timestamp image_path \\\n",
209
+ "0 8Ilh1ewceco 0 0.0 data/images/8Ilh1ewceco/0.jpg \n",
210
+ "1 8Ilh1ewceco 145 5.0 data/images/8Ilh1ewceco/145.jpg \n",
211
+ "2 8Ilh1ewceco 290 10.0 data/images/8Ilh1ewceco/290.jpg \n",
212
+ "3 8Ilh1ewceco 435 15.0 data/images/8Ilh1ewceco/435.jpg \n",
213
+ "4 8Ilh1ewceco 580 20.0 data/images/8Ilh1ewceco/580.jpg \n",
214
+ "\n",
215
+ " dim_0 dim_1 dim_2 dim_3 dim_4 dim_5 ... dim_502 \\\n",
216
+ "0 -0.013127 -0.022996 -0.049374 -0.006306 0.013602 -0.003762 ... -0.006920 \n",
217
+ "1 0.009040 0.003338 0.029684 -0.033058 0.040864 -0.006447 ... 0.033575 \n",
218
+ "2 0.004891 0.006527 0.004417 -0.000323 0.006400 -0.024191 ... -0.043122 \n",
219
+ "3 -0.022159 0.020703 -0.021607 -0.019721 -0.006067 -0.035070 ... -0.017047 \n",
220
+ "4 -0.015903 0.033545 0.009257 -0.033540 0.010586 -0.028067 ... -0.016532 \n",
221
+ "\n",
222
+ " dim_503 dim_504 dim_505 dim_506 dim_507 dim_508 dim_509 \\\n",
223
+ "0 0.013831 0.056647 0.007946 -0.002478 -0.030497 -0.011770 0.067427 \n",
224
+ "1 -0.019076 0.047166 -0.010574 -0.018608 -0.013465 -0.020017 0.086240 \n",
225
+ "2 -0.010695 0.005672 0.000172 -0.014442 -0.014647 -0.016840 0.100285 \n",
226
+ "3 -0.018341 -0.006733 -0.007040 -0.008368 0.009755 -0.045662 0.116601 \n",
227
+ "4 0.012388 0.020868 -0.012635 0.010914 0.009203 -0.010078 0.063971 \n",
228
+ "\n",
229
+ " dim_510 dim_511 \n",
230
+ "0 -0.031810 -0.025615 \n",
231
+ "1 -0.029653 0.035949 \n",
232
+ "2 0.013794 0.015046 \n",
233
+ "3 -0.000572 -0.000985 \n",
234
+ "4 -0.038024 0.025840 \n",
235
+ "\n",
236
+ "[5 rows x 516 columns]"
237
+ ]
238
+ },
239
+ "execution_count": 14,
240
+ "metadata": {},
241
+ "output_type": "execute_result"
242
+ }
243
+ ],
244
+ "source": [
245
+ "df.head()"
246
+ ]
247
+ },
248
+ {
249
+ "cell_type": "code",
250
+ "execution_count": 21,
251
+ "metadata": {},
252
+ "outputs": [
253
+ {
254
+ "name": "stderr",
255
+ "output_type": "stream",
256
+ "text": [
257
+ "100%|██████████| 71761/71761 [00:49<00:00, 1458.75it/s]\n"
258
+ ]
259
+ }
260
+ ],
261
+ "source": [
262
+ "image_paths = df[\"image_path\"].tolist()\n",
263
+ "new_df = df.rename(columns={'image_path': 'base64_image'})\n",
264
+ "for i, img in enumerate(tqdm(image_paths)):\n",
265
+ " with open(f\"../{img}\", \"rb\") as image_file:\n",
266
+ " encoded_string = base64.b64encode(image_file.read()).decode()\n",
267
+ " new_df.loc[i, 'base64_image'] = encoded_string"
268
+ ]
269
+ },
270
+ {
271
+ "cell_type": "code",
272
+ "execution_count": 23,
273
+ "metadata": {},
274
+ "outputs": [
275
+ {
276
+ "name": "stderr",
277
+ "output_type": "stream",
278
+ "text": [
279
+ "/Users/sidneyradcliffe/miniforge3/envs/visual-content-search-over-videos/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
280
+ " from .autonotebook import tqdm as notebook_tqdm\n"
281
+ ]
282
+ }
283
+ ],
284
+ "source": [
285
+ "from pipeline.process_videos import DATAFRAME_PATH\n",
286
+ "new_df.to_parquet(DATAFRAME_PATH, index=False)"
287
+ ]
288
+ },
289
+ {
290
+ "cell_type": "code",
291
+ "execution_count": 24,
292
+ "metadata": {},
293
+ "outputs": [],
294
+ "source": [
295
+ "# reload, check it's correct\n",
296
+ "new_df = pd.read_parquet(DATAFRAME_PATH)"
297
+ ]
298
+ },
299
+ {
300
+ "cell_type": "code",
301
+ "execution_count": 25,
302
+ "metadata": {},
303
+ "outputs": [
304
+ {
305
+ "data": {
306
+ "text/html": [
307
+ "<div>\n",
308
+ "<style scoped>\n",
309
+ " .dataframe tbody tr th:only-of-type {\n",
310
+ " vertical-align: middle;\n",
311
+ " }\n",
312
+ "\n",
313
+ " .dataframe tbody tr th {\n",
314
+ " vertical-align: top;\n",
315
+ " }\n",
316
+ "\n",
317
+ " .dataframe thead th {\n",
318
+ " text-align: right;\n",
319
+ " }\n",
320
+ "</style>\n",
321
+ "<table border=\"1\" class=\"dataframe\">\n",
322
+ " <thead>\n",
323
+ " <tr style=\"text-align: right;\">\n",
324
+ " <th></th>\n",
325
+ " <th>video_id</th>\n",
326
+ " <th>frame_idx</th>\n",
327
+ " <th>timestamp</th>\n",
328
+ " <th>base64_image</th>\n",
329
+ " <th>dim_0</th>\n",
330
+ " <th>dim_1</th>\n",
331
+ " <th>dim_2</th>\n",
332
+ " <th>dim_3</th>\n",
333
+ " <th>dim_4</th>\n",
334
+ " <th>dim_5</th>\n",
335
+ " <th>...</th>\n",
336
+ " <th>dim_502</th>\n",
337
+ " <th>dim_503</th>\n",
338
+ " <th>dim_504</th>\n",
339
+ " <th>dim_505</th>\n",
340
+ " <th>dim_506</th>\n",
341
+ " <th>dim_507</th>\n",
342
+ " <th>dim_508</th>\n",
343
+ " <th>dim_509</th>\n",
344
+ " <th>dim_510</th>\n",
345
+ " <th>dim_511</th>\n",
346
+ " </tr>\n",
347
+ " </thead>\n",
348
+ " <tbody>\n",
349
+ " <tr>\n",
350
+ " <th>0</th>\n",
351
+ " <td>8Ilh1ewceco</td>\n",
352
+ " <td>0</td>\n",
353
+ " <td>0.0</td>\n",
354
+ " <td>b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH...</td>\n",
355
+ " <td>-0.013127</td>\n",
356
+ " <td>-0.022996</td>\n",
357
+ " <td>-0.049374</td>\n",
358
+ " <td>-0.006306</td>\n",
359
+ " <td>0.013602</td>\n",
360
+ " <td>-0.003762</td>\n",
361
+ " <td>...</td>\n",
362
+ " <td>-0.006920</td>\n",
363
+ " <td>0.013831</td>\n",
364
+ " <td>0.056647</td>\n",
365
+ " <td>0.007946</td>\n",
366
+ " <td>-0.002478</td>\n",
367
+ " <td>-0.030497</td>\n",
368
+ " <td>-0.011770</td>\n",
369
+ " <td>0.067427</td>\n",
370
+ " <td>-0.031810</td>\n",
371
+ " <td>-0.025615</td>\n",
372
+ " </tr>\n",
373
+ " <tr>\n",
374
+ " <th>1</th>\n",
375
+ " <td>8Ilh1ewceco</td>\n",
376
+ " <td>145</td>\n",
377
+ " <td>5.0</td>\n",
378
+ " <td>b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH...</td>\n",
379
+ " <td>0.009040</td>\n",
380
+ " <td>0.003338</td>\n",
381
+ " <td>0.029684</td>\n",
382
+ " <td>-0.033058</td>\n",
383
+ " <td>0.040864</td>\n",
384
+ " <td>-0.006447</td>\n",
385
+ " <td>...</td>\n",
386
+ " <td>0.033575</td>\n",
387
+ " <td>-0.019076</td>\n",
388
+ " <td>0.047166</td>\n",
389
+ " <td>-0.010574</td>\n",
390
+ " <td>-0.018608</td>\n",
391
+ " <td>-0.013465</td>\n",
392
+ " <td>-0.020017</td>\n",
393
+ " <td>0.086240</td>\n",
394
+ " <td>-0.029653</td>\n",
395
+ " <td>0.035949</td>\n",
396
+ " </tr>\n",
397
+ " <tr>\n",
398
+ " <th>2</th>\n",
399
+ " <td>8Ilh1ewceco</td>\n",
400
+ " <td>290</td>\n",
401
+ " <td>10.0</td>\n",
402
+ " <td>b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH...</td>\n",
403
+ " <td>0.004891</td>\n",
404
+ " <td>0.006527</td>\n",
405
+ " <td>0.004417</td>\n",
406
+ " <td>-0.000323</td>\n",
407
+ " <td>0.006400</td>\n",
408
+ " <td>-0.024191</td>\n",
409
+ " <td>...</td>\n",
410
+ " <td>-0.043122</td>\n",
411
+ " <td>-0.010695</td>\n",
412
+ " <td>0.005672</td>\n",
413
+ " <td>0.000172</td>\n",
414
+ " <td>-0.014442</td>\n",
415
+ " <td>-0.014647</td>\n",
416
+ " <td>-0.016840</td>\n",
417
+ " <td>0.100285</td>\n",
418
+ " <td>0.013794</td>\n",
419
+ " <td>0.015046</td>\n",
420
+ " </tr>\n",
421
+ " <tr>\n",
422
+ " <th>3</th>\n",
423
+ " <td>8Ilh1ewceco</td>\n",
424
+ " <td>435</td>\n",
425
+ " <td>15.0</td>\n",
426
+ " <td>b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH...</td>\n",
427
+ " <td>-0.022159</td>\n",
428
+ " <td>0.020703</td>\n",
429
+ " <td>-0.021607</td>\n",
430
+ " <td>-0.019721</td>\n",
431
+ " <td>-0.006067</td>\n",
432
+ " <td>-0.035070</td>\n",
433
+ " <td>...</td>\n",
434
+ " <td>-0.017047</td>\n",
435
+ " <td>-0.018341</td>\n",
436
+ " <td>-0.006733</td>\n",
437
+ " <td>-0.007040</td>\n",
438
+ " <td>-0.008368</td>\n",
439
+ " <td>0.009755</td>\n",
440
+ " <td>-0.045662</td>\n",
441
+ " <td>0.116601</td>\n",
442
+ " <td>-0.000572</td>\n",
443
+ " <td>-0.000985</td>\n",
444
+ " </tr>\n",
445
+ " <tr>\n",
446
+ " <th>4</th>\n",
447
+ " <td>8Ilh1ewceco</td>\n",
448
+ " <td>580</td>\n",
449
+ " <td>20.0</td>\n",
450
+ " <td>b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH...</td>\n",
451
+ " <td>-0.015903</td>\n",
452
+ " <td>0.033545</td>\n",
453
+ " <td>0.009257</td>\n",
454
+ " <td>-0.033540</td>\n",
455
+ " <td>0.010586</td>\n",
456
+ " <td>-0.028067</td>\n",
457
+ " <td>...</td>\n",
458
+ " <td>-0.016532</td>\n",
459
+ " <td>0.012388</td>\n",
460
+ " <td>0.020868</td>\n",
461
+ " <td>-0.012635</td>\n",
462
+ " <td>0.010914</td>\n",
463
+ " <td>0.009203</td>\n",
464
+ " <td>-0.010078</td>\n",
465
+ " <td>0.063971</td>\n",
466
+ " <td>-0.038024</td>\n",
467
+ " <td>0.025840</td>\n",
468
+ " </tr>\n",
469
+ " </tbody>\n",
470
+ "</table>\n",
471
+ "<p>5 rows × 516 columns</p>\n",
472
+ "</div>"
473
+ ],
474
+ "text/plain": [
475
+ " video_id frame_idx timestamp \\\n",
476
+ "0 8Ilh1ewceco 0 0.0 \n",
477
+ "1 8Ilh1ewceco 145 5.0 \n",
478
+ "2 8Ilh1ewceco 290 10.0 \n",
479
+ "3 8Ilh1ewceco 435 15.0 \n",
480
+ "4 8Ilh1ewceco 580 20.0 \n",
481
+ "\n",
482
+ " base64_image dim_0 dim_1 \\\n",
483
+ "0 b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH... -0.013127 -0.022996 \n",
484
+ "1 b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH... 0.009040 0.003338 \n",
485
+ "2 b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH... 0.004891 0.006527 \n",
486
+ "3 b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH... -0.022159 0.020703 \n",
487
+ "4 b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH... -0.015903 0.033545 \n",
488
+ "\n",
489
+ " dim_2 dim_3 dim_4 dim_5 ... dim_502 dim_503 dim_504 \\\n",
490
+ "0 -0.049374 -0.006306 0.013602 -0.003762 ... -0.006920 0.013831 0.056647 \n",
491
+ "1 0.029684 -0.033058 0.040864 -0.006447 ... 0.033575 -0.019076 0.047166 \n",
492
+ "2 0.004417 -0.000323 0.006400 -0.024191 ... -0.043122 -0.010695 0.005672 \n",
493
+ "3 -0.021607 -0.019721 -0.006067 -0.035070 ... -0.017047 -0.018341 -0.006733 \n",
494
+ "4 0.009257 -0.033540 0.010586 -0.028067 ... -0.016532 0.012388 0.020868 \n",
495
+ "\n",
496
+ " dim_505 dim_506 dim_507 dim_508 dim_509 dim_510 dim_511 \n",
497
+ "0 0.007946 -0.002478 -0.030497 -0.011770 0.067427 -0.031810 -0.025615 \n",
498
+ "1 -0.010574 -0.018608 -0.013465 -0.020017 0.086240 -0.029653 0.035949 \n",
499
+ "2 0.000172 -0.014442 -0.014647 -0.016840 0.100285 0.013794 0.015046 \n",
500
+ "3 -0.007040 -0.008368 0.009755 -0.045662 0.116601 -0.000572 -0.000985 \n",
501
+ "4 -0.012635 0.010914 0.009203 -0.010078 0.063971 -0.038024 0.025840 \n",
502
+ "\n",
503
+ "[5 rows x 516 columns]"
504
+ ]
505
+ },
506
+ "execution_count": 25,
507
+ "metadata": {},
508
+ "output_type": "execute_result"
509
+ }
510
+ ],
511
+ "source": [
512
+ "new_df.head()"
513
+ ]
514
+ }
515
+ ],
516
+ "metadata": {
517
+ "kernelspec": {
518
+ "display_name": "visual-content-search-over-videos",
519
+ "language": "python",
520
+ "name": "python3"
521
+ },
522
+ "language_info": {
523
+ "codemirror_mode": {
524
+ "name": "ipython",
525
+ "version": 3
526
+ },
527
+ "file_extension": ".py",
528
+ "mimetype": "text/x-python",
529
+ "name": "python",
530
+ "nbconvert_exporter": "python",
531
+ "pygments_lexer": "ipython3",
532
+ "version": "3.9.16"
533
+ },
534
+ "orig_nbformat": 4
535
+ },
536
+ "nbformat": 4,
537
+ "nbformat_minor": 2
538
+ }