{ "cells": [ { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import cv2\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "dataframes = [\n", " \"../data/dataset-unfiltered.parquet\",\n", " \"../data/dataset.parquet\",\n", "]\n", "df_path = dataframes[1]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "df = pd.read_parquet(df_path)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
video_idframe_idxtimestampbase64_imagedim_0dim_1dim_2dim_3dim_4dim_5...dim_502dim_503dim_504dim_505dim_506dim_507dim_508dim_509dim_510dim_511
08Ilh1ewceco1455.0b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH...0.0090400.0033380.029684-0.0330580.040864-0.006447...0.033575-0.0190760.047166-0.010574-0.018608-0.013465-0.0200170.086240-0.0296530.035949
18Ilh1ewceco29010.0b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH...0.0048910.0065270.004417-0.0003230.006400-0.024191...-0.043122-0.0106950.0056720.000172-0.014442-0.014647-0.0168400.1002850.0137940.015046
28Ilh1ewceco43515.0b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH...-0.0221590.020703-0.021607-0.019721-0.006067-0.035070...-0.017047-0.018341-0.006733-0.007040-0.0083680.009755-0.0456620.116601-0.000572-0.000985
38Ilh1ewceco58020.0b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH...-0.0159030.0335450.009257-0.0335400.010586-0.028067...-0.0165320.0123880.020868-0.0126350.0109140.009203-0.0100780.063971-0.0380240.025840
48Ilh1ewceco72525.0b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH...-0.010193-0.0123230.023012-0.0158930.0470410.050783...-0.024013-0.009684-0.035481-0.024743-0.0098120.035214-0.0089570.124215-0.0124100.040907
\n", "

5 rows × 516 columns

\n", "
" ], "text/plain": [ " video_id frame_idx timestamp \n", "0 8Ilh1ewceco 145 5.0 \\\n", "1 8Ilh1ewceco 290 10.0 \n", "2 8Ilh1ewceco 435 15.0 \n", "3 8Ilh1ewceco 580 20.0 \n", "4 8Ilh1ewceco 725 25.0 \n", "\n", " base64_image dim_0 dim_1 \n", "0 b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH... 0.009040 0.003338 \\\n", "1 b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH... 0.004891 0.006527 \n", "2 b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH... -0.022159 0.020703 \n", "3 b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH... -0.015903 0.033545 \n", "4 b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgH... -0.010193 -0.012323 \n", "\n", " dim_2 dim_3 dim_4 dim_5 ... dim_502 dim_503 dim_504 \n", "0 0.029684 -0.033058 0.040864 -0.006447 ... 0.033575 -0.019076 0.047166 \\\n", "1 0.004417 -0.000323 0.006400 -0.024191 ... -0.043122 -0.010695 0.005672 \n", "2 -0.021607 -0.019721 -0.006067 -0.035070 ... -0.017047 -0.018341 -0.006733 \n", "3 0.009257 -0.033540 0.010586 -0.028067 ... -0.016532 0.012388 0.020868 \n", "4 0.023012 -0.015893 0.047041 0.050783 ... -0.024013 -0.009684 -0.035481 \n", "\n", " dim_505 dim_506 dim_507 dim_508 dim_509 dim_510 dim_511 \n", "0 -0.010574 -0.018608 -0.013465 -0.020017 0.086240 -0.029653 0.035949 \n", "1 0.000172 -0.014442 -0.014647 -0.016840 0.100285 0.013794 0.015046 \n", "2 -0.007040 -0.008368 0.009755 -0.045662 0.116601 -0.000572 -0.000985 \n", "3 -0.012635 0.010914 0.009203 -0.010078 0.063971 -0.038024 0.025840 \n", "4 -0.024743 -0.009812 0.035214 -0.008957 0.124215 -0.012410 0.040907 \n", "\n", "[5 rows x 516 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(df.head())" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "60844" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# wierdly all the timestamps are rounded to the nearest second o.O\n", "(df[\"timestamp\"].apply(lambda x: x % 1) == 0).sum()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 1417/1417 [00:02<00:00, 626.05it/s]\n" ] } ], "source": [ "video_fps = {}\n", "for video_id in tqdm(df.video_id.unique()):\n", " video_path = f\"../data/videos/{video_id}.mp4\"\n", " cap = cv2.VideoCapture(str(video_path))\n", " num_video_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))\n", " fps = cap.get(cv2.CAP_PROP_FPS)\n", " video_fps[video_id] = fps" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "def correct_timestamp(row):\n", " # pd.Series([1, 2], index=['foo', 'bar'])\n", " video_id = row[\"video_id\"]\n", " frame_idx = row[\"frame_idx\"]\n", " fps = video_fps[video_id]\n", " timestamp = frame_idx / fps\n", " return timestamp\n", "\n", "\n", "df[\"timestamp\"] = df.apply(correct_timestamp, axis=1)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "df.to_parquet(df_path, index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "visual-content-search-over-videos", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }