Spaces:

sradc
/

visual-content-search-over-videos

Running

File size: 4,837 Bytes

1801c3b

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/sidneyradcliffe/miniforge3/envs/semvideo-hackathon-230523/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "from tqdm import tqdm\n",
    "\n",
    "import pandas as pd\n",
    "import cv2\n",
    "from PIL import Image\n",
    "import numpy as np\n",
    "\n",
    "from pipeline.clip_wrapper import ClipWrapper, MODEL_DIM\n",
    "from pipeline.download_videos import VIDEO_DIR, REPO_ROOT, DATA_DIR\n",
    "\n",
    "FRAME_EXTRACT_RATE_SECONDS = 5  # Extract a frame every 5 seconds\n",
    "IMAGES_DIR = DATA_DIR / \"images\"\n",
    "\n",
    "DATAFRAME_PATH = DATA_DIR / \"dataset.parquet\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "clip_wrapper = ClipWrapper()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_clip_vectors(video_path):\n",
    "    cap = cv2.VideoCapture(str(video_path))\n",
    "    num_video_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))\n",
    "    fps = int(cap.get(cv2.CAP_PROP_FPS))\n",
    "    extract_every_n_frames = FRAME_EXTRACT_RATE_SECONDS * fps\n",
    "    for frame_idx in tqdm(range(num_video_frames), desc=\"Running CLIP on video\"):\n",
    "        ret, frame = cap.read()\n",
    "        if frame_idx % extract_every_n_frames != 0:\n",
    "            continue\n",
    "        image = Image.fromarray(frame[..., ::-1])\n",
    "        clip_vector = clip_wrapper.images2vec([image]).squeeze().numpy()\n",
    "        timestamp_secs = frame_idx / fps\n",
    "        yield clip_vector, image, timestamp_secs, frame_idx\n",
    "    cap.release()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Running CLIP on video: 100%|██████████| 7465/7465 [00:04<00:00, 1759.86it/s]\n",
      "Running CLIP on video: 100%|██████████| 6056/6056 [00:03<00:00, 1728.62it/s]\n",
      "Running CLIP on video: 100%|██████████| 5234/5234 [00:03<00:00, 1648.12it/s]\n",
      "Running CLIP on video: 100%|██████████| 3551/3551 [00:01<00:00, 1806.30it/s]\n",
      "Running CLIP on video: 100%|██████████| 5904/5904 [00:03<00:00, 1655.01it/s]\n",
      "Processing videos: 100%|██████████| 5/5 [00:16<00:00,  3.30s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saving data to /Users/sidneyradcliffe/repos/semvideo-hackathon-230523/data/dataset.parquet\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "results = []\n",
    "for i, video_path in enumerate(\n",
    "    tqdm(list(VIDEO_DIR.glob(\"*.mp4\")), desc=\"Processing videos\")\n",
    "):\n",
    "    video_id = video_path.stem\n",
    "    extracted_images_dir = IMAGES_DIR / video_id\n",
    "    extracted_images_dir.mkdir(exist_ok=True, parents=True)\n",
    "    for clip_vector, image, timestamp_secs, frame_idx in get_clip_vectors(video_path):\n",
    "        image_path = extracted_images_dir / f\"{frame_idx}.jpg\"\n",
    "        image.save(image_path)\n",
    "        results.append(\n",
    "            [\n",
    "                video_id,\n",
    "                frame_idx,\n",
    "                timestamp_secs,\n",
    "                str(image_path.relative_to(REPO_ROOT)),\n",
    "                *clip_vector,\n",
    "            ]\n",
    "        )\n",
    "df = pd.DataFrame(\n",
    "    results,\n",
    "    columns=[\"video_id\", \"frame_idx\", \"timestamp\", \"image_path\"]\n",
    "    + [f\"dim_{i}\" for i in range(MODEL_DIM)],\n",
    ")\n",
    "print(f\"Saving data to {DATAFRAME_PATH}\")\n",
    "df.to_parquet(DATAFRAME_PATH, index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "semvideo-hackathon-230523",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}