{ "cells": [ { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1894,1101.507\n", "https://media.britishmuseum.org/media/Repository/Documents/2014_10/6_14/c5015a41_782e_4eb7_badf_a3bc00f54f2c/preview_00426109_001.jpg\n", "Image downloaded successfully!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/james/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/urllib3/connectionpool.py:1103: InsecureRequestWarning: Unverified HTTPS request is being made to host 'media.britishmuseum.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "1903,1215.10\n", "https://media.britishmuseum.org/media/Repository/Documents/2014_10/15_13/532668b9_0af1_4402_8e13_a3c500e1907c/preview_00944260_001.jpg\n", "Image downloaded successfully!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/james/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/urllib3/connectionpool.py:1103: InsecureRequestWarning: Unverified HTTPS request is being made to host 'media.britishmuseum.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n", " warnings.warn(\n" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[9], line 27\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to download image. Status code:\u001b[39m\u001b[38;5;124m\"\u001b[39m, response\u001b[38;5;241m.\u001b[39mstatus_code)\n\u001b[1;32m 26\u001b[0m \u001b[38;5;66;03m# wait 20 seconds\u001b[39;00m\n\u001b[0;32m---> 27\u001b[0m \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m20\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 28\u001b[0m \u001b[38;5;66;03m# response = requests.get(url, headers=headers)\u001b[39;00m\n\u001b[1;32m 29\u001b[0m \n\u001b[1;32m 30\u001b[0m \u001b[38;5;66;03m# if response.status_code == 200:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[38;5;66;03m# else:\u001b[39;00m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;66;03m# print(\"Failed to download image. Status code:\", response.status_code)\u001b[39;00m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "import requests\n", "import pandas as pd\n", "import time\n", "\n", "url = \"http://media.britishmuseum.org/media/Repository/Documents/2020_2/25_11/8772f2ea_b08f_46cf_8af2_ab6c00c10b84/preview_DSC_0760.jpg\"\n", "headers = {\n", " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3\"\n", "}\n", "\n", "\n", "df = pd.read_csv(\"../data/raw/BM_csv_files/3000BC-AD500/europe_999BC-600.csv\")\n", "df[\"Museum number\"] = df[\"Museum number\"].str.replace(r\"^No: \", \"\", regex=True)\n", "\n", "for index, row in df.iterrows():\n", " print(row[\"Museum number\"])\n", " url = row[\"Image\"]\n", " print(url)\n", " response = requests.get(url, verify=False)\n", " if response.status_code == 200:\n", " with open(f\"../data/raw/BM_images/{row['Museum number']}.jpg\", \"wb\") as f:\n", " f.write(response.content)\n", " print(\"Image downloaded successfully!\")\n", " else:\n", " print(\"Failed to download image. Status code:\", response.status_code)\n", " # wait 20 seconds\n", " time.sleep(20)\n", "# response = requests.get(url, headers=headers)\n", "\n", "# if response.status_code == 200:\n", "# with open(\"image.jpg\", \"wb\") as f:\n", "# f.write(response.content)\n", "# print(\"Image downloaded successfully!\")\n", "# else:\n", "# print(\"Failed to download image. Status code:\", response.status_code)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "460" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(df[\"Museum number\"] == \"null\").sum()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "ArtifactClassification", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 2 }