File size: 5,584 Bytes
3752cdf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1894,1101.507\n",
"https://media.britishmuseum.org/media/Repository/Documents/2014_10/6_14/c5015a41_782e_4eb7_badf_a3bc00f54f2c/preview_00426109_001.jpg\n",
"Image downloaded successfully!\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/james/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/urllib3/connectionpool.py:1103: InsecureRequestWarning: Unverified HTTPS request is being made to host 'media.britishmuseum.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1903,1215.10\n",
"https://media.britishmuseum.org/media/Repository/Documents/2014_10/15_13/532668b9_0af1_4402_8e13_a3c500e1907c/preview_00944260_001.jpg\n",
"Image downloaded successfully!\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/james/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/urllib3/connectionpool.py:1103: InsecureRequestWarning: Unverified HTTPS request is being made to host 'media.britishmuseum.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
" warnings.warn(\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[9], line 27\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to download image. Status code:\u001b[39m\u001b[38;5;124m\"\u001b[39m, response\u001b[38;5;241m.\u001b[39mstatus_code)\n\u001b[1;32m 26\u001b[0m \u001b[38;5;66;03m# wait 20 seconds\u001b[39;00m\n\u001b[0;32m---> 27\u001b[0m \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m20\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 28\u001b[0m \u001b[38;5;66;03m# response = requests.get(url, headers=headers)\u001b[39;00m\n\u001b[1;32m 29\u001b[0m \n\u001b[1;32m 30\u001b[0m \u001b[38;5;66;03m# if response.status_code == 200:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[38;5;66;03m# else:\u001b[39;00m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;66;03m# print(\"Failed to download image. Status code:\", response.status_code)\u001b[39;00m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"import requests\n",
"import pandas as pd\n",
"import time\n",
"\n",
"url = \"http://media.britishmuseum.org/media/Repository/Documents/2020_2/25_11/8772f2ea_b08f_46cf_8af2_ab6c00c10b84/preview_DSC_0760.jpg\"\n",
"headers = {\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3\"\n",
"}\n",
"\n",
"\n",
"df = pd.read_csv(\"../data/raw/BM_csv_files/3000BC-AD500/europe_999BC-600.csv\")\n",
"df[\"Museum number\"] = df[\"Museum number\"].str.replace(r\"^No: \", \"\", regex=True)\n",
"\n",
"for index, row in df.iterrows():\n",
" print(row[\"Museum number\"])\n",
" url = row[\"Image\"]\n",
" print(url)\n",
" response = requests.get(url, verify=False)\n",
" if response.status_code == 200:\n",
" with open(f\"../data/raw/BM_images/{row['Museum number']}.jpg\", \"wb\") as f:\n",
" f.write(response.content)\n",
" print(\"Image downloaded successfully!\")\n",
" else:\n",
" print(\"Failed to download image. Status code:\", response.status_code)\n",
" # wait 20 seconds\n",
" time.sleep(20)\n",
"# response = requests.get(url, headers=headers)\n",
"\n",
"# if response.status_code == 200:\n",
"# with open(\"image.jpg\", \"wb\") as f:\n",
"# f.write(response.content)\n",
"# print(\"Image downloaded successfully!\")\n",
"# else:\n",
"# print(\"Failed to download image. Status code:\", response.status_code)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"460"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(df[\"Museum number\"] == \"null\").sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "ArtifactClassification",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|