{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import logging\n", "import os\n", "import sys\n", "from pathlib import Path\n", "\n", "import click\n", "from dotenv import find_dotenv, load_dotenv\n", "import pandas as pd\n", "import time\n", "import requests\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "input_filepath = \"../data/raw/BM_csv_files/3000BC-AD500/\"\n", "\n", "\n", "csv_files = os.listdir(input_filepath)\n", "regions = [\"north_america\", \"asia\", \"europe\", \"africa\", \"south_america\"]\n", "all_dfs = {}\n", "for region in regions:\n", " region_csv_files = [file for file in csv_files if region in file.lower()]\n", " region_df = pd.concat(\n", " [pd.read_csv(os.path.join(input_filepath, file)) for file in region_csv_files]\n", " )\n", " region_df.drop_duplicates(inplace=True, ignore_index=True)\n", " all_dfs[region] = region_df\n", "\n", "# create a master df with a new column for the region\n", "world_df = pd.DataFrame()\n", "for region, df in all_dfs.items():\n", " df[\"region\"] = region\n", " world_df = pd.concat([world_df, df])\n", "\n", "world_df.drop_duplicates(subset=world_df.columns.difference([\"region\"]), inplace=True)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Image | \n", "Object type | \n", "Museum number | \n", "Title | \n", "Denomination | \n", "Escapement | \n", "Description | \n", "Producer name | \n", "School/style | \n", "State | \n", "... | \n", "Acq notes (acq) | \n", "Acq notes (exc) | \n", "Dept | \n", "BM/Big number | \n", "Reg number | \n", "Add ids | \n", "Cat no | \n", "Banknote serial number | \n", "Joined objects | \n", "region | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "adze | \n", "No: Am1994,09.1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Adze? of pecked and ground stone, grooved for ... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "Africa, Oceania and the Americas | \n", "NaN | \n", "Am1994,09.1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "north_america | \n", "
1 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "altar | \n", "No: Am,S.818 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Three fragments of burnt clay that formed part... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "Africa, Oceania and the Americas | \n", "NaN | \n", "Am,S.818 | \n", "CDMS number: Am1931E1.818 (old CDMS no.); Prev... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "north_america | \n", "
2 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "altar | \n", "No: Am,S.817 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Fragments of an altar or crematory basin made ... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "Africa, Oceania and the Americas | \n", "NaN | \n", "Am,S.817 | \n", "CDMS number: Am1931E1.817 (old CDMS no.); Prev... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "north_america | \n", "
3 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "amulet; pendant | \n", "No: Am.9685 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Chalchihuitl, amulet, pendant made of amazonst... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "Africa, Oceania and the Americas | \n", "NaN | \n", "Am.9685 | \n", "CDMS number: Am1876C1.9685 (old CDMS no.); Mis... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "north_america | \n", "
4 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "arrow; point | \n", "No: Am,S.758.a-c | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Three expanding stem arrow or spear points, ma... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "Africa, Oceania and the Americas | \n", "NaN | \n", "Am,S.758.a-c | \n", "CDMS number: Am1931E1.758a-c (old CDMS no.); M... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "north_america | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
719 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "whistle; figurine | \n", "No: Am1954,05.196 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Anthropomorphic whistle / whistle in the shape... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "Africa, Oceania and the Americas | \n", "NaN | \n", "Am1954,05.196 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "south_america | \n", "
720 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "whistle | \n", "No: Am1954,05.669 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Whistle made of pottery, possibly modelled and... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "Africa, Oceania and the Americas | \n", "NaN | \n", "Am1954,05.669 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "south_america | \n", "
721 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "whistle | \n", "No: Am1954,05.194 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Anthropomorphic whistle in the shape of the An... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "Africa, Oceania and the Americas | \n", "NaN | \n", "Am1954,05.194 | \n", "Previous owner/ex-collection number: 167687 (W... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "south_america | \n", "
722 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "whistle | \n", "No: Am.6877 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Ovoid shaped whistle made of tumbaga by lost-w... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "Africa, Oceania and the Americas | \n", "NaN | \n", "Am.6877 | \n", "CDMS number: Am1870C1.6877 (old CDMS no.); Mis... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "south_america | \n", "
723 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "whistling vessel; jar | \n", "No: Am1982,Q.944 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Rectangular spouted jar with bridge and (broke... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "Acquisition details unknown. | \n", "NaN | \n", "Africa, Oceania and the Americas | \n", "NaN | \n", "Am1982,Q.944 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "south_america | \n", "
205721 rows × 48 columns
\n", "\n", " | Column | \n", "Null Percentage | \n", "Non-Null Count | \n", "unique_values | \n", "
---|---|---|---|---|
0 | \n", "Image | \n", "0.00 | \n", "205721 | \n", "202525 | \n", "
6 | \n", "Description | \n", "0.00 | \n", "205721 | \n", "107102 | \n", "
40 | \n", "Dept | \n", "0.00 | \n", "205721 | \n", "9 | \n", "
25 | \n", "Location | \n", "0.00 | \n", "205721 | \n", "1403 | \n", "
14 | \n", "Production date | \n", "0.00 | \n", "205721 | \n", "12766 | \n", "
47 | \n", "region | \n", "0.00 | \n", "205721 | \n", "5 | \n", "
2 | \n", "Museum number | \n", "0.00 | \n", "205721 | \n", "196970 | \n", "
1 | \n", "Object type | \n", "0.00 | \n", "205718 | \n", "4199 | \n", "
17 | \n", "Materials | \n", "0.05 | \n", "205612 | \n", "1856 | \n", "
42 | \n", "Reg number | \n", "3.86 | \n", "197787 | \n", "196674 | \n", "
13 | \n", "Culture | \n", "4.18 | \n", "197125 | \n", "1738 | \n", "
21 | \n", "Dimensions | \n", "4.52 | \n", "196413 | \n", "115526 | \n", "
15 | \n", "Production place | \n", "17.44 | \n", "169853 | \n", "3494 | \n", "
37 | \n", "Acq date | \n", "20.69 | \n", "163156 | \n", "566 | \n", "
24 | \n", "Bib references | \n", "21.67 | \n", "161134 | \n", "13399 | \n", "
33 | \n", "Acq name (acq) | \n", "29.95 | \n", "144108 | \n", "5215 | \n", "
28 | \n", "Subjects | \n", "44.44 | \n", "114308 | \n", "6779 | \n", "
16 | \n", "Find spot | \n", "47.62 | \n", "107754 | \n", "7860 | \n", "
10 | \n", "Authority | \n", "49.92 | \n", "103030 | \n", "1541 | \n", "
22 | \n", "Inscription | \n", "53.73 | \n", "95178 | \n", "47979 | \n", "
29 | \n", "Assoc name | \n", "54.57 | \n", "93456 | \n", "9520 | \n", "
4 | \n", "Denomination | \n", "57.39 | \n", "87657 | \n", "636 | \n", "
9 | \n", "State | \n", "62.18 | \n", "77802 | \n", "50 | \n", "
20 | \n", "Technique | \n", "70.98 | \n", "59695 | \n", "4003 | \n", "
23 | \n", "Curators Comments | \n", "74.30 | \n", "52865 | \n", "33459 | \n", "
44 | \n", "Cat no | \n", "78.42 | \n", "44389 | \n", "44280 | \n", "
43 | \n", "Add ids | \n", "84.81 | \n", "31245 | \n", "26824 | \n", "
38 | \n", "Acq notes (acq) | \n", "86.98 | \n", "26782 | \n", "5853 | \n", "
36 | \n", "Acq name (previous) | \n", "89.06 | \n", "22497 | \n", "1501 | \n", "
39 | \n", "Acq notes (exc) | \n", "90.70 | \n", "19132 | \n", "1450 | \n", "
27 | \n", "Condition | \n", "93.39 | \n", "13590 | \n", "5438 | \n", "
19 | \n", "Type series | \n", "94.52 | \n", "11271 | \n", "3001 | \n", "
18 | \n", "Ware | \n", "94.60 | \n", "11116 | \n", "351 | \n", "
41 | \n", "BM/Big number | \n", "95.86 | \n", "8526 | \n", "8474 | \n", "
26 | \n", "Exhibition history | \n", "95.87 | \n", "8504 | \n", "3688 | \n", "
34 | \n", "Acq name (finding) | \n", "96.75 | \n", "6689 | \n", "108 | \n", "
7 | \n", "Producer name | \n", "98.25 | \n", "3596 | \n", "1447 | \n", "
30 | \n", "Assoc place | \n", "98.72 | \n", "2638 | \n", "492 | \n", "
3 | \n", "Title | \n", "99.00 | \n", "2063 | \n", "313 | \n", "
31 | \n", "Assoc events | \n", "99.88 | \n", "244 | \n", "113 | \n", "
32 | \n", "Assoc titles | \n", "99.96 | \n", "89 | \n", "38 | \n", "
11 | \n", "Ethnic name (made by) | \n", "99.98 | \n", "32 | \n", "18 | \n", "
12 | \n", "Ethnic name (assoc) | \n", "100.00 | \n", "8 | \n", "4 | \n", "
45 | \n", "Banknote serial number | \n", "100.00 | \n", "1 | \n", "1 | \n", "
35 | \n", "Acq name (excavator) | \n", "100.00 | \n", "0 | \n", "0 | \n", "
8 | \n", "School/style | \n", "100.00 | \n", "0 | \n", "0 | \n", "
46 | \n", "Joined objects | \n", "100.00 | \n", "0 | \n", "0 | \n", "
5 | \n", "Escapement | \n", "100.00 | \n", "0 | \n", "0 | \n", "
\n", " | Production date | \n", "Reg number | \n", "correct | \n", "
---|---|---|---|
129 | \n", "700BC-1560s | \n", "Am1849,0629.23 | \n", "error | \n", "
152 | \n", "700BC-1560s | \n", "Am1940,02.45 | \n", "error | \n", "
153 | \n", "700BC-1560s | \n", "Am1946,19.6 | \n", "error | \n", "
154 | \n", "700BC-1560s | \n", "Am1943,04.3 | \n", "error | \n", "
155 | \n", "700BC-1560s | \n", "Am1946,19.7 | \n", "error | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
20519 | \n", "Third century BC (?) | \n", "1925,0119.611 | \n", "error | \n", "
21400 | \n", "19135 | \n", "Af1935,0205.2 | \n", "error | \n", "
21405 | \n", "6 April 29 AD | \n", "1898,0315.342 | \n", "error | \n", "
115 | \n", "C14-C15 | \n", "Am1842,1112.3 | \n", "error | \n", "
225 | \n", "C21 | \n", "2013,2014.1.a-b | \n", "error | \n", "
385 rows × 3 columns
\n", "\n", " | Production date | \n", "Reg number | \n", "correct | \n", "
---|---|---|---|
0 | \n", "1-1600 (?) | \n", "Am1994,09.1 | \n", "800 | \n", "
1 | \n", "200BC - 400AD | \n", "Am,S.818 | \n", "100 | \n", "
2 | \n", "200BC - 400AD | \n", "Am,S.817 | \n", "100 | \n", "
3 | \n", "400 - 800 | \n", "Am.9685 | \n", "600 | \n", "
4 | \n", "1000BC - 400AD | \n", "Am,S.758.a-c | \n", "-300 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
719 | \n", "100BC-600 | \n", "Am1954,05.196 | \n", "-350 | \n", "
720 | \n", "100BC-600 | \n", "Am1954,05.669 | \n", "-350 | \n", "
721 | \n", "100BC-650 | \n", "Am1954,05.194 | \n", "-375 | \n", "
722 | \n", "150BC-1600 (?) | \n", "Am.6877 | \n", "-875 | \n", "
723 | \n", "100BC-600 | \n", "Am1982,Q.944 | \n", "-350 | \n", "
200734 rows × 3 columns
\n", "\n", " | Production date | \n", "Reg number | \n", "correct | \n", "bucket | \n", "
---|---|---|---|---|
0 | \n", "1-1600 (?) | \n", "Am1994,09.1 | \n", "800 | \n", "(600.0, 1995.0] | \n", "
1 | \n", "200BC - 400AD | \n", "Am,S.818 | \n", "100 | \n", "(81.0, 100.0] | \n", "
2 | \n", "200BC - 400AD | \n", "Am,S.817 | \n", "100 | \n", "(81.0, 100.0] | \n", "
3 | \n", "400 - 800 | \n", "Am.9685 | \n", "600 | \n", "(550.0, 600.0] | \n", "
4 | \n", "1000BC - 400AD | \n", "Am,S.758.a-c | \n", "-300 | \n", "(-303.0, -300.0] | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
719 | \n", "100BC-600 | \n", "Am1954,05.196 | \n", "-350 | \n", "(-375.0, -350.0] | \n", "
720 | \n", "100BC-600 | \n", "Am1954,05.669 | \n", "-350 | \n", "(-375.0, -350.0] | \n", "
721 | \n", "100BC-650 | \n", "Am1954,05.194 | \n", "-375 | \n", "(-400.0, -375.0] | \n", "
722 | \n", "150BC-1600 (?) | \n", "Am.6877 | \n", "-875 | \n", "(-970.0, -800.0] | \n", "
723 | \n", "100BC-600 | \n", "Am1982,Q.944 | \n", "-350 | \n", "(-375.0, -350.0] | \n", "
197667 rows × 4 columns
\n", "\n", " | Image | \n", "Object type | \n", "Description | \n", "Culture | \n", "Production date | \n", "Production place | \n", "Materials | \n", "Subjects | \n", "Reg number | \n", "region | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "adze | \n", "Adze? of pecked and ground stone, grooved for ... | \n", "Anasazi | \n", "1-1600 (?) | \n", "NaN | \n", "stone | \n", "NaN | \n", "Am1994,09.1 | \n", "north_america | \n", "
1 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "altar | \n", "Three fragments of burnt clay that formed part... | \n", "Middle Woodland Period | \n", "200BC - 400AD | \n", "NaN | \n", "clay | \n", "NaN | \n", "Am,S.818 | \n", "north_america | \n", "
2 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "altar | \n", "Fragments of an altar or crematory basin made ... | \n", "Middle Woodland Period | \n", "200BC - 400AD | \n", "NaN | \n", "clay | \n", "NaN | \n", "Am,S.817 | \n", "north_america | \n", "
3 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "amulet | \n", "Chalchihuitl, amulet, pendant made of amazonst... | \n", "Classic Maya | \n", "400 - 800 | \n", "NaN | \n", "amazonite | \n", "NaN | \n", "Am.9685 | \n", "north_america | \n", "
4 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "arrow | \n", "Three expanding stem arrow or spear points, ma... | \n", "Early Woodland Period | \n", "1000BC - 400AD | \n", "NaN | \n", "chert | \n", "NaN | \n", "Am,S.758.a-c | \n", "north_america | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
719 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "whistle | \n", "Anthropomorphic whistle / whistle in the shape... | \n", "Nasca | \n", "100BC-600 | \n", "NaN | \n", "pottery | \n", "society/human life | \n", "Am1954,05.196 | \n", "south_america | \n", "
720 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "whistle | \n", "Whistle made of pottery, possibly modelled and... | \n", "Nasca | \n", "100BC-600 | \n", "NaN | \n", "pottery | \n", "animal | \n", "Am1954,05.669 | \n", "south_america | \n", "
721 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "whistle | \n", "Anthropomorphic whistle in the shape of the An... | \n", "Nasca | \n", "100BC-650 | \n", "NaN | \n", "pottery | \n", "society/human life; anthropomorphism; amphibia... | \n", "Am1954,05.194 | \n", "south_america | \n", "
722 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "whistle | \n", "Ovoid shaped whistle made of tumbaga by lost-w... | \n", "Zenu | \n", "150BC-1600 (?) | \n", "NaN | \n", "tumbaga | \n", "NaN | \n", "Am.6877 | \n", "south_america | \n", "
723 | \n", "https://media.britishmuseum.org/media/Reposito... | \n", "whistling vessel | \n", "Rectangular spouted jar with bridge and (broke... | \n", "Nasca | \n", "100BC-600 | \n", "NaN | \n", "pottery | \n", "bird | \n", "Am1982,Q.944 | \n", "south_america | \n", "
201119 rows × 10 columns
\n", "