{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "file2obj = pd.read_csv(\"../data/processed/OM_file_to_obj.csv\")\n", "obj2info = pd.read_csv(\"../data/processed/OM_obj_to_info.csv\")\n", "\n", "\n", "# Could eventually do something with these columns, but need cleaning first\n", "obj2info.drop(\n", " columns=[\"number_of_parts\", \"production.date.start\", \"production.date.end\", \"obj_num_old\"],\n", " inplace=True,\n", ")\n", "\n", "file2obj[\"image\"] = file2obj.apply(lambda x: os.path.join(x[\"root\"], x[\"file\"]), axis=1)\n", "# file2obj.rename(columns={\"obj_num\": \"label\"}, inplace=True)\n", "\n", "join_df = file2obj[[\"obj_num\", \"file\", \"image\", \"root\"]].merge(\n", " obj2info, left_on=\"obj_num\", right_on=\"obj_num\", how=\"left\"\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 data/raw/images/fulling_mill/1985\n", "1 data/raw/images/fulling_mill/1985\n", "2 data/raw/images/fulling_mill/1985\n", "3 data/raw/images/fulling_mill/1985\n", "4 data/raw/images/fulling_mill/1985\n", " ... \n", "37300 data/raw/images/egyptian/2014\n", "37301 data/raw/images/egyptian/2014\n", "37302 data/raw/images/egyptian/2014\n", "37303 data/raw/images/egyptian/1963\n", "37304 data/raw/images/egyptian/1963\n", "Name: root, Length: 37305, dtype: object" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "file2obj[\"root\"]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | obj_num | \n", "description | \n", "object_name | \n", "other_name | \n", "material | \n", "production.period | \n", "production.place | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "eg3 | \n", "squat shouldered jar, no rim | \n", "bowls | \n", "bowl | \n", "limestone | \n", "1st Dynasty | \n", "Egypt | \n", "
1 | \n", "eg64 | \n", "axe-head | \n", "axes: woodworking tools | \n", "axe-head | \n", "granite | \n", "NaN | \n", "Egypt | \n", "
2 | \n", "eg71 | \n", "the working end of a fish tail knife with pres... | \n", "knives | \n", "knife | \n", "Flint/Chert | \n", "Naqada II | \n", "Egypt | \n", "
3 | \n", "eg75 | \n", "seated figure of priest holding unrolled papyr... | \n", "Human Figurine | \n", "imhotep figurine | \n", "bronze | \n", "Late Period | \n", "Egypt | \n", "
4 | \n", "durom.1971.78 | \n", "seated woman, inset eyes (lost), headdress had... | \n", "Human Figurine | \n", "Hathor figurine | \n", "bronze | \n", "Late Period | \n", "Egypt | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
12349 | \n", "durma.2020.3.2562 | \n", "A silver Roman coin which is a part of the Pie... | \n", "coins | \n", "NaN | \n", "metal | \n", "Roman | \n", "Rome | \n", "
12350 | \n", "durma.2020.3.2060 | \n", "A silver Roman coin which is a part of the Pie... | \n", "coins | \n", "NaN | \n", "metal | \n", "Roman | \n", "NaN | \n", "
12351 | \n", "durma.2020.3.1446 | \n", "A silver Roman coin which is a part of the Pie... | \n", "coins | \n", "NaN | \n", "metal | \n", "Roman | \n", "Rome | \n", "
12352 | \n", "durma.2020.3.2042 | \n", "A silver Roman coin which is a part of the Pie... | \n", "coins | \n", "NaN | \n", "metal | \n", "Roman | \n", "Rome | \n", "
12353 | \n", "durma.2020.3.2072 | \n", "A silver Roman coin which is a part of the Pie... | \n", "coins | \n", "NaN | \n", "metal | \n", "Roman | \n", "Rome | \n", "
11673 rows × 7 columns
\n", "\n", " | obj_num | \n", "file | \n", "image | \n", "root | \n", "description | \n", "object_name | \n", "other_name | \n", "material | \n", "production.period | \n", "production.place | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "durma.1985.15.68 | \n", "1985.15.68.jpg | \n", "data/raw/images/fulling_mill/1985/1985.15.68.jpg | \n", "data/raw/images/fulling_mill/1985 | \n", "2 fragments of a bowl with open fret work at t... | \n", "None | \n", "Rim Sherds | \n", "pottery | \n", "Post-Medieval | \n", "None | \n", "
1 | \n", "durma.1985.52.37 | \n", "1985.52.37.ff2.jpg | \n", "data/raw/images/fulling_mill/1985/1985.52.37.f... | \n", "data/raw/images/fulling_mill/1985 | \n", "Reconstructed small vessel (many pieces with s... | \n", "pottery | \n", "Pottery | \n", "pottery | \n", "Roman | \n", "None | \n", "
2 | \n", "durma.1985.81.4496 | \n", "1985.81.4496 d2.jpg | \n", "data/raw/images/fulling_mill/1985/1985.81.4496... | \n", "data/raw/images/fulling_mill/1985 | \n", "Fragment of a Samian beaker. Panell decoration... | \n", "vessels | \n", "pottery | \n", "pottery | \n", "Roman | \n", "None | \n", "
3 | \n", "durma.1985.9.1 | \n", "1985.9.1.1-d4.jpg | \n", "data/raw/images/fulling_mill/1985/1985.9.1.1-d... | \n", "data/raw/images/fulling_mill/1985 | \n", "2 Fragmentary Saxon Cinerary Urns + 1 relative... | \n", "None | \n", "Cinerary Urns | \n", "pottery | \n", "Saxon | \n", "None | \n", "
4 | \n", "durma.1985.52.37 | \n", "1985.52.37.sf2.jpg | \n", "data/raw/images/fulling_mill/1985/1985.52.37.s... | \n", "data/raw/images/fulling_mill/1985 | \n", "Reconstructed small vessel (many pieces with s... | \n", "pottery | \n", "Pottery | \n", "pottery | \n", "Roman | \n", "None | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
37300 | \n", "durom.2014.1.2 | \n", "2014.1.2 bb.jpg | \n", "data/raw/images/egyptian/2014/2014.1.2 bb.jpg | \n", "data/raw/images/egyptian/2014 | \n", "One of a collection of 162 flint tools. Brown,... | \n", "blades | \n", "None | \n", "Flint/Chert | \n", "Neolithic Period | \n", "Egypt | \n", "
37301 | \n", "durom.2014.1.71 | \n", "2014.1.71 ll.jpg | \n", "data/raw/images/egyptian/2014/2014.1.71 ll.jpg | \n", "data/raw/images/egyptian/2014 | \n", "One of a collection of 162 flint tools. Large,... | \n", "axes: woodworking tools | \n", "None | \n", "Flint/Chert | \n", "Neolithic Period | \n", "Egypt | \n", "
37302 | \n", "durom.2014.1.2 | \n", "2014.1.2 rr.jpg | \n", "data/raw/images/egyptian/2014/2014.1.2 rr.jpg | \n", "data/raw/images/egyptian/2014 | \n", "One of a collection of 162 flint tools. Brown,... | \n", "blades | \n", "None | \n", "Flint/Chert | \n", "Neolithic Period | \n", "Egypt | \n", "
37303 | \n", "durom.1963.4 | \n", "1963.4.jpg | \n", "data/raw/images/egyptian/1963/1963.4.jpg | \n", "data/raw/images/egyptian/1963 | \n", "The woman is dressed in Qing dynasty style and... | \n", "figures | \n", "牙雕母婴像 | \n", "ivory | \n", "late Qing dynasty | \n", "China | \n", "
37304 | \n", "durom.1963.4 | \n", "1963.4.2.jpg | \n", "data/raw/images/egyptian/1963/1963.4.2.jpg | \n", "data/raw/images/egyptian/1963 | \n", "The woman is dressed in Qing dynasty style and... | \n", "figures | \n", "牙雕母婴像 | \n", "ivory | \n", "late Qing dynasty | \n", "China | \n", "
37305 rows × 10 columns
\n", "\n", " | obj_num | \n", "description | \n", "object_name | \n", "other_name | \n", "material | \n", "production.period | \n", "production.place | \n", "
---|