{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "import re" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Loaded variable 'df' from URI: /home/james/CodingProjects/ArcPostDoc/HeDAP-imagesearch/Durham_University_Museums_data (1).xlsx\n", "df = pd.read_excel(\n", " r\"/home/james/CodingProjects/ArcPostDoc/HeDAP-imagesearch/Durham_University_Museums_data (1).xlsx\"\n", ")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | object_number | \n", "object_name | \n", "other_name | \n", "reproduction.reference | \n", "description | \n", "label.text | \n", "material | \n", "technique | \n", "physical_description | \n", "number_of_parts | \n", "... | \n", "Unnamed: 25 | \n", "Unnamed: 26 | \n", "Unnamed: 27 | \n", "Unnamed: 28 | \n", "Unnamed: 29 | \n", "Unnamed: 30 | \n", "Unnamed: 31 | \n", "Unnamed: 32 | \n", "Unnamed: 33 | \n", "Unnamed: 34 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "EG1 | \n", "Kohl Pot | \n", "jar | \n", "NaN | \n", "Shouldered, squat, incised kohl jar, blackened... | \n", "NaN | \n", "limestone | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1 | \n", "EG2 | \n", "cups | \n", "beaker | \n", "NaN | \n", "slightly concave beaker with flaring rim and c... | \n", "NaN | \n", "travertine | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2 | \n", "EG3 | \n", "bowls | \n", "bowl | \n", "../images/egyptian/eg/eg3-409-d1.jpg | \n", "squat shouldered jar, no rim | \n", "<SPAN lang=en-GB style='FONT-SIZE: 12pt; FONT-... | \n", "limestone | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
3 | \n", "EG4 | \n", "bottles | \n", "jar | \n", "NaN | \n", "necked, globular jar with flared rim | \n", "NaN | \n", "travertine | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
4 | \n", "EG5 | \n", "bottles | \n", "jar | \n", "NaN | \n", "necked, globular jar with narrow rim, plus sto... | \n", "NaN | \n", "travertine | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
60081 | \n", "DURMA.2020.3.2072 | \n", "coins | \n", "NaN | \n", "../images/fulling_mill/2020/DURMA.2020.3.2072-... | \n", "A silver Roman coin which is a part of the Pie... | \n", "NaN | \n", "metal | \n", "hammering | \n", "A silver denarius of Elagabalus dating to the ... | \n", "1 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
60082 | \n", "DUROM.2021.286 | \n", "postcards | \n", "NaN | \n", "NaN | \n", "Portrait orientation postcard for the 1996 Ind... | \n", "NaN | \n", "paper | \n", "printing | \n", "Digital printed onto paper/card | \n", "1 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
60083 | \n", "DUROM.2021.287 | \n", "postcards | \n", "NaN | \n", "NaN | \n", "Pair of landscape orientation postcard sized p... | \n", "NaN | \n", "paper | \n", "printing | \n", "Digital print on card | \n", "2 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
60084 | \n", "DUROM.2021.289 | \n", "posters | \n", "NaN | \n", "NaN | \n", "Portrait orientation poster for the 1996 India... | \n", "NaN | \n", "paper | \n", "printing | \n", "digital print on gloss paper | \n", "1 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
60085 | \n", "DUROM.2021.288 | \n", "posters | \n", "NaN | \n", "NaN | \n", "Portrait orientation poster for the 1996 India... | \n", "NaN | \n", "paper | \n", "printing | \n", "Digital print on paper | \n", "1 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
60086 rows × 35 columns
\n", "