Nicky Nicolson commited on
Commit
16e5414
1 Parent(s): c3dab06

Get download metadata from GBIF, write doi/licence to datasette metadata

Browse files
Files changed (4) hide show
  1. Dockerfile +1 -1
  2. getdDownloadMetadata.py +23 -0
  3. metadata.json +3 -1
  4. requirements.txt +2 -1
Dockerfile CHANGED
@@ -25,6 +25,6 @@ RUN sqlite-utils enable-fts /code/gbifocc.db gbifocc collectorNameAndNumber
25
 
26
  RUN chmod 755 /code/gbifocc.db
27
 
28
- COPY ./metadata.json /code/metadata.json
29
 
30
  CMD ["datasette", "/code/gbifocc.db", "-m", "/code/metadata.json", "--host", "0.0.0.0", "--port", "7860"]
 
25
 
26
  RUN chmod 755 /code/gbifocc.db
27
 
28
+ RUN python getDownloadMetadata.py ./metadata.json /code/metadata.json --download_id=$GBIF_DOWNLOAD_ID
29
 
30
  CMD ["datasette", "/code/gbifocc.db", "-m", "/code/metadata.json", "--host", "0.0.0.0", "--port", "7860"]
getdDownloadMetadata.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from pygbif import occurrences as occ
3
+ import json
4
+
5
+ if __name__ == '__main__':
6
+ parser = argparse.ArgumentParser()
7
+ parser.add_argument("inputfile")
8
+ parser.add_argument("--download_id", type=str)
9
+ parser.add_argument("outputfile")
10
+
11
+ args = parser.parse_args()
12
+
13
+ datasette_metadata = None
14
+ with open(args.inputfile, 'r') as f_in:
15
+ datasette_metadata = json.load(f_in)
16
+
17
+ gbif_metadata = occ.download_meta(key = args.download_id)
18
+ datasette_metadata['licence'] = gbif_metadata['license']
19
+ datasette_metadata['source_url'] = 'https://doi.org{}'.format(gbif_metadata['doi'])
20
+
21
+ datasette_metadata_json = json.dumps(datasette_metadata)
22
+ with open(args.outputfile, 'w') as f_out:
23
+ f_out.write(datasette_metadata_json)
metadata.json CHANGED
@@ -1,5 +1,7 @@
1
  {
2
- "title": "gbifocc-reconcile",
 
 
3
  "databases": {
4
  "gbifocc": {
5
  "tables": {
 
1
  {
2
+ "title": "GBIF-mediated specimen occurrences",
3
+ "description": "This is a datasette instance containing GBIF-mediated specimen occurrences. It can be used to browse specimen records (with options to filter and facet records) and to run SQL queries. It is also configured to run an Open Refine compatible reconciliation service on collector name and number, allowing a user to easily link specimen references (as found in taxonomic literature) to these specimen records.",
4
+ "source": "Global Biodiversity Information Facility (GBIF)",
5
  "databases": {
6
  "gbifocc": {
7
  "tables": {
requirements.txt CHANGED
@@ -6,4 +6,5 @@ csvs-to-sqlite
6
  pandas==1.5.3
7
  bananompy
8
  datasette-jellyfish
9
- tqdm
 
 
6
  pandas==1.5.3
7
  bananompy
8
  datasette-jellyfish
9
+ tqdm
10
+ pygbif