Nicky Nicolson commited on
Commit
abf8c49
1 Parent(s): f2146ad

conv tsv to csv before using sqllite utils

Browse files
Files changed (2) hide show
  1. Dockerfile +2 -1
  2. tab2csv.py +11 -0
Dockerfile CHANGED
@@ -15,7 +15,8 @@ ADD https://api.gbif.org/v1/occurrence/download/request/0032228-231002084531237.
15
  RUN ls -l /data
16
  RUN unzip /data/gbif-occs.zip -d /data
17
  RUN ls -l /data
18
- RUN csvs-to-sqlite /data/0032228-231002084531237.csv /code/gbifocc.db -s \$'\t'
 
19
  RUN ls -l /code
20
  RUN sqlite-utils tables /code/gbifocc.db --counts
21
 
 
15
  RUN ls -l /data
16
  RUN unzip /data/gbif-occs.zip -d /data
17
  RUN ls -l /data
18
+ RUN python tab2csv.py /data/0032228-231002084531237.tsv /data/0032228-231002084531237.csv
19
+ RUN csvs-to-sqlite /data/0032228-231002084531237.csv /code/gbifocc.db
20
  RUN ls -l /code
21
  RUN sqlite-utils tables /code/gbifocc.db --counts
22
 
tab2csv.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import pandas as pd
3
+
4
+ if __name__ == '__main__':
5
+ parser = argparse.ArgumentParser()
6
+ parser.add_argument("inputfile")
7
+ parser.add_argument("outputfile")
8
+ args = parser.parse_args()
9
+
10
+ df = pd.read_csv(args.inputfile, encoding='utf8', keep_default_na=False, sep='\t')
11
+ df.to_csv(outputfile, index=False, sep=',')