Spaces:
Runtime error
Runtime error
Nicky Nicolson
commited on
Commit
•
abf8c49
1
Parent(s):
f2146ad
conv tsv to csv before using sqllite utils
Browse files- Dockerfile +2 -1
- tab2csv.py +11 -0
Dockerfile
CHANGED
@@ -15,7 +15,8 @@ ADD https://api.gbif.org/v1/occurrence/download/request/0032228-231002084531237.
|
|
15 |
RUN ls -l /data
|
16 |
RUN unzip /data/gbif-occs.zip -d /data
|
17 |
RUN ls -l /data
|
18 |
-
RUN
|
|
|
19 |
RUN ls -l /code
|
20 |
RUN sqlite-utils tables /code/gbifocc.db --counts
|
21 |
|
|
|
15 |
RUN ls -l /data
|
16 |
RUN unzip /data/gbif-occs.zip -d /data
|
17 |
RUN ls -l /data
|
18 |
+
RUN python tab2csv.py /data/0032228-231002084531237.tsv /data/0032228-231002084531237.csv
|
19 |
+
RUN csvs-to-sqlite /data/0032228-231002084531237.csv /code/gbifocc.db
|
20 |
RUN ls -l /code
|
21 |
RUN sqlite-utils tables /code/gbifocc.db --counts
|
22 |
|
tab2csv.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
if __name__ == '__main__':
|
5 |
+
parser = argparse.ArgumentParser()
|
6 |
+
parser.add_argument("inputfile")
|
7 |
+
parser.add_argument("outputfile")
|
8 |
+
args = parser.parse_args()
|
9 |
+
|
10 |
+
df = pd.read_csv(args.inputfile, encoding='utf8', keep_default_na=False, sep='\t')
|
11 |
+
df.to_csv(outputfile, index=False, sep=',')
|