Nicky Nicolson
Add year columns
32b8a4e
raw
history blame contribute delete
940 Bytes
import argparse
import pandas as pd
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("inputfile")
parser.add_argument("-c","--createcols", action='store_true')
parser.add_argument("-l","--limit", type=int)
parser.add_argument("outputfile")
args = parser.parse_args()
date_columns = ['discovered','published','added']
df = pd.read_csv(args.inputfile,
encoding='utf8',
keep_default_na=False,
on_bad_lines='skip',
sep='\t',
nrows=args.limit,
parse_dates=date_columns)
if args.createcols:
for date_column in date_columns:
df[date_column + '_yr'] = pd.PeriodIndex(df[date_column], freq='Y')
df[date_column + '_q'] = pd.PeriodIndex(df[date_column], freq='Q')
df.to_csv(args.outputfile, index=False, sep=',')