File size: 940 Bytes
e6f931e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32b8a4e
e6f931e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import argparse
import pandas as pd

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("inputfile")
    parser.add_argument("-c","--createcols", action='store_true')
    parser.add_argument("-l","--limit", type=int)
    parser.add_argument("outputfile")    
    args = parser.parse_args()

    date_columns = ['discovered','published','added']
    df = pd.read_csv(args.inputfile, 
                    encoding='utf8', 
                    keep_default_na=False, 
                    on_bad_lines='skip', 
                    sep='\t',
                    nrows=args.limit,
                    parse_dates=date_columns)
    if args.createcols:
        for date_column in date_columns:
            df[date_column + '_yr'] = pd.PeriodIndex(df[date_column], freq='Y')
            df[date_column + '_q'] = pd.PeriodIndex(df[date_column], freq='Q')
    df.to_csv(args.outputfile, index=False, sep=',')