Spaces:
Runtime error
Runtime error
File size: 3,938 Bytes
5d396e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import pandas as pd
def extract_data(year):
"""
A function for loading data corresponding to an individual
year from a CSV file. Data is then preprocessed and the
following dataframes are returned:
- 'bicycles': samples are bicycle vehicles which were
involved in crashes.
- 'persons': samples are all individuals involved in
crashes involving bicycles.
- 'crashes': samples are crash events involving bicycles.
- 'roadway': additional features for
crash events, related to roadway attributes and conditions.
"""
# Retrieve vehicle samples corresponding to bicycles.
# Note that in some samples VEH_TYPE is string, others float
vehicles = pd.read_csv(f'data/raw_csv/VEHICLE_{year}_Statewide.csv',encoding='latin')
bicycle_filter = vehicles.VEH_TYPE.isin([20,21,'20','21'])
cols = ['CRN', 'GRADE', 'IMPACT_POINT',
'RDWY_ALIGNMENT','UNIT_NUM',
'VEH_MOVEMENT', 'VEH_POSITION','VEH_ROLE', 'VEH_TYPE']
bicycles = vehicles[bicycle_filter][cols]
del vehicles
# Merge onto bicycles dataframe some additional features from cycle
cycles = pd.read_csv(f'data/raw_csv/CYCLE_{year}_Statewide.csv',encoding='latin')
cols = ['CRN','UNIT_NUM','PC_HDLGHT_IND', 'PC_HLMT_IND','PC_REAR_RFLTR_IND']
bicycles = bicycles.merge(cycles[cols],how='left',on=['CRN','UNIT_NUM'])
del cycles
# Retrieve information about persons involved in crashes involving bikes
# (not just the persons riding the bikes)
persons = pd.read_csv(f'data/raw_csv/PERSON_{year}_Statewide.csv',encoding='latin')
cols = ['AGE','CRN','INJ_SEVERITY','PERSON_TYPE',
'RESTRAINT_HELMET','SEX', 'TRANSPORTED', 'UNIT_NUM']
persons = persons[persons.CRN.isin(bicycles.CRN)][cols]
# Retrieve crash samples involving bikes
crashes = pd.read_csv(f'data/raw_csv/CRASH_{year}_Statewide.csv',encoding='latin')
cols = ['CRN','ARRIVAL_TM','DISPATCH_TM','COUNTY','MUNICIPALITY','DEC_LAT','DEC_LONG',
'BICYCLE_DEATH_COUNT','BICYCLE_SUSP_SERIOUS_INJ_COUNT',
'BUS_COUNT','COMM_VEH_COUNT','HEAVY_TRUCK_COUNT','SMALL_TRUCK_COUNT','SUV_COUNT','VAN_COUNT',
'CRASH_MONTH', 'CRASH_YEAR','DAY_OF_WEEK','HOUR_OF_DAY',
'COLLISION_TYPE','ILLUMINATION','INTERSECT_TYPE',
'LOCATION_TYPE','RELATION_TO_ROAD','TIME_OF_DAY',
'ROAD_CONDITION','TCD_TYPE','TCD_FUNC_CD','URBAN_RURAL',
'WEATHER1','WEATHER2']
crashes = crashes[crashes.CRN.isin(bicycles.CRN)][cols]
# Retrieve roadway data involving bikes
roadway = pd.read_csv(f'data/raw_csv/ROADWAY_{year}_Statewide.csv',encoding='latin')
cols = ['CRN','SPEED_LIMIT','RDWY_COUNTY']
roadway = roadway[roadway.CRN.isin(bicycles.CRN)][cols]
# Merge onto out bicycle_crashes and ped_crashes dataframe
# some additional flag features.
# Include flag features corresponding to driver impairment,
# driver inattention, other driver attributes,relevant road conditions, etc.
flags = pd.read_csv(f'data/raw_csv/FLAG_{year}_Statewide.csv',encoding='latin')
cols = ['AGGRESSIVE_DRIVING','ALCOHOL_RELATED','ANGLE_CRASH','CELL_PHONE','COMM_VEHICLE',
'CRN','CROSS_MEDIAN','CURVED_ROAD','CURVE_DVR_ERROR','DISTRACTED','DRINKING_DRIVER',
'DRUGGED_DRIVER','DRUG_RELATED','FATIGUE_ASLEEP','HO_OPPDIR_SDSWP','ICY_ROAD',
'ILLUMINATION_DARK','IMPAIRED_DRIVER','INTERSECTION','LANE_DEPARTURE',
'NHTSA_AGG_DRIVING','NO_CLEARANCE',
'NON_INTERSECTION','REAR_END','RUNNING_RED_LT','RUNNING_STOP_SIGN',
'RURAL','SNOW_SLUSH_ROAD','SPEEDING','SPEEDING_RELATED',
'SUDDEN_DEER','TAILGATING','URBAN','WET_ROAD','WORK_ZONE',
'MATURE_DRIVER','YOUNG_DRIVER']
crashes = crashes.merge(flags[cols],how='left',on='CRN')
del flags
return bicycles, persons, crashes, roadway |