File size: 3,938 Bytes
5d396e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import pandas as pd

def extract_data(year):
    """
    A function for loading data corresponding to an individual
    year from a CSV file.  Data is then preprocessed and the
    following dataframes are returned:
    - 'bicycles': samples are bicycle vehicles which were
       involved in crashes.
    - 'persons': samples are all individuals involved in
      crashes involving bicycles.
    - 'crashes': samples are crash events involving bicycles.
    - 'roadway': additional features for
      crash events, related to roadway attributes and conditions.
    """
    
    # Retrieve vehicle samples corresponding to bicycles.
    # Note that in some samples VEH_TYPE is string, others float
    vehicles = pd.read_csv(f'data/raw_csv/VEHICLE_{year}_Statewide.csv',encoding='latin')
    bicycle_filter = vehicles.VEH_TYPE.isin([20,21,'20','21'])
    cols = ['CRN', 'GRADE', 'IMPACT_POINT',
            'RDWY_ALIGNMENT','UNIT_NUM',
            'VEH_MOVEMENT', 'VEH_POSITION','VEH_ROLE', 'VEH_TYPE']
    bicycles = vehicles[bicycle_filter][cols]
    del vehicles
    
    # Merge onto bicycles dataframe some additional features from cycle
    cycles = pd.read_csv(f'data/raw_csv/CYCLE_{year}_Statewide.csv',encoding='latin')
    cols = ['CRN','UNIT_NUM','PC_HDLGHT_IND', 'PC_HLMT_IND','PC_REAR_RFLTR_IND']
    bicycles = bicycles.merge(cycles[cols],how='left',on=['CRN','UNIT_NUM'])
    del cycles
    
    # Retrieve information about persons involved in crashes involving bikes
    # (not just the persons riding the bikes)
    persons = pd.read_csv(f'data/raw_csv/PERSON_{year}_Statewide.csv',encoding='latin')
    cols = ['AGE','CRN','INJ_SEVERITY','PERSON_TYPE',
            'RESTRAINT_HELMET','SEX', 'TRANSPORTED', 'UNIT_NUM']
    persons = persons[persons.CRN.isin(bicycles.CRN)][cols]
    
    # Retrieve crash samples involving bikes
    crashes = pd.read_csv(f'data/raw_csv/CRASH_{year}_Statewide.csv',encoding='latin')
    cols = ['CRN','ARRIVAL_TM','DISPATCH_TM','COUNTY','MUNICIPALITY','DEC_LAT','DEC_LONG',
            'BICYCLE_DEATH_COUNT','BICYCLE_SUSP_SERIOUS_INJ_COUNT',
            'BUS_COUNT','COMM_VEH_COUNT','HEAVY_TRUCK_COUNT','SMALL_TRUCK_COUNT','SUV_COUNT','VAN_COUNT',
            'CRASH_MONTH', 'CRASH_YEAR','DAY_OF_WEEK','HOUR_OF_DAY',
            'COLLISION_TYPE','ILLUMINATION','INTERSECT_TYPE',
            'LOCATION_TYPE','RELATION_TO_ROAD','TIME_OF_DAY',
            'ROAD_CONDITION','TCD_TYPE','TCD_FUNC_CD','URBAN_RURAL',
            'WEATHER1','WEATHER2']
    crashes = crashes[crashes.CRN.isin(bicycles.CRN)][cols]
    
    # Retrieve roadway data involving bikes
    roadway = pd.read_csv(f'data/raw_csv/ROADWAY_{year}_Statewide.csv',encoding='latin')
    cols = ['CRN','SPEED_LIMIT','RDWY_COUNTY']
    roadway = roadway[roadway.CRN.isin(bicycles.CRN)][cols]
    
    # Merge onto out bicycle_crashes and ped_crashes dataframe
    # some additional flag features.
    # Include flag features corresponding to driver impairment,
    # driver inattention, other driver attributes,relevant road conditions, etc.
    flags = pd.read_csv(f'data/raw_csv/FLAG_{year}_Statewide.csv',encoding='latin')
    cols = ['AGGRESSIVE_DRIVING','ALCOHOL_RELATED','ANGLE_CRASH','CELL_PHONE','COMM_VEHICLE',
            'CRN','CROSS_MEDIAN','CURVED_ROAD','CURVE_DVR_ERROR','DISTRACTED','DRINKING_DRIVER',
            'DRUGGED_DRIVER','DRUG_RELATED','FATIGUE_ASLEEP','HO_OPPDIR_SDSWP','ICY_ROAD',
            'ILLUMINATION_DARK','IMPAIRED_DRIVER','INTERSECTION','LANE_DEPARTURE',
            'NHTSA_AGG_DRIVING','NO_CLEARANCE',
            'NON_INTERSECTION','REAR_END','RUNNING_RED_LT','RUNNING_STOP_SIGN',
            'RURAL','SNOW_SLUSH_ROAD','SPEEDING','SPEEDING_RELATED',
            'SUDDEN_DEER','TAILGATING','URBAN','WET_ROAD','WORK_ZONE',
            'MATURE_DRIVER','YOUNG_DRIVER']
    crashes = crashes.merge(flags[cols],how='left',on='CRN')
    del flags
    
    return bicycles, persons, crashes, roadway