{ "cells": [ { "cell_type": "code", "execution_count": 13, "id": "52701550-0bda-4fa4-959f-2b42eb42e140", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 14, "id": "8cb2dd41-79e1-45bf-a0eb-ff9abc0baa8b", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"cleaned_data_with_categories.csv\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "c4e79890-de47-4774-9445-142cff46cf86", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idHeadlineSummarySeverityCategoryRegionDatetimeYearlatlonURL
03Shanghai port congestion impacts terminals in ...The persisting port congestion at Shanghai’s...MinorMaritime AdvisoryChina27/4/17 9:16201729.52000121.33190NaN
11724 hour pier hold in effect at Port of DurbanIndustry sources indicate on August 31 that th...MinorPort CongestionSouth Africa31/8/18 18:072018-29.8687531.03305NaN
21599Heavy congestion reported at Port of HoustonAccording to sources on March 20, heavy conges...ModeratePort CongestionUnited States20/3/19 12:43201929.60949-95.00567NaN
31600Heavy congestion reported at Port of Long BeachAccording to sources on January 7, heavy conge...SeverePort CongestionUnited States7/1/19 9:50201933.75448-118.21663NaN
41613High risk of pilot suspension reported at Port...On December 31, shipping sources indicate that...MinorSevere WindsJapan31/12/19 10:232019NaNNaNNaN
....................................
1003756Magnitude 5.1 earthquake reported 12 kilometer...The United States Geological Survey has confir...ModerateEarthquakeJapan25/9/20 17:35202033.12568139.80898NaN
1013757Magnitude 5.2 earthquake recorded 30 kilometer...The United States Geological Survey (USGS) has...ModerateEarthquakePhilippines6/1/20 8:0320205.41372125.40147NaN
1023758Magnitude 5.3 earthquake reported 17 km west o...The United States Geological Survey (USGS) has...ModerateEarthquakePhilippines9/5/20 20:09202014.06667120.63333NaN
1033759Magnitude 6.2 earthquake reported north of Sur...The US Geological Survey is reporting that a m...SevereEarthquakeIndonesia5/2/20 19:372020-6.23324112.93235NaN
1045715USGS reports magnitude 6.3 earthquake 4km sout...The United States Geological Survey (USGS) has...SevereEarthquakePhilippines25/12/20 0:25202013.83333120.63330NaN
\n", "

105 rows × 11 columns

\n", "
" ], "text/plain": [ " id Headline \\\n", "0 3 Shanghai port congestion impacts terminals in ... \n", "1 17 24 hour pier hold in effect at Port of Durban \n", "2 1599 Heavy congestion reported at Port of Houston \n", "3 1600 Heavy congestion reported at Port of Long Beach \n", "4 1613 High risk of pilot suspension reported at Port... \n", ".. ... ... \n", "100 3756 Magnitude 5.1 earthquake reported 12 kilometer... \n", "101 3757 Magnitude 5.2 earthquake recorded 30 kilometer... \n", "102 3758 Magnitude 5.3 earthquake reported 17 km west o... \n", "103 3759 Magnitude 6.2 earthquake reported north of Sur... \n", "104 5715 USGS reports magnitude 6.3 earthquake 4km sout... \n", "\n", " Summary Severity \\\n", "0 The persisting port congestion at Shanghai’s... Minor \n", "1 Industry sources indicate on August 31 that th... Minor \n", "2 According to sources on March 20, heavy conges... Moderate \n", "3 According to sources on January 7, heavy conge... Severe \n", "4 On December 31, shipping sources indicate that... Minor \n", ".. ... ... \n", "100 The United States Geological Survey has confir... Moderate \n", "101 The United States Geological Survey (USGS) has... Moderate \n", "102 The United States Geological Survey (USGS) has... Moderate \n", "103 The US Geological Survey is reporting that a m... Severe \n", "104 The United States Geological Survey (USGS) has... Severe \n", "\n", " Category Region Datetime Year lat \\\n", "0 Maritime Advisory China 27/4/17 9:16 2017 29.52000 \n", "1 Port Congestion South Africa 31/8/18 18:07 2018 -29.86875 \n", "2 Port Congestion United States 20/3/19 12:43 2019 29.60949 \n", "3 Port Congestion United States 7/1/19 9:50 2019 33.75448 \n", "4 Severe Winds Japan 31/12/19 10:23 2019 NaN \n", ".. ... ... ... ... ... \n", "100 Earthquake Japan 25/9/20 17:35 2020 33.12568 \n", "101 Earthquake Philippines 6/1/20 8:03 2020 5.41372 \n", "102 Earthquake Philippines 9/5/20 20:09 2020 14.06667 \n", "103 Earthquake Indonesia 5/2/20 19:37 2020 -6.23324 \n", "104 Earthquake Philippines 25/12/20 0:25 2020 13.83333 \n", "\n", " lon URL \n", "0 121.33190 NaN \n", "1 31.03305 NaN \n", "2 -95.00567 NaN \n", "3 -118.21663 NaN \n", "4 NaN NaN \n", ".. ... ... \n", "100 139.80898 NaN \n", "101 125.40147 NaN \n", "102 120.63333 NaN \n", "103 112.93235 NaN \n", "104 120.63330 NaN \n", "\n", "[105 rows x 11 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 16, "id": "df3cdbd0-a8b4-4b6e-b5bf-5fdbb7ce93a3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idHeadlineSummarySeverityRegionDatetimeYearlatlonURL
Category
Maritime Advisory2020202020202013130
Earthquake1818181718181814140
Port Congestion1313131313131311110
Severe Winds7777777330
Port Disruption6666666440
Port Closure5555555220
Miscellaneous Strikes3333333330
Maritime Accident2222222220
Maritime Accident/Ground Transportation Advisory2222222110
Tropical Cyclone / Storm, Severe Winds, Flooding, Evacuation, Air Travel Disruption, Port Closure, Power Outage, Public Safety / Security, Weather Advisory, Travel Warning1111111110
Tropical Cyclone / Storm1111111000
Public Utilities Disruption, Water Restriction Program1111111000
Protest/Riot, Port Disruption, Cargo Disruption, Climate Change1111111110
Protest / Riot, Public Safety / Security, Public Transportation Disruption, Police Operations, Roadway Closure / Disruption, Hazmat Response, Bombing1111111000
Protest / Riot1111111000
Cargo Disruption1111111110
Political Crackdown / Opposition Movement1111111000
Chemical Spill, Maritime Accident, Port Disruption, Hazmat Response, Public Safety / Security1111111110
Maritime Accident/Port Disruption1111111000
Maritime Accident, Maritime Advisory, Police Operations, Maritime Accident/Ground Transportation Advisory1111111000
Maritime Accident / Hazmat Response1111111110
Industrial Action, Maritime Advisory, Miscellaneous Strikes, Public Safety / Security1111111000
Hazmat Response1111111110
Ground Transportation Advisory / Brexit Contingency / Government Plan / Traffic Management / Border Control / Cross-Channel Traffic1111111110
Ground Transportation Advisory1111111110
Death / Injury, Individuals in Focus, Public Safety / Security, Public Transportation Disruption, Roadway Closure / Disruption1111111000
Weather Advisory1111111110
\n", "
" ], "text/plain": [ " id Headline Summary \\\n", "Category \n", "Maritime Advisory 20 20 20 \n", "Earthquake 18 18 18 \n", "Port Congestion 13 13 13 \n", "Severe Winds 7 7 7 \n", "Port Disruption 6 6 6 \n", "Port Closure 5 5 5 \n", "Miscellaneous Strikes 3 3 3 \n", "Maritime Accident 2 2 2 \n", "Maritime Accident/Ground Transportation Advisory 2 2 2 \n", "Tropical Cyclone / Storm, Severe Winds, Floodin... 1 1 1 \n", "Tropical Cyclone / Storm 1 1 1 \n", "Public Utilities Disruption, Water Restriction ... 1 1 1 \n", "Protest/Riot, Port Disruption, Cargo Disruption... 1 1 1 \n", "Protest / Riot, Public Safety / Security, Publi... 1 1 1 \n", "Protest / Riot 1 1 1 \n", "Cargo Disruption 1 1 1 \n", "Political Crackdown / Opposition Movement 1 1 1 \n", "Chemical Spill, Maritime Accident, Port Disrupt... 1 1 1 \n", "Maritime Accident/Port Disruption 1 1 1 \n", "Maritime Accident, Maritime Advisory, Police Op... 1 1 1 \n", "Maritime Accident / Hazmat Response 1 1 1 \n", "Industrial Action, Maritime Advisory, Miscellan... 1 1 1 \n", "Hazmat Response 1 1 1 \n", "Ground Transportation Advisory / Brexit Conting... 1 1 1 \n", "Ground Transportation Advisory 1 1 1 \n", "Death / Injury, Individuals in Focus, Public Sa... 1 1 1 \n", "Weather Advisory 1 1 1 \n", "\n", " Severity Region \\\n", "Category \n", "Maritime Advisory 20 20 \n", "Earthquake 17 18 \n", "Port Congestion 13 13 \n", "Severe Winds 7 7 \n", "Port Disruption 6 6 \n", "Port Closure 5 5 \n", "Miscellaneous Strikes 3 3 \n", "Maritime Accident 2 2 \n", "Maritime Accident/Ground Transportation Advisory 2 2 \n", "Tropical Cyclone / Storm, Severe Winds, Floodin... 1 1 \n", "Tropical Cyclone / Storm 1 1 \n", "Public Utilities Disruption, Water Restriction ... 1 1 \n", "Protest/Riot, Port Disruption, Cargo Disruption... 1 1 \n", "Protest / Riot, Public Safety / Security, Publi... 1 1 \n", "Protest / Riot 1 1 \n", "Cargo Disruption 1 1 \n", "Political Crackdown / Opposition Movement 1 1 \n", "Chemical Spill, Maritime Accident, Port Disrupt... 1 1 \n", "Maritime Accident/Port Disruption 1 1 \n", "Maritime Accident, Maritime Advisory, Police Op... 1 1 \n", "Maritime Accident / Hazmat Response 1 1 \n", "Industrial Action, Maritime Advisory, Miscellan... 1 1 \n", "Hazmat Response 1 1 \n", "Ground Transportation Advisory / Brexit Conting... 1 1 \n", "Ground Transportation Advisory 1 1 \n", "Death / Injury, Individuals in Focus, Public Sa... 1 1 \n", "Weather Advisory 1 1 \n", "\n", " Datetime Year lat lon \\\n", "Category \n", "Maritime Advisory 20 20 13 13 \n", "Earthquake 18 18 14 14 \n", "Port Congestion 13 13 11 11 \n", "Severe Winds 7 7 3 3 \n", "Port Disruption 6 6 4 4 \n", "Port Closure 5 5 2 2 \n", "Miscellaneous Strikes 3 3 3 3 \n", "Maritime Accident 2 2 2 2 \n", "Maritime Accident/Ground Transportation Advisory 2 2 1 1 \n", "Tropical Cyclone / Storm, Severe Winds, Floodin... 1 1 1 1 \n", "Tropical Cyclone / Storm 1 1 0 0 \n", "Public Utilities Disruption, Water Restriction ... 1 1 0 0 \n", "Protest/Riot, Port Disruption, Cargo Disruption... 1 1 1 1 \n", "Protest / Riot, Public Safety / Security, Publi... 1 1 0 0 \n", "Protest / Riot 1 1 0 0 \n", "Cargo Disruption 1 1 1 1 \n", "Political Crackdown / Opposition Movement 1 1 0 0 \n", "Chemical Spill, Maritime Accident, Port Disrupt... 1 1 1 1 \n", "Maritime Accident/Port Disruption 1 1 0 0 \n", "Maritime Accident, Maritime Advisory, Police Op... 1 1 0 0 \n", "Maritime Accident / Hazmat Response 1 1 1 1 \n", "Industrial Action, Maritime Advisory, Miscellan... 1 1 0 0 \n", "Hazmat Response 1 1 1 1 \n", "Ground Transportation Advisory / Brexit Conting... 1 1 1 1 \n", "Ground Transportation Advisory 1 1 1 1 \n", "Death / Injury, Individuals in Focus, Public Sa... 1 1 0 0 \n", "Weather Advisory 1 1 1 1 \n", "\n", " URL \n", "Category \n", "Maritime Advisory 0 \n", "Earthquake 0 \n", "Port Congestion 0 \n", "Severe Winds 0 \n", "Port Disruption 0 \n", "Port Closure 0 \n", "Miscellaneous Strikes 0 \n", "Maritime Accident 0 \n", "Maritime Accident/Ground Transportation Advisory 0 \n", "Tropical Cyclone / Storm, Severe Winds, Floodin... 0 \n", "Tropical Cyclone / Storm 0 \n", "Public Utilities Disruption, Water Restriction ... 0 \n", "Protest/Riot, Port Disruption, Cargo Disruption... 0 \n", "Protest / Riot, Public Safety / Security, Publi... 0 \n", "Protest / Riot 0 \n", "Cargo Disruption 0 \n", "Political Crackdown / Opposition Movement 0 \n", "Chemical Spill, Maritime Accident, Port Disrupt... 0 \n", "Maritime Accident/Port Disruption 0 \n", "Maritime Accident, Maritime Advisory, Police Op... 0 \n", "Maritime Accident / Hazmat Response 0 \n", "Industrial Action, Maritime Advisory, Miscellan... 0 \n", "Hazmat Response 0 \n", "Ground Transportation Advisory / Brexit Conting... 0 \n", "Ground Transportation Advisory 0 \n", "Death / Injury, Individuals in Focus, Public Sa... 0 \n", "Weather Advisory 0 " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_sorted = df.groupby(\"Category\").count().sort_values(by=\"id\", ascending=False)\n", "df_sorted" ] }, { "cell_type": "code", "execution_count": 17, "id": "d3ddee2c-946c-4ee5-b0ba-31ac6d201261", "metadata": {}, "outputs": [], "source": [ "# Function to determine the value for the new column\n", "def categorize(value):\n", " if \"/\" in str(value) or \",\" in str(value):\n", " return \"Miscellaneous Events\"\n", " else:\n", " return value" ] }, { "cell_type": "code", "execution_count": 18, "id": "b470266e-00c1-4d4c-8738-28b4fde2dcc6", "metadata": {}, "outputs": [], "source": [ "df[\"GPT Generated Result\"] = df[\"Category\"].apply(categorize)" ] }, { "cell_type": "code", "execution_count": 19, "id": "4d5d7300-a663-4394-979e-cf7c5d4f88d5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idHeadlineSummarySeverityCategoryRegionDatetimeYearlatlonURL
GPT Generated Result
Maritime Advisory202020202020202013130
Earthquake181818171818181814140
Miscellaneous Events1616161616161616660
Port Congestion131313131313131311110
Severe Winds77777777330
Port Disruption66666666440
Port Closure55555555220
Miscellaneous Strikes33333333330
Maritime Accident22222222220
Cargo Disruption11111111110
Ground Transportation Advisory11111111110
Hazmat Response11111111110
Weather Advisory11111111110
\n", "
" ], "text/plain": [ " id Headline Summary Severity Category \\\n", "GPT Generated Result \n", "Maritime Advisory 20 20 20 20 20 \n", "Earthquake 18 18 18 17 18 \n", "Miscellaneous Events 16 16 16 16 16 \n", "Port Congestion 13 13 13 13 13 \n", "Severe Winds 7 7 7 7 7 \n", "Port Disruption 6 6 6 6 6 \n", "Port Closure 5 5 5 5 5 \n", "Miscellaneous Strikes 3 3 3 3 3 \n", "Maritime Accident 2 2 2 2 2 \n", "Cargo Disruption 1 1 1 1 1 \n", "Ground Transportation Advisory 1 1 1 1 1 \n", "Hazmat Response 1 1 1 1 1 \n", "Weather Advisory 1 1 1 1 1 \n", "\n", " Region Datetime Year lat lon URL \n", "GPT Generated Result \n", "Maritime Advisory 20 20 20 13 13 0 \n", "Earthquake 18 18 18 14 14 0 \n", "Miscellaneous Events 16 16 16 6 6 0 \n", "Port Congestion 13 13 13 11 11 0 \n", "Severe Winds 7 7 7 3 3 0 \n", "Port Disruption 6 6 6 4 4 0 \n", "Port Closure 5 5 5 2 2 0 \n", "Miscellaneous Strikes 3 3 3 3 3 0 \n", "Maritime Accident 2 2 2 2 2 0 \n", "Cargo Disruption 1 1 1 1 1 0 \n", "Ground Transportation Advisory 1 1 1 1 1 0 \n", "Hazmat Response 1 1 1 1 1 0 \n", "Weather Advisory 1 1 1 1 1 0 " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_sorted1 = (\n", " df.groupby(\"GPT Generated Result\").count().sort_values(by=\"id\", ascending=False)\n", ")\n", "df_sorted1" ] }, { "cell_type": "code", "execution_count": 20, "id": "6dbe0ac2-4ac6-4d75-9fa7-c856b9370269", "metadata": {}, "outputs": [], "source": [ "df.to_csv(\"result.csv\")" ] }, { "cell_type": "code", "execution_count": null, "id": "5dfe7870-c29c-4942-8301-f5e8b1bd9994", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 21, "id": "dc6007d3-c6bd-4bc3-8ccc-099440354ce8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0idHeadlineSummarySeverityCategory_GoldenResultCategory_GPTRegionDatetimeYearlatlonURLGPT Generated ResultResult_GoldenResult_GPT
0861014 miles NE of Jakarta - A magnitude 4.5 earth...A magnitude 4.5 earthquake was detected 14 mil...ModerateEarthquakeEarthquakeIndonesia24/9/18 18:332018-6.10000107.02000NaNEarthquakeTrueTrue
1871117 miles SW of Batangas - A magnitude 5.5 eart...Multiple sources report that a magnitude 5.5 e...NaNEarthquakeEarthquakePhilippines31/8/18 15:04201813.55600120.90300NaNEarthquakeTrueTrue
2881623 miles ESE of Taichung City- A magnitude 4.5...A magnitude 4.5 earthquake was detected in cen...ModerateEarthquakeEarthquakeTaiwan17/8/18 10:40201824.02000121.03000NaNEarthquakeTrueTrue
3892033 miles SE of Valencia A magnitude 4.0 earthq...The European-Mediterranean Seismological Centr...ModerateEarthquakeEarthquakeSpain17/10/18 6:06201839.230000.16000NaNEarthquakeTrueTrue
4901981Philippines: Magnitude 6.2 earthquake recorded...The United States Geological Survey (USGS) has...SevereEarthquakeEarthquakePhilippines29/9/19 3:3520196.35667126.18441NaNEarthquakeTrueTrue
...................................................
94532824UPDATE: High winds still causing port disrupti...Industry sources on April 23 indicate that hig...ModeratePort CongestionWeather AdvisorySaudi Arabia18/4/19 13:31201921.4625039.16000NaNWeather AdvisoryTrueFalse
95492815UPDATE: Fog causing congestion in Houston, aff...Updated sources report fog has delayed vessel ...SeverePort CongestionNaNUnited States11/3/19 7:48201929.68234-94.99355NaNNaNTrueFalse
96542827UPDATE: Inbound pilotage suspended due to stro...Sources indicate that strong winds have led to...ModeratePort CongestionNaNChina22/1/19 10:08201931.32010121.65733NaNNaNTrueFalse
97602869UPDATE: Port of Busan reopens after temporary ...Updated sources indicate on October 4 that the...ModeratePort CongestionNaNRepublic of Korea30/9/19 11:412019NaNNaNNaNNaNFalseFalse
98612870UPDATE: Port of Charleston is open for busines...Updated media sources indicate on September 6 ...SeverePort CongestionNaNUnited States3/9/19 5:28201932.90473-79.96455NaNNaNTrueFalse
\n", "

99 rows × 16 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 id Headline \\\n", "0 86 10 14 miles NE of Jakarta - A magnitude 4.5 earth... \n", "1 87 11 17 miles SW of Batangas - A magnitude 5.5 eart... \n", "2 88 16 23 miles ESE of Taichung City- A magnitude 4.5... \n", "3 89 20 33 miles SE of Valencia A magnitude 4.0 earthq... \n", "4 90 1981 Philippines: Magnitude 6.2 earthquake recorded... \n", ".. ... ... ... \n", "94 53 2824 UPDATE: High winds still causing port disrupti... \n", "95 49 2815 UPDATE: Fog causing congestion in Houston, aff... \n", "96 54 2827 UPDATE: Inbound pilotage suspended due to stro... \n", "97 60 2869 UPDATE: Port of Busan reopens after temporary ... \n", "98 61 2870 UPDATE: Port of Charleston is open for busines... \n", "\n", " Summary Severity \\\n", "0 A magnitude 4.5 earthquake was detected 14 mil... Moderate \n", "1 Multiple sources report that a magnitude 5.5 e... NaN \n", "2 A magnitude 4.5 earthquake was detected in cen... Moderate \n", "3 The European-Mediterranean Seismological Centr... Moderate \n", "4 The United States Geological Survey (USGS) has... Severe \n", ".. ... ... \n", "94 Industry sources on April 23 indicate that hig... Moderate \n", "95 Updated sources report fog has delayed vessel ... Severe \n", "96 Sources indicate that strong winds have led to... Moderate \n", "97 Updated sources indicate on October 4 that the... Moderate \n", "98 Updated media sources indicate on September 6 ... Severe \n", "\n", " Category_GoldenResult Category_GPT Region Datetime \\\n", "0 Earthquake Earthquake Indonesia 24/9/18 18:33 \n", "1 Earthquake Earthquake Philippines 31/8/18 15:04 \n", "2 Earthquake Earthquake Taiwan 17/8/18 10:40 \n", "3 Earthquake Earthquake Spain 17/10/18 6:06 \n", "4 Earthquake Earthquake Philippines 29/9/19 3:35 \n", ".. ... ... ... ... \n", "94 Port Congestion Weather Advisory Saudi Arabia 18/4/19 13:31 \n", "95 Port Congestion NaN United States 11/3/19 7:48 \n", "96 Port Congestion NaN China 22/1/19 10:08 \n", "97 Port Congestion NaN Republic of Korea 30/9/19 11:41 \n", "98 Port Congestion NaN United States 3/9/19 5:28 \n", "\n", " Year lat lon URL GPT Generated Result Result_Golden \\\n", "0 2018 -6.10000 107.02000 NaN Earthquake True \n", "1 2018 13.55600 120.90300 NaN Earthquake True \n", "2 2018 24.02000 121.03000 NaN Earthquake True \n", "3 2018 39.23000 0.16000 NaN Earthquake True \n", "4 2019 6.35667 126.18441 NaN Earthquake True \n", ".. ... ... ... ... ... ... \n", "94 2019 21.46250 39.16000 NaN Weather Advisory True \n", "95 2019 29.68234 -94.99355 NaN NaN True \n", "96 2019 31.32010 121.65733 NaN NaN True \n", "97 2019 NaN NaN NaN NaN False \n", "98 2019 32.90473 -79.96455 NaN NaN True \n", "\n", " Result_GPT \n", "0 True \n", "1 True \n", "2 True \n", "3 True \n", "4 True \n", ".. ... \n", "94 False \n", "95 False \n", "96 False \n", "97 False \n", "98 False \n", "\n", "[99 rows x 16 columns]" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "eva = pd.read_csv(\"evaluation_result.csv\")\n", "eva" ] }, { "cell_type": "code", "execution_count": 22, "id": "a98d47b7-7ab1-4947-a10a-a36fa303dcc1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "GPT Generated Result\n", "Earthquake 18\n", "Miscellaneous Events 16\n", "Port Congestion 12\n", "Maritime Advisory 6\n", "Port Closure 5\n", "Port Disruption 4\n", "Miscellaneous Strikes 3\n", "Maritime Accident 2\n", "Cargo Disruption 1\n", "Ground Transportation Advisory 1\n", "Hazmat Response 1\n", "Severe Winds 1\n", "Weather Advisory 0\n", "Name: Result_GPT_True_Count, dtype: int64" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "eva[\"Result_GPT_True_Count\"] = eva[\"Result_GPT\"].astype(\n", " int\n", ") # Convert boolean values to integers\n", "result = eva.groupby(\"GPT Generated Result\")[\"Result_GPT_True_Count\"].sum()\n", "\n", "result_gpt = result.sort_values(ascending=False)\n", "result_gpt" ] }, { "cell_type": "code", "execution_count": 23, "id": "ec0defea-c4ad-4f97-9704-23ef83f73ff7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0idHeadlineSummarySeverityCategory_GPTRegionDatetimeYearlatlonURLGPT Generated ResultResult_GoldenResult_GPTResult_GPT_True_Count
Category_GoldenResult
Maritime Advisory2020202020192020201515019202020
Miscellaneous Events2020202020202020201212020202020
Port Closure2020202020162020201515016202020
Port Congestion2020202020162020201111016202020
Earthquake1919191918181919191515018191919
\n", "
" ], "text/plain": [ " Unnamed: 0 id Headline Summary Severity \\\n", "Category_GoldenResult \n", "Maritime Advisory 20 20 20 20 20 \n", "Miscellaneous Events 20 20 20 20 20 \n", "Port Closure 20 20 20 20 20 \n", "Port Congestion 20 20 20 20 20 \n", "Earthquake 19 19 19 19 18 \n", "\n", " Category_GPT Region Datetime Year lat lon URL \\\n", "Category_GoldenResult \n", "Maritime Advisory 19 20 20 20 15 15 0 \n", "Miscellaneous Events 20 20 20 20 12 12 0 \n", "Port Closure 16 20 20 20 15 15 0 \n", "Port Congestion 16 20 20 20 11 11 0 \n", "Earthquake 18 19 19 19 15 15 0 \n", "\n", " GPT Generated Result Result_Golden Result_GPT \\\n", "Category_GoldenResult \n", "Maritime Advisory 19 20 20 \n", "Miscellaneous Events 20 20 20 \n", "Port Closure 16 20 20 \n", "Port Congestion 16 20 20 \n", "Earthquake 18 19 19 \n", "\n", " Result_GPT_True_Count \n", "Category_GoldenResult \n", "Maritime Advisory 20 \n", "Miscellaneous Events 20 \n", "Port Closure 20 \n", "Port Congestion 20 \n", "Earthquake 19 " ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test = (\n", " eva.groupby(\"Category_GoldenResult\").count().sort_values(by=\"id\", ascending=False)\n", ")\n", "test" ] }, { "cell_type": "code", "execution_count": 24, "id": "6bc57f6b-c554-4634-8a98-45d82546d6f8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Category_GoldenResult\n", "Miscellaneous Events 20\n", "Earthquake 19\n", "Port Congestion 15\n", "Port Closure 12\n", "Maritime Advisory 10\n", "Name: Result_Golden_True_Count, dtype: int64" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "eva[\"Result_Golden_True_Count\"] = eva[\"Result_Golden\"].astype(\n", " int\n", ") # Convert boolean values to integers\n", "result = eva.groupby(\"Category_GoldenResult\")[\"Result_Golden_True_Count\"].sum()\n", "\n", "# If you want to sort the result by the count in descending order:\n", "result_golden = result.sort_values(ascending=False)\n", "\n", "result_golden" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.16" } }, "nbformat": 4, "nbformat_minor": 5 }