{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "e550a89c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "workding dir: /Users/inflaton/code/engd/papers/maritime/global-incidents\n", "loading env vars from: /Users/inflaton/code/engd/papers/maritime/global-incidents/.env\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "import os\n", "import sys\n", "from pathlib import Path\n", "\n", "workding_dir = str(Path.cwd().parent)\n", "os.chdir(workding_dir)\n", "sys.path.append(workding_dir)\n", "print(\"workding dir:\", workding_dir)\n", "\n", "from dotenv import find_dotenv, load_dotenv\n", "\n", "found_dotenv = find_dotenv(\".env\")\n", "\n", "if len(found_dotenv) == 0:\n", " found_dotenv = find_dotenv(\".env.example\")\n", "print(f\"loading env vars from: {found_dotenv}\")\n", "load_dotenv(found_dotenv, override=True)" ] }, { "cell_type": "markdown", "id": "1fecbc87", "metadata": {}, "source": [ "## Import Statement" ] }, { "cell_type": "code", "execution_count": 2, "id": "5169e3ee", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "markdown", "id": "76905f72", "metadata": {}, "source": [ "### read the data" ] }, { "cell_type": "code", "execution_count": 3, "id": "b1043895", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"data/all_port_labelled.csv\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "2e40d90a", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", " | Unnamed: 0 | \n", "Index | \n", "Unnamed: 0.1 | \n", "Headline | \n", "Details | \n", "Severity | \n", "Category | \n", "Region | \n", "Datetime | \n", "Year | \n", "... | \n", "IT | \n", "EP | \n", "NEW | \n", "CSD | \n", "RPE | \n", "MN | \n", "NM | \n", "if_labeled | \n", "Month | \n", "Week | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.0 | \n", "8.0 | \n", "34.0 | \n", "Grasberg Mine- Grasberg mine workers extend st... | \n", "Media sources indicate that workers at the Gra... | \n", "Moderate | \n", "Mine Workers Strike | \n", "Indonesia | \n", "28/5/17 17:08 | \n", "2017.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "False | \n", "5.0 | \n", "21.0 | \n", "
1 | \n", "1.0 | \n", "10.0 | \n", "63.0 | \n", "Indonesia: Undersea internet cables damaged by... | \n", "News sources are stating that recent typhoons ... | \n", "Minor | \n", "Travel Warning | \n", "Indonesia | \n", "4/9/17 14:30 | \n", "2017.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "False | \n", "4.0 | \n", "14.0 | \n", "
2 rows × 46 columns
\n", "\n", " | Details | \n", "maritime_label | \n", "Details_cleaned | \n", "
---|---|---|---|
0 | \n", "Media sources indicate that workers at the Gra... | \n", "FALSE | \n", "medium source indicate worker grasberg mine ex... | \n", "
1 | \n", "News sources are stating that recent typhoons ... | \n", "FALSE | \n", "news source stating recent typhoon impact hong... | \n", "
2 | \n", "The persisting port congestion at Shanghai’s Y... | \n", "TRUE | \n", "persisting port congestion shanghai ’ yangshan... | \n", "
3 | \n", "Updated local media sources from Jakarta indic... | \n", "TRUE | \n", "updated local medium source jakarta indicate e... | \n", "
4 | \n", "According to local police in Jakarta, two expl... | \n", "TRUE | \n", "according local police jakarta two explosion c... | \n", "
MultinomialNB()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
MultinomialNB()
LogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression()
SVC(kernel='linear')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SVC(kernel='linear')
RandomForestClassifier(random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier(random_state=42)