{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "feaf77ab", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "workding dir: /Users/inflaton/code/engd/papers/maritime/global-incidents\n", "loading env vars from: /Users/inflaton/code/engd/papers/maritime/global-incidents/.env\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "import os\n", "import sys\n", "from pathlib import Path\n", "\n", "workding_dir = str(Path.cwd().parent)\n", "os.chdir(workding_dir)\n", "sys.path.append(workding_dir)\n", "print(\"workding dir:\", workding_dir)\n", "\n", "from dotenv import find_dotenv, load_dotenv\n", "\n", "found_dotenv = find_dotenv(\".env\")\n", "\n", "if len(found_dotenv) == 0:\n", " found_dotenv = find_dotenv(\".env.example\")\n", "print(f\"loading env vars from: {found_dotenv}\")\n", "load_dotenv(found_dotenv, override=True)" ] }, { "cell_type": "markdown", "id": "3a7dd7d8", "metadata": {}, "source": [ "## Import Statement" ] }, { "cell_type": "code", "execution_count": 2, "id": "86fc25e6", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "markdown", "id": "fac53e88", "metadata": {}, "source": [ "### read the data" ] }, { "cell_type": "code", "execution_count": 3, "id": "dc33b13b", "metadata": {}, "outputs": [], "source": [ "result_df = pd.read_csv(\"data/processed_data.csv\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "31f58fd1", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", " | Details | \n", "Category | \n", "Details_cleaned | \n", "Category_cleaned | \n", "Category_single | \n", "Summarized_label | \n", "
---|---|---|---|---|---|---|
0 | \n", "Media sources indicate that workers at the Gra... | \n", "Mine Workers Strike | \n", "medium source indicate worker grasberg mine ex... | \n", "Mine Workers Strike | \n", "Mine Workers Strike | \n", "Worker Strike | \n", "
1 | \n", "News sources are stating that recent typhoons ... | \n", "Travel Warning | \n", "news source stating recent typhoon impact hong... | \n", "Travel Warning | \n", "Travel Warning | \n", "Administrative Issue | \n", "
2 | \n", "The persisting port congestion at Shanghai’s Y... | \n", "Port Congestion | \n", "persisting port congestion shanghai ’ yangshan... | \n", "Port Congestion | \n", "Port Congestion | \n", "Administrative Issue | \n", "
3 | \n", "Updated local media sources from Jakarta indic... | \n", "Bombing, Police Operations | \n", "updated local medium source jakarta indicate e... | \n", "Bombing, Police Operations | \n", "Bombing | \n", "Terrorism | \n", "
4 | \n", "According to local police in Jakarta, two expl... | \n", "Bombing, Police Operations | \n", "according local police jakarta two explosion c... | \n", "Bombing, Police Operations | \n", "Bombing | \n", "Terrorism | \n", "
MultinomialNB()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
MultinomialNB()
MultinomialNB(alpha=0.1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
MultinomialNB(alpha=0.1)
LogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression()
SVC(kernel='linear')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SVC(kernel='linear')
RandomForestClassifier(random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier(random_state=42)