{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "46322fb5-5918-4b70-9689-9e0781439ac4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "workding dir: /Users/inflaton/code/engd/papers/maritime/global-incidents\n", "loading env vars from: /Users/inflaton/code/engd/papers/maritime/global-incidents/.env\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "import os\n", "import sys\n", "from pathlib import Path\n", "\n", "workding_dir = str(Path.cwd().parent)\n", "os.chdir(workding_dir)\n", "sys.path.append(workding_dir)\n", "print(\"workding dir:\", workding_dir)\n", "\n", "from dotenv import find_dotenv, load_dotenv\n", "\n", "found_dotenv = find_dotenv(\".env\")\n", "\n", "if len(found_dotenv) == 0:\n", " found_dotenv = find_dotenv(\".env.example\")\n", "print(f\"loading env vars from: {found_dotenv}\")\n", "load_dotenv(found_dotenv, override=True)" ] }, { "cell_type": "code", "execution_count": 2, "id": "daf1e3d1-75ac-4299-8bed-2f413a49f9a6", "metadata": { "tags": [] }, "outputs": [], "source": [ "import nltk\n", "from nltk.tokenize import sent_tokenize\n", "from nltk.tokenize import word_tokenize\n", "\n", "import gensim\n", "from gensim import corpora\n", "from gensim import similarities\n", "from gensim import models\n", "from gensim.models import CoherenceModel\n", "\n", "# from wordcloud import WordCloud, ImageColorGenerator\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import pandas as pd\n", "import re\n", "import os\n", "import datetime\n", "\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "from pprint import pprint\n", "import pyLDAvis\n", "import pyLDAvis.gensim_models as gensimvis" ] }, { "cell_type": "markdown", "id": "49e6de6b-71bd-4948-8827-52601406058f", "metadata": {}, "source": [ "# Import Data" ] }, { "cell_type": "code", "execution_count": 3, "id": "49222182-7811-4fa6-8c0a-21d3a546863e", "metadata": {}, "outputs": [], "source": [ "df = pd.read_parquet('data/processed_data2.parquet')" ] }, { "cell_type": "code", "execution_count": 4, "id": "3fb59a30", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | id | \n", "Headline | \n", "Details | \n", "Severity | \n", "Category | \n", "Region | \n", "Datetime | \n", "Year | \n", "lat | \n", "lon | \n", "... | \n", "if_labeled | \n", "Month | \n", "Week | \n", "Headline_Details | \n", "url | \n", "title | \n", "content | \n", "cleaned_content | \n", "binary_content | \n", "word_count | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "Grasberg Mine- Grasberg mine workers extend st... | \n", "Media sources indicate that workers at the Gra... | \n", "Moderate | \n", "Mine Workers Strike | \n", "Indonesia | \n", "28/5/17 17:08 | \n", "2017.0 | \n", "-4.05608 | \n", "137.11302 | \n", "... | \n", "False | \n", "5.0 | \n", "21.0 | \n", "Grasberg Mine- Grasberg mine workers extend st... | \n", "https://news.google.com/rss/articles/CBMiZ2h0d... | \n", "Freeport Indonesia mine workers extend strike ... | \n", "Trucks are seen on a road in the Grasberg copp... | \n", "[truck, be, see, on, road, in, grasberg, coppe... | \n", "[adkerson_jakarta_try, agreement_freeport_indo... | \n", "53 | \n", "
1 | \n", "3 | \n", "Shanghai port congestion impacts terminals in ... | \n", "The persisting port congestion at Shanghai’s Y... | \n", "Minor | \n", "Port Congestion | \n", "China | \n", "27/4/17 9:16 | \n", "2017.0 | \n", "29.52000 | \n", "121.33190 | \n", "... | \n", "False | \n", "4.0 | \n", "17.0 | \n", "Shanghai port congestion impacts terminals in ... | \n", "https://news.google.com/rss/articles/CBMiVWh0d... | \n", "Typhoon Muifa to shut China ports for second t... | \n", "By Sam Whelan 13/09/2022\\n\\nAnother typhoon ha... | \n", "[by, sam, whelan, typhoon, have, prompt, port,... | \n", "[additional_ripple_effect, avoid_path_typhoon,... | \n", "44 | \n", "
2 | \n", "5 | \n", "UPDATE - Indonesia: Police confirm two explosi... | \n", "According to local police in Jakarta, two expl... | \n", "Extreme | \n", "Bombing, Police Operations | \n", "Indonesia | \n", "24/5/17 16:20 | \n", "2017.0 | \n", "NaN | \n", "NaN | \n", "... | \n", "True | \n", "5.0 | \n", "21.0 | \n", "UPDATE - Indonesia: Police confirm two explosi... | \n", "https://news.google.com/rss/articles/CBMiZWh0d... | \n", "Jakarta Police Receive 2 More Reports on Coldp... | \n", "TEMPO.CO, Jakarta - South Jakarta Metro Police... | \n", "[jakarta, south, jakarta, metro, police, recei... | \n", "[actress_accord, available_day_concert, click_... | \n", "24 | \n", "
3 | \n", "6 | \n", "UPDATE - Indonesia: Severe winds damage infras... | \n", "Severe winds have downed billboards and trees ... | \n", "Moderate | \n", "Roadway Closure / Disruption, Flooding, Severe... | \n", "Indonesia | \n", "19/4/17 9:10 | \n", "2017.0 | \n", "-6.91264 | \n", "107.65700 | \n", "... | \n", "True | \n", "4.0 | \n", "16.0 | \n", "UPDATE - Indonesia: Severe winds damage infras... | \n", "https://news.google.com/rss/articles/CBMiSWh0d... | \n", "Indonesia hit by some of strongest winds recorded | \n", "A man stands near damaged houses following a t... | \n", "[man, stand, near, damage, house, follow, torn... | \n", "[bbc_indonesia, climatologist_government_resea... | \n", "28 | \n", "
4 | \n", "14 | \n", "2 miles E of Chesterfield - A tornado has touc... | \n", "Government sources are reporting a tornado has... | \n", "Minor | \n", "Tornado | \n", "United States | \n", "17/9/18 19:55 | \n", "2018.0 | \n", "37.51000 | \n", "-77.61000 | \n", "... | \n", "True | \n", "9.0 | \n", "38.0 | \n", "2 miles E of Chesterfield - A tornado has touc... | \n", "https://news.google.com/rss/articles/CBMigAFod... | \n", "UPDATE: Number of homes without power down to ... | \n", "More than 90,000 homes and businesses across t... | \n", "[more, than, home, business, across, richmond,... | \n", "[advise_seek_alternate, affect_richmond, alter... | \n", "134 | \n", "
5 rows × 23 columns
\n", "