Spaces:

smu-ai
/

global-incidents

Build error

App Files Files Community

dh-mc commited on Aug 25, 2024

Commit

23c2526

1 Parent(s): 4132e15

WIP

Browse files

Files changed (10) hide show

.gitattributes +4 -0
IS424_Data_Mining/code/GIS_DataMining_G1/.gitignore +4 -0
IS424_Data_Mining/code/GIS_DataMining_G1/Data_mining_G1.R +0 -0
IS424_Data_Mining/code/GIS_DataMining_G1/Datamining_qf.Rmd +191 -0
IS424_Data_Mining/code/GIS_DataMining_G1/GIS_DataMining_G1.Rproj +13 -0
IS424_Data_Mining/code/GIS_DataMining_G1/GIS_Purpose.csv +0 -0
IS424_Data_Mining/code/GIS_DataMining_G1/incident_map.png +3 -0
IS424_Data_Mining/code/GIS_DataMining_G1/incident_map_refined.png +3 -0
IS424_Data_Mining/code/GIS_DataMining_G1/my_spatial_plot.png +3 -0
IS424_Data_Mining/code/GIS_DataMining_G1/my_spatial_plot1.png +3 -0

.gitattributes CHANGED Viewed

@@ -48,3 +48,7 @@ data/cleaned_data_with_categories.csv filter=lfs diff=lfs merge=lfs -text
 data/processed_data2.parquet filter=lfs diff=lfs merge=lfs -text
 data/topic_viz_benchmark_moderate.html filter=lfs diff=lfs merge=lfs -text
 data/topic_viz_benchmark_severe.html filter=lfs diff=lfs merge=lfs -text

 data/processed_data2.parquet filter=lfs diff=lfs merge=lfs -text
 data/topic_viz_benchmark_moderate.html filter=lfs diff=lfs merge=lfs -text
 data/topic_viz_benchmark_severe.html filter=lfs diff=lfs merge=lfs -text
+IS424_Data_Mining/code/GIS_DataMining_G1/incident_map_refined.png filter=lfs diff=lfs merge=lfs -text
+IS424_Data_Mining/code/GIS_DataMining_G1/my_spatial_plot.png filter=lfs diff=lfs merge=lfs -text
+IS424_Data_Mining/code/GIS_DataMining_G1/my_spatial_plot1.png filter=lfs diff=lfs merge=lfs -text
+IS424_Data_Mining/code/GIS_DataMining_G1/incident_map.png filter=lfs diff=lfs merge=lfs -text

IS424_Data_Mining/code/GIS_DataMining_G1/.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata

IS424_Data_Mining/code/GIS_DataMining_G1/Data_mining_G1.R ADDED Viewed

File without changes

IS424_Data_Mining/code/GIS_DataMining_G1/Datamining_qf.Rmd ADDED Viewed

	@@ -0,0 +1,191 @@

+```{r}
+install.packages(c("sf", "raster"))
+# Load packages
+```
+```{r}
+install.packages(c("dplyr"))
+```
+```{r}
+# Load necessary libraries
+library(sf)
+library(ggplot2)
+library(rnaturalearth)
+library(rnaturalearthdata)
+# Read your CSV data
+data <- read.csv("GIS_Purpose.csv")
+```
+```{r}
+data_clean <- na.omit(data)
+# Convert the cleaned data frame to an sf object, specifying the coordinates and CRS (Coordinate Reference System)
+data_sf <- st_as_sf(data_clean, coords = c("lon", "lat"), crs = 4326)
+# Get world map data
+world <- ne_countries(scale = "medium", returnclass = "sf")
+# Plot the world map with points from your data
+my_plot <- ggplot(data = world) +
+  geom_sf() + # This plots the world map as a base layer
+  geom_sf(data = data_sf, aes(color = Severity), size = 0.4) + # This adds your points on top
+  theme_minimal() +
+  labs(title = "Spatial Distribution of Incidents with World Map Basemap") +
+  theme(legend.position = "right") # Adjust legend position if needed
+# Save the plot to a file
+ggsave("my_spatial_plot.png", plot = my_plot, width = 10, height = 8, dpi = 300)
+```
+```{r}
+library(lubridate)
+library(ggplot2)
+library(forecast)
+# Check for NA values and remove them
+data <- na.omit(data)
+# Aggregate data by month
+data$Month <- floor_date(data$Datetime, "month")
+monthly_incidents <- aggregate(Index ~ Month, data, length)
+# Make sure that there are no NA values
+monthly_incidents <- na.omit(monthly_incidents)
+# Assuming that you've verified the 'monthly_incidents' dataframe and it looks correct
+# Create a time series object, checking the start and end values
+start_year <- min(year(monthly_incidents$Month), na.rm = TRUE)
+start_month <- min(month(monthly_incidents$Month), na.rm = TRUE)
+end_year <- max(year(monthly_incidents$Month), na.rm = TRUE)
+end_month <- max(month(monthly_incidents$Month), na.rm = TRUE)
+# Check if start date is after end date
+if (make_date(start_year, start_month) > make_date(end_year, end_month)) {
+  stop("'start' cannot be after 'end'")
+}
+# Now create the time series object
+ts_data <- ts(monthly_incidents$Index, frequency=12, start=c(start_year, start_month))
+```
+```{r}
+plot(ts_data, main = "Monthly Incidents Time Series", xlab = "Time", ylab = "Number of Incidents", col = "blue")
+```
+```{r}
+decomposed_data <- decompose(ts_data)
+plot(decomposed_data)
+```
+```{r}
+incidents_by_severity <- aggregate(Index ~ Severity, data = data, FUN = length)
+# Visualize the number of incidents by Severity
+ggplot(incidents_by_severity, aes(x = Severity, y = Index, fill = Severity)) +
+  geom_bar(stat = "identity") +
+  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
+  labs(x = "Severity", y = "Frequency", title = "Frequency of Incidents by Severity")
+```
+```{r}
+# Assuming 'data' is your dataframe and 'Severity' is the column with the severity level
+# First, count the frequency of each severity level
+severity_counts <- table(data$Severity)
+# Convert the names of the table (the severity levels) to numeric ranks
+severity_ranks <- as.numeric(factor(names(severity_counts),
+                                    levels = c("Minor", "Moderate", "Severe", "Extreme")))
+# Perform Spearman's rank correlation test between severity ranks and their frequencies
+cor.test(severity_ranks, severity_counts, method = "spearman")
+```
+```{r}
+# Assuming 'data' is your data frame and 'Category' is the column with incident types
+category_counts <- table(data$Category)
+top_categories <- sort(category_counts, decreasing = TRUE)[1:5]
+```
+```{r}
+# Convert table to data frame for filtering
+top_categories_df <- as.data.frame(top_categories)
+# Filter your original data for only top categories
+top_data <- data[data$Category %in% names(top_categories), ]
+```
+```{r}
+library(dplyr)
+library(ggplot2)
+library(maps)
+# Assuming 'data' is your data frame, 'Category' is the column with incident types, and 'lon', 'lat' are your longitude and latitude columns
+# Calculate counts of incidents for each category at each location
+top_data <- data %>%
+  count(Category, lon, lat) %>%
+  filter(Category %in% names(top_categories))
+# Get world map data
+world_map <- map_data("world")
+# Create the plot
+plot <- ggplot(data = world_map, aes(x = long, y = lat)) +
+  geom_polygon(aes(group = group), fill = "gray80", color = "white") +
+  geom_point(data = top_data, aes(x = lon, y = lat, color = Category, size = n), alpha = 0.7) +
+  scale_size(range = c(4, 16)) +  # Adjust the size range as needed
+  scale_color_brewer(palette = "Dark2") +
+  labs(title = "Top 5 Categories of Incidents on World Map",
+       subtitle = "Size of point represents frequency of incidents",
+       size = "Number of Incidents") +
+  theme_minimal() +
+  theme(legend.position = "bottom")
+# Save the plot
+ggsave("incident_map.png", plot = plot, width = 20, height = 10, dpi = 300)
+```
+```{r}
+library(dplyr)
+library(ggplot2)
+library(maps)
+library(scales)  # For more refined control over point sizes
+# Assuming 'data' is your data frame, 'Category' is the column with incident types, and 'lon', 'lat' are your longitude and latitude columns
+# Calculate counts of incidents for each category at each location
+top_data <- data %>%
+  count(Category, lon, lat) %>%
+  filter(Category %in% names(top_categories)) %>%
+  mutate(size = sqrt(n))  # Use square root scaling for point sizes
+# Get world map data
+world_map <- map_data("world")
+# Create the plot with improved aesthetics
+incident_map <- ggplot(data = world_map, aes(x = long, y = lat)) +
+  geom_polygon(aes(group = group), fill = "lightblue", color = "white") +  # Use a different fill color for water
+  geom_point(data = top_data, aes(x = lon, y = lat, color = Category, size = size), alpha = 0.6) +
+  scale_size_continuous(trans = "identity", range = c(1, 12)) +  # Use identity transformation and adjust the size range
+  scale_color_brewer(palette = "Dark2", name = "Category") +
+  labs(title = "Top 5 Categories of Incidents on World Map",
+       subtitle = "Size of point represents frequency of incidents",
+       size = "Frequency (sqrt scale)") +  # Updated legend title to reflect sqrt scaling
+  coord_quickmap() +  # Use an equirectangular projection
+  theme_minimal() +
+  theme(legend.position = "bottom",
+        legend.key.size = unit(0.5, "cm"))  # Adjust legend key size for better appearance
+# Save the plot using the new variable name
+ggsave("incident_map_refined.png", plot = incident_map, width = 12, height = 8, dpi = 300)  # Adjusted dimensions for a better aspect ratio
+```

IS424_Data_Mining/code/GIS_DataMining_G1/GIS_DataMining_G1.Rproj ADDED Viewed

	@@ -0,0 +1,13 @@

+Version: 1.0
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+RnwWeave: Sweave
+LaTeX: pdfLaTeX