Spaces:
Build error
Build error
WIP
Browse files- .gitattributes +4 -0
- IS424_Data_Mining/code/GIS_DataMining_G1/.gitignore +4 -0
- IS424_Data_Mining/code/GIS_DataMining_G1/Data_mining_G1.R +0 -0
- IS424_Data_Mining/code/GIS_DataMining_G1/Datamining_qf.Rmd +191 -0
- IS424_Data_Mining/code/GIS_DataMining_G1/GIS_DataMining_G1.Rproj +13 -0
- IS424_Data_Mining/code/GIS_DataMining_G1/GIS_Purpose.csv +0 -0
- IS424_Data_Mining/code/GIS_DataMining_G1/incident_map.png +3 -0
- IS424_Data_Mining/code/GIS_DataMining_G1/incident_map_refined.png +3 -0
- IS424_Data_Mining/code/GIS_DataMining_G1/my_spatial_plot.png +3 -0
- IS424_Data_Mining/code/GIS_DataMining_G1/my_spatial_plot1.png +3 -0
.gitattributes
CHANGED
@@ -48,3 +48,7 @@ data/cleaned_data_with_categories.csv filter=lfs diff=lfs merge=lfs -text
|
|
48 |
data/processed_data2.parquet filter=lfs diff=lfs merge=lfs -text
|
49 |
data/topic_viz_benchmark_moderate.html filter=lfs diff=lfs merge=lfs -text
|
50 |
data/topic_viz_benchmark_severe.html filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
48 |
data/processed_data2.parquet filter=lfs diff=lfs merge=lfs -text
|
49 |
data/topic_viz_benchmark_moderate.html filter=lfs diff=lfs merge=lfs -text
|
50 |
data/topic_viz_benchmark_severe.html filter=lfs diff=lfs merge=lfs -text
|
51 |
+
IS424_Data_Mining/code/GIS_DataMining_G1/incident_map_refined.png filter=lfs diff=lfs merge=lfs -text
|
52 |
+
IS424_Data_Mining/code/GIS_DataMining_G1/my_spatial_plot.png filter=lfs diff=lfs merge=lfs -text
|
53 |
+
IS424_Data_Mining/code/GIS_DataMining_G1/my_spatial_plot1.png filter=lfs diff=lfs merge=lfs -text
|
54 |
+
IS424_Data_Mining/code/GIS_DataMining_G1/incident_map.png filter=lfs diff=lfs merge=lfs -text
|
IS424_Data_Mining/code/GIS_DataMining_G1/.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.Rproj.user
|
2 |
+
.Rhistory
|
3 |
+
.RData
|
4 |
+
.Ruserdata
|
IS424_Data_Mining/code/GIS_DataMining_G1/Data_mining_G1.R
ADDED
File without changes
|
IS424_Data_Mining/code/GIS_DataMining_G1/Datamining_qf.Rmd
ADDED
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
```{r}
|
2 |
+
install.packages(c("sf", "raster"))
|
3 |
+
|
4 |
+
# Load packages
|
5 |
+
```
|
6 |
+
|
7 |
+
```{r}
|
8 |
+
install.packages(c("dplyr"))
|
9 |
+
```
|
10 |
+
|
11 |
+
```{r}
|
12 |
+
# Load necessary libraries
|
13 |
+
library(sf)
|
14 |
+
library(ggplot2)
|
15 |
+
library(rnaturalearth)
|
16 |
+
library(rnaturalearthdata)
|
17 |
+
# Read your CSV data
|
18 |
+
data <- read.csv("GIS_Purpose.csv")
|
19 |
+
```
|
20 |
+
|
21 |
+
```{r}
|
22 |
+
data_clean <- na.omit(data)
|
23 |
+
|
24 |
+
# Convert the cleaned data frame to an sf object, specifying the coordinates and CRS (Coordinate Reference System)
|
25 |
+
data_sf <- st_as_sf(data_clean, coords = c("lon", "lat"), crs = 4326)
|
26 |
+
|
27 |
+
# Get world map data
|
28 |
+
world <- ne_countries(scale = "medium", returnclass = "sf")
|
29 |
+
|
30 |
+
# Plot the world map with points from your data
|
31 |
+
my_plot <- ggplot(data = world) +
|
32 |
+
geom_sf() + # This plots the world map as a base layer
|
33 |
+
geom_sf(data = data_sf, aes(color = Severity), size = 0.4) + # This adds your points on top
|
34 |
+
theme_minimal() +
|
35 |
+
labs(title = "Spatial Distribution of Incidents with World Map Basemap") +
|
36 |
+
theme(legend.position = "right") # Adjust legend position if needed
|
37 |
+
|
38 |
+
# Save the plot to a file
|
39 |
+
ggsave("my_spatial_plot.png", plot = my_plot, width = 10, height = 8, dpi = 300)
|
40 |
+
```
|
41 |
+
|
42 |
+
```{r}
|
43 |
+
library(lubridate)
|
44 |
+
library(ggplot2)
|
45 |
+
library(forecast)
|
46 |
+
|
47 |
+
# Check for NA values and remove them
|
48 |
+
data <- na.omit(data)
|
49 |
+
|
50 |
+
# Aggregate data by month
|
51 |
+
data$Month <- floor_date(data$Datetime, "month")
|
52 |
+
monthly_incidents <- aggregate(Index ~ Month, data, length)
|
53 |
+
|
54 |
+
# Make sure that there are no NA values
|
55 |
+
monthly_incidents <- na.omit(monthly_incidents)
|
56 |
+
|
57 |
+
# Assuming that you've verified the 'monthly_incidents' dataframe and it looks correct
|
58 |
+
# Create a time series object, checking the start and end values
|
59 |
+
start_year <- min(year(monthly_incidents$Month), na.rm = TRUE)
|
60 |
+
start_month <- min(month(monthly_incidents$Month), na.rm = TRUE)
|
61 |
+
end_year <- max(year(monthly_incidents$Month), na.rm = TRUE)
|
62 |
+
end_month <- max(month(monthly_incidents$Month), na.rm = TRUE)
|
63 |
+
|
64 |
+
# Check if start date is after end date
|
65 |
+
if (make_date(start_year, start_month) > make_date(end_year, end_month)) {
|
66 |
+
stop("'start' cannot be after 'end'")
|
67 |
+
}
|
68 |
+
|
69 |
+
# Now create the time series object
|
70 |
+
ts_data <- ts(monthly_incidents$Index, frequency=12, start=c(start_year, start_month))
|
71 |
+
|
72 |
+
```
|
73 |
+
|
74 |
+
```{r}
|
75 |
+
plot(ts_data, main = "Monthly Incidents Time Series", xlab = "Time", ylab = "Number of Incidents", col = "blue")
|
76 |
+
|
77 |
+
|
78 |
+
```
|
79 |
+
|
80 |
+
```{r}
|
81 |
+
decomposed_data <- decompose(ts_data)
|
82 |
+
plot(decomposed_data)
|
83 |
+
|
84 |
+
```
|
85 |
+
|
86 |
+
```{r}
|
87 |
+
incidents_by_severity <- aggregate(Index ~ Severity, data = data, FUN = length)
|
88 |
+
|
89 |
+
# Visualize the number of incidents by Severity
|
90 |
+
ggplot(incidents_by_severity, aes(x = Severity, y = Index, fill = Severity)) +
|
91 |
+
geom_bar(stat = "identity") +
|
92 |
+
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
|
93 |
+
labs(x = "Severity", y = "Frequency", title = "Frequency of Incidents by Severity")
|
94 |
+
```
|
95 |
+
|
96 |
+
```{r}
|
97 |
+
# Assuming 'data' is your dataframe and 'Severity' is the column with the severity level
|
98 |
+
# First, count the frequency of each severity level
|
99 |
+
severity_counts <- table(data$Severity)
|
100 |
+
|
101 |
+
# Convert the names of the table (the severity levels) to numeric ranks
|
102 |
+
severity_ranks <- as.numeric(factor(names(severity_counts),
|
103 |
+
levels = c("Minor", "Moderate", "Severe", "Extreme")))
|
104 |
+
|
105 |
+
# Perform Spearman's rank correlation test between severity ranks and their frequencies
|
106 |
+
cor.test(severity_ranks, severity_counts, method = "spearman")
|
107 |
+
|
108 |
+
```
|
109 |
+
|
110 |
+
```{r}
|
111 |
+
# Assuming 'data' is your data frame and 'Category' is the column with incident types
|
112 |
+
category_counts <- table(data$Category)
|
113 |
+
top_categories <- sort(category_counts, decreasing = TRUE)[1:5]
|
114 |
+
```
|
115 |
+
|
116 |
+
```{r}
|
117 |
+
# Convert table to data frame for filtering
|
118 |
+
top_categories_df <- as.data.frame(top_categories)
|
119 |
+
|
120 |
+
# Filter your original data for only top categories
|
121 |
+
top_data <- data[data$Category %in% names(top_categories), ]
|
122 |
+
|
123 |
+
```
|
124 |
+
|
125 |
+
```{r}
|
126 |
+
library(dplyr)
|
127 |
+
library(ggplot2)
|
128 |
+
library(maps)
|
129 |
+
|
130 |
+
# Assuming 'data' is your data frame, 'Category' is the column with incident types, and 'lon', 'lat' are your longitude and latitude columns
|
131 |
+
|
132 |
+
# Calculate counts of incidents for each category at each location
|
133 |
+
top_data <- data %>%
|
134 |
+
count(Category, lon, lat) %>%
|
135 |
+
filter(Category %in% names(top_categories))
|
136 |
+
|
137 |
+
# Get world map data
|
138 |
+
world_map <- map_data("world")
|
139 |
+
|
140 |
+
# Create the plot
|
141 |
+
plot <- ggplot(data = world_map, aes(x = long, y = lat)) +
|
142 |
+
geom_polygon(aes(group = group), fill = "gray80", color = "white") +
|
143 |
+
geom_point(data = top_data, aes(x = lon, y = lat, color = Category, size = n), alpha = 0.7) +
|
144 |
+
scale_size(range = c(4, 16)) + # Adjust the size range as needed
|
145 |
+
scale_color_brewer(palette = "Dark2") +
|
146 |
+
labs(title = "Top 5 Categories of Incidents on World Map",
|
147 |
+
subtitle = "Size of point represents frequency of incidents",
|
148 |
+
size = "Number of Incidents") +
|
149 |
+
theme_minimal() +
|
150 |
+
theme(legend.position = "bottom")
|
151 |
+
|
152 |
+
# Save the plot
|
153 |
+
ggsave("incident_map.png", plot = plot, width = 20, height = 10, dpi = 300)
|
154 |
+
|
155 |
+
```
|
156 |
+
|
157 |
+
```{r}
|
158 |
+
library(dplyr)
|
159 |
+
library(ggplot2)
|
160 |
+
library(maps)
|
161 |
+
library(scales) # For more refined control over point sizes
|
162 |
+
|
163 |
+
# Assuming 'data' is your data frame, 'Category' is the column with incident types, and 'lon', 'lat' are your longitude and latitude columns
|
164 |
+
|
165 |
+
# Calculate counts of incidents for each category at each location
|
166 |
+
top_data <- data %>%
|
167 |
+
count(Category, lon, lat) %>%
|
168 |
+
filter(Category %in% names(top_categories)) %>%
|
169 |
+
mutate(size = sqrt(n)) # Use square root scaling for point sizes
|
170 |
+
|
171 |
+
# Get world map data
|
172 |
+
world_map <- map_data("world")
|
173 |
+
|
174 |
+
# Create the plot with improved aesthetics
|
175 |
+
incident_map <- ggplot(data = world_map, aes(x = long, y = lat)) +
|
176 |
+
geom_polygon(aes(group = group), fill = "lightblue", color = "white") + # Use a different fill color for water
|
177 |
+
geom_point(data = top_data, aes(x = lon, y = lat, color = Category, size = size), alpha = 0.6) +
|
178 |
+
scale_size_continuous(trans = "identity", range = c(1, 12)) + # Use identity transformation and adjust the size range
|
179 |
+
scale_color_brewer(palette = "Dark2", name = "Category") +
|
180 |
+
labs(title = "Top 5 Categories of Incidents on World Map",
|
181 |
+
subtitle = "Size of point represents frequency of incidents",
|
182 |
+
size = "Frequency (sqrt scale)") + # Updated legend title to reflect sqrt scaling
|
183 |
+
coord_quickmap() + # Use an equirectangular projection
|
184 |
+
theme_minimal() +
|
185 |
+
theme(legend.position = "bottom",
|
186 |
+
legend.key.size = unit(0.5, "cm")) # Adjust legend key size for better appearance
|
187 |
+
|
188 |
+
# Save the plot using the new variable name
|
189 |
+
ggsave("incident_map_refined.png", plot = incident_map, width = 12, height = 8, dpi = 300) # Adjusted dimensions for a better aspect ratio
|
190 |
+
|
191 |
+
```
|
IS424_Data_Mining/code/GIS_DataMining_G1/GIS_DataMining_G1.Rproj
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Version: 1.0
|
2 |
+
|
3 |
+
RestoreWorkspace: Default
|
4 |
+
SaveWorkspace: Default
|
5 |
+
AlwaysSaveHistory: Default
|
6 |
+
|
7 |
+
EnableCodeIndexing: Yes
|
8 |
+
UseSpacesForTab: Yes
|
9 |
+
NumSpacesForTab: 2
|
10 |
+
Encoding: UTF-8
|
11 |
+
|
12 |
+
RnwWeave: Sweave
|
13 |
+
LaTeX: pdfLaTeX
|
IS424_Data_Mining/code/GIS_DataMining_G1/GIS_Purpose.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
IS424_Data_Mining/code/GIS_DataMining_G1/incident_map.png
ADDED
Git LFS Details
|
IS424_Data_Mining/code/GIS_DataMining_G1/incident_map_refined.png
ADDED
Git LFS Details
|
IS424_Data_Mining/code/GIS_DataMining_G1/my_spatial_plot.png
ADDED
Git LFS Details
|
IS424_Data_Mining/code/GIS_DataMining_G1/my_spatial_plot1.png
ADDED
Git LFS Details
|