dh-mc commited on
Commit
23c2526
·
1 Parent(s): 4132e15
.gitattributes CHANGED
@@ -48,3 +48,7 @@ data/cleaned_data_with_categories.csv filter=lfs diff=lfs merge=lfs -text
48
  data/processed_data2.parquet filter=lfs diff=lfs merge=lfs -text
49
  data/topic_viz_benchmark_moderate.html filter=lfs diff=lfs merge=lfs -text
50
  data/topic_viz_benchmark_severe.html filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
48
  data/processed_data2.parquet filter=lfs diff=lfs merge=lfs -text
49
  data/topic_viz_benchmark_moderate.html filter=lfs diff=lfs merge=lfs -text
50
  data/topic_viz_benchmark_severe.html filter=lfs diff=lfs merge=lfs -text
51
+ IS424_Data_Mining/code/GIS_DataMining_G1/incident_map_refined.png filter=lfs diff=lfs merge=lfs -text
52
+ IS424_Data_Mining/code/GIS_DataMining_G1/my_spatial_plot.png filter=lfs diff=lfs merge=lfs -text
53
+ IS424_Data_Mining/code/GIS_DataMining_G1/my_spatial_plot1.png filter=lfs diff=lfs merge=lfs -text
54
+ IS424_Data_Mining/code/GIS_DataMining_G1/incident_map.png filter=lfs diff=lfs merge=lfs -text
IS424_Data_Mining/code/GIS_DataMining_G1/.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .Rproj.user
2
+ .Rhistory
3
+ .RData
4
+ .Ruserdata
IS424_Data_Mining/code/GIS_DataMining_G1/Data_mining_G1.R ADDED
File without changes
IS424_Data_Mining/code/GIS_DataMining_G1/Datamining_qf.Rmd ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```{r}
2
+ install.packages(c("sf", "raster"))
3
+
4
+ # Load packages
5
+ ```
6
+
7
+ ```{r}
8
+ install.packages(c("dplyr"))
9
+ ```
10
+
11
+ ```{r}
12
+ # Load necessary libraries
13
+ library(sf)
14
+ library(ggplot2)
15
+ library(rnaturalearth)
16
+ library(rnaturalearthdata)
17
+ # Read your CSV data
18
+ data <- read.csv("GIS_Purpose.csv")
19
+ ```
20
+
21
+ ```{r}
22
+ data_clean <- na.omit(data)
23
+
24
+ # Convert the cleaned data frame to an sf object, specifying the coordinates and CRS (Coordinate Reference System)
25
+ data_sf <- st_as_sf(data_clean, coords = c("lon", "lat"), crs = 4326)
26
+
27
+ # Get world map data
28
+ world <- ne_countries(scale = "medium", returnclass = "sf")
29
+
30
+ # Plot the world map with points from your data
31
+ my_plot <- ggplot(data = world) +
32
+ geom_sf() + # This plots the world map as a base layer
33
+ geom_sf(data = data_sf, aes(color = Severity), size = 0.4) + # This adds your points on top
34
+ theme_minimal() +
35
+ labs(title = "Spatial Distribution of Incidents with World Map Basemap") +
36
+ theme(legend.position = "right") # Adjust legend position if needed
37
+
38
+ # Save the plot to a file
39
+ ggsave("my_spatial_plot.png", plot = my_plot, width = 10, height = 8, dpi = 300)
40
+ ```
41
+
42
+ ```{r}
43
+ library(lubridate)
44
+ library(ggplot2)
45
+ library(forecast)
46
+
47
+ # Check for NA values and remove them
48
+ data <- na.omit(data)
49
+
50
+ # Aggregate data by month
51
+ data$Month <- floor_date(data$Datetime, "month")
52
+ monthly_incidents <- aggregate(Index ~ Month, data, length)
53
+
54
+ # Make sure that there are no NA values
55
+ monthly_incidents <- na.omit(monthly_incidents)
56
+
57
+ # Assuming that you've verified the 'monthly_incidents' dataframe and it looks correct
58
+ # Create a time series object, checking the start and end values
59
+ start_year <- min(year(monthly_incidents$Month), na.rm = TRUE)
60
+ start_month <- min(month(monthly_incidents$Month), na.rm = TRUE)
61
+ end_year <- max(year(monthly_incidents$Month), na.rm = TRUE)
62
+ end_month <- max(month(monthly_incidents$Month), na.rm = TRUE)
63
+
64
+ # Check if start date is after end date
65
+ if (make_date(start_year, start_month) > make_date(end_year, end_month)) {
66
+ stop("'start' cannot be after 'end'")
67
+ }
68
+
69
+ # Now create the time series object
70
+ ts_data <- ts(monthly_incidents$Index, frequency=12, start=c(start_year, start_month))
71
+
72
+ ```
73
+
74
+ ```{r}
75
+ plot(ts_data, main = "Monthly Incidents Time Series", xlab = "Time", ylab = "Number of Incidents", col = "blue")
76
+
77
+
78
+ ```
79
+
80
+ ```{r}
81
+ decomposed_data <- decompose(ts_data)
82
+ plot(decomposed_data)
83
+
84
+ ```
85
+
86
+ ```{r}
87
+ incidents_by_severity <- aggregate(Index ~ Severity, data = data, FUN = length)
88
+
89
+ # Visualize the number of incidents by Severity
90
+ ggplot(incidents_by_severity, aes(x = Severity, y = Index, fill = Severity)) +
91
+ geom_bar(stat = "identity") +
92
+ theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
93
+ labs(x = "Severity", y = "Frequency", title = "Frequency of Incidents by Severity")
94
+ ```
95
+
96
+ ```{r}
97
+ # Assuming 'data' is your dataframe and 'Severity' is the column with the severity level
98
+ # First, count the frequency of each severity level
99
+ severity_counts <- table(data$Severity)
100
+
101
+ # Convert the names of the table (the severity levels) to numeric ranks
102
+ severity_ranks <- as.numeric(factor(names(severity_counts),
103
+ levels = c("Minor", "Moderate", "Severe", "Extreme")))
104
+
105
+ # Perform Spearman's rank correlation test between severity ranks and their frequencies
106
+ cor.test(severity_ranks, severity_counts, method = "spearman")
107
+
108
+ ```
109
+
110
+ ```{r}
111
+ # Assuming 'data' is your data frame and 'Category' is the column with incident types
112
+ category_counts <- table(data$Category)
113
+ top_categories <- sort(category_counts, decreasing = TRUE)[1:5]
114
+ ```
115
+
116
+ ```{r}
117
+ # Convert table to data frame for filtering
118
+ top_categories_df <- as.data.frame(top_categories)
119
+
120
+ # Filter your original data for only top categories
121
+ top_data <- data[data$Category %in% names(top_categories), ]
122
+
123
+ ```
124
+
125
+ ```{r}
126
+ library(dplyr)
127
+ library(ggplot2)
128
+ library(maps)
129
+
130
+ # Assuming 'data' is your data frame, 'Category' is the column with incident types, and 'lon', 'lat' are your longitude and latitude columns
131
+
132
+ # Calculate counts of incidents for each category at each location
133
+ top_data <- data %>%
134
+ count(Category, lon, lat) %>%
135
+ filter(Category %in% names(top_categories))
136
+
137
+ # Get world map data
138
+ world_map <- map_data("world")
139
+
140
+ # Create the plot
141
+ plot <- ggplot(data = world_map, aes(x = long, y = lat)) +
142
+ geom_polygon(aes(group = group), fill = "gray80", color = "white") +
143
+ geom_point(data = top_data, aes(x = lon, y = lat, color = Category, size = n), alpha = 0.7) +
144
+ scale_size(range = c(4, 16)) + # Adjust the size range as needed
145
+ scale_color_brewer(palette = "Dark2") +
146
+ labs(title = "Top 5 Categories of Incidents on World Map",
147
+ subtitle = "Size of point represents frequency of incidents",
148
+ size = "Number of Incidents") +
149
+ theme_minimal() +
150
+ theme(legend.position = "bottom")
151
+
152
+ # Save the plot
153
+ ggsave("incident_map.png", plot = plot, width = 20, height = 10, dpi = 300)
154
+
155
+ ```
156
+
157
+ ```{r}
158
+ library(dplyr)
159
+ library(ggplot2)
160
+ library(maps)
161
+ library(scales) # For more refined control over point sizes
162
+
163
+ # Assuming 'data' is your data frame, 'Category' is the column with incident types, and 'lon', 'lat' are your longitude and latitude columns
164
+
165
+ # Calculate counts of incidents for each category at each location
166
+ top_data <- data %>%
167
+ count(Category, lon, lat) %>%
168
+ filter(Category %in% names(top_categories)) %>%
169
+ mutate(size = sqrt(n)) # Use square root scaling for point sizes
170
+
171
+ # Get world map data
172
+ world_map <- map_data("world")
173
+
174
+ # Create the plot with improved aesthetics
175
+ incident_map <- ggplot(data = world_map, aes(x = long, y = lat)) +
176
+ geom_polygon(aes(group = group), fill = "lightblue", color = "white") + # Use a different fill color for water
177
+ geom_point(data = top_data, aes(x = lon, y = lat, color = Category, size = size), alpha = 0.6) +
178
+ scale_size_continuous(trans = "identity", range = c(1, 12)) + # Use identity transformation and adjust the size range
179
+ scale_color_brewer(palette = "Dark2", name = "Category") +
180
+ labs(title = "Top 5 Categories of Incidents on World Map",
181
+ subtitle = "Size of point represents frequency of incidents",
182
+ size = "Frequency (sqrt scale)") + # Updated legend title to reflect sqrt scaling
183
+ coord_quickmap() + # Use an equirectangular projection
184
+ theme_minimal() +
185
+ theme(legend.position = "bottom",
186
+ legend.key.size = unit(0.5, "cm")) # Adjust legend key size for better appearance
187
+
188
+ # Save the plot using the new variable name
189
+ ggsave("incident_map_refined.png", plot = incident_map, width = 12, height = 8, dpi = 300) # Adjusted dimensions for a better aspect ratio
190
+
191
+ ```
IS424_Data_Mining/code/GIS_DataMining_G1/GIS_DataMining_G1.Rproj ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Version: 1.0
2
+
3
+ RestoreWorkspace: Default
4
+ SaveWorkspace: Default
5
+ AlwaysSaveHistory: Default
6
+
7
+ EnableCodeIndexing: Yes
8
+ UseSpacesForTab: Yes
9
+ NumSpacesForTab: 2
10
+ Encoding: UTF-8
11
+
12
+ RnwWeave: Sweave
13
+ LaTeX: pdfLaTeX
IS424_Data_Mining/code/GIS_DataMining_G1/GIS_Purpose.csv ADDED
The diff for this file is too large to render. See raw diff
 
IS424_Data_Mining/code/GIS_DataMining_G1/incident_map.png ADDED

Git LFS Details

  • SHA256: 0ea179bcb63987a2ac177ff4029897b540329e1bf07ededb572415dd1b4368c5
  • Pointer size: 132 Bytes
  • Size of remote file: 1.91 MB
IS424_Data_Mining/code/GIS_DataMining_G1/incident_map_refined.png ADDED

Git LFS Details

  • SHA256: 8558b21eb46ede29793d1e4cab9b6825fd0dced1d3dcf80d437c3e575b4bf5d3
  • Pointer size: 132 Bytes
  • Size of remote file: 1.07 MB
IS424_Data_Mining/code/GIS_DataMining_G1/my_spatial_plot.png ADDED

Git LFS Details

  • SHA256: bc0ab41b70561e31beea4d1f00e5f03e18ac44d32eb1f44ca72615baf53559d1
  • Pointer size: 131 Bytes
  • Size of remote file: 820 kB
IS424_Data_Mining/code/GIS_DataMining_G1/my_spatial_plot1.png ADDED

Git LFS Details

  • SHA256: ea45cba662b990d95dd4d73801af2c5ef7995cdcb904bb48fc7c524ce3e1eb7a
  • Pointer size: 131 Bytes
  • Size of remote file: 414 kB