::p_load(sf, sfdep, GWmodel, SpatialML, tidyverse, tmap, ggpubr, olsrr, devtools, rsample) pacman
In-class exercise week 10
Imports
Import packages
Data
<- read_rds("data/aspatial/mdata.rds") mdata
set.seed(1234) #To do data sampling
<- initial_split(mdata, prop = 6.5/10,)
resale_split <- training(resale_split)
train_data <- testing(resale_split) test_data
write_rds(train_data, "data/model/train_data.rds")
write_rds(test_data, "data/model/test_data.rds")
<- read_rds("data/model/train_data.rds")
train_data <- read_rds("data/model/test_data.rds") test_data
Building non-spatial multiple linear regression
<- lm(resale_price ~ floor_area_sqm + storey_order +remaining_lease_mths + PROX_CBD +PROX_ELDERLYCARE + PROX_HAWKER + PROX_MRT + PROX_PARK + PROX_MALL + PROX_SUPERMARKET + WITHIN_350M_KINDERGARTEN + WITHIN_350M_CHILDCARE + WITHIN_350M_BUS +WITHIN_1KM_PRISCH,
price_mlr data=train_data)
summary(price_mlr)
Call:
lm(formula = resale_price ~ floor_area_sqm + storey_order + remaining_lease_mths +
PROX_CBD + PROX_ELDERLYCARE + PROX_HAWKER + PROX_MRT + PROX_PARK +
PROX_MALL + PROX_SUPERMARKET + WITHIN_350M_KINDERGARTEN +
WITHIN_350M_CHILDCARE + WITHIN_350M_BUS + WITHIN_1KM_PRISCH,
data = train_data)
Residuals:
Min 1Q Median 3Q Max
-205193 -39120 -1930 36545 472355
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 107601.073 10601.261 10.150 < 2e-16 ***
floor_area_sqm 2780.698 90.579 30.699 < 2e-16 ***
storey_order 14299.298 339.115 42.167 < 2e-16 ***
remaining_lease_mths 344.490 4.592 75.027 < 2e-16 ***
PROX_CBD -16930.196 201.254 -84.124 < 2e-16 ***
PROX_ELDERLYCARE -14441.025 994.867 -14.516 < 2e-16 ***
PROX_HAWKER -19265.648 1273.597 -15.127 < 2e-16 ***
PROX_MRT -32564.272 1744.232 -18.670 < 2e-16 ***
PROX_PARK -5712.625 1483.885 -3.850 0.000119 ***
PROX_MALL -14717.388 2007.818 -7.330 2.47e-13 ***
PROX_SUPERMARKET -26881.938 4189.624 -6.416 1.46e-10 ***
WITHIN_350M_KINDERGARTEN 8520.472 632.812 13.464 < 2e-16 ***
WITHIN_350M_CHILDCARE -4510.650 354.015 -12.741 < 2e-16 ***
WITHIN_350M_BUS 813.493 222.574 3.655 0.000259 ***
WITHIN_1KM_PRISCH -8010.834 491.512 -16.298 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 61650 on 10320 degrees of freedom
Multiple R-squared: 0.7373, Adjusted R-squared: 0.737
F-statistic: 2069 on 14 and 10320 DF, p-value: < 2.2e-16
write_rds(price_mlr, "data/model/price_mlr.rds")
GWR methods
GWR predictive model
<- as_Spatial(train_data)
train_data_sp train_data_sp
class : SpatialPointsDataFrame
features : 10335
extent : 11597.31, 42623.63, 28217.39, 48741.06 (xmin, xmax, ymin, ymax)
crs : +proj=tmerc +lat_0=1.36666666666667 +lon_0=103.833333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
variables : 17
names : resale_price, floor_area_sqm, storey_order, remaining_lease_mths, PROX_CBD, PROX_ELDERLYCARE, PROX_HAWKER, PROX_MRT, PROX_PARK, PROX_GOOD_PRISCH, PROX_MALL, PROX_CHAS, PROX_SUPERMARKET, WITHIN_350M_KINDERGARTEN, WITHIN_350M_CHILDCARE, ...
min values : 218000, 74, 1, 555, 0.999393538715878, 1.98943787433087e-08, 0.0333358643817954, 0.0220407324774434, 0.0441643212802781, 0.0652540365486641, 0, 6.20621206270077e-09, 1.21715176356525e-07, 0, 0, ...
max values : 1186888, 133, 17, 1164, 19.6500691667807, 3.30163731686804, 2.86763031236184, 2.13060636038504, 2.41313695915468, 10.6223726149914, 2.27100643784442, 0.808332738794272, 1.57131703651196, 7, 20, ...
Preparing coordinate data
Extracting coordinate data
<- st_coordinates(mdata)
coords <- st_coordinates(train_data)
coords_train <- st_coordinates(test_data) coords_test
write_rds(coords_train, "data/model/coords_train.rds")
write_rds(coords_test, "data/model/coords_test.rds")
<- read_rds("data/model/coords_train.rds")
coords_train <- read_rds("data/model/coords_test.rds") coords_test
Dropping geometry fields
<- train_data %>%
train_data st_drop_geometry()
Callibrating random forest
set.seed(1234)
<- ranger(resale_price ~ floor_area_sqm + storey_order +remaining_lease_mths + PROX_CBD +PROX_ELDERLYCARE + PROX_HAWKER + PROX_MRT + PROX_PARK + PROX_MALL + PROX_SUPERMARKET + WITHIN_350M_KINDERGARTEN + WITHIN_350M_CHILDCARE + WITHIN_350M_BUS +WITHIN_1KM_PRISCH,
rf data=train_data)
summary(rf)
Length Class Mode
predictions 10335 -none- numeric
num.trees 1 -none- numeric
num.independent.variables 1 -none- numeric
mtry 1 -none- numeric
min.node.size 1 -none- numeric
prediction.error 1 -none- numeric
forest 7 ranger.forest list
splitrule 1 -none- character
treetype 1 -none- character
r.squared 1 -none- numeric
call 3 -none- call
importance.mode 1 -none- character
num.samples 1 -none- numeric
replace 1 -none- logical
print(rf)
Ranger result
Call:
ranger(resale_price ~ floor_area_sqm + storey_order + remaining_lease_mths + PROX_CBD + PROX_ELDERLYCARE + PROX_HAWKER + PROX_MRT + PROX_PARK + PROX_MALL + PROX_SUPERMARKET + WITHIN_350M_KINDERGARTEN + WITHIN_350M_CHILDCARE + WITHIN_350M_BUS + WITHIN_1KM_PRISCH, data = train_data)
Type: Regression
Number of trees: 500
Sample size: 10335
Number of independent variables: 14
Mtry: 3
Target node size: 5
Variable importance mode: none
Splitrule: variance
OOB prediction error (MSE): 728602496
R squared (OOB): 0.9495728
minimum node size is 5 can be increased
Higher R square value in comparison shows random forest performs better.
Geographically weighted random forest
#set.seed(1234)
#gwRF_adaptive <- grf(formula = resale_price ~ floor_area_sqm + storey_order +remaining_lease_mths + PROX_CBD +PROX_ELDERLYCARE + PROX_HAWKER + PROX_MRT + PROX_PARK + PROX_MALL + PROX_SUPERMARKET + WITHIN_350M_KINDERGARTEN + WITHIN_350M_CHILDCARE + WITHIN_350M_BUS +WITHIN_1KM_PRISCH,
# dframe=train_data,
#bw =55,
#kernel = "adaptive",
#coords = coords_train)
#summary(gwRF_adaptive)
#write_rds(gwRF_adaptive,"data/model/gwRF.rds")
#vi_df <- as.data.frame(gwRF_adaptive$Global.Model$variable.importance)
#gwRF_adaptive <- read_rds("data/model/gwRF.rds")
<- cbind(test_data, coords_test) %>%
test_data st_drop_geometry()
#TAKES A LOT OF TIME TO RUN
#gwRD_pred <- predict.grf(gwRF_adaptive,
# test_data,
# x.var.name = "X",
# y.var.name = "Y",
# local.w = 1,
# global.w = 0)