Thank you for creating the package, it's easy to use and the examples are very useful. I want to perform stepwise linear regression using backward elimination to find the optimal model based on the r-squared. So far, I managed to do that using "brute-force" (please see the code below).
I have a sf object with two columns. The column I am interested in is called fclass. This columns has unique values which I want to use them as predictors. This means that, my baseline model will have all the classes (i.e., the sf object itself). The stepwise model will eliminate the unique values from the fclass column and eventually will print the remaining unique values which yielded the largest r-squared.
library(pacman)
pacman::p_load(terra, sf, dplyr)
ntl <- rast("path/ntl.tif") # response
v <- st_read("path/road.shp") # sf object
# baseline r2
vterra <- vect(v)
ref <- rast("path/pop.tif") # get ext and pixel size
r <- rast(v, res = res(ref), ext = ext(ref))
x <- rasterizeGeom(vterra, r, "length", "m")
x_res <- resample(x, ntl, "average")
s <- c(ntl, x_res)
names(s) <- c("ntl", "road")
# linear model containing all the unique values from the predictor variable
m <- lm(ntl ~ road, s, na.action = na.exclude)
baseline <- sqrt(summary(m)$adj.r.squared)
orig_baseline <- sqrt(summary(m)$adj.r.squared)
classes <- unique(v$fclass)
inclasses <- unique(v$fclass)
i <- 1
while (i <= length(classes)) {
class <- classes[i]
print(paste0("current class ", class))
print(paste0("orig baseline: ", orig_baseline, " - baseline: ", baseline))
print(classes)
v_filtered <- v[v$fclass != class, ]
vterra <- vect(v_filtered)
r <- rast(v, res = res(ref), ext = ext(ref))
x <- rasterizeGeom(vterra, r, "length", "m")
x_res <- resample(x, ntl, "average")
s <- c(ntl, x_res)
names(s) <- c("ntl", "road")
m <- lm(ntl ~ road, s, na.action = na.exclude)
class_r2 <- sqrt(summary(m)$adj.r.squared)
if (class_r2 > baseline) {
classes <- classes[-i]
baseline <- class_r2
} else {
print("class_r2 is less than baseline")
print(paste0("class_r2: ", class_r2, " - baseline: ", baseline))
i <- i + 1
}
}
but it's not efficient (it takes ~ 5 minutes).
head(v, 6)
Simple feature collection with 6 features and 1 field
Geometry type: MULTILINESTRING
Dimension: XY
Bounding box: xmin: 598675.9 ymin: 7111459 xmax: 609432.8 ymax: 7118729
Projected CRS: WGS 84 / UTM zone 35S
fclass geometry
1 secondary MULTILINESTRING ((598675.9 ...
2 secondary MULTILINESTRING ((600641.7 ...
3 residential MULTILINESTRING ((601734.8 ...
4 residential MULTILINESTRING ((601163.9 ...
5 residential MULTILINESTRING ((601104.2 ...
6 motorway_link MULTILINESTRING ((609432.8 ...
unique(v$fclass)
[1] "secondary" "residential" "motorway_link" "service" "primary" "unclassified" "motorway"
[8] "tertiary" "trunk" "primary_link" "footway" "track" "secondary_link" "unknown"
[15] "living_street" "pedestrian" "path" "bridleway" "steps" "trunk_link" "track_grade1"
[22] "track_grade3" "track_grade5" "cycleway" "track_grade4" "tertiary_link" "track_grade2"
Best wishes.