list_of_csv[['Lancaster']] %>%
mutate(
# Create categories
age_group = dplyr::case_when(
age == '00_04' | age == '05_09' | age == '10_14' | age == '15_19' ~ '0-19'
),
# Convert to factor
age_group = factor(
age_group,
level = c('0-19')
)
)
cut(0:25, c(0, 5, 10, 15, 20, Inf), c("0-4", "5-9", "10-14", "15-19", "20+"), right = TRUE)
[1] <NA> 0-4 0-4 0-4 0-4 0-4 5-9 5-9 5-9 5-9 5-9 10-14 10-14 10-14 10-14 10-14 15-19 15-19 15-19 15-19 15-19 20+ 20+
[24] 20+ 20+ 20+
Levels: 0-4 5-9 10-14 15-19 20+
list_of_csv[['Lancaster']] %>%
mutate(
# Create categories
age_group = dplyr::case_when(
age %in% c('00_04', '05_09', '10_14', '15_19') ~ '0-19',
TRUE ~ 'Other age bands'
),
# Convert to factor
age_group = factor(
age_group,
level = c('0-19')
)
)
# lookup approach
lookup <- c(
"00_04" = "00_19",
"05_09" = "00_19",
"10_14" = "00_19",
"15_19" = "00_19",
"20_29" = "20_29",
"30_99" = "30_99"
)
# get some dummy data
(dat <- sample(names(lookup), size = 20L, replace = TRUE))
#> [1] "15_19" "15_19" "10_14" "15_19" "30_99" "20_29" "05_09" "30_99" "10_14"
#> [10] "30_99" "10_14" "10_14" "05_09" "30_99" "05_09" "00_04" "20_29" "15_19"
#> [19] "20_29" "15_19"
# use lookup
data.frame(old = dat, new = lookup[dat])
#> old new
#> 1 15_19 00_19
#> 2 15_19 00_19
#> 3 10_14 00_19
#> 4 15_19 00_19
#> 5 30_99 30_99
#> 6 20_29 20_29
#> 7 05_09 00_19
#> 8 30_99 30_99
#> 9 10_14 00_19
#> 10 30_99 30_99
#> 11 10_14 00_19
#> 12 10_14 00_19
#> 13 05_09 00_19
#> 14 30_99 30_99
#> 15 05_09 00_19
#> 16 00_04 00_19
#> 17 20_29 20_29
#> 18 15_19 00_19
#> 19 20_29 20_29
#> 20 15_19 00_19
# example data using @TimTaylorUKHSA lookup
set.seed(1); dat <- data.frame(age = sample(names(lookup), size = 10L, replace = TRUE))
library(forcats)
library(dplyr)
dat %>%
mutate(ageForcats = fct_collapse(age, `00_19` = c("00_04", "05_09", "10_14", "15_19")),
ageFactor = factor(age,
levels = c("00_04", "05_09", "10_14", "15_19", "20_29", "30_99"),
labels = c("00_19", "00_19", "00_19", "00_19", "20_29", "30_99")))
# age ageForcats ageFactor
# 1 00_04 00_19 00_19
# 2 15_19 00_19 00_19
# 3 00_04 00_19 00_19
# 4 05_09 00_19 00_19
# 5 20_29 20_29 20_29
# 6 10_14 00_19 00_19
# 7 30_99 30_99 30_99
# 8 05_09 00_19 00_19
# 9 10_14 00_19 00_19
# 10 10_14 00_19 00_19
dat$age[ as.integer(factor(dat$age)) < 5 ] <- "00_19"