library(tidyverse)
set.seed(42)
(input_df <- tibble(
id = 1:20,
region = sample.int(5, 20, replace = TRUE),
gender = sample.int(2, 20, replace = TRUE),
sector = sample.int(3, 20, replace = TRUE),
col1 = sample.int(6, 20, replace = TRUE),
col2 = sample.int(7, 20, replace = TRUE),
col3 = sample.int(8, 20, replace = TRUE),
col4 = sample.int(16, 20, replace = TRUE)
) %>% mutate(across(starts_with("col"), ~ ifelse(. > 4, NA, .))) %>%
mutate(across(starts_with("col"), forcats::as_factor)))
(recoded_df <- mutate(input_df,
newvar = case_when(
between(region, 1, 2) ~ "A",
between(region, 4, 5) ~ "B",
between(region, 6, 7) ~ "not seen",
TRUE ~ "region3"
)
))
(long_counts <- recoded_df %>% group_by(col1, newvar) %>%
summarise(n = n()))
(total_col_counts <- group_by(long_counts, newvar) %>% summarise(sum_n = sum(n)))
(long_counts_x <- left_join(
long_counts,
total_col_counts
) %>% mutate(col_pcnt = paste0(round(100 * n / sum_n, digits = 2), "%")))
(tidied_df <- pivot_wider(long_counts_x, id_cols = col1, names_from = newvar, values_from = col_pcnt))
(summary_row <- pivot_wider(total_col_counts, names_from = newvar, values_from = sum_n, values_fn = as.character))
(collated_df <- bind_rows(tidied_df, cbind(col1 = "Totals:", summary_row)))
(cleaned_df <- mutate(collated_df,
across(.fns = ~if_else(is.na(.),'',.))))