Thanks for the guidance - I have now included sample data, and have been able to progress with the issue further by renaming Age categories as 1 - 7, and Gender as 1 - 2
- I now get the following NA issue in the tables that are produced. Any thoughts?
- I'd also like to not have to rename categorical variables into numbers for this to work. Not sure how?
library(readxl)
library(weights)
library(anesrake)
library(plyr)
library(dplyr)
library(reshape2)
library(reprex)
library(datapasta)
#Example data#
head(rtest2,10)
#> # A tibble: 10 x 4
#> caseid gender age Answer
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1 1 1 1
#> 2 2 2 2 0
#> 3 3 1 1 1
#> 4 4 1 1 1
#> 5 5 1 4 1
#> 6 6 1 1 0
#> 7 7 2 2 0
#> 8 8 2 1 1
#> 9 9 1 1 0
#> 10 10 1 4 0
datapasta::df_paste(head(rtest2,10))
#> data.frame(
#> caseid = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
#> gender = c(1, 2, 1, 1, 1, 1, 2, 2, 1, 1),
#> age = c(1, 2, 1, 1, 4, 1, 2, 1, 1, 4),
#> Answer = c(1, 0, 1, 1, 1, 0, 0, 1, 0, 0)
#> )
data.frame(
caseid = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
gender = c(1, 2, 1, 1, 1, 1, 2, 2, 1, 1),
age = c(1, 2, 1, 1, 4, 1, 2, 1, 1, 4),
Answer = c(1, 0, 1, 1, 1, 0, 0, 1, 0, 0)
)#Proportions of rtest2 survey#
#> caseid gender age Answer
#> 1 1 1 1 1
#> 2 2 2 2 0
#> 3 3 1 1 1
#> 4 4 1 1 1
#> 5 5 1 4 1
#> 6 6 1 1 0
#> 7 7 2 2 0
#> 8 8 2 1 1
#> 9 9 1 1 0
#> 10 10 1 4 0
wpct(rtest2$gender)
#> 1 2
#> 0.65 0.35
wpct(rtest2$age)
#> 1 2 3 4 5 6 7
#> 0.274 0.246 0.114 0.096 0.076 0.102 0.092
wpct(rtest2$Answer)
#> 0 1
#> 0.516 0.484
# UK Census 2019
gender <- c(.51,.49)
age <- c(0.1,.17,.163,0.166,0.161,0.128,0.112)
# definitions of target list
targets <- list(gender, age)
#important to use same variable names of the dataset
names(targets) <- c("gender", "age")
#id variable
rtest2$caseid <- 1:length(rtest2$gender)
#label levels of targets#
names(targets$gender) <- levels(rtest2$gender)
names(targets$age) <- levels(rtest2$age)
# change table type
rtest2 <- as.data.frame(rtest2)
class(rtest2)
#> [1] "data.frame"
#measure variance in population vs sample
anesrakefinder(targets, rtest2, choosemethod = "total")
#> gender age
#> 0.28 0.50
#raking procedure
raking <- anesrake(targets, rtest2, caseid = rtest2$caseid, verbose = FALSE, cap = 5,choosemethod = "total", type = "pctlim",pctlim = 0.05, nlim = 5, iterate = TRUE, force1 = TRUE)
#> [1] "Raking converged in 11 iterations"
raking_summary <- summary(raking)
rtest2$weight <- raking$weightvec
#to find the unique weights#
rtest2 %>% select(gender, age) %>% unique()
#> gender age
#> 1 1 1
#> 2 2 2
#> 5 1 4
#> 8 2 1
#> 12 1 2
#> 14 2 6
#> 15 1 7
#> 16 2 7
#> 17 1 3
#> 22 1 6
#> 35 1 5
#> 39 2 5
#> 40 2 3
#> 62 2 4
wpct(rtest2$Answer)
#> 0 1
#> 0.516 0.484
wpct(rtest2$Answer, rtest2$weight)
#> 0 1
#> 0.5120287 0.4879713
#Raking summary statistics do not work "NA"#
raking_summary$raking.variables
#> [1] "gender" "age"
raking_summary$gender
#> Target Unweighted N Unweighted % Wtd N Wtd % Change in % Resid. Disc.
#> <NA> 0.51 NA NA NA NA NA NA
#> <NA> 0.49 NA NA NA NA NA NA
#> Total 1.00 0 0 0 0 0 0
#> Orig. Disc.
#> <NA> NA
#> <NA> NA
#> Total 0
raking_summary$age
#> Target Unweighted N Unweighted % Wtd N Wtd % Change in % Resid. Disc.
#> <NA> 0.100 NA NA NA NA NA NA
#> <NA> 0.170 NA NA NA NA NA NA
#> <NA> 0.163 NA NA NA NA NA NA
#> <NA> 0.166 NA NA NA NA NA NA
#> <NA> 0.161 NA NA NA NA NA NA
#> <NA> 0.128 NA NA NA NA NA NA
#> <NA> 0.112 NA NA NA NA NA NA
#> Total 1.000 0 0 0 0 0 0
#> Orig. Disc.
#> <NA> NA
#> <NA> NA
#> <NA> NA
#> <NA> NA
#> <NA> NA
#> <NA> NA
#> <NA> NA
#> Total 0
Created on 2021-02-08 by the reprex package (v1.0.0)