Thank you @nirgrahamuk for your quick feedback and sorry for my delay
Actually I have a database which has multiple variables of type select one and select multiple. First I want to calculate the frequency of each multiple response variable
and second cross each multiple response variable with a single choice variable.
Here is a small example with a
script. However this script does not provide the results as I wish.
- For multiple response variables: the output of the frequencies of the options of the multiple response variables (X and Y) seems incorrect. it considers as if we have only one variable with 6 options.
- For the crosstab: I would like to have an output like this:
sex x1 x2 x3 y1 y2 y3
man 0.25 0.17 0.17 0.25 0.17 0.17
woman 0.17 0.42 0.42 0.25 0.42 0.25
###Loading data###
## Two multiple response variables: X and Y (with three options each)
## Two single-choice variables: sex and market
data <- tibble::tribble(
~sex, ~market, ~x1, ~x2, ~x3, ~y1, ~y2, ~y3,
"woman", "mk", 0L, 0L, 1L, 0L, 0L, 0L,
"woman", "mk", 1L, 1L, 1L, 0L, 0L, 1L,
"woman", "mk", 1L, 1L, 0L, 0L, 1L, 1L,
"man", "mk", 0L, 0L, 0L, 1L, 0L, 0L,
"woman", "mk", 1L, 1L, 1L, 0L, 0L, 1L,
"man", "st", 1L, 1L, 0L, 1L, 1L, 0L,
"woman", "mk", 1L, 1L, 0L, 0L, 1L, 0L,
"man", "st", 1L, 1L, 0L, 0L, 1L, 0L,
"woman", "st", 0L, 0L, 0L, 0L, 1L, 1L,
"man", "st", 0L, 0L, 1L, 0L, 1L, 0L,
"man", "st", 0L, 0L, 1L, 1L, 1L, 0L,
"woman", "st", 1L, 0L, 0L, 0L, 0L, 0L
)
###Frequency of multiple response variables###
mult_resp = function(data, v1 = c("x1", "x2", "x3", "y1", "y2", "y3")){
data2 = data %>%
mutate(id = rownames(.)) %>% #row id for counting n_cases
select(id, everything()) %>%
mutate_at(v1, ~ ifelse(. != 0, 1, 0)) %>%
gather(question, resp,-id,-market,-sex)
#count number of cases excluding "all zeros" cases
n_cases = data2 %>% group_by(id) %>%
summarise(n = sum(resp)) %>%
summarise(sum(n > 0))
#output table
res = data2 %>%
group_by(question) %>%
summarise(freq = sum(resp)) %>%
mutate(
percent = freq/sum(freq) *100,
percent_of_cases = freq/as.numeric(n_cases)*100
)
res
}
mult_resp(data, v1 = c("x1", "x2", "x3", "y1", "y2", "y3"))
### relationship between variables#####
# cross tabulation: variables select one and select multiple
tcd <- function(x, y){
# Otherwise, make a frequency table, dropping NA values
tab <- table(x, y, useNA = "no")
# Calculate a proportion
pt <- prop.table(tab)
pt
}
tcd(data$sex, data$x1)
tcd(data$market, data$x1)
tcd(data$sex, data$y1)
tcd(data$market, data$y1)
Created on 2023-05-05 with reprex v2.0.2