multi-choice columns out do not equal the columns in

Hi all, I am struggling with running the script below, and please, I need help.

This is the error message "Error in run_json_parsing(data = jdata) : Your multi-choice columns out do not equal the columns in".

The script below is a part of the script I am working with (not written by me).

get_multi_choice_Qs <- function(with_choices_data, cols_in, cols_out) {
     
     if (!exists("multi_choice_Qs")) {
          print("You have not entered any Multiple Choice subquestions, such as 'what behaviors do you see?', 'what sides of the animal are visible?'")
          combined <- NULL
     } else {
          if(missing(cols_out)) cols_out <- cols_in
          print("Getting multi-choice questions.")
          print(paste("The subquestion", cols_in, "will be returned as", cols_out)) # paste returns a 1:1 (so three returns for three columns); cat returns all at once
          
          stopifnot(length(cols_in) == length(cols_out))
          combined <- with_choices_data
          
          for (m in 1:length(cols_in)) {
               #col_in    <- multi_choice_Qs[m]
               #col_out   <- multi_choice_colnames[m]
               
               col_in    <- cols_in[m]
               col_out   <- cols_out[m]
               array_ind <- paste(col_out, "ind", m, sep=".")
               prepend   <- as.character(col_out)
               
               multi_choice_m <- with_choices_data %>% 
                    enter_object("answers") %>%
                    enter_object(col_in) %>%
                    gather_array(column.name = array_ind) %>%
                    append_values_string(col_out) 
               
               # Spread columns
               midway <- multi_choice_m %>% data.frame %>%
                    select_(., paste0("-", array_ind)) %>% #have to paste these for standard evaluation
                    mutate(., value_present = 1, pre_col = prepend) %>%
                    unite_(., "out", c("pre_col", col_out)) 
               
               #holler if any weirdness with duplicate entries in the answers
               check_duplicate_answers <- midway %>% 
                    group_by(subject_ids, user_name, classification_id, submission_index, choice, out) %>% 
                    mutate(dups = n()) %>% 
                    filter(dups > 1) 
               check <- check_duplicate_answers %>%
                    nrow() %>% 
                    as.numeric()
               
               if(check > 0) {
                    print("These classifications have duplicate answers for given questions; these answers are being removed.")
                    print(check_duplicate_answers)
               }
               
               multi_choice_m_flat <- midway %>%
                    distinct(subject_ids, user_name, classification_id, task, total_submissions, submission_index, choice, out, value_present) %>%
                    spread_(., key = "out", value = "value_present", fill = 0)
               
               # Need to left_join after creation of each new columns because there might be multiple rows per classification, and this could vary.
               combined <- left_join(combined, multi_choice_m_flat)
          }
     
     return(combined)     
     }
}

My own work using the 'flattening script' above is copied below. I get the error message cited earlier when I run this 'WG_linuxtest_flattened <- run_json_parsing(data = jdata)'.
I reckon it is something to do with naming the 'multi_choice_Qs' variable. I am trying to flatten the data to run in R and to assign the different choice (shown below) to different columns, with their corresponding votes.

library(devtools)
devtools::install_github("sailthru/tidyjson")
library(tidyjson)
library(magrittr)
library(jsonlite)
library(dplyr)
library(stringr)
library(tidyr)
library(lubridate)

#### Set source of flattening script #### 

source(file = "C:\\Users\\Cemogor\\Downloads\\Dissertation\\R scripts\\flattening_script.R") 

#### Interactive cleaning of classification data and specifying fields ####

# define variables that will be used in the run_json_parsing function. They need the 

# REQUIRED VARIABLES: 
# jdata <- "character"
# survey_id <- "character"
# workflow_id_num <- numeric
# workflow_version_num <- numeric 

# OPTIONAL VARIABLES
#single_choice_Qs <- "character" or c("character", "character")
#single_choice_colnames  <- "character" or c("character", "character")
#multi_choice_Qs  <- "character" or c("character", "character")
#multi_choice_colnames <- "character" or c("character", "character")


# Specify Project
project_name <- "wildgabon"
classifications_file <- "C:\\Users\\Cemogor\\Downloads\\Dissertation\\Wild Gabon raw files\\wild-gabon-classifications.csv"

# Examine data
jdata <- read.csv(classifications_file, stringsAsFactors = F)

# Set project-specific details
check_workflow(jdata) %>% View
workflow_id_num <- 8924
workflow_version_num <- 148.18

# limit to relevant workflow id and version
jdata <- jdata %>% filter(., workflow_id == workflow_id_num, workflow_version == workflow_version_num)

# Identify task-specific details. 

View_json(jdata)
survey_id <- c("T0")

single_choice_Qs <-  c("HUMAN", "BUFFALO", "LEOPARD", "ELEPHANT", "GORILLA", 
                      "OTHER", "REDRIVERHOG", "CHIMPANZEE", 
                      "YELLOWBACKEDDUIKER", "BUSHBUCK", 
                                             "NOANIMAL")
single_choice_colnames  <- c("HUMAN", "BUFFALO", "LEOPARD", "ELEPHANT", "GORILLA", 
                       "OTHER", "REDRIVERHOG", "CHIMPANZEE", 
                      "YELLOWBACKEDDUIKER", "BUSHBUCK", 
                       "NOANIMAL") 


single_choice_Qs <-  c("HOWMANYINCLUDINGYOUNGS" )
single_choice_colnames <- c("How_many")
single_choice_colnames  <- c("1","2","3","4","5","6","7","8","9","10","11-50","51+") 

multi_choice_Qs <- c("WHATBEHAVIORSDOYOUSEE")
multi_choice_colnames <- c("Behaviour")
multi_choice_colnames <-  c("STANDING", "EATING","RESTING","MOVING","INTERACTING (PLAYING OR GROOMING)", "MATING","FIGHTING") 

# Flatten by calling the code from the flattening_functions file. 

WG_linuxtest_flattened <- run_json_parsing(data = jdata)

WG_multi_test <- get_multi_choice_Qs(jdata, "WHATBEHAVIORSDOYOUSEE", "bEHAVIOR")
View(WG_multi_test)

View(WG_linuxtest_flattened)

I am happy to provide more information. Thank you :slight_smile:

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.