Hi all, I am struggling with running the script below, and please, I need help.
This is the error message "Error in run_json_parsing(data = jdata) : Your multi-choice columns out do not equal the columns in".
The script below is a part of the script I am working with (not written by me).
get_multi_choice_Qs <- function(with_choices_data, cols_in, cols_out) {
if (!exists("multi_choice_Qs")) {
print("You have not entered any Multiple Choice subquestions, such as 'what behaviors do you see?', 'what sides of the animal are visible?'")
combined <- NULL
} else {
if(missing(cols_out)) cols_out <- cols_in
print("Getting multi-choice questions.")
print(paste("The subquestion", cols_in, "will be returned as", cols_out)) # paste returns a 1:1 (so three returns for three columns); cat returns all at once
stopifnot(length(cols_in) == length(cols_out))
combined <- with_choices_data
for (m in 1:length(cols_in)) {
#col_in <- multi_choice_Qs[m]
#col_out <- multi_choice_colnames[m]
col_in <- cols_in[m]
col_out <- cols_out[m]
array_ind <- paste(col_out, "ind", m, sep=".")
prepend <- as.character(col_out)
multi_choice_m <- with_choices_data %>%
enter_object("answers") %>%
enter_object(col_in) %>%
gather_array(column.name = array_ind) %>%
append_values_string(col_out)
# Spread columns
midway <- multi_choice_m %>% data.frame %>%
select_(., paste0("-", array_ind)) %>% #have to paste these for standard evaluation
mutate(., value_present = 1, pre_col = prepend) %>%
unite_(., "out", c("pre_col", col_out))
#holler if any weirdness with duplicate entries in the answers
check_duplicate_answers <- midway %>%
group_by(subject_ids, user_name, classification_id, submission_index, choice, out) %>%
mutate(dups = n()) %>%
filter(dups > 1)
check <- check_duplicate_answers %>%
nrow() %>%
as.numeric()
if(check > 0) {
print("These classifications have duplicate answers for given questions; these answers are being removed.")
print(check_duplicate_answers)
}
multi_choice_m_flat <- midway %>%
distinct(subject_ids, user_name, classification_id, task, total_submissions, submission_index, choice, out, value_present) %>%
spread_(., key = "out", value = "value_present", fill = 0)
# Need to left_join after creation of each new columns because there might be multiple rows per classification, and this could vary.
combined <- left_join(combined, multi_choice_m_flat)
}
return(combined)
}
}
My own work using the 'flattening script' above is copied below. I get the error message cited earlier when I run this 'WG_linuxtest_flattened <- run_json_parsing(data = jdata)'.
I reckon it is something to do with naming the 'multi_choice_Qs' variable. I am trying to flatten the data to run in R and to assign the different choice (shown below) to different columns, with their corresponding votes.
library(devtools)
devtools::install_github("sailthru/tidyjson")
library(tidyjson)
library(magrittr)
library(jsonlite)
library(dplyr)
library(stringr)
library(tidyr)
library(lubridate)
#### Set source of flattening script ####
source(file = "C:\\Users\\Cemogor\\Downloads\\Dissertation\\R scripts\\flattening_script.R")
#### Interactive cleaning of classification data and specifying fields ####
# define variables that will be used in the run_json_parsing function. They need the
# REQUIRED VARIABLES:
# jdata <- "character"
# survey_id <- "character"
# workflow_id_num <- numeric
# workflow_version_num <- numeric
# OPTIONAL VARIABLES
#single_choice_Qs <- "character" or c("character", "character")
#single_choice_colnames <- "character" or c("character", "character")
#multi_choice_Qs <- "character" or c("character", "character")
#multi_choice_colnames <- "character" or c("character", "character")
# Specify Project
project_name <- "wildgabon"
classifications_file <- "C:\\Users\\Cemogor\\Downloads\\Dissertation\\Wild Gabon raw files\\wild-gabon-classifications.csv"
# Examine data
jdata <- read.csv(classifications_file, stringsAsFactors = F)
# Set project-specific details
check_workflow(jdata) %>% View
workflow_id_num <- 8924
workflow_version_num <- 148.18
# limit to relevant workflow id and version
jdata <- jdata %>% filter(., workflow_id == workflow_id_num, workflow_version == workflow_version_num)
# Identify task-specific details.
View_json(jdata)
survey_id <- c("T0")
single_choice_Qs <- c("HUMAN", "BUFFALO", "LEOPARD", "ELEPHANT", "GORILLA",
"OTHER", "REDRIVERHOG", "CHIMPANZEE",
"YELLOWBACKEDDUIKER", "BUSHBUCK",
"NOANIMAL")
single_choice_colnames <- c("HUMAN", "BUFFALO", "LEOPARD", "ELEPHANT", "GORILLA",
"OTHER", "REDRIVERHOG", "CHIMPANZEE",
"YELLOWBACKEDDUIKER", "BUSHBUCK",
"NOANIMAL")
single_choice_Qs <- c("HOWMANYINCLUDINGYOUNGS" )
single_choice_colnames <- c("How_many")
single_choice_colnames <- c("1","2","3","4","5","6","7","8","9","10","11-50","51+")
multi_choice_Qs <- c("WHATBEHAVIORSDOYOUSEE")
multi_choice_colnames <- c("Behaviour")
multi_choice_colnames <- c("STANDING", "EATING","RESTING","MOVING","INTERACTING (PLAYING OR GROOMING)", "MATING","FIGHTING")
# Flatten by calling the code from the flattening_functions file.
WG_linuxtest_flattened <- run_json_parsing(data = jdata)
WG_multi_test <- get_multi_choice_Qs(jdata, "WHATBEHAVIORSDOYOUSEE", "bEHAVIOR")
View(WG_multi_test)
View(WG_linuxtest_flattened)
I am happy to provide more information. Thank you