Help tallying multiple variables to plot as columns on X axis

Hi there,

I am trying to plot the level of agreement with a variable by location. What has me stumped is the fact that respondents can select multiple locations (i.e. their place of business is located in several locations).

I've used summarise(sum) on one location, but this method (see below) does not work when I add new locations.

Here is a sample dataset

data.frame(
                                 Brisbane_CBD = c(0L,1L,
                                                  1L,0L,0L,0L,0L,0L,0L,
                                                  1L),
                                 Greater_Bris = c(0L,0L,
                                                  0L,0L,0L,0L,0L,0L,0L,
                                                  1L),
                                Regional_City = c(1L,1L,
                                                  1L,0L,1L,1L,1L,1L,1L,
                                                  1L),
                                   Rural_Town = c(0L,0L,
                                                  0L,1L,0L,0L,0L,0L,0L,
                                                  1L),
                                      Virtual = c(0L,0L,
                                                  0L,0L,0L,0L,0L,0L,0L,
                                                  1L),
                  Equipped_for_future_changes = c("c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
                                                  "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
                                                  "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
                                                  "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
                                                  "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
                                                  "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
                                                  "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
                                                  "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
                                                  "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
                                                  "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)")
                )

Here is the code I am using

library(ggplot2)
library(tidyr)
library(tibble)
library(dplyr)
library(likert)
library(RColorBrewer)
library(string)

lbs_DF <- c("Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree")
DF$Equipped_for_future_changes <- DF %>%
  select("Equipped_for_future_changes")%>%
  dplyr::mutate_if(is.numeric, factor, levels = 1:5, labels = lbs_DF)%>%
  as.data.frame()
#> Error in DF %>% select("Equipped_for_future_changes") %>% dplyr::mutate_if(is.numeric, : could not find function "%>%"

head(DF)
#> Error in head(DF): object 'DF' not found


DF %>%
  group_by(Equipped_for_future_changes) %>%
  summarise(sum(Brisbane_CBD))
#> Error in DF %>% group_by(Equipped_for_future_changes) %>% summarise(sum(Brisbane_CBD)): could not find function "%>%"

Created on 2023-01-24 with reprex v2.0.2

This code produces the following result

Equipped_for_future_changes$Equipped_for_future_changes `sum(Brisbane_CBD)`
  <fct>                                                                 <int>
1 Strongly Disagree                                                         1
2 Disagree                                                                  3
3 Neutral                                                                   7
4 Agree                                                                    19
5 Strongly Agree                                                           11
6 NA                                                                       12

I need to do this (or something more elegant) for all locations simultaneously to plot all locations along the x-axis with the Equipped_for_future_changes filling the bar_plot or geom_col.

Thank you in advance for any help or advice you can provide.

Kind Regards

Aaron

I'm uncertain what you mean by this.

Here's a touched up reprex

DF <- data.frame(
  Brisbane_CBD = c(
    0L, 1L,
    1L, 0L, 0L, 0L, 0L, 0L, 0L,
    1L
  ),
  Greater_Bris = c(
    0L, 0L,
    0L, 0L, 0L, 0L, 0L, 0L, 0L,
    1L
  ),
  Regional_City = c(
    1L, 1L,
    1L, 0L, 1L, 1L, 1L, 1L, 1L,
    1L
  ),
  Rural_Town = c(
    0L, 0L,
    0L, 1L, 0L, 0L, 0L, 0L, 0L,
    1L
  ),
  Virtual = c(
    0L, 0L,
    0L, 0L, 0L, 0L, 0L, 0L, 0L,
    1L
  ),
  Equipped_for_future_changes = c(
    "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
    "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
    "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
    "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
    "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
    "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
    "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
    "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
    "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)",
    "c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)"
  )
)
library(magrittr)
library(ggplot2)
library(tidyr)
#> 
#> Attaching package: 'tidyr'
#> The following object is masked from 'package:magrittr':
#> 
#>     extract
library(tibble)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(likert)
#> Loading required package: xtable
#> 
#> Attaching package: 'likert'
#> The following object is masked from 'package:dplyr':
#> 
#>     recode
library(RColorBrewer)
# library(string) DK

lbs_DF <- c("Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree")
DF$Equipped_for_future_changes <- DF %>%
  select("Equipped_for_future_changes") %>%
  dplyr::mutate_if(is.numeric, factor, levels = 1:5, labels = lbs_DF) %>%
  as.data.frame()
#> Error in DF %>% select("Equipped_for_future_changes") %>% dplyr::mutate_if(is.numeric, : could not find function "%>%"

DF %>%
  group_by(Equipped_for_future_changes) %>%
  summarise(sum(Brisbane_CBD))
#> # A tibble: 1 × 2
#>   Equipped_for_future_changes$Equipped_for_future_changes `sum(Brisbane_CBD)`
#>   <chr>                                                                 <int>
#> 1 c(NA, NA, 4, 4, 3, 3, 4, 2, 3, 5)                                         3

Created on 2023-01-23 with reprex v2.0.2

Results can vary without using reprex because of other objects in namespace, while reprex runs fresh.

Thanks @technocrat for looking into this.

I didn't realise reprex runs everything fresh. That is no doubt why I see errors in the reprex that I don't see in the console.

Does rm(list = ls()) #remove all objects in global environment wipe the slate clean in the way that reprex does?

Anyway, I've rerun the reprex, which shows what I mean by adding more locations.

library(ggplot2)
library(tidyr)
library(tibble)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(likert)
#> Loading required package: xtable
#> 
#> Attaching package: 'likert'
#> The following object is masked from 'package:dplyr':
#> 
#>     recode
library(RColorBrewer)
library(stringr)
library(reprex)

DF<-data.frame(
                 Brisbane_CBD = c(0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L),
                 Greater_Bris = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L),
                Regional_City = c(1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L),
                   Rural_Town = c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L),
                      Virtual = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L),
  Equipped_for_future_changes = c(NA, NA, 4L, 4L, 3L, 3L, 4L, 2L, 3L, 5L)
)


lbs_DF <- c("Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree")
DF$Equipped_for_future_changes <- DF %>%
  select("Equipped_for_future_changes")%>%
  dplyr::mutate_if(is.numeric, factor, levels = 1:5, labels = lbs_DF)%>%
  as.data.frame()


DF %>%
  group_by(Equipped_for_future_changes) %>%
  summarise(sum(Brisbane_CBD))%>%
  summarise(sum(Regional_City))%>%
  summarise(sum(Rural_Town))%>%
  summarise(sum(Virtual))
#> Error in `summarise()`:
#> ! Problem while computing `..1 = sum(Regional_City)`.
#> Caused by error in `mask$eval_all_summarise()`:
#> ! object 'Regional_City' not found

#> Backtrace:
#>      ▆
#>   1. ├─... %>% summarise(sum(Virtual))
#>   2. ├─dplyr::summarise(., sum(Virtual))
#>   3. ├─dplyr::summarise(., sum(Rural_Town))
#>   4. ├─dplyr::summarise(., sum(Regional_City))
#>   5. ├─dplyr:::summarise.data.frame(., sum(Regional_City))
#>   6. │ └─dplyr:::summarise_cols(.data, dplyr_quosures(...), caller_env = caller_env())
#>   7. │   ├─base::withCallingHandlers(...)
#>   8. │   └─dplyr:::map(quosures, summarise_eval_one, mask = mask)
#>   9. │     └─base::lapply(.x, .f, ...)
#>  10. │       └─dplyr (local) FUN(X[[i]], ...)
#>  11. │         └─mask$eval_all_summarise(quo)
#>  12. └─base::.handleSimpleError(...)
#>  13.   └─dplyr (local) h(simpleError(msg, call))
#>  14.     └─rlang::abort(bullets, call = error_call, parent = skip_internal_condition(e))

Created on 2023-01-24 with reprex v2.0.2

I think I need to get to a point where I have a tally or sum for each location that is grouped_by $Equipped_for_future_changes.

I hope this has clarified my question.

To get to the same place as reprex, use the Session | Restart. The ls route just removes objects from namespaces; loaded libraries remain.

library(ggplot2)
library(tidyr)
library(tibble)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(likert)
#> Loading required package: xtable
#> 
#> Attaching package: 'likert'
#> The following object is masked from 'package:dplyr':
#> 
#>     recode
library(RColorBrewer)
library(stringr)
library(reprex)


library(magrittr)
#> 
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:tidyr':
#> 
#>     extract
library(ggplot2)
library(tidyr)
library(tibble)
library(dplyr)
library(likert)
library(RColorBrewer)
# library(string) DK
# changed long variable name for ease of use; save
# long descriptive names for presentation tables
DF<-data.frame(
  Brisbane_CBD = c(0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L),
  Greater_Bris = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L),
  Regional_City = c(1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L),
  Rural_Town = c(0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L),
  Virtual = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L),
  Equip = c(NA, NA, 4L, 4L, 3L, 3L, 4L, 2L, 3L, 5L)
)

# number of labels has to equal number of levels, so need
# to change data
DF[1,6] <- 1
lbs_DF <- c("Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree")

#DF$Equip <- DF %>%
  # Equip is not quoted but usual way is
  # DF %>% mutate(Equip = SOMETHING)
  # select(Equip) %>%
  # factor is a variable property, can't be selectively applied
  # to rows & dplyr:: isn't needed because library(dplyr)
  # mutate(is.numeric, factor, levels = 1:5, labels = lbs_DF) %>%
  # DF is already a data frame, so this does nothing
  # as.data.frame()

# more directly
DF$Equip <- factor(DF$Equip, levels = lbs_DF)
 
DF %>%
  group_by(Equip) %>%
  summarise(Brisbane_CBD = sum(Brisbane_CBD),
            Regional_City = sum(Regional_City),
            Rural_Town = sum(Rural_Town),
            Virtual = sum(Virtual))
#> # A tibble: 1 × 5
#>   Equip Brisbane_CBD Regional_City Rural_Town Virtual
#>   <fct>        <int>         <int>      <int>   <int>
#> 1 <NA>             3             9          2       1

Created on 2023-01-23 with reprex v2.0.2

2 Likes

After resting on it, I answered my own question. For some unknown reason, I skipped pivot_longer.

DF_Long <- DF %>% pivot_longer(cols = Brisbane_CBD:Virtual,
                                             names_to="Location",values_to="Value")

DF_Long

DF_Summ <- DF_Long %>% group_by(Location,Equipped_for_future_changes) %>% summarize(Total=sum(Value))
DF_Summ

Thank you @technocrat for your help.

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.