Aggregating to Allow for a Mean Across Several Columns

I'm trying to aggregate the values under the "ID Number" column down to four groups - CD-Female, CD-Male, SS-Female, SS-male so I can take a mean across column V2 to V17. From there I'd like to plot the four lines on a ggplot to show change over time (columns V2-V17 is just a unit of time). I can figure out the ggplot part though. The only headache I have right is the data management aspect of aggregating and taking a mean across more than one column.

I tried this:

aggregate(0:30 ~ ID Number, FUN = sum, data = baseline)

But that was embarrassingly ineffective.

I hope I'm explaining this correctly. Essentially aggregating to take a mean across each column so I can plot it (like a longitudinal plot). The X axis will still have V2, V3, V4, V5....... V17 but they'll just be a mean of, for example, CD-female.

Any information would extremely helpful. Thank you

Stress<-tibble::tribble(
          ~V1,  ~V2,  ~V3,  ~V4,  ~V5,  ~V6,  ~V7,  ~V8,  ~V9, ~V10, ~V11, ~V12, ~V13, ~V14, ~V15, ~V16, ~V17,
  "ID Number",   0L,   1L,   2L,   3L,   4L,   5L,   6L,   7L,   8L,   9L,  10L,  11L,  12L,  13L,  14L,  15L,
  "CD-Female",  66L,  66L,  64L,  64L,  58L,  58L,  58L,  57L,  56L,  56L,  57L,  57L,  57L,  57L,  57L,  57L,
  "CD-Female", 103L, 103L, 103L, 103L, 103L, 103L, 103L, 103L, 103L, 103L, 103L, 103L, 103L, 103L, 103L, 104L,
  "CD-Female", 115L, 115L, 114L, 114L, 114L, 114L, 112L, 111L, 110L, 109L, 109L, 108L, 109L, 110L, 111L, 112L,
  "CD-Female",  84L,  63L,  62L,  62L,  63L,  58L,  58L,  54L,  62L,  66L,  66L,  60L,  60L,  51L,  54L,  54L,
  "CD-Female",  83L,  84L,  84L,  84L,  84L,  84L,  82L,  82L,  82L,  82L,  81L,  82L,  83L,  82L,  82L,  82L,
  "CD-Female", 240L, 202L, 141L, 106L,  89L,  74L,  71L,  71L,  71L,  72L,  74L,  77L,  77L,  70L,  68L,  68L,
  "CD-Female",  66L,  66L,  76L,  76L,  76L,  77L,  75L,  75L,  75L,  75L,  76L,  76L,  77L,  79L,  79L,  77L,
  "CD-Female",  66L,  68L,  68L,  67L,  66L,  67L,  68L,  69L,  69L,  68L,  68L,  68L,  70L,  72L,  71L,  69L,
  "CD-Female",  65L,  65L,  65L,  65L,  65L,  65L,  66L,  66L,  65L,  65L,  66L,  66L,  67L,  67L,  68L,  69L,
    "CD-Male",  70L,  69L,  68L,  68L,  69L,  69L,  69L,  69L,  68L,  69L,  70L,  71L,  72L,  72L,  73L,  74L,
    "CD-Male",  66L,  66L,  66L,  66L,  66L,  66L,  67L,  67L,  67L,  67L,  68L,  68L,  68L,  67L,  67L,  67L,
  "SS-Female",  67L,  65L,  65L,  64L,  58L,  61L,  55L,  60L,  60L,  60L,  60L,  66L,  66L,  72L,  72L,  82L,
  "SS-Female",  78L,  78L,  77L,  76L,  75L,  75L,  75L,  75L,  76L,  76L,  76L,  76L,  77L,  76L,  76L,  77L,
  "SS-Female",  79L,  78L,  67L,  67L,  68L,  62L,  62L,  62L,  63L,  72L,  72L,  82L,  81L,  82L,  83L,  82L,
  "SS-Female",  85L,  84L,  83L,  81L,  80L,  79L,  78L,  78L,  79L,  80L,  80L,  81L,  81L,  81L,  80L,  80L,
    "SS-Male",  63L,  63L,  63L,  63L,  77L,  78L,  80L,  77L,  76L,  67L,  67L,  62L,  57L,  57L,  57L,  48L,
    "SS-Male",  49L,  48L,  46L,  46L,  45L,  46L,  45L,  45L,  45L,  45L,  46L,  46L,  47L,  48L,  48L,  50L
  )
head(Stress)

I think your data will be better stored with a column for date and columns for each participant (CD-Female1, CD-Femal2, etc.). This can be easily pivoted to a column for date, column for participant, column for value.

Here is a solution in the current form:

library(tidyverse)
#> Warning: package 'ggplot2' was built under R version 4.0.5

Stress<-tibble::tribble(
  ~V1,  ~V2,  ~V3,  ~V4,  ~V5,  ~V6,  ~V7,  ~V8,  ~V9, ~V10, ~V11, ~V12, ~V13, ~V14, ~V15, ~V16, ~V17,
  "ID Number",   0L,   1L,   2L,   3L,   4L,   5L,   6L,   7L,   8L,   9L,  10L,  11L,  12L,  13L,  14L,  15L,
  "CD-Female",  66L,  66L,  64L,  64L,  58L,  58L,  58L,  57L,  56L,  56L,  57L,  57L,  57L,  57L,  57L,  57L,
  "CD-Female", 103L, 103L, 103L, 103L, 103L, 103L, 103L, 103L, 103L, 103L, 103L, 103L, 103L, 103L, 103L, 104L,
  "CD-Female", 115L, 115L, 114L, 114L, 114L, 114L, 112L, 111L, 110L, 109L, 109L, 108L, 109L, 110L, 111L, 112L,
  "CD-Female",  84L,  63L,  62L,  62L,  63L,  58L,  58L,  54L,  62L,  66L,  66L,  60L,  60L,  51L,  54L,  54L,
  "CD-Female",  83L,  84L,  84L,  84L,  84L,  84L,  82L,  82L,  82L,  82L,  81L,  82L,  83L,  82L,  82L,  82L,
  "CD-Female", 240L, 202L, 141L, 106L,  89L,  74L,  71L,  71L,  71L,  72L,  74L,  77L,  77L,  70L,  68L,  68L,
  "CD-Female",  66L,  66L,  76L,  76L,  76L,  77L,  75L,  75L,  75L,  75L,  76L,  76L,  77L,  79L,  79L,  77L,
  "CD-Female",  66L,  68L,  68L,  67L,  66L,  67L,  68L,  69L,  69L,  68L,  68L,  68L,  70L,  72L,  71L,  69L,
  "CD-Female",  65L,  65L,  65L,  65L,  65L,  65L,  66L,  66L,  65L,  65L,  66L,  66L,  67L,  67L,  68L,  69L,
  "CD-Male",  70L,  69L,  68L,  68L,  69L,  69L,  69L,  69L,  68L,  69L,  70L,  71L,  72L,  72L,  73L,  74L,
  "CD-Male",  66L,  66L,  66L,  66L,  66L,  66L,  67L,  67L,  67L,  67L,  68L,  68L,  68L,  67L,  67L,  67L,
  "SS-Female",  67L,  65L,  65L,  64L,  58L,  61L,  55L,  60L,  60L,  60L,  60L,  66L,  66L,  72L,  72L,  82L,
  "SS-Female",  78L,  78L,  77L,  76L,  75L,  75L,  75L,  75L,  76L,  76L,  76L,  76L,  77L,  76L,  76L,  77L,
  "SS-Female",  79L,  78L,  67L,  67L,  68L,  62L,  62L,  62L,  63L,  72L,  72L,  82L,  81L,  82L,  83L,  82L,
  "SS-Female",  85L,  84L,  83L,  81L,  80L,  79L,  78L,  78L,  79L,  80L,  80L,  81L,  81L,  81L,  80L,  80L,
  "SS-Male",  63L,  63L,  63L,  63L,  77L,  78L,  80L,  77L,  76L,  67L,  67L,  62L,  57L,  57L,  57L,  48L,
  "SS-Male",  49L,  48L,  46L,  46L,  45L,  46L,  45L,  45L,  45L,  45L,  46L,  46L,  47L,  48L,  48L,  50L
)
head(Stress)
#> # A tibble: 6 x 17
#>   V1        V2    V3    V4    V5    V6    V7    V8    V9   V10   V11   V12   V13
#>   <chr>  <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
#> 1 ID Nu~     0     1     2     3     4     5     6     7     8     9    10    11
#> 2 CD-Fe~    66    66    64    64    58    58    58    57    56    56    57    57
#> 3 CD-Fe~   103   103   103   103   103   103   103   103   103   103   103   103
#> 4 CD-Fe~   115   115   114   114   114   114   112   111   110   109   109   108
#> 5 CD-Fe~    84    63    62    62    63    58    58    54    62    66    66    60
#> 6 CD-Fe~    83    84    84    84    84    84    82    82    82    82    81    82
#> # ... with 4 more variables: V14 <int>, V15 <int>, V16 <int>, V17 <int>

Stress %>%
  group_by(V1) %>%
  summarize_if(is.numeric, mean) %>% 
  pivot_longer(-V1) %>%
  filter(V1 != "ID Number") %>%
  mutate(time = str_sub(name, 2) %>% as.numeric()) %>%
  ggplot() + 
  aes(time, value, color = V1) +
  geom_line()

Created on 2021-09-23 by the reprex package (v1.0.0)

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.