Number of occurences in column

I have a data set that contains the time in which participants monitored a clock during an experiment. How can I calculate the total number of times the CheckTime variable exists for each participant? Or to put it another way, How do I know how many times each participant looked at the clock?

I tried:

group_by(Participant, CheckTime) %>%
  summarize(count = n())

But this way I get the number of times each specific time appears in the data set and not the sum of all clockchecks by individual.

This is part of my data:

tibble::tribble(
  ~Participant, ~CheckTime, ~Valence,  ~n,
            1L,         16,        0,  1L,
            1L,         18,        0,  1L,
            1L,         19,        0,  1L,
            1L,         21,        0,  2L,
            1L,         23,        0,  1L,
            1L,         26,        0,  2L,
            1L,         28,        0,  1L,
            1L,         30,        0,  2L,
            1L,         31,        0,  1L,
            1L,         34,        0,  1L,
            1L,         35,        0,  2L,
            1L,         36,        0,  2L,
            1L,         37,        0,  2L,
            1L,         39,        0,  1L,
            1L,         41,        0,  2L,
            1L,         44,        0,  1L,
            1L,         47,        0,  1L,
            1L,         49,        0,  1L,
            1L,         50,        0,  1L,
            1L,         52,        0,  2L,
            1L,         54,        0,  1L,
            1L,         57,        0,  2L,
            1L,         58,        0,  2L,
            1L,         59,        0,  3L,
            1L,         60,        0,  3L,
            1L,         61,        0,  3L,
            1L,         65,        0,  1L,
            1L,         82,        0,  1L,
            2L,          0,        1,  1L,
            2L,          1,        1, 10L,
            2L,          2,        1,  3L,
            2L,          7,        1,  1L,
            2L,          8,        1,  2L,
            2L,          9,        1,  3L,
            2L,         10,        1,  1L,
            2L,         11,        1,  1L,
            2L,         13,        1,  3L,
            2L,         17,        1,  2L,
            2L,         19,        1,  2L,
            2L,         20,        1,  1L,
            2L,         21,        1,  1L,
            2L,         22,        1,  1L,
            2L,         23,        1,  3L,
            2L,         24,        1,  2L,
            2L,         27,        1,  6L,
            2L,         30,        1,  1L,
            2L,         32,        1,  1L,
            2L,         33,        1,  4L,
            2L,         36,        1,  1L,
            2L,         37,        1,  1L,
            2L,         39,        1,  1L,
            2L,         41,        1,  3L,
            2L,         42,        1,  2L,
            2L,         43,        1,  2L,
            2L,         44,        1,  1L,
            2L,         45,        1,  4L,
            2L,         46,        1,  2L,
            2L,         47,        1,  2L,
            2L,         49,        1,  3L,
            2L,         50,        1,  4L,
            2L,         51,        1,  1L,
            2L,         52,        1,  2L,
            2L,         53,        1,  1L,
            2L,         54,        1,  5L,
            2L,         55,        1,  4L,
            2L,         56,        1,  1L,
            2L,         57,        1,  3L,
            2L,         59,        1,  2L,
            2L,         62,        1,  1L,
            2L,         69,        1,  1L,
            2L,         80,        1,  1L
  )

Right now, I'm on my phone and hence can't check for sure, but do you need to group by CheckTime?

As far as I can see, they are all distinct for each participant. So, I think counting observations per group will be enough, after grouping only by participants.

If n denotes the number of times to check at a particular CheckTime, you need to sum over it, and that'll give the answer. But I'm not sure as you haven't provided details of the columns.

1 Like

@Yarnabrina thanks for your response. You're right n means the number of times checked at a particular CheckTime. How do I sum the n by participant?

If you just want to sum n by participant, you don't want to group it by Clockcheck:

suppressPackageStartupMessages(library(tidyverse))
df <- tibble::tribble(
  ~Participant, ~CheckTime, ~Valence,  ~n,
  1L,         16,        0,  1L,
  1L,         18,        0,  1L,
  1L,         19,        0,  1L,
  1L,         21,        0,  2L,
  1L,         23,        0,  1L,
  1L,         26,        0,  2L,
  1L,         28,        0,  1L,
  1L,         30,        0,  2L,
  1L,         31,        0,  1L,
  1L,         34,        0,  1L,
  1L,         35,        0,  2L,
  1L,         36,        0,  2L,
  1L,         37,        0,  2L,
  1L,         39,        0,  1L,
  1L,         41,        0,  2L,
  1L,         44,        0,  1L,
  1L,         47,        0,  1L,
  1L,         49,        0,  1L,
  1L,         50,        0,  1L,
  1L,         52,        0,  2L,
  1L,         54,        0,  1L,
  1L,         57,        0,  2L,
  1L,         58,        0,  2L,
  1L,         59,        0,  3L,
  1L,         60,        0,  3L,
  1L,         61,        0,  3L,
  1L,         65,        0,  1L,
  1L,         82,        0,  1L,
  2L,          0,        1,  1L,
  2L,          1,        1, 10L,
  2L,          2,        1,  3L,
  2L,          7,        1,  1L,
  2L,          8,        1,  2L,
  2L,          9,        1,  3L,
  2L,         10,        1,  1L,
  2L,         11,        1,  1L,
  2L,         13,        1,  3L,
  2L,         17,        1,  2L,
  2L,         19,        1,  2L,
  2L,         20,        1,  1L,
  2L,         21,        1,  1L,
  2L,         22,        1,  1L,
  2L,         23,        1,  3L,
  2L,         24,        1,  2L,
  2L,         27,        1,  6L,
  2L,         30,        1,  1L,
  2L,         32,        1,  1L,
  2L,         33,        1,  4L,
  2L,         36,        1,  1L,
  2L,         37,        1,  1L,
  2L,         39,        1,  1L,
  2L,         41,        1,  3L,
  2L,         42,        1,  2L,
  2L,         43,        1,  2L,
  2L,         44,        1,  1L,
  2L,         45,        1,  4L,
  2L,         46,        1,  2L,
  2L,         47,        1,  2L,
  2L,         49,        1,  3L,
  2L,         50,        1,  4L,
  2L,         51,        1,  1L,
  2L,         52,        1,  2L,
  2L,         53,        1,  1L,
  2L,         54,        1,  5L,
  2L,         55,        1,  4L,
  2L,         56,        1,  1L,
  2L,         57,        1,  3L,
  2L,         59,        1,  2L,
  2L,         62,        1,  1L,
  2L,         69,        1,  1L,
  2L,         80,        1,  1L
)

df %>%
  group_by(Participant) %>%
  summarize(total_clockchecks = sum(n))
#> # A tibble: 2 x 2
#>   Participant total_clockchecks
#>         <int>             <int>
#> 1           1                44
#> 2           2                97

Created on 2019-06-27 by the reprex package (v0.3.0)

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.