Question about calculating mean difference

# my data set
datapasta::df_paste(head(df.summary.pre, 24))
data.frame(
              treatment = c(0L,0L,0L,0L,0L,
                            0L,0L,0L,0L,0L,0L,0L,1L,1L,1L,1L,1L,1L,
                            1L,1L,1L,1L,1L,1L),
                   mean = c(2080.80524668034,
                            1894.47583081571,1645.53080824431,1287.96860164005,
                            1205.21353042728,1597.5904186448,2078.47593871385,
                            2038.90839447562,1686.11048769961,1212.36825636599,
                            1460.7180845314,1771.19468546638,
                            2074.22014135427,1888.73851360146,1643.06264236902,
                            1285.04594260428,1200.89990888976,1590.37771703762,
                            2070.3251659508,2034.21189639464,1680.32800754938,
                            1205.71369537631,1453.78814278256,1766.48370098841),
                     sd = c(882.836084693134,
                            820.410642991885,689.148421405028,647.125102287398,
                            464.250357867045,664.65512876134,800.929236387366,
                            774.277294321536,730.11311283474,484.567342986466,
                            588.017853816504,728.384746716548,
                            884.555478679759,820.317713198492,692.802742555817,
                            640.326840907904,459.636585613027,650.083749456554,
                            785.85824899789,769.739885544013,732.198317691098,
                            483.16075014882,588.055376335814,739.676252388324),
                      n = c(9263L,9268L,9267L,
                            9268L,9268L,9268L,9268L,9268L,9268L,9268L,
                            9251L,9220L,30703L,30732L,30730L,30732L,30732L,
                            30732L,30732L,30732L,30731L,30732L,30646L,
                            30546L),
                  month = as.factor(c("1","2","3","4","5","6","7","8","9",
                                      "10","11","12","1","2","3","4",
                                      "5","6","7","8","9","10","11","12"))
           )data.frame(
   treatment = c(0L, 0L, 0L, 0L),
        mean = c(2080.80524668034,1894.47583081571,
                 1645.53080824431,1287.96860164005),
          sd = c(882.836084693134,820.410642991885,
                 689.148421405028,647.125102287398),
           n = c(9263L, 9268L, 9267L, 9268L),
       month = as.factor(c("1", "2", "3", "4"))
)

#now I want to calculate the mean difference for each month between treatment group and control group
df.2a <- df.summary.pre %>% group_by(month) %>% 
  summarise(tstats = 
              mean[treatment=1]-mean[treatment=0]
  )
# this is wrong. what's the problem?

Hard to tell without a working reprex. This one throws errors. It's always a good idea to cut and past your reprex into a fresh session to make sure that it has everything needed to work.

You still have to work in your reprex skills, the example is not reproducible since you are missing library calls, and you are not integrating the sample data with the code. Since you are very new to this I'm going to help you with the reprex one more time, is this what you are trying to do?

library(tidyverse)

df.summary.pre <- data.frame(
    treatment = c(0L,0L,0L,0L,0L,
                  0L,0L,0L,0L,0L,0L,0L,1L,1L,1L,1L,1L,1L,
                  1L,1L,1L,1L,1L,1L),
    mean = c(2080.80524668034,
             1894.47583081571,1645.53080824431,1287.96860164005,
             1205.21353042728,1597.5904186448,2078.47593871385,
             2038.90839447562,1686.11048769961,1212.36825636599,
             1460.7180845314,1771.19468546638,
             2074.22014135427,1888.73851360146,1643.06264236902,
             1285.04594260428,1200.89990888976,1590.37771703762,
             2070.3251659508,2034.21189639464,1680.32800754938,
             1205.71369537631,1453.78814278256,1766.48370098841),
    sd = c(882.836084693134,
           820.410642991885,689.148421405028,647.125102287398,
           464.250357867045,664.65512876134,800.929236387366,
           774.277294321536,730.11311283474,484.567342986466,
           588.017853816504,728.384746716548,
           884.555478679759,820.317713198492,692.802742555817,
           640.326840907904,459.636585613027,650.083749456554,
           785.85824899789,769.739885544013,732.198317691098,
           483.16075014882,588.055376335814,739.676252388324),
    n = c(9263L,9268L,9267L,
          9268L,9268L,9268L,9268L,9268L,9268L,9268L,
          9251L,9220L,30703L,30732L,30730L,30732L,30732L,
          30732L,30732L,30732L,30731L,30732L,30646L,
          30546L),
    month = as.factor(c("1","2","3","4","5","6","7","8","9",
                        "10","11","12","1","2","3","4",
                        "5","6","7","8","9","10","11","12"))
)

df.summary.pre %>% 
    pivot_wider(id_cols = month,
                names_from = treatment,
                values_from = mean,
                names_prefix = "treatment_") %>% 
    mutate(tstats = treatment_1 - treatment_0)
#> # A tibble: 12 x 4
#>    month treatment_0 treatment_1 tstats
#>    <fct>       <dbl>       <dbl>  <dbl>
#>  1 1           2081.       2074.  -6.59
#>  2 2           1894.       1889.  -5.74
#>  3 3           1646.       1643.  -2.47
#>  4 4           1288.       1285.  -2.92
#>  5 5           1205.       1201.  -4.31
#>  6 6           1598.       1590.  -7.21
#>  7 7           2078.       2070.  -8.15
#>  8 8           2039.       2034.  -4.70
#>  9 9           1686.       1680.  -5.78
#> 10 10          1212.       1206.  -6.65
#> 11 11          1461.       1454.  -6.93
#> 12 12          1771.       1766.  -4.71

Created on 2020-02-09 by the reprex package (v0.3.0.9001)

I learned a lot from you. Thanks!

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.