Calculate the differences between one row and the rest of the data per group [Solved]

This is basically a sensitivity experiment for three id groups. Each group has 4 rows. The first group (batch1) has fixed dx and varied dy. The 2nd group (batch1) and 3rd group (batch2) have varied dx and fixed dy.

I want to calculate the differences between the row id ended in 1 and the other rows ended in 2, 3, and 4 for each group.

How do I go about doing this? I'm open to any dplyr or data.table solutions. Thanks!

library(data.table)
dt <- data.table(
  id = as.factor(c("batch1_dx0.0_dy-2.1_4", "batch1_dx0.0_dy0.155_3",
				   "batch1_dx0.0_dy1.23_2", "batch1_dx0.0_dy1_1", 
				   "batch1_dx-0.8_dy1.0_2", "batch1_dx0_dy1.0_1",
				   "batch1_dx1.321_dy1.0_3", "batch1_dx4.12_dy1.0_4",
				   "batch2_dx-0.8_dy1.0_2", "batch2_dx0_dy1.0_1",
				   "batch2_dx1.321_dy1.0_3", "batch2_dx4.12_dy1.0_4")),
  val1 = c(6, 2, 2, 0, 3, 1, 3, 3, 4, 5, 1),
  val2 = c(6, 4, 2, 1, 1, 1, 5, 3, 2, 8, 9),
  val3 = c(6, 3, 3, 0, 4, 2, 4, 1, 5, 7, 1))

						id val1 val2 val3
 1:  batch1_dx0.0_dy-2.1_4    6    6    6
 2: batch1_dx0.0_dy0.155_3    2    4    3
 3:  batch1_dx0.0_dy1.23_2    2    2    3
 4:     batch1_dx0.0_dy1_1    0    1    0
 5:  batch1_dx-0.8_dy1.0_2    3    1    4
 6:     batch1_dx0_dy1.0_1    1    1    2
 7: batch1_dx1.321_dy1.0_3    3    5    4
 8:  batch1_dx4.12_dy1.0_4    3    3    1
 9:  batch2_dx-0.8_dy1.0_2    4    2    5
10:     batch2_dx0_dy1.0_1    5    8    7
11: batch2_dx1.321_dy1.0_3    1    9    1
12:  batch2_dx4.12_dy1.0_4    6    6    6

Expected output

	id	val1	val2	val3	dval1	dval2	dval3
batch1_dx0.0_dy-2.1_4	6	6	6	6	5	6
batch1_dx0.0_dy0.155_3	2	4	3	2	3	3
batch1_dx0.0_dy1.23_2	2	2	3	2	1	3
batch1_dx0.0_dy1_1	0	1	0	0	0	0
batch1_dx-0.8_dy1.0_2	3	1	4	2	0	2
batch1_dx0_dy1.0_1	1	1	2	0	0	0
batch1_dx1.321_dy1.0_3	3	5	4	2	4	2
batch1_dx4.12_dy1.0_4	3	3	1	2	2	-1
batch2_dx-0.8_dy1.0_2	4	2	5	-1	-6	-2
batch2_dx0_dy1.0_1	5	8	7	0	0	0
batch2_dx1.321_dy1.0_3	1	9	1	-4	1	-6
batch2_dx4.12_dy1.0_4	6	6	6	1	-2	-1

Hi Reese,

Welcome to the R Community! :slight_smile:

Below you will find a possible solution based on your example data and the requested output design. It's may a good starting point for your solution...!

Best regards
Adam

library(tidyverse)
dt <- tibble(
  id = c("batch1_dx0.0_dy-2.1_4", "batch1_dx0.0_dy0.155_3",
                   "batch1_dx0.0_dy1.23_2", "batch1_dx0.0_dy1_1", 
                   "batch1_dx-0.8_dy1.0_2", "batch1_dx0_dy1.0_1",
                   "batch1_dx1.321_dy1.0_3", "batch1_dx4.12_dy1.0_4",
                   "batch2_dx-0.8_dy1.0_2", "batch2_dx0_dy1.0_1",
                   "batch2_dx1.321_dy1.0_3", "batch2_dx4.12_dy1.0_4"),
  val1 = c(6, 2, 2, 0, 3, 1, 3, 3, 4, 5, 1, 6),
  val2 = c(6, 4, 2, 1, 1, 1, 5, 3, 2, 8, 9, 6),
  val3 = c(6, 3, 3, 0, 4, 2, 4, 1, 5, 7, 1, 6))

#prepare input data and calculate differencs on the variables val1 to val3
dt %>% 
  #split id information in four parts
  separate(col = id, into = c('batch','dx','dy','bid'), sep='_') %>% 
  #save temporary row order 
  mutate(rowid=1:nrow(dt)) %>% 
  #check which of the dx and dy cols are fixed or varied per each batch group
  group_by(batch) %>% 
  #create temporary id as helping for batch grouping 
  mutate(tmpid=cumsum(!duplicated(dx) & !duplicated(dy))) %>% 
  #sort id cols of interest for calculation
  arrange(batch, tmpid, desc(bid)) %>% 
  group_by(batch, tmpid) %>% 
  #calculate differences based on requested definition 
  mutate(dval1=val1-last(val1),
         dval2=val2-last(val2),
         dval3=val3-last(val3)) %>% 
  #redesign data to requested output format 
  ungroup() %>% 
  arrange(rowid) %>% 
  select(-rowid,-tmpid) %>% 
  unite(batch, dx, dy, bid, col = 'id', sep='_')
#> # A tibble: 12 x 7
#>    id                      val1  val2  val3 dval1 dval2 dval3
#>    <chr>                  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1 batch1_dx0.0_dy-2.1_4      6     6     6     6     5     6
#>  2 batch1_dx0.0_dy0.155_3     2     4     3     2     3     3
#>  3 batch1_dx0.0_dy1.23_2      2     2     3     2     1     3
#>  4 batch1_dx0.0_dy1_1         0     1     0     0     0     0
#>  5 batch1_dx-0.8_dy1.0_2      3     1     4     2     0     2
#>  6 batch1_dx0_dy1.0_1         1     1     2     0     0     0
#>  7 batch1_dx1.321_dy1.0_3     3     5     4     2     4     2
#>  8 batch1_dx4.12_dy1.0_4      3     3     1     2     2    -1
#>  9 batch2_dx-0.8_dy1.0_2      4     2     5    -1    -6    -2
#> 10 batch2_dx0_dy1.0_1         5     8     7     0     0     0
#> 11 batch2_dx1.321_dy1.0_3     1     9     1    -4     1    -6
#> 12 batch2_dx4.12_dy1.0_4      6     6     6     1    -2    -1

Created on 2019-05-08 by the reprex package (v0.2.1)

2 Likes

Thanks Adam! Is it possible to make this calculation block dynamic? I have many more columns with different names in my real data.

 #calculate differences based on requested definition 
 mutate(dval1=val1-last(val1),
        dval2=val2-last(val2),
        dval3=val3-last(val3)) %>% 

You can use some of the mutate_* variants like this for example

mutate_if(is.numeric, list(d = ~. - last(.))) %>%
2 Likes

There was a warning if I used mutate_if:

mutate_if() ignored the following grouping variables:
Columns batch, tmpid

Does it matter?

It should work without warnings based on Andrés nice solution!

Are your results in your real data as expected...?!

Please compare and control also your variable formats between the data example and your real data.

library(tidyverse)
dt <- tibble(
  id = c("batch1_dx0.0_dy-2.1_4", "batch1_dx0.0_dy0.155_3",
                   "batch1_dx0.0_dy1.23_2", "batch1_dx0.0_dy1_1", 
                   "batch1_dx-0.8_dy1.0_2", "batch1_dx0_dy1.0_1",
                   "batch1_dx1.321_dy1.0_3", "batch1_dx4.12_dy1.0_4",
                   "batch2_dx-0.8_dy1.0_2", "batch2_dx0_dy1.0_1",
                   "batch2_dx1.321_dy1.0_3", "batch2_dx4.12_dy1.0_4"),
  val1 = c(6, 2, 2, 0, 3, 1, 3, 3, 4, 5, 1, 6),
  val2 = c(6, 4, 2, 1, 1, 1, 5, 3, 2, 8, 9, 6),
  val3 = c(6, 3, 3, 0, 4, 2, 4, 1, 5, 7, 1, 6))

#prepare input data and calculate differencs on the variables val1 to val3
dt %>% 
  #split id information in four parts
  separate(col = id, into = c('batch','dx','dy','bid'), sep='_') %>% 
  #save temporary row order 
  mutate(rowid=1:nrow(dt)) %>% 
  #check which of the dx and dy cols are fixed or varied per each batch group
  group_by(batch) %>% 
  #create temporary id as helping for batch grouping 
  mutate(tmpid=cumsum(!duplicated(dx) & !duplicated(dy))) %>% 
  #sort id cols of interest for calculation
  arrange(batch, tmpid, desc(bid)) %>% 
  group_by(batch, tmpid) %>% 
  #calculate differences based on requested definition 
  mutate_if(is.double, list(d = ~. - last(.))) %>% 
  #redesign data to requested output format 
  ungroup() %>% 
  arrange(rowid) %>% 
  select(-rowid,-tmpid) %>% 
  unite(batch, dx, dy, bid, col = 'id', sep='_')
#> # A tibble: 12 x 7
#>    id                      val1  val2  val3 val1_d val2_d val3_d
#>    <chr>                  <dbl> <dbl> <dbl>  <dbl>  <dbl>  <dbl>
#>  1 batch1_dx0.0_dy-2.1_4      6     6     6      6      5      6
#>  2 batch1_dx0.0_dy0.155_3     2     4     3      2      3      3
#>  3 batch1_dx0.0_dy1.23_2      2     2     3      2      1      3
#>  4 batch1_dx0.0_dy1_1         0     1     0      0      0      0
#>  5 batch1_dx-0.8_dy1.0_2      3     1     4      2      0      2
#>  6 batch1_dx0_dy1.0_1         1     1     2      0      0      0
#>  7 batch1_dx1.321_dy1.0_3     3     5     4      2      4      2
#>  8 batch1_dx4.12_dy1.0_4      3     3     1      2      2     -1
#>  9 batch2_dx-0.8_dy1.0_2      4     2     5     -1     -6     -2
#> 10 batch2_dx0_dy1.0_1         5     8     7      0      0      0
#> 11 batch2_dx1.321_dy1.0_3     1     9     1     -4      1     -6
#> 12 batch2_dx4.12_dy1.0_4      6     6     6      1     -2     -1

Created on 2019-05-09 by the reprex package (v0.2.1)

1 Like

Thanks. I get expected results with the real data.

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.