How to subtract data from one column to another?

Hi,

How do I subtract one column of data from another (Sample_778981 is my comparator), and save the result in a new *.csv file. I would like to obtain the difference of each sample with Sample_778981. I have tried with one comparison (see below). Similarly, I am interested to perform on all samples at the same time and save a csv file.

Data_Input <- read.csv(file ="./Data_Input.csv", stringsAsFactors = F, check.names = F, row.names = 1)
library(dplyr)
(Data_Sample_778981_vs_Sample_778951 <- Data_Input %>%
  mutate(Diff_778981_vs_778951 = Sample_778951-Sample_778981))

Input data

dput(Data_Input)
structure(list(Sample_778981 = c(0L, 184L, 60L, 0L, 7L, 0L, 87L, 
0L, 0L, 21L, 193L, 29L, 0L, 0L, 3L, 50L, 0L, 325L, 442L), Sample_778951 = c(0, 
140.5, 64, 0, 4, 0, 83, 0, 1, 51.5, 199, 25, 0, 0, 5, 62, 0, 
525, 407), Sample_778961 = c(0, 169, 45, 1, 3, 0, 122, 0, 0, 
36.5, 179, 20, 0, 0, 1, 58, 0, 494, 570), Sample_778971 = c(0L, 
107L, 67L, 0L, 5L, 0L, 99L, 0L, 0L, 63L, 178L, 34L, 0L, 0L, 2L, 
60L, 0L, 467L, 283L), Sample_778991 = c(0L, 221L, 44L, 0L, 1L, 
0L, 139L, 0L, 0L, 48L, 222L, 24L, 1L, 0L, 5L, 67L, 0L, 612L, 
451L), Sample_779001 = c(0, 120.5, 45, 1, 1, 0, 100, 0, 0, 44.5, 
202, 39, 1, 0, 3, 76, 0, 719, 681)), class = "data.frame", row.names = c("Gene_1", 
"Gene_2", "Gene_3", "Gene_4", "Gene_5", "Gene_6", "Gene_7", "Gene_8", 
"Gene_9", "Gene_10", "Gene_11", "Gene_12", "Gene_13", "Gene_14", 
"Gene_15", "Gene_16", "Gene_17", "Gene_18", "Gene_19"))

Expected output

dput(Data_Output)
structure(list(Diff_778981_vs_778981 = c(0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Diff_778981_vs_778951 = c(0, 
-43.5, 4, 0, -3, 0, -4, 0, 1, 30.5, 6, -4, 0, 0, 2, 12, 0, 200, 
-35), Diff_778981_vs_778961 = c(0, -15, -15, 1, -4, 0, 35, 0, 
0, 15.5, -14, -9, 0, 0, -2, 8, 0, 169, 128), Diff_778981_vs_778971 = c(0L, 
-77L, 7L, 0L, -2L, 0L, 12L, 0L, 0L, 42L, -15L, 5L, 0L, 0L, -1L, 
10L, 0L, 142L, -159L), Diff_778981_vs_778991 = c(0L, 37L, -16L, 
0L, -6L, 0L, 52L, 0L, 0L, 27L, 29L, -5L, 1L, 0L, 2L, 17L, 0L, 
287L, 9L), Diff_778981_vs_779001 = c(0, -63.5, -15, 1, -6, 0, 
13, 0, 0, 23.5, 9, 10, 1, 0, 0, 26, 0, 394, 239)), class = "data.frame", row.names = c("Gene_1", 
"Gene_2", "Gene_3", "Gene_4", "Gene_5", "Gene_6", "Gene_7", "Gene_8", 
"Gene_9", "Gene_10", "Gene_11", "Gene_12", "Gene_13", "Gene_14", 
"Gene_15", "Gene_16", "Gene_17", "Gene_18", "Gene_19"))

Thank you,

Toufiq

This is not a standard variable name. R will think its a number. If you want it to be considered as possibly a variable you need to quote it with backticks

`778981`

@nirgrahamuk, thank you for the suggestion. I just included the revised dput() of input data and expected output.

(Data_778981 <- Data_Input %>%
  mutate(Diff_778981_vs_778951 = Sample_778981-Sample_778951))

@nirgrahamuk, apologies. I now rectified the question again.

                                                                                                                                                                                                                                                                                                                       "Gene_15", "Gene_16", "Gene_17", "Gene_18", "Gene_19"))
library(tidyverse)
combinations <- combn(x = names(Data_Input),m = 2,simplify = FALSE)

(combination_values <- purrr::map_dfc(combinations,
           ~{
            df <- tibble(a=Data_Input[[.[[1]]]]-Data_Input[[.[[2]]]]) 
            names(df) <- paste0(.[[1]],"_v_",.[[2]])
            df
           }
           ))

@nirgrahamuk, thank you very much. This is really helpful. One question, it seems like the actual rownames (such as Gene_1, Gene_2, Gene3, etc) are missing. Should I assign it from the Data_Input dataframe like the below:

rownames(Data_Input) = rownames(combination_values)

or, is there are way to assign in the code provided by you?

dput(combination_values)
structure(list(Sample_778981_v_Sample_778951 = c(0, 43.5, -4, 
0, 3, 0, 4, 0, -1, -30.5, -6, 4, 0, 0, -2, -12, 0, -200, 35), 
    Sample_778981_v_Sample_778961 = c(0, 15, 15, -1, 4, 0, -35, 
    0, 0, -15.5, 14, 9, 0, 0, 2, -8, 0, -169, -128), Sample_778981_v_Sample_778971 = c(0L, 
    77L, -7L, 0L, 2L, 0L, -12L, 0L, 0L, -42L, 15L, -5L, 0L, 0L, 
    1L, -10L, 0L, -142L, 159L), Sample_778981_v_Sample_778991 = c(0L, 
    -37L, 16L, 0L, 6L, 0L, -52L, 0L, 0L, -27L, -29L, 5L, -1L, 
    0L, -2L, -17L, 0L, -287L, -9L), Sample_778981_v_Sample_779001 = c(0, 
    63.5, 15, -1, 6, 0, -13, 0, 0, -23.5, -9, -10, -1, 0, 0, 
    -26, 0, -394, -239), Sample_778951_v_Sample_778961 = c(0, 
    -28.5, 19, -1, 1, 0, -39, 0, 1, 15, 20, 5, 0, 0, 4, 4, 0, 
    31, -163), Sample_778951_v_Sample_778971 = c(0, 33.5, -3, 
    0, -1, 0, -16, 0, 1, -11.5, 21, -9, 0, 0, 3, 2, 0, 58, 124
    ), Sample_778951_v_Sample_778991 = c(0, -80.5, 20, 0, 3, 
    0, -56, 0, 1, 3.5, -23, 1, -1, 0, 0, -5, 0, -87, -44), Sample_778951_v_Sample_779001 = c(0, 
    20, 19, -1, 3, 0, -17, 0, 1, 7, -3, -14, -1, 0, 2, -14, 0, 
    -194, -274), Sample_778961_v_Sample_778971 = c(0, 62, -22, 
    1, -2, 0, 23, 0, 0, -26.5, 1, -14, 0, 0, -1, -2, 0, 27, 287
    ), Sample_778961_v_Sample_778991 = c(0, -52, 1, 1, 2, 0, 
    -17, 0, 0, -11.5, -43, -4, -1, 0, -4, -9, 0, -118, 119), 
    Sample_778961_v_Sample_779001 = c(0, 48.5, 0, 0, 2, 0, 22, 
    0, 0, -8, -23, -19, -1, 0, -2, -18, 0, -225, -111), Sample_778971_v_Sample_778991 = c(0L, 
    -114L, 23L, 0L, 4L, 0L, -40L, 0L, 0L, 15L, -44L, 10L, -1L, 
    0L, -3L, -7L, 0L, -145L, -168L), Sample_778971_v_Sample_779001 = c(0, 
    -13.5, 22, -1, 4, 0, -1, 0, 0, 18.5, -24, -5, -1, 0, -1, 
    -16, 0, -252, -398), Sample_778991_v_Sample_779001 = c(0, 
    100.5, -1, -1, 0, 0, 39, 0, 0, 3.5, 20, -15, 0, 0, 2, -9, 
    0, -107, -230)), row.names = c(NA, -19L), class = c("tbl_df", 
"tbl", "data.frame"))

setting row.names on tibbles is deprecated.
Its generally best practice to have an explicit column that has the rowname in it.
for example

combination_values$row_name_col <-  row.names(Data_Input)
#if you want it to show as the first column ...
relocate(combination_values,row_name_col)
1 Like

@nirgrahamuk, thank you very much.

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.