Operating list of dataframes

My data is composed by a list of dfs: 268 dfs contained in z the name of the list
Each df has the following str

df <- structure(list(id = structure(c(50109025, 60901029, 140103026, 
50705001, 110113007, 111201026, 50521001, 111201007, 60901030, 
50508026, 50508026, 110113003, 110113008, 111202009, 140103023, 
50203026, 110113005, 140102088, 110104030, 140102095), label = "Identificador", format.spss = "F10.0", display_width = 10L), 
    name = c("ABCA1", "ABCA1", "ABCA1", "ABCA1", "ABCA1", "ABCA1", 
    "ABCA1", "ABCA1", "ABCA1", "ABCA1", "ABCA1", "ABCA1", "ABCA1", 
    "ABCA1", "ABCA1", "ABCA1", "ABCA1", "ABCA1", "ABCA1", "ABCA1"
    ), `1` = c(1.063, 1.623, 1.078, 1.765, 1.596, 0.365, 0.53, 
    0.672, 1.77, 0.875, 0.875, 1.426, 0.569, 0.921, 0.988, 1.017, 
    1.208, 0.642, 0.703, 0.834), `3` = c(0.753, 2.05, 2.132, 
    1.961, 1.165, 0.764, 1.983, 1.59, 1.028, 1.488, 1.488, 2.226, 
    1.16, 0.967, 0.95, 1.761, 1.488, 0.871, 5.634, 0.898)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -20L))

# This dfs came from splitted df according to the variable **name**

My idea is to iterate through the 268 dfs, each one with differente name, and perform paired t.test between the columns 1 and 3, with the id column as grouped variable

#1st approach

names <- names(z)
rowvars <- c(names)
rowvars <- base::sort(rowvars)

for (v in 1:length(rowvars)) {
  
P1 <- t.test(x= z[,v[3]][, "1"], y= z[,v[4]][, "3"], paired = TRUE)$p.value

}


# 2nd approach

lapply(z, function(x){t.test(x = z[[x]][, "1"], y = z[[x]][, "3"], paired = TRUE)
})

# purrr approach

I have tried purrr drafts but enough consistent to post them

All I am getting is errors: "Enough observations of x"

Thank in advance

lapply(z, function(x){t.test(x = x[, "1"], y = x[, "3"], paired = TRUE)})

I am getting the following error
Error in t.test.default(x = x[, "1"], y = x[, "3"], paired = TRUE) :
not enough 'x' observations

I don't get it because when I do it individually df per df across the list of dfs individually, it works.
And somehow, here is not working

based on your reported error message its clear that there is some dataframe in z which has empty dimensions.
By what process have you combined your data.frames of interest together into z ?
Have you really manually evaluated whether all 268 are computable ?

you can use an apply to find the rows of each frame in z, I would try to diagnose from that ...

rows_in_each_frame_of_z <- sapply(z, nrow)

I combined 2 processes (I am working with both z and m, that are supposed to be equal):

# My original database is this way, and several transformations led me to get this:

# 1st approach
m <- lapply(k, function(x){ 
    x%>%
     group_by(id, name) %>%
     summarise_all(na.omit) 

#Error:
! Must subset columns with a valid subscript vector.
i Logical subscripts must match the size of the indexed input.
x Input has size 1 but subscript `yok` has size 136.

# 2nd approach 
z<-purrr::map(k, ~ .x %>% group_by(id, name) %>%dplyr::summarise(across(everything(), na.omit), .groups = 'drop'))

#Error: 
Error in t.test.default(x = x[, "1"], y = x[, "3"], paired = TRUE) : 
  not enough 'x' observations

You must be asking where k does it come from? Well, this is a small piece:

df <- structure(list(id = structure(c(70107034, 50203008, 120715021, 
130108009, 50203004, 130105045, 140103027, 120715022, 140102095, 
50203022, 110104029, 50203004, 120715033, 110104027, 110606056, 
140103029, 120715033, 60901031, 110104029, 60901023, 50521001, 
70204055, 50203009, 70802013, 110104030, 50203016, 110113008, 
130102013, 140101088, 60901030, 71801002, 111202007, 110110005, 
60901020, 120715034, 60901038, 71801002, 50203014, 140102089, 
70107034, 50203016, 110104028, 130102012, 50601001, 70802013, 
120715033, 60901020, 50203017, 60901021, 70111024, 120715037, 
111202015, 71801001, 110606061, 70713001, 50203011, 140102089, 
50203009, 60901034, 120715021), label = "Identificador", format.spss = "F10.0", display_width = 10L), 
    name = c("coltot_hd", "cec", "totalsfa", "iohtyru", "hdlox_max", 
    "ialbu", "c3_sr4", "ibnpt", "in_mohtyu", "IL8RB", "hdl_vd_pf", 
    "iantitri", "saa", "pon_r", "pon", "cetp_r", "totaln6pufa", 
    "CXCL8_IL8_", "ihomocis", "n6_n3pufa", "c22_4n6", "c3_sr4", 
    "iapoa1", "PTGS2", "ABCG1", "ldl_no", "i_lbp_1", "NAMPT", 
    "S1PR3", "hii", "lcat_r", "iapoa1_1", "c20_3n6", "hdl09", 
    "itbars", "c20_5n3", "idopac", "ilpa_co", "ityru", "hdl03", 
    "itau", "ialbu", "invhii_n", "ldlox_oxr_r", "hdl_vd_pf", 
    "gluco", "MCP1", "log10il6_IFM", "CHUK", "ihdl", "ldl_tox_hepg2_r", 
    "hdlox_oxr_r", "collib_hd", "i_vcam_1", "hdl_intcol_r", "c16_1n7cis", 
    "c20", "itau", "IL10", "iapoe_spinreact"), `1` = c(NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, 0.277, NA, 97.59, NA, 0.96141189172, 
    29361.254294, 1.10115321966132, NA, 1.657, NA, NA, NA, NA, 
    NA, NA, NA, 1.01808, NA, 0.989, NA, 0.0581779377498283, 1.0315708735, 
    0.92, NA, NA, NA, NA, NA, NA, NA, 8.1, NA, NA, NA, 0.72956479539, 
    NA, NA, 2.744, NA, NA, NA, NA, 0.53636161795, NA, NA, NA, 
    NA, NA, NA, 0.313, 0.24), `3` = c(71.56, NA, NA, NA, 4.42149758, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, 0.87, 1.353, 6.536, NA, NA, NA, 1.006, NA, NA, NA, 
    NA, 2.3, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.623477768337581, 
    NA, NA, NA, 1.014, NA, NA, NA, 27.39, NA, 1.3829923112, NA, 
    NA, NA, NA, NA)), row.names = c(NA, -60L), class = c("tbl_df", 
"tbl", "data.frame"))

Then splitting by name variable I obtain k

k <- df  %>%   split(.$name)

I checked manually 4 or 5, not 268 entries.

you can use an apply to find the rows of each frame in z, I would try to diagnose from that ... I don't understand what you mean
Thanks in advance

I meant the code that I gave you. Here it is again

rows_in_each_frame_of_z <- sapply(z, nrow)

right, so at least one is empty and thats your problem

Do you have any suggestion in how to find the empty one?

for the third time ....

Sorry I did not understand it. Thank you for the support

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.