get variables with regular expression in function

I have column names like cc32.1_1,cc32.1_2,cc32.1_3,cc32.1_4,cc32.1_text,cc33.1_1,cc33.1_2,cc33.1_3,cc33.1_Text,cc33.2_1,cc33.2_2,cc33.2_3,cc33.2_4,cc33.2_5,cc33.2_6,cc33.2_7,cc33.2_8,cc33.2_8,cc33.2_9,cc33.2_10,cc33.2_11,cc33.2_Text.

i want to give parameter like "cc32.1_" so that it can select vaiables c(cc32.1_1,cc32.1_2,cc32.1_3,cc32.1_4) and except cc32.1_text and same as for "cc33.2_11" and select all vars c(33.2_1,cc33.2_2,cc33.2_3,cc33.2_4,cc33.2_5,cc33.2_6,cc33.2_7,cc33.2_8,cc33.2_8,cc33.2_9,cc33.2_10,cc33.2_11)

Fun1<- function(dataset,pattern,banner,label){
  print(pattern)
  npatt<-paste0(pattern, ".*(?<!_TEXT)$")
  vars = grep(npatt, colnames(dataset), value = TRUE)

Fun1(dataset=data, pattern="cc32.1_",banner=banner,label="Table 1")

Does this do what you want?

DF <- data.frame(cc32.1_1=1,cc32.1_2=1,cc32.1_3=1,cc32.1_4=1,cc32.1_text=1,
                 cc33.1_1=1,cc33.1_2=1,cc33.1_3=1,cc33.1_Text=1,cc33.2_1=1,
                 cc33.2_2=1,cc33.2_3=1,cc33.2_4=1,cc33.2_5=1,cc33.2_6=1,
                 cc33.2_7=1,cc33.2_8=1,cc33.2_8=1,cc33.2_9=1,cc33.2_10=1,
                 cc33.2_11=1,cc33.2_Text=1)
DF
#>   cc32.1_1 cc32.1_2 cc32.1_3 cc32.1_4 cc32.1_text cc33.1_1 cc33.1_2 cc33.1_3
#> 1        1        1        1        1           1        1        1        1
#>   cc33.1_Text cc33.2_1 cc33.2_2 cc33.2_3 cc33.2_4 cc33.2_5 cc33.2_6 cc33.2_7
#> 1           1        1        1        1        1        1        1        1
#>   cc33.2_8 cc33.2_8.1 cc33.2_9 cc33.2_10 cc33.2_11 cc33.2_Text
#> 1        1          1        1         1         1           1

Fun1<- function(dataset,pattern,banner,label){
  print(pattern)
  npatt<-paste0(pattern, "\\d+")
  vars = grep(npatt, colnames(dataset), value = TRUE)
  return(vars)
}
banner="XXX"
Fun1(dataset=DF, pattern="cc32.1_",banner=banner,label="Table 1")
#> [1] "cc32.1_"
#> [1] "cc32.1_1" "cc32.1_2" "cc32.1_3" "cc32.1_4"

Created on 2022-06-01 by the reprex package (v2.0.1)

Maybe this isn't exactly what you need, but might I suggest dplyr::across here? If you are trying to pick columns from a dataframe by regular expression, this would be a great way to do it:

library(dplyr)

# From FJCC's excellent response
df <- data.frame(cc32.1_1=1,cc32.1_2=1,cc32.1_3=1,cc32.1_4=1,cc32.1_text=1,
                 cc33.1_1=1,cc33.1_2=1,cc33.1_3=1,cc33.1_Text=1,cc33.2_1=1,
                 cc33.2_2=1,cc33.2_3=1,cc33.2_4=1,cc33.2_5=1,cc33.2_6=1,
                 cc33.2_7=1,cc33.2_8=1,cc33.2_8=1,cc33.2_9=1,cc33.2_10=1,
                 cc33.2_11=1,cc33.2_Text=1)

selected_df <- df %>% 
    select(
        matches('cc32.1_')
    )

selected_df
#>   cc32.1_1 cc32.1_2 cc32.1_3 cc32.1_4 cc32.1_text
#> 1        1        1        1        1           1

Then if you wanted just the column names, you could just do names(selected_df).

1 Like

To complement @dvetsch75 solution, you can omit text ending columns by narrowing the regular expression's scope

library(dplyr)

df <- data.frame(cc32.1_1=1,cc32.1_2=1,cc32.1_3=1,cc32.1_4=1,cc32.1_text=1,
                 cc33.1_1=1,cc33.1_2=1,cc33.1_3=1,cc33.1_Text=1,cc33.2_1=1,
                 cc33.2_2=1,cc33.2_3=1,cc33.2_4=1,cc33.2_5=1,cc33.2_6=1,
                 cc33.2_7=1,cc33.2_8=1,cc33.2_8=1,cc33.2_9=1,cc33.2_10=1,
                 cc33.2_11=1,cc33.2_Text=1)

df %>% 
    select(
        matches('cc32.1_\\d+')
    )
#>   cc32.1_1 cc32.1_2 cc32.1_3 cc32.1_4
#> 1        1        1        1        1

Created on 2022-06-01 by the reprex package (v2.0.1)

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.