You can also use set operations (base docs; dplyr docs):
data_short <-
data.frame(
"a" = runif(5),
"b" = runif(5),
"c" = runif(5),
"d" = runif(5)
)
data_long <-
data.frame(
"a" = runif(5),
"b" = runif(5),
"c" = runif(5),
"d" = runif(5),
"e" = runif(5),
"f" = runif(5)
)
# See which column names are in both
intersect(names(data_short), names(data_long))
#> [1] "a" "b" "c" "d"
# See which columns from the longer data frame aren't in the shorter one
setdiff(names(data_long), names(data_short))
#> [1] "e" "f"
# Select only the columns from the longer data frame that are in both
data_long[intersect(names(data_long), names(data_short))]
#> a b c d
#> 1 0.1540338 0.7234066 0.5916640 0.7219967
#> 2 0.5342947 0.7615840 0.4242600 0.5354202
#> 3 0.7874348 0.5796321 0.4673035 0.2321965
#> 4 0.7270508 0.5580596 0.8692353 0.8039400
#> 5 0.4221739 0.5960718 0.8802889 0.3052120
# Select only the extra columns from the longer data frame
data_long[setdiff(names(data_long), names(data_short))]
#> e f
#> 1 0.8467308 0.81951804
#> 2 0.7265840 0.45185584
#> 3 0.2096210 0.69614875
#> 4 0.9464625 0.90677953
#> 5 0.4143425 0.06293806
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
# Also works with dplyr
data_long %>% select(intersect(names(data_long), names(data_short)))
#> a b c d
#> 1 0.1540338 0.7234066 0.5916640 0.7219967
#> 2 0.5342947 0.7615840 0.4242600 0.5354202
#> 3 0.7874348 0.5796321 0.4673035 0.2321965
#> 4 0.7270508 0.5580596 0.8692353 0.8039400
#> 5 0.4221739 0.5960718 0.8802889 0.3052120
Created on 2018-10-01 by the reprex package (v0.2.1)