Merging 2 data frames in R

Hi,

Kindly help me with this, as I am unable to merge two data frames based on two keys - ID and Date. Please see the code below.

install.packages("dplyr")
#> Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.1'
#> (as 'lib' is unspecified)
install.packages("tidyverse")
#> Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.1'
#> (as 'lib' is unspecified)
install.packages("ggplot")
#> Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.1'
#> (as 'lib' is unspecified)
#> Warning: package 'ggplot' is not available for this version of R
#> 
#> A version of this package for your version of R might be available elsewhere,
#> see the ideas at
#> https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
install.packages("skimr")
#> Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.1'
#> (as 'lib' is unspecified)
install.packages("dlookr")
#> Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.1'
#> (as 'lib' is unspecified)
install.packages("reprex")
#> Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.1'
#> (as 'lib' is unspecified)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyverse)
library(ggplot2)
library(skimr)
library(dlookr)
#> Either Arial Narrow or Liberation Sans Narrow fonts are required to Viz.
#> Please use dlookr::import_liberation() to install Liberation Sans Narrow font.
#> 
#> Attaching package: 'dlookr'
#> The following object is masked from 'package:tidyr':
#> 
#>     extract
#> The following object is masked from 'package:base':
#> 
#>     transform
library(reprex)

activity <- data.frame(
  stringsAsFactors = FALSE,
  Id = c(1503960366,
         1503960366,1503960366,1503960366,1503960366,
         1503960366),
  ActivityDate = c("4/12/2016",
                   "4/13/2016","4/14/2016","4/15/2016","4/16/2016",
                   "4/17/2016"),
  TotalSteps = c(13162L, 10735L, 10460L, 9762L, 12669L, 9705L),
  TotalDistance = c(8.5,
                    6.96999979019165,6.73999977111816,6.28000020980835,
                    8.15999984741211,6.48000001907349),
  TrackerDistance = c(8.5,
                      6.96999979019165,6.73999977111816,6.28000020980835,
                      8.15999984741211,6.48000001907349),
  LoggedActivitiesDistance = c(0, 0, 0, 0, 0, 0),
  VeryActiveDistance = c(1.87999999523163,1.57000005245209,2.44000005722046,
                         2.14000010490417,2.71000003814697,3.19000005722046),
  ModeratelyActiveDistance = c(0.550000011920929,0.689999997615814,0.400000005960464,
                               1.25999999046326,0.409999996423721,0.779999971389771),
  LightActiveDistance = c(6.05999994277954,4.71000003814697,3.91000008583069,
                          2.82999992370605,5.03999996185303,2.50999999046326),
  SedentaryActiveDistance = c(0, 0, 0, 0, 0, 0),
  VeryActiveMinutes = c(25L, 21L, 30L, 29L, 36L, 38L),
  FairlyActiveMinutes = c(13L, 19L, 11L, 34L, 10L, 20L),
  LightlyActiveMinutes = c(328L, 217L, 181L, 209L, 221L, 164L),
  SedentaryMinutes = c(728L, 776L, 1218L, 726L, 773L, 539L),
  Calories = c(1985L, 1797L, 1776L, 1745L, 1863L, 1728L)
)

weight <- data.frame(
  stringsAsFactors = FALSE,
  Id = c(1503960366,1503960366,
         1927972279,2873212765,2873212765,4319703577),
  Date = c("5/2/2016","5/3/2016",
           "4/13/2016","4/21/2016","5/12/2016","4/17/2016"),
  WeightKg = c(52.59999847,52.59999847,133.5,
               56.70000076,57.29999924,72.40000153),
  WeightPounds = c(115.9631465,115.9631465,
                   294.31712,125.0021043,126.3248746,159.6146812),
  Fat = c(22L, NA, NA, NA, NA, 25L),
  BMI = c(22.64999962,22.64999962,
          47.54000092,21.45000076,21.69000053,27.45000076),
  IsManualReport = c(TRUE, TRUE, FALSE, TRUE, TRUE, TRUE),
  LogId = c(1.46223e+12,1.46232e+12,
            1.46051e+12,1.46128e+12,1.4631e+12,1.46094e+12)
)

activity_weight <- merge(activity,weight,by=c("Id","Date"))
#> Error in fix.by(by.x, x): 'by' must specify a uniquely valid column
head(activity_weight)
#> Error in head(activity_weight): object 'activity_weight' not found

Created on 2021-09-13 by the reprex package (v2.0.1)

Regards,
Camille

Hello @camille :wave:

You're getting an error in because merge can't find one of your columns in the x data.frame, which is 'activity' in this case. You want to specify 'ActivityDate' instead of 'Date'.

activity_weight <- merge(activity,weight,by.x=c("Id","ActivityDate"), by.y=c("Id","Date"))

When it comes to appending data frames, the rbind() and cbind() function comes to mind because they can concatenate the data frames horizontally and vertically. In this example, we will see how to use the rbind() function to append data frames.

To append data frames in R, use the rbind() function. The rbind() is a built-in R function that can combine several vectors, matrices, and/or data frames by rows.