Help with bind_rows to combine different data

Hi,

I have a tsibble with some incomplete values (NAs) in some variables as seen in below data df. I worked on getting the right values for NAs in each of these variables and now we have all the remaining values for these variables as seen in data A, B, and D. Now, I would like to combine the new values of A, B, and D to data frame df in order to fill the NAs. However, when I do so with bind_rows(), nothing happens.

#Main Data
df <- data.frame(
          month = c("2020 Jan","2020 Feb","2020 Mar",
                    "2020 Apr","2020 May","2020 Jun","2020 Jul","2020 Aug",
                    "2020 Sep","2020 Oct","2020 Nov","2020 Dec"),
          sales = c(2061292,2087140,2136628,449335,
                    1105069,1535344,2130425,NA,NA,NA,NA,NA),
              A = c(5067331.423,4856897.658,4175123.217,
                    3494987.878,3768201.526,4058111.486,NA,NA,NA,NA,NA,
                    NA),
           B = c(153, 146, 115, 108, 133, 150, 153, 152, NA, NA, NA, NA),
              C = c(58.247345,50.548263,30.994029,
                    20.521175,28.040035,38.072555,40.682499,39.97,39.92,40.24,
                    40.6,40.92),
              D = c(609026,595426.8,598968.2,544902.2,
                    556805.2,553345.6,NA,NA,NA,NA,NA,NA)
   )

I worked on getting the right values for NAs in each of these variables and now we have all the values as seen below:

# A, B, and D stores values that needs to be used in place of NAs in data df
# New values of variable A
A <- data.frame(
          month = c("2020 Jul","2020 Aug","2020 Sep",
                    "2020 Oct","2020 Nov","2020 Dec"),
              A = c(552674.432361771,559871.048209667,
                    531612.90726691,557754.060786167,570807.867236923,
                    593736.174013134)
   )
# New values of variable B
B <- data.frame(
       month = c("2020 Sep", "2020 Oct", "2020 Nov", "2020 Dec"),
              B = c(152.803823829201,149.34086404708,
                    166.594832524435,168.13347140866)
   )

# New values of variable D
D <- data.frame(
          month = c("2020 Jul","2020 Aug","2020 Sep",
                    "2020 Oct","2020 Nov","2020 Dec"),
              D = c(4087858.31971597,4241141.57607399,
                    4099146.30714426,4399586.69570602,4193662.44947109,
                    4770070.75713772)
   )

Now, I when I try to combine the new values of A, B, and D to data frame df in order to fill the NAs using the following code, nothing happens.

df <- df %>%
  bind_rows(df, A)

Can you please help? Thank you!

That's not how bind_rows() works. In fact, for this specific use case I would recommend using the new rows_update() function in dplyr 1.0.0.

library(dplyr, warn.conflicts = FALSE)

# Main Data
df <- data.frame(
  month = c(
    "2020 Jan", "2020 Feb", "2020 Mar",
    "2020 Apr", "2020 May", "2020 Jun", "2020 Jul", "2020 Aug",
    "2020 Sep", "2020 Oct", "2020 Nov", "2020 Dec"
  ),
  sales = c(
    2061292, 2087140, 2136628, 449335,
    1105069, 1535344, 2130425, NA, NA, NA, NA, NA
  ),
  A = c(
    5067331.423, 4856897.658, 4175123.217,
    3494987.878, 3768201.526, 4058111.486, NA, NA, NA, NA, NA,
    NA
  ),
  B = c(153, 146, 115, 108, 133, 150, 153, 152, NA, NA, NA, NA),
  C = c(
    58.247345, 50.548263, 30.994029,
    20.521175, 28.040035, 38.072555, 40.682499, 39.97, 39.92, 40.24,
    40.6, 40.92
  ),
  D = c(
    609026, 595426.8, 598968.2, 544902.2,
    556805.2, 553345.6, NA, NA, NA, NA, NA, NA
  )
)

# A, B, and D stores values that needs to be used in place of NAs in data df

# New values of variable A
A <- data.frame(
  month = c(
    "2020 Jul", "2020 Aug", "2020 Sep",
    "2020 Oct", "2020 Nov", "2020 Dec"
  ),
  A = c(
    552674.432361771, 559871.048209667,
    531612.90726691, 557754.060786167, 570807.867236923,
    593736.174013134
  )
)
# New values of variable B
B <- data.frame(
  month = c("2020 Sep", "2020 Oct", "2020 Nov", "2020 Dec"),
  B = c(
    152.803823829201, 149.34086404708,
    166.594832524435, 168.13347140866
  )
)

# New values of variable D
D <- data.frame(
  month = c(
    "2020 Jul", "2020 Aug", "2020 Sep",
    "2020 Oct", "2020 Nov", "2020 Dec"
  ),
  D = c(
    4087858.31971597, 4241141.57607399,
    4099146.30714426, 4399586.69570602, 4193662.44947109,
    4770070.75713772
  )
)

df %>% 
  rows_update(A, by = "month") %>% 
  rows_update(B, by = "month") %>% 
  rows_update(D, by = "month")
#>       month   sales         A        B        C         D
#> 1  2020 Jan 2061292 5067331.4 153.0000 58.24735  609026.0
#> 2  2020 Feb 2087140 4856897.7 146.0000 50.54826  595426.8
#> 3  2020 Mar 2136628 4175123.2 115.0000 30.99403  598968.2
#> 4  2020 Apr  449335 3494987.9 108.0000 20.52117  544902.2
#> 5  2020 May 1105069 3768201.5 133.0000 28.04003  556805.2
#> 6  2020 Jun 1535344 4058111.5 150.0000 38.07256  553345.6
#> 7  2020 Jul 2130425  552674.4 153.0000 40.68250 4087858.3
#> 8  2020 Aug      NA  559871.0 152.0000 39.97000 4241141.6
#> 9  2020 Sep      NA  531612.9 152.8038 39.92000 4099146.3
#> 10 2020 Oct      NA  557754.1 149.3409 40.24000 4399586.7
#> 11 2020 Nov      NA  570807.9 166.5948 40.60000 4193662.4
#> 12 2020 Dec      NA  593736.2 168.1335 40.92000 4770070.8

Created on 2020-09-11 by the reprex package (v0.3.0)

1 Like

Perfect! Thank you so much!

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.