Merge multiple dataframe

Hi everybody, I would like to merge many data.frames at one with two conditions "year" and "ticker". I try the function multi merge but I am still confused. I would appreciate any help.
My data is below. Thank you in advance.

age<-data.frame(
        year = c(2010L, 2009L, 2006L, 2009L, 2007L, 2015L, 2010L, 2016L, 2002L,
                 2012L),
         age = c(9L, 10L, 13L, 10L, 12L, 4L, 9L, 3L, 17L, 7L),
      ticker = as.factor(c("AAA", "AAM", "ABT", "ACC", "ACL", "ACM", "ADC",
                           "ADS", "AGF", "AGM"))
)
asset_year0<-data.frame(
          year = c(2010L, 2009L, 2006L, 2009L, 2007L, 2015L, 2010L, 2016L,
                   2002L, 2012L),
        ticker = as.factor(c("AAA", "AAM", "ABT", "ACC", "ACL", "ACM", "ADC",
                             "ADS", "AGF", "AGM")),
   asset_year0 = as.factor(c("6.45E+11", "3.64E+11", "1.18E+11", "2.22E+11",
                             "2.17E+11", "6.55E+11", "26240651800", "1.27E+12",
                             "#N/A", "1.17E+12"))
)
cash_ratio<-data.frame(
         year = c(2010L, 2009L, 2006L, 2009L, 2007L, 2015L, 2010L, 2016L,
                  2002L, 2012L),
       ticker = as.factor(c("AAA", "AAM", "ABT", "ACC", "ACL", "ACM", "ADC",
                            "ADS", "AGF", "AGM")),
   cash_ratio = as.factor(c("0.094730365", "2.544216679", "0.596136038",
                            "0.300024686", "0.01552257", "0.060122892",
                            "0.307371621", "0.396210329", "#N/A", "0.760222623"))
)
domestic<-data.frame(
        year = c(2010L, 2009L, 2006L, 2009L, 2007L, 2015L, 2010L, 2016L, 2002L,
                 2012L),
      ticker = as.factor(c("AAA", "AAM", "ABT", "ACC", "ACL", "ACM", "ADC",
                           "ADS", "AGF", "AGM")),
    domestic = as.factor(c("96.01%", "99.64%", NA, NA, NA, "99.97%", "100.00%",
                           "98.99%", NA, "70.08%"))
)
state<-data.frame(
        year = c(2010L, 2009L, 2006L, 2009L, 2007L, 2015L, 2010L, 2016L, 2002L,
                 2012L),
      ticker = as.factor(c("AAA", "AAM", "ABT", "ACC", "ACL", "ACM", "ADC",
                           "ADS", "AGF", "AGM")),
       state = as.factor(c("0.00%", "0.00%", NA, NA, NA, "0.00%", "0.00%",
                           "0.00%", NA, "28.17%"))
)
liabilities<-data.frame(
                year = c(2010L, 2009L, 2006L, 2009L, 2007L, 2015L, 2010L,
                         2016L, 2002L, 2012L),
              ticker = as.factor(c("AAA", "AAM", "ABT", "ACC", "ACL", "ACM",
                                   "ADC", "ADS", "AGF", "AGM")),
   liabilities_year0 = as.factor(c("3.21443E+11", "59602228739", "49508156053",
                                   "1.03858E+11", "87512287627", "97807701313",
                                   "13573732210", "1.00883E+12", "#N/A",
                                   "8.10514E+11"))
)
region <-data.frame(
        year = c(2010L, 2009L, 2006L, 2009L, 2007L, 2015L, 2010L, 2016L, NA,
                 2012L),
      ticker = as.factor(c("AAA", "AAM", "ABT", "ACC", "ACL", "ACM", "ADC",
                           "ADS", "AGF", "AGM")),
      region = as.factor(c("Red River Delta", "Mekong River Delta",
                           "Mekong River Delta", "Southeast",
                           "Mekong River Delta", "Northeast", "Red River Delta",
                           "Red River Delta", NA, "Mekong River Delta"))
)
revenue<-data.frame(
                year = c(2010L, 2009L, 2006L, 2009L, 2007L, 2015L, 2010L,
                         2016L, 2002L, 2012L),
              ticker = as.factor(c("AAA", "AAM", "ABT", "ACC", "ACL", "ACM",
                                   "ADC", "ADS", "AGF", "AGM")),
   revenue_pre_year0 = as.factor(c("3.86E+11", "4.01E+11", "2.89E+11",
                                   "1.49E+11", "3.11E+11", "86118427480",
                                   "13963674833", "1.30E+12", "#N/A", "2.64E+12"))
)
share_outstanding<-data.frame(
                      year = c(2010L, 2009L, 2006L, 2009L, 2007L, 2015L, 2010L,
                               2016L, 2002L, 2012L),
                    ticker = as.factor(c("AAA", "AAM", "ABT", "ACC", "ACL",
                                         "ACM", "ADC", "ADS", "AGF", "AGM")),
   share_outstanding_year0 = as.factor(c("9900000", "10062236.28", "3300000",
                                         "#N/A", "9000000", "51000000",
                                         "1000000", "16270870.25", "4029130",
                                         "18200000"))
)
liabilities<-data.frame(
           year = c(2010L, 2009L, 2006L, 2009L, 2007L, 2015L, 2010L, 2016L,
                    2002L, 2012L),
   liabilities0 = c(2.9e+11, 36203906202, 49508156053, NA, 52352950367,
                    1.3e+11, 12970829044, 8.81e+11, 83466002431, 8.1e+11),
         ticker = as.factor(c("AAA", "AAM", "ABT", "ACC", "ACL", "ACM", "ADC",
                              "ADS", "AGF", "AGM"))
)

What did you try exactly ?

You should achieve this with inner_join and friends from dplyr and a bit of functional programming using purrr with reduce function I guess

See these resources

1 Like

I search google with mutlimerge function but I can not match it with my data. :frowning:

What your problem is with multimerge()?, it works as expected for me, please provide a reprex for your issue.

library(mergeutils)

age <- data.frame(
  year = c(2010L, 2009L, 2006L, 2009L, 2007L, 2015L, 2010L, 2016L, 2002L,
           2012L),
  age = c(9L, 10L, 13L, 10L, 12L, 4L, 9L, 3L, 17L, 7L),
  ticker = as.factor(c("AAA", "AAM", "ABT", "ACC", "ACL", "ACM", "ADC",
                       "ADS", "AGF", "AGM"))
)
asset_year0 <- data.frame(
  year = c(2010L, 2009L, 2006L, 2009L, 2007L, 2015L, 2010L, 2016L,
           2002L, 2012L),
  ticker = as.factor(c("AAA", "AAM", "ABT", "ACC", "ACL", "ACM", "ADC",
                       "ADS", "AGF", "AGM")),
  asset_year0 = as.factor(c("6.45E+11", "3.64E+11", "1.18E+11", "2.22E+11",
                            "2.17E+11", "6.55E+11", "26240651800", "1.27E+12",
                            "#N/A", "1.17E+12"))
)
cash_ratio <- data.frame(
  year = c(2010L, 2009L, 2006L, 2009L, 2007L, 2015L, 2010L, 2016L,
           2002L, 2012L),
  ticker = as.factor(c("AAA", "AAM", "ABT", "ACC", "ACL", "ACM", "ADC",
                       "ADS", "AGF", "AGM")),
  cash_ratio = as.factor(c("0.094730365", "2.544216679", "0.596136038",
                           "0.300024686", "0.01552257", "0.060122892",
                           "0.307371621", "0.396210329", "#N/A", "0.760222623"))
)

multimerge(list(age, asset_year0, cash_ratio), by = list(c("year", "ticker")))
#>    year ticker age asset_year0  cash_ratio
#> 1  2002    AGF  17        #N/A        #N/A
#> 2  2006    ABT  13    1.18E+11 0.596136038
#> 3  2007    ACL  12    2.17E+11  0.01552257
#> 4  2009    AAM  10    3.64E+11 2.544216679
#> 5  2009    ACC  10    2.22E+11 0.300024686
#> 6  2010    AAA   9    6.45E+11 0.094730365
#> 7  2010    ADC   9 26240651800 0.307371621
#> 8  2012    AGM   7    1.17E+12 0.760222623
#> 9  2015    ACM   4    6.55E+11 0.060122892
#> 10 2016    ADS   3    1.27E+12 0.396210329

Created on 2019-11-29 by the reprex package (v0.3.0)

2 Likes

Your code looks so simple. But I made it too complicated. I found one suggestion in google as follows but I failed to replicate it.
multmerge = function(mypath){
filenames=list.files(path=mypath, full.names=TRUE)
datalist = lapply(filenames, function(x){read.csv(file=x,header=T)})
Reduce(function(x,y) {merge(x,y)}, datalist)

Thank you so much for your support these days. It supports my research a lot. Really appreciate.

That code is for reading multiple csv files into a single dataframe, not multiple dataframes, and they all must have the same column names.

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.