Match data by vertically and horizontally

Hi everybody,

I would like to look for help.

ownership<-data.frame(
      ticker = as.factor(c("AAA", "AAM", "AAT", "ABS", "ABT", "ACC", "ACL",
                           "ADS", "AGF", "AGM")),
       X2008 = as.factor(c("  ", "  ", "  ", "  ", "  ", "0.17%", "  ", "  ",
                           "19.87%", "  ")),
       X2009 = as.factor(c(NA, "0.36%", NA, NA, "19.22%", NA, "6.35%", NA,
                           "7.65%", NA)),
       X2010 = as.factor(c("3.99%", "1.44%", NA, NA, "15.60%", NA, "4.44%", NA,
                           "5.24%", NA)),
       X2011 = as.factor(c("10.42%", "1.50%", NA, NA, "9.20%", "1.64%",
                           "4.34%", NA, "4.10%", NA)),
       X2012 = as.factor(c("21.61%", "2.65%", NA, NA, "7.21%", "7.68%",
                           "5.78%", NA, "2.92%", "1.75%")),
       X2013 = as.factor(c("21.09%", "2.55%", NA, NA, "6.78%", "13.45%",
                           "4.98%", NA, "1.47%", "2.61%")),
       X2014 = as.factor(c("20.59%", "0.97%", NA, NA, "7.47%", "16.50%",
                           "3.79%", NA, "1.10%", "0.00%")),
       X2015 = as.factor(c("19.13%", "0.49%", NA, NA, "7.53%", "16.53%",
                           "3.69%", NA, "0.79%", "0.00%"))
)
df<-data.frame(
        year = c(2010L, 2009L, 2006L, 2009L, 2007L, 2015L, 2010L, 2016L, 2002L,
                 2012L),
      ticker = as.factor(c("AAA", "AAM", "ABT", "ACC", "ACL", "ACM", "ADC",
                           "ADS", "AGF", "AGM")),
        date = as.factor(c("15-07-10", "24-09-09", "25-12-06", "22-12-09",
                           "05-09-07", "23-07-15", "24-12-10", "29-06-16",
                           "03-05-02", "14-12-12"))
)

I would like to add a new variable named "percent" to data frame df.
This variable "percent" are extracted based on the variable year and ticker from the ownership data frame. In excel I use the index and match function for this. How would it run in R?

I appreciate any help
Thank you in advance.

Best regards,

You can do something like this

ownership <- data.frame(
    ticker = as.factor(c("AAA", "AAM", "AAT", "ABS", "ABT", "ACC", "ACL",
                         "ADS", "AGF", "AGM")),
    X2008 = as.factor(c("  ", "  ", "  ", "  ", "  ", "0.17%", "  ", "  ",
                        "19.87%", "  ")),
    X2009 = as.factor(c(NA, "0.36%", NA, NA, "19.22%", NA, "6.35%", NA,
                        "7.65%", NA)),
    X2010 = as.factor(c("3.99%", "1.44%", NA, NA, "15.60%", NA, "4.44%", NA,
                        "5.24%", NA)),
    X2011 = as.factor(c("10.42%", "1.50%", NA, NA, "9.20%", "1.64%",
                        "4.34%", NA, "4.10%", NA)),
    X2012 = as.factor(c("21.61%", "2.65%", NA, NA, "7.21%", "7.68%",
                        "5.78%", NA, "2.92%", "1.75%")),
    X2013 = as.factor(c("21.09%", "2.55%", NA, NA, "6.78%", "13.45%",
                        "4.98%", NA, "1.47%", "2.61%")),
    X2014 = as.factor(c("20.59%", "0.97%", NA, NA, "7.47%", "16.50%",
                        "3.79%", NA, "1.10%", "0.00%")),
    X2015 = as.factor(c("19.13%", "0.49%", NA, NA, "7.53%", "16.53%",
                        "3.69%", NA, "0.79%", "0.00%"))
)

df <- data.frame(
    year = c(2010L, 2009L, 2006L, 2009L, 2007L, 2015L, 2010L, 2016L, 2002L,
             2012L),
    ticker = as.factor(c("AAA", "AAM", "ABT", "ACC", "ACL", "ACM", "ADC",
                         "ADS", "AGF", "AGM")),
    date = as.factor(c("15-07-10", "24-09-09", "25-12-06", "22-12-09",
                       "05-09-07", "23-07-15", "24-12-10", "29-06-16",
                       "03-05-02", "14-12-12"))
)
library(tidyverse)
df %>% 
    left_join(ownership %>% 
                  mutate_all(as.character) %>% 
                  gather(year, percent, -ticker) %>% 
                  mutate(year = parse_number(year)),
              by = c("year", "ticker"))
#> Warning: Column `ticker` joining factor and character vector, coercing into
#> character vector
#>    year ticker     date percent
#> 1  2010    AAA 15-07-10   3.99%
#> 2  2009    AAM 24-09-09   0.36%
#> 3  2006    ABT 25-12-06    <NA>
#> 4  2009    ACC 22-12-09    <NA>
#> 5  2007    ACL 05-09-07    <NA>
#> 6  2015    ACM 23-07-15    <NA>
#> 7  2010    ADC 24-12-10    <NA>
#> 8  2016    ADS 29-06-16    <NA>
#> 9  2002    AGF 03-05-02    <NA>
#> 10 2012    AGM 14-12-12   1.75%

Created on 2019-11-28 by the reprex package (v0.3.0.9000)

4 Likes

Only two things to add to @andresrcs

  1. Percent is formatted as character. You’ll need to fix that before using it for calculating anything

  2. Making your data “tidy” — rows are observations and columns are variables— will vastly improve the easy of these kinds of transformations

1 Like

@ andresrcs

How would your code look like if you used pivot_longer() instead of gather() ?

kind regards

In the meantime I tried this:

df %>%
  dplyr::left_join(ownership %>%
   dplyr::mutate_all(as.character) %>%
   tidyr::pivot_longer(
   cols = starts_with("X20"),
   names_to = "year",
   values_to = "percent") %>%
   dplyr::mutate(year = parse_number(year)))
#> Error in df %>% dplyr::left_join(ownership %>% dplyr::mutate_all(as.character) %>% : could not find function "%>%"

Created on 2019-11-28 by the reprex package (v0.3.0)

I do not know why that reprex error is coming up, because in the console, output looks OK and magrittr is loaded as well.

The reprex() function runs the code on a fresh R session (with an empty environment) so you have to include library calls and data, like this

ownership <- data.frame(
    ticker = as.factor(c("AAA", "AAM", "AAT", "ABS", "ABT", "ACC", "ACL",
                         "ADS", "AGF", "AGM")),
    X2008 = as.factor(c("  ", "  ", "  ", "  ", "  ", "0.17%", "  ", "  ",
                        "19.87%", "  ")),
    X2009 = as.factor(c(NA, "0.36%", NA, NA, "19.22%", NA, "6.35%", NA,
                        "7.65%", NA)),
    X2010 = as.factor(c("3.99%", "1.44%", NA, NA, "15.60%", NA, "4.44%", NA,
                        "5.24%", NA)),
    X2011 = as.factor(c("10.42%", "1.50%", NA, NA, "9.20%", "1.64%",
                        "4.34%", NA, "4.10%", NA)),
    X2012 = as.factor(c("21.61%", "2.65%", NA, NA, "7.21%", "7.68%",
                        "5.78%", NA, "2.92%", "1.75%")),
    X2013 = as.factor(c("21.09%", "2.55%", NA, NA, "6.78%", "13.45%",
                        "4.98%", NA, "1.47%", "2.61%")),
    X2014 = as.factor(c("20.59%", "0.97%", NA, NA, "7.47%", "16.50%",
                        "3.79%", NA, "1.10%", "0.00%")),
    X2015 = as.factor(c("19.13%", "0.49%", NA, NA, "7.53%", "16.53%",
                        "3.69%", NA, "0.79%", "0.00%"))
)

df <- data.frame(
    year = c(2010L, 2009L, 2006L, 2009L, 2007L, 2015L, 2010L, 2016L, 2002L,
             2012L),
    ticker = as.factor(c("AAA", "AAM", "ABT", "ACC", "ACL", "ACM", "ADC",
                         "ADS", "AGF", "AGM")),
    date = as.factor(c("15-07-10", "24-09-09", "25-12-06", "22-12-09",
                       "05-09-07", "23-07-15", "24-12-10", "29-06-16",
                       "03-05-02", "14-12-12"))
)
library(tidyverse)
df %>% 
    left_join(ownership %>% 
                  mutate_all(as.character) %>%
                  pivot_longer(cols = -ticker, names_to = "year", values_to = "percent" ) %>% 
                  #gather(year, percent, -ticker) %>% 
                  mutate(year = parse_number(year)),
              by = c("year", "ticker"))
#> Warning: Column `ticker` joining factor and character vector, coercing into
#> character vector
#>    year ticker     date percent
#> 1  2010    AAA 15-07-10   3.99%
#> 2  2009    AAM 24-09-09   0.36%
#> 3  2006    ABT 25-12-06    <NA>
#> 4  2009    ACC 22-12-09    <NA>
#> 5  2007    ACL 05-09-07    <NA>
#> 6  2015    ACM 23-07-15    <NA>
#> 7  2010    ADC 24-12-10    <NA>
#> 8  2016    ADS 29-06-16    <NA>
#> 9  2002    AGF 03-05-02    <NA>
#> 10 2012    AGM 14-12-12   1.75%

Created on 2019-11-28 by the reprex package (v0.3.0.9000)

1 Like

add either

library(dplyr)

or

library(magrittr)

Either will bring in %>% operator

Hi,
Thank you @andresrcs and @technocrat,

It is working now. By the way I asked about pivot_longer() because it somehow is more intuitive to me than gather().

kind regards

1 Like

Great. Please mark the solution for the benefit of those to follow.

I agree with you about the tidyr methods.

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.