Pick up data by quarterly

Hello everybody,

I am studying about the IPO (Initial public offering) and dealing with data in 20 years with 897 stocks.
My data is like that,

data<-data.frame(
year = c(2008L, 2008L, 2008L, 2008L, 2009L, 2009L, 2009L, 2009L,
2010L, 2010L, 2010L, 2010L, 2011L, 2011L, 2011L, 2011L,
2008L, 2008L, 2008L, 2008L, 2009L, 2009L, 2009L, 2009L, 2010L,
2010L, 2010L, 2010L, 2011L, 2011L, 2011L, 2011L, 2008L, 2008L,
2008L, 2008L, 2009L, 2009L, 2009L, 2009L),
total_assets = c(NA, NA, NA, NA, NA, NA, NA, NA, 4.74e+11, 5.41e+11,
6.01e+11, 6.45e+11, 6.55e+11, 7.38e+11, 7.99e+11, 8.17e+11,
NA, NA, NA, NA, 3.43e+11, 3.25e+11, 3.37e+11, 3.64e+11,
3.6e+11, 3.48e+11, 3.55e+11, 3.32e+11, 3.27e+11, 3.35e+11,
3.59e+11, 3.29e+11, 4.97e+11, 4.27e+11, 3.87e+11, 4.07e+11,
3.93e+11, 4.14e+11, 5.17e+11, 5.31e+11),
quarter = as.factor(c("Q1", "Q2", "Q3", "Q4", "Q1", "Q2", "Q3", "Q4",
"Q1", "Q2", "Q3", "Q4", "Q1", "Q2", "Q3", "Q4",
"Q1", "Q2", "Q3", "Q4", "Q1", "Q2", "Q3", "Q4",
"Q1", "Q2", "Q3", "Q4", "Q1", "Q2", "Q3", "Q4", "Q1",
"Q2", "Q3", "Q4", "Q1", "Q2", "Q3", "Q4")),
ticker = as.factor(c("AAA", "AAA", "AAA", "AAA", "AAA", "AAA", "AAA",
"AAA", "AAA", "AAA", "AAA", "AAA", "AAA", "AAA",
"AAA", "AAA", "AAM", "AAM", "AAM", "AAM", "AAM",
"AAM", "AAM", "AAM", "AAM", "AAM", "AAM", "AAM",
"AAM", "AAM", "AAM", "AAM", "ABT", "ABT", "ABT", "ABT",
"ABT", "ABT", "ABT", "ABT")),
code = as.factor(c("AAAQ12008", "AAAQ22008", "AAAQ32008",
"AAAQ42008", "AAAQ12009", "AAAQ22009",
"AAAQ32009", "AAAQ42009", "AAAQ12010", "AAAQ22010",
"AAAQ32010", "AAAQ42010", "AAAQ12011", "AAAQ22011",
"AAAQ32011", "AAAQ42011", "AAMQ12008", "AAMQ22008",
"AAMQ32008", "AAMQ42008", "AAMQ12009", "AAMQ22009",
"AAMQ32009", "AAMQ42009", "AAMQ12010", "AAMQ22010",
"AAMQ32010", "AAMQ42010", "AAMQ12011", "AAMQ22011",
"AAMQ32011", "AAMQ42011", "ABTQ12008", "ABTQ22008",
"ABTQ32008", "ABTQ42008", "ABTQ12009", "ABTQ22009",
"ABTQ32009", "ABTQ42009"))
)
IPO_quarter<-data.frame(
year = c(2010L, 2009L, 2006L),
ticker = as.factor(c("AAA", "AAM", "ABT")),
date = as.factor(c("15-07-10", "24-09-09", "25-12-06")),
quarter = as.factor(c("Q3", "Q3", "Q4")),
code = as.factor(c("AAAQ32010", "AAMQ32009", "ABTQ42006"))
)

I would like to pick up data by quarterly, (one quarter, two quarter, three quarter, four quarter after IPO quarter)
For example: Stock AAA has an IPO quarter at Q32010, I would like to pick up data of total _assets at Q42010, Q12011, Q22011, Q32011.
My output is something like that


I tried some codes about the loop but failed. I would like to ask for help. I really appreciate any helps.
Kind regards,

Hi @sirius1170. You can use lead to get the next values in the vector. Hope the following code can help.

library(tidyverse)

data<-data.frame(
  year = c(2008L, 2008L, 2008L, 2008L, 2009L, 2009L, 2009L, 2009L,
           2010L, 2010L, 2010L, 2010L, 2011L, 2011L, 2011L, 2011L,
           2008L, 2008L, 2008L, 2008L, 2009L, 2009L, 2009L, 2009L, 2010L,
           2010L, 2010L, 2010L, 2011L, 2011L, 2011L, 2011L, 2008L, 2008L,
           2008L, 2008L, 2009L, 2009L, 2009L, 2009L),
  total_assets = c(NA, NA, NA, NA, NA, NA, NA, NA, 4.74e+11, 5.41e+11,
                   6.01e+11, 6.45e+11, 6.55e+11, 7.38e+11, 7.99e+11, 8.17e+11,
                   NA, NA, NA, NA, 3.43e+11, 3.25e+11, 3.37e+11, 3.64e+11,
                   3.6e+11, 3.48e+11, 3.55e+11, 3.32e+11, 3.27e+11, 3.35e+11,
                   3.59e+11, 3.29e+11, 4.97e+11, 4.27e+11, 3.87e+11, 4.07e+11,
                   3.93e+11, 4.14e+11, 5.17e+11, 5.31e+11),
  quarter = as.factor(c("Q1", "Q2", "Q3", "Q4", "Q1", "Q2", "Q3", "Q4",
                        "Q1", "Q2", "Q3", "Q4", "Q1", "Q2", "Q3", "Q4",
                        "Q1", "Q2", "Q3", "Q4", "Q1", "Q2", "Q3", "Q4",
                        "Q1", "Q2", "Q3", "Q4", "Q1", "Q2", "Q3", "Q4", "Q1",
                        "Q2", "Q3", "Q4", "Q1", "Q2", "Q3", "Q4")),
  ticker = as.factor(c("AAA", "AAA", "AAA", "AAA", "AAA", "AAA", "AAA",
                       "AAA", "AAA", "AAA", "AAA", "AAA", "AAA", "AAA",
                       "AAA", "AAA", "AAM", "AAM", "AAM", "AAM", "AAM",
                       "AAM", "AAM", "AAM", "AAM", "AAM", "AAM", "AAM",
                       "AAM", "AAM", "AAM", "AAM", "ABT", "ABT", "ABT", "ABT",
                       "ABT", "ABT", "ABT", "ABT")),
  code = as.factor(c("AAAQ12008", "AAAQ22008", "AAAQ32008",
                     "AAAQ42008", "AAAQ12009", "AAAQ22009",
                     "AAAQ32009", "AAAQ42009", "AAAQ12010", "AAAQ22010",
                     "AAAQ32010", "AAAQ42010", "AAAQ12011", "AAAQ22011",
                     "AAAQ32011", "AAAQ42011", "AAMQ12008", "AAMQ22008",
                     "AAMQ32008", "AAMQ42008", "AAMQ12009", "AAMQ22009",
                     "AAMQ32009", "AAMQ42009", "AAMQ12010", "AAMQ22010",
                     "AAMQ32010", "AAMQ42010", "AAMQ12011", "AAMQ22011",
                     "AAMQ32011", "AAMQ42011", "ABTQ12008", "ABTQ22008",
                     "ABTQ32008", "ABTQ42008", "ABTQ12009", "ABTQ22009",
                     "ABTQ32009", "ABTQ42009"))
)

data %>%
  select(ticker, year, quarter, code, total_assets) %>%
  arrange(year, quarter) %>%
  group_by(ticker) %>%
  mutate(total_assets_Q1 = lead(total_assets, 1),
         total_assets_Q2 = lead(total_assets, 2),
         total_assets_Q3 = lead(total_assets, 3),
         total_assets_Q4 = lead(total_assets, 4)) %>%
  arrange(ticker, year, quarter) %>%
  select(-total_assets) %>%
  as.data.frame()
#>    ticker year quarter      code total_assets_Q1 total_assets_Q2
#> 1     AAA 2008      Q1 AAAQ12008              NA              NA
#> 2     AAA 2008      Q2 AAAQ22008              NA              NA
#> 3     AAA 2008      Q3 AAAQ32008              NA              NA
#> 4     AAA 2008      Q4 AAAQ42008              NA              NA
#> 5     AAA 2009      Q1 AAAQ12009              NA              NA
#> 6     AAA 2009      Q2 AAAQ22009              NA              NA
#> 7     AAA 2009      Q3 AAAQ32009              NA        4.74e+11
#> 8     AAA 2009      Q4 AAAQ42009        4.74e+11        5.41e+11
#> 9     AAA 2010      Q1 AAAQ12010        5.41e+11        6.01e+11
#> 10    AAA 2010      Q2 AAAQ22010        6.01e+11        6.45e+11
#> 11    AAA 2010      Q3 AAAQ32010        6.45e+11        6.55e+11
#> 12    AAA 2010      Q4 AAAQ42010        6.55e+11        7.38e+11
#> 13    AAA 2011      Q1 AAAQ12011        7.38e+11        7.99e+11
#> 14    AAA 2011      Q2 AAAQ22011        7.99e+11        8.17e+11
#> 15    AAA 2011      Q3 AAAQ32011        8.17e+11              NA
#> 16    AAA 2011      Q4 AAAQ42011              NA              NA
#> 17    AAM 2008      Q1 AAMQ12008              NA              NA
#> 18    AAM 2008      Q2 AAMQ22008              NA              NA
#> 19    AAM 2008      Q3 AAMQ32008              NA        3.43e+11
#> 20    AAM 2008      Q4 AAMQ42008        3.43e+11        3.25e+11
#> 21    AAM 2009      Q1 AAMQ12009        3.25e+11        3.37e+11
#> 22    AAM 2009      Q2 AAMQ22009        3.37e+11        3.64e+11
#> 23    AAM 2009      Q3 AAMQ32009        3.64e+11        3.60e+11
#> 24    AAM 2009      Q4 AAMQ42009        3.60e+11        3.48e+11
#> 25    AAM 2010      Q1 AAMQ12010        3.48e+11        3.55e+11
#> 26    AAM 2010      Q2 AAMQ22010        3.55e+11        3.32e+11
#> 27    AAM 2010      Q3 AAMQ32010        3.32e+11        3.27e+11
#> 28    AAM 2010      Q4 AAMQ42010        3.27e+11        3.35e+11
#> 29    AAM 2011      Q1 AAMQ12011        3.35e+11        3.59e+11
#> 30    AAM 2011      Q2 AAMQ22011        3.59e+11        3.29e+11
#> 31    AAM 2011      Q3 AAMQ32011        3.29e+11              NA
#> 32    AAM 2011      Q4 AAMQ42011              NA              NA
#> 33    ABT 2008      Q1 ABTQ12008        4.27e+11        3.87e+11
#> 34    ABT 2008      Q2 ABTQ22008        3.87e+11        4.07e+11
#> 35    ABT 2008      Q3 ABTQ32008        4.07e+11        3.93e+11
#> 36    ABT 2008      Q4 ABTQ42008        3.93e+11        4.14e+11
#> 37    ABT 2009      Q1 ABTQ12009        4.14e+11        5.17e+11
#> 38    ABT 2009      Q2 ABTQ22009        5.17e+11        5.31e+11
#> 39    ABT 2009      Q3 ABTQ32009        5.31e+11              NA
#> 40    ABT 2009      Q4 ABTQ42009              NA              NA
#>    total_assets_Q3 total_assets_Q4
#> 1               NA              NA
#> 2               NA              NA
#> 3               NA              NA
#> 4               NA              NA
#> 5               NA        4.74e+11
#> 6         4.74e+11        5.41e+11
#> 7         5.41e+11        6.01e+11
#> 8         6.01e+11        6.45e+11
#> 9         6.45e+11        6.55e+11
#> 10        6.55e+11        7.38e+11
#> 11        7.38e+11        7.99e+11
#> 12        7.99e+11        8.17e+11
#> 13        8.17e+11              NA
#> 14              NA              NA
#> 15              NA              NA
#> 16              NA              NA
#> 17              NA        3.43e+11
#> 18        3.43e+11        3.25e+11
#> 19        3.25e+11        3.37e+11
#> 20        3.37e+11        3.64e+11
#> 21        3.64e+11        3.60e+11
#> 22        3.60e+11        3.48e+11
#> 23        3.48e+11        3.55e+11
#> 24        3.55e+11        3.32e+11
#> 25        3.32e+11        3.27e+11
#> 26        3.27e+11        3.35e+11
#> 27        3.35e+11        3.59e+11
#> 28        3.59e+11        3.29e+11
#> 29        3.29e+11              NA
#> 30              NA              NA
#> 31              NA              NA
#> 32              NA              NA
#> 33        4.07e+11        3.93e+11
#> 34        3.93e+11        4.14e+11
#> 35        4.14e+11        5.17e+11
#> 36        5.17e+11        5.31e+11
#> 37        5.31e+11              NA
#> 38              NA              NA
#> 39              NA              NA
#> 40              NA              NA

Created on 2019-11-21 by the reprex package (v0.3.0)

2 Likes

I understand your issue a little different than @raytong, I think this is what you are trying to achieve.

library(lubridate)
library(tidyverse)

# This will perform the filtering
filtered_df <- data %>% 
    mutate(date_data = yq(paste0(year, quarter))) %>% 
    left_join(IPO_quarter %>%
                  select(ticker, date) %>% 
                  mutate(date = dmy(date)),
              by = "ticker") %>% 
    filter(date_data > date) %>% 
    select(-date_data)

filtered_df
#>    year total_assets quarter ticker      code       date
#> 1  2010     6.45e+11      Q4    AAA AAAQ42010 2010-07-15
#> 2  2011     6.55e+11      Q1    AAA AAAQ12011 2010-07-15
#> 3  2011     7.38e+11      Q2    AAA AAAQ22011 2010-07-15
#> 4  2011     7.99e+11      Q3    AAA AAAQ32011 2010-07-15
#> 5  2011     8.17e+11      Q4    AAA AAAQ42011 2010-07-15
#> 6  2009     3.64e+11      Q4    AAM AAMQ42009 2009-09-24
#> 7  2010     3.60e+11      Q1    AAM AAMQ12010 2009-09-24
#> 8  2010     3.48e+11      Q2    AAM AAMQ22010 2009-09-24
#> 9  2010     3.55e+11      Q3    AAM AAMQ32010 2009-09-24
#> 10 2010     3.32e+11      Q4    AAM AAMQ42010 2009-09-24
#> 11 2011     3.27e+11      Q1    AAM AAMQ12011 2009-09-24
#> 12 2011     3.35e+11      Q2    AAM AAMQ22011 2009-09-24
#> 13 2011     3.59e+11      Q3    AAM AAMQ32011 2009-09-24
#> 14 2011     3.29e+11      Q4    AAM AAMQ42011 2009-09-24
#> 15 2008     4.97e+11      Q1    ABT ABTQ12008 2006-12-25
#> 16 2008     4.27e+11      Q2    ABT ABTQ22008 2006-12-25
#> 17 2008     3.87e+11      Q3    ABT ABTQ32008 2006-12-25
#> 18 2008     4.07e+11      Q4    ABT ABTQ42008 2006-12-25
#> 19 2009     3.93e+11      Q1    ABT ABTQ12009 2006-12-25
#> 20 2009     4.14e+11      Q2    ABT ABTQ22009 2006-12-25
#> 21 2009     5.17e+11      Q3    ABT ABTQ32009 2006-12-25
#> 22 2009     5.31e+11      Q4    ABT ABTQ42009 2006-12-25

# This will reshape your data similar to your desired output
filtered_df %>%
    pivot_wider(id_cols = c(ticker, year),
                names_from = quarter,
                values_from = total_assets,
                names_prefix = "total_assets_") %>% 
    select(-total_assets_Q4, total_assets_Q4)
#> # A tibble: 7 x 6
#>   ticker  year total_assets_Q1 total_assets_Q2 total_assets_Q3 total_assets_Q4
#>   <fct>  <int>           <dbl>           <dbl>           <dbl>           <dbl>
#> 1 AAA     2010              NA              NA              NA    645000000000
#> 2 AAA     2011    655000000000    738000000000    799000000000    817000000000
#> 3 AAM     2009              NA              NA              NA    364000000000
#> 4 AAM     2010    360000000000    348000000000    355000000000    332000000000
#> 5 AAM     2011    327000000000    335000000000    359000000000    329000000000
#> 6 ABT     2008    497000000000    427000000000    387000000000    407000000000
#> 7 ABT     2009    393000000000    414000000000    517000000000    531000000000
1 Like

Thank you so much for your support. @raytong @andresrcs
I just only add one more step: filter the stock to get the output.

all<-data %>%
select(ticker, year, quarter, code, total_assets) %>%
arrange(year, quarter) %>%
group_by(ticker) %>%
mutate(total_assets_Q1 = lead(total_assets, 1),
total_assets_Q2 = lead(total_assets, 2),
total_assets_Q3 = lead(total_assets, 3),
total_assets_Q4 = lead(total_assets, 4)) %>%
arrange(ticker, year, quarter) %>%
select(-total_assets) %>%
as.data.frame()

library(dplyr)
select_stocks<-IPO_quarter$code
result<-all %>% filter(code %in%select_stocks)

Once again, thank you so much.

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.