lead/lag for a time-series panel data with unequal timepoints

Hi,
I am trying to lead/lag a time-series panel data where subjects have unequal number of time points.
Example dataframe = dflong.
I am trying to lead/lag the two variables "Smoking_" and "Docvisit_" grouped by the column "Name".
Note that each individual has unequal number of datapoint.

This is what I tried (my first try at lag/lead), but this does not seems to work. Could anyone point out my mistake?
I want to make sure that the function runs across each individual and the data do not move across them.

df <- data.frame(
  Name = c("John","Mary","Alice","Brown"),
  Smoking_2000 = c(0,0,NA,1),
  Smoking_2001 = c(0,1,1,1),
  Smoking_2002 = c(0,NA,2,1),
  Smoking_2003 = c(0,1,1,NA),
  Docvisit_2000 = c("yes","no",NA,"no"),
  Docvisit_2001 = c("yes","yes","no","no"),
  Docvisit_2002 = c("yes","no","yes","no"),
  Docvisit_2003 = c("yes","no","no","no")
)

#Convert to long
dflong <- pivot_longer(data = df,cols = Smoking_2000:Docvisit_2003,
             names_pattern = "([^\\d]+)(\\d+)",
             names_to = c(".value","year"))
#> Error in pivot_longer(data = df, cols = Smoking_2000:Docvisit_2003, names_pattern = "([^\\d]+)(\\d+)", : could not find function "pivot_longer"

#Delete rows with NA (to show the time points are different for individuals)
dflong <- na.omit(dflong)
#> Error in na.omit(dflong): object 'dflong' not found

#Trying to lag-lead
library(plyr)
dflong_lead <- dflong %>% group_by(Name) %>% mutate(smoking_lead = lead(Smoking_),1)
#> Error in dflong %>% group_by(Name) %>% mutate(smoking_lead = lead(Smoking_), : could not find function "%>%"
dflong_lead1 <- dflong %>% group_by(Name) %>% mutate(Docvisit_lead = lead(Docvisit_),1)
#> Error in dflong %>% group_by(Name) %>% mutate(Docvisit_lead = lead(Docvisit_), : could not find function "%>%"

dflong_lag <- dflong %>% group_by(Name) %>% mutate(smoking_lag = lag(Smoking_),1)
#> Error in dflong %>% group_by(Name) %>% mutate(smoking_lag = lag(Smoking_), : could not find function "%>%"
dflong_lag1 <- dflong %>% group_by(Name) %>% mutate(Docvisit_lag = lag(Docvisit_),1)
#> Error in dflong %>% group_by(Name) %>% mutate(Docvisit_lag = lag(Docvisit_), : could not find function "%>%"
Created on 2022-01-20 by the reprex package (v2.0.1)

Is it because of missing libraries?

library(dplyr)
library(tidyr)

G.

Don't think so. I had loaded both of them above the rendered selection.
If you run the code, it seems to work (at least to the point of creating the df). Here is the new one where I have rendered loading those two libraries too. Thank you !

library(plyr)
library(tidyverse)
#> Warning: package 'ggplot2' was built under R version 3.6.2
#> Warning: package 'tibble' was built under R version 3.6.2
#> Warning: package 'tidyr' was built under R version 3.6.2
#> Warning: package 'purrr' was built under R version 3.6.2
#> Warning: package 'dplyr' was built under R version 3.6.2
library(dplyr)


df <- data.frame(
  Name = c("John","Mary","Alice","Brown"),
  Smoking_2000 = c(0,0,NA,1),
  Smoking_2001 = c(0,1,1,1),
  Smoking_2002 = c(0,NA,2,1),
  Smoking_2003 = c(0,1,1,NA),
  Docvisit_2000 = c("yes","no",NA,"no"),
  Docvisit_2001 = c("yes","yes","no","no"),
  Docvisit_2002 = c("yes","no","yes","no"),
  Docvisit_2003 = c("yes","no","no","no")
)

#Convert to long
dflong <- pivot_longer(data = df,cols = Smoking_2000:Docvisit_2003,
             names_pattern = "([^\\d]+)(\\d+)",
             names_to = c(".value","year"))

#Delete rows with NA (to show the time points are different for individuals)
dflong <- na.omit(dflong)

#Trying to lag-lead
dflong_lead <- dflong %>% group_by(Name) %>% mutate(smoking_lead = lead(Smoking_),1)
dflong_lead1 <- dflong %>% group_by(Name) %>% mutate(Docvisit_lead = lead(Docvisit_),1)

dflong_lag <- dflong %>% group_by(Name) %>% mutate(smoking_lag = lag(Smoking_),1)
dflong_lag1 <- dflong %>% group_by(Name) %>% mutate(Docvisit_lag = lag(Docvisit_),1)
Created on 2022-01-20 by the reprex package (v2.0.1)

Found the right way to do this and sharing it

library(dplyr)
library(data.table)

# for lag for variable "smoking"
df_lag <- setDT(df), [,lag_smoking:=shift((smoking), type = "lag"),by = Name]

#for lead for variable "smoking"
df_lead <- setDT(df), [,lead_smoking:=shift((smoking), type = "lead"),by = Name]

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.