Loop to replace with condition

Hey everyone, unfortunately I cannot find a solution to write the following chunk of code in a loop. The problem is that for every year (2018-2022) the January data is named "MONTH01" , the February data "MONTH02" and so on. I would like to be more efficient. Any ideas?

data$MONTH[big_data$MONTH== "MONTH01" & data$YEAR == "2018"] = "201801"
data$MONTH[big_data$MONTH== "MONTH01" & data$YEAR == "2019"] = "201901"
data$MONTH[big_data$MONTH== "MONTH01" & data$YEAR == "2020"] = "202001"
data$MONTH[big_data$MONTH== "MONTH01" & data$YEAR == "2021"] = "202101"
data$MONTh[big_data$MONTH== "MONTH01" & data$YEAR == "2022"] = "202201"

Perhaps this will point you in the right direction?

library("tidyverse")
my_data <- tibble(
  MONTH = rep("MONTH01", 5),
  YEAR = 2018:2022)

my_data %>% 
  mutate(MONTH_formatted = str_c(YEAR, str_remove(MONTH, "MONTH")))

Hey Leon,

thank you for your reply. Unfortunately, I don't really see how this may help me. I am searching for a "R-version" of the Stata forvalues command or something similar so that I can simply replace 18-22 with a variable and loop over this one. E.g.,:

forvalues i= 18/22 {
    replace MONTH=20`i' 01 if YEAR == 20`i'
}

In R we don't use loops that often because most functions are vectorized. The example solution @Leon is giving you will work on the entire MONTH column without the need for a loop.

If you need more specific help, please provide a proper REPRoducible EXample (reprex) illustrating your issue.

1 Like

Thank you for your reply! Please find a reproducable example below:

df1 <- data.frame(ID = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
                  MONTH = c('MONTH01', 'MONTH02', 'MONTH03', 'MONTH04', 'MONTH05','MONTH06',
                            'MONTH07','MONTH08','MONTH09','MONTH10','MONTH11','MONTH12',
                            'MONTH01', 'MONTH02', 'MONTH03', 'MONTH04', 'MONTH05','MONTH06',
                            'MONTH07','MONTH08','MONTH09','MONTH10','MONTH11','MONTH12',
                            'MONTH01', 'MONTH02', 'MONTH03', 'MONTH04', 'MONTH05','MONTH06',
                            'MONTH07','MONTH08','MONTH09','MONTH10','MONTH11','MONTH12',
                            'MONTH01', 'MONTH02', 'MONTH03', 'MONTH04', 'MONTH05','MONTH06',
                            'MONTH07','MONTH08','MONTH09','MONTH10','MONTH11','MONTH12'),
                  YEAR = c(2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,
                           2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019,
                           2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,
                           2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019))


#the following code would do the job, but I am looking for a more efficient solution:


df1$MONTH[df1$MONTH == "MONTH01" & df1$YEAR == "2018"] = "201801"
df1$MONTH[df1$MONTH == "MONTH01" & df1$YEAR == "2019"] = "201901"

df1$MONTH[df1$MONTH == "MONTH02" & df1$YEAR == "2018"] = "201802"
df1$MONTH[df1$MONTH == "MONTH02" & df1$YEAR == "2019"] = "201902"

df1$MONTH[df1$MONTH == "MONTH03" & df1$YEAR == "2018"] = "201803"
df1$MONTH[df1$MONTH == "MONTH03" & df1$YEAR == "2019"] = "201903"

df1$MONTH[df1$MONTH == "MONTH04" & df1$YEAR == "2018"] = "201804"
df1$MONTH[df1$MONTH == "MONTH04" & df1$YEAR == "2019"] = "201904"

df1$MONTH[df1$MONTH == "MONTH05" & df1$YEAR == "2018"] = "201805"
df1$MONTH[df1$MONTH == "MONTH05" & df1$YEAR == "2019"] = "201905"

df1$MONTH[df1$MONTH == "MONTH06" & df1$YEAR == "2018"] = "201806"
df1$MONTH[df1$MONTH == "MONTH06" & df1$YEAR == "2019"] = "201906"

df1$MONTH[df1$MONTH == "MONTH07" & df1$YEAR == "2018"] = "201807"
df1$MONTH[df1$MONTH == "MONTH07" & df1$YEAR == "2019"] = "201907"

df1$MONTH[df1$MONTH == "MONTH08" & df1$YEAR == "2018"] = "201808"
df1$MONTH[df1$MONTH == "MONTH08" & df1$YEAR == "2019"] = "201908"

df1$MONTH[df1$MONTH == "MONTH09" & df1$YEAR == "2018"] = "201809"
df1$MONTH[df1$MONTH == "MONTH09" & df1$YEAR == "2019"] = "201909"

df1$MONTH[df1$MONTH == "MONTH10" & df1$YEAR == "2018"] = "201810"
df1$MONTH[df1$MONTH == "MONTH10" & df1$YEAR == "2019"] = "201910"

df1$MONTH[df1$MONTH == "MONTH11" & df1$YEAR == "2018"] = "201811"
df1$MONTH[df1$MONTH == "MONTH11" & df1$YEAR == "2019"] = "201911"

df1$MONTH[df1$MONTH == "MONTH12" & df1$YEAR == "2018"] = "201812"
df1$MONTH[df1$MONTH == "MONTH12" & df1$YEAR == "2019"] = "201912"

your code is equivalent to

df1 |> mutate(MONTH=paste0(YEAR,substr(MONTH,6,7)))

edit to say:
this is also equivalent to the answer Leon recommended to you

df1 |> mutate(MONTH=str_c(YEAR, str_remove(MONTH, "MONTH")))
1 Like

The dplyr based solutions are more human-readable but if you want to keep using base R, this would be an alternative solution.

df1 <- data.frame(ID = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
                  MONTH = c('MONTH01', 'MONTH02', 'MONTH03', 'MONTH04', 'MONTH05','MONTH06',
                            'MONTH07','MONTH08','MONTH09','MONTH10','MONTH11','MONTH12',
                            'MONTH01', 'MONTH02', 'MONTH03', 'MONTH04', 'MONTH05','MONTH06',
                            'MONTH07','MONTH08','MONTH09','MONTH10','MONTH11','MONTH12',
                            'MONTH01', 'MONTH02', 'MONTH03', 'MONTH04', 'MONTH05','MONTH06',
                            'MONTH07','MONTH08','MONTH09','MONTH10','MONTH11','MONTH12',
                            'MONTH01', 'MONTH02', 'MONTH03', 'MONTH04', 'MONTH05','MONTH06',
                            'MONTH07','MONTH08','MONTH09','MONTH10','MONTH11','MONTH12'),
                  YEAR = c(2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,
                           2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019,
                           2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,
                           2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019))


(df1$MONTH <- paste0(df1$YEAR,substr(df1$MONTH,6,7)))
#>  [1] "201801" "201802" "201803" "201804" "201805" "201806" "201807" "201808"
#>  [9] "201809" "201810" "201811" "201812" "201901" "201902" "201903" "201904"
#> [17] "201905" "201906" "201907" "201908" "201909" "201910" "201911" "201912"
#> [25] "201801" "201802" "201803" "201804" "201805" "201806" "201807" "201808"
#> [33] "201809" "201810" "201811" "201812" "201901" "201902" "201903" "201904"
#> [41] "201905" "201906" "201907" "201908" "201909" "201910" "201911" "201912"

Created on 2023-01-16 with reprex v2.0.2

Thanks everyone! That's exactly what I was looking for!

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.