Hi everyone,
I am trying to clean another table. This one is with more subgroups.
I need to pivot_longer and pivot_wider. See below the code that @FactOREO gave me (thanks again )for another example, the dataframe which i would like to clean (start_17) and the outcome that i would like to get (outcome_17)
Thanks very much in advance
start_17 dataframe:
structure(list(Department = c("London", "Male", "2017", "2017,Quartal1",
"2017,Quartal 2", "2017,Quartal 3", "2017,Quartal4", "Female",
"2017", "2017,Quartal1", "2017,Quartal 2", "2017,Quartal 3",
"2017,Quartal4", "Child", "2017", "2017,Quartal4", "Home", "2017",
"2017,Quartal 2", "2017,Quartal 3", "2017,Quartal4", "Garden",
"2017", "2017,Quartal1", "2017,Quartal 2", "2017,Quartal 3",
"2017,Quartal4", "Paris", "Male", "2017", "2017,Quartal1", "2017,Quartal4",
"Female", "2017", "2017,Quartal 2", "2017,Quartal 3", "2017,Quartal4",
"Child", "2017", "2017,Quartal4", "Garden", "2017", "2017,Quartal1",
"2017,Quartal 2", "2017,Quartal 3", "2017,Quartal4", "Rome",
"Male", "2017", "2017,Quartal1", "2017,Quartal 2", "2017,Quartal 3",
"2017,Quartal4", "Female", "2017", "2017,Quartal1", "2017,Quartal 2",
"2017,Quartal 3", "2017,Quartal4", "Child", "2017", "2017,Quartal4",
"Total"), TotalSales = c(NA, NA, NA, 56, 81, 103, 88, NA, NA,
368, 296, 369, 324, NA, NA, 103, NA, NA, 230, 225, 334, NA, NA,
787, 806, 925, 918, NA, NA, NA, 242, 287, NA, NA, 156, 161, 169,
153, NA, NA, 2691, NA, 76, 62, 66, 70, NA, NA, NA, 1176, 1185,
1432, 1408, NA, NA, 1376, 1492, 1830, 1628, NA, NA, 289, 21962
), TotalDamage = c(NA, NA, NA, "2", "2", "1", "2", NA, NA, "9",
"4", "4", "3", NA, NA, "1", NA, NA, "7", "6", "11", NA, NA, "2",
"4", "5", "4", NA, NA, NA, "NA", "NA", NA, NA, "NA", "NA", "NA",
"NA", NA, NA, "2", NA, "NA", "NA", "NA", "NA", NA, NA, NA, "NA",
"NA", "4", "1", NA, NA, "34", "19", "40", "37", NA, NA, "6",
"210"), TotalDamagePercent = c(NA, NA, NA, 3.57, 2.46, 9.7, 2.72,
NA, NA, 2.44, 1.35, 1.08, 9.25, NA, NA, 9.7, NA, NA, 3.04, 2.66,
3.29, NA, NA, 2.54, 4.96, 5.4, 4.35, NA, NA, NA, 0, 0, NA, NA,
0, 0, 0, 0, NA, NA, 7.43, NA, 0, 0, 0, 0, NA, NA, NA, 0, 0, 2.79,
7.1, NA, NA, 2.47, 1.27, 2.18, 2.27, NA, NA, 2.07, 0.96)), class = "data.frame", row.names = c(NA,
63L))
this is the outcome_17 that i would like to get:
structure(list(Year = c(2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2017), Quartal = c("2017-Q1",
"2017-Q2", "2017-Q3", "2017-Q4", "2017-Q1", "2017-Q2", "2017-Q3",
"2017-Q4", "2017-Q4", "2017-Q2", "2017-Q3", "2017-Q4", "2017-Q1",
"2017-Q2", "2017-Q3", "2017-Q4", "2017-Q1", "2017-Q4", "2017-Q2",
"2017-Q3", "2017-Q4", "2017-Q4", "2017-Q1", "2017-Q2", "2017-Q3",
"2017-Q4", "2017-Q1", "2017-Q2", "2017-Q3", "2017-Q4", "2017-Q1",
"2017-Q2", "2017-Q3", "2017-Q4", "2017-Q4", NA), City = c("London",
"London", "London", "London", "London", "London", "London", "London",
"London", "London", "London", "London", "London", "London", "London",
"London", "Paris", "Paris", "Paris", "Paris", "Paris", "Paris",
"Paris", "Paris", "Paris", "Paris", "Rome", "Rome", "Rome", "Rome",
"Rome", "Rome", "Rome", "Rome", "Rome", "Rome"), Department = c("Male",
"Male", "Male", "Male", "Female", "Female", "Female", "Female",
"Child", "Home", "Home", "Home", "Garden", "Garden", "Garden",
"Garden", "Male", "Male", "Female", "Female", "Female", "Child",
"Garden", "Garden", "Garden", "Garden", "Male", "Male", "Male",
"Male", "Female", "Female", "Female", "Female", "Child", "Total"
), TotalSales = c(56, 81, 103, 88, 368, 296, 369, 324, 103, 230,
225, 334, 787, 806, 925, 918, 242, 287, 156, 161, 169, 2691,
76, 62, 66, 70, 1176, 1185, 1432, 1408, 1376, 1492, 1830, 1628,
289, 21809), TotalDamage = c("2", "2", "1", "2", "9", "4", "4",
"3", "1", "7", "6", "11", "2", "4", "5", "4", "NA", "NA", "NA",
"NA", "NA", "2", "NA", "NA", "NA", "NA", "NA", "NA", "4", "1",
"34", "19", "40", "37", "6", "210"), TotalDamagePercent = c(3.57,
2.46, 9.7, 2.72, 2.44, 1.35, 1.08, 9.25, 9.7, 3.04, 2.66, 3.29,
2.54, 4.96, 5.4, 4.35, 0, 0, 0, 0, 0, 7.43, 0, 0, 0, 0, 0, 0,
2.79, 7.1, 2.47, 1.27, 2.18, 2.27, 2.07, 0.96)), class = "data.frame", row.names = c(NA,
36L))
and the code is this one:
city <- c("London", "Paris", "Rome")
department <-c("Male", "Female","Child", "Home", "Garden")
quartal <- c("2017-Q1", "2017-Q2","2017-Q3","2017-Q4")
d17<-start_17|>
mutate(
Group = if_else(Department %in% department, Department, NA_character_)
) |>
mutate(TotalDamagePercent = ifelse(is.na(TotalDamagePercent), 0, TotalDamagePercent)) |>
fill(
Group, .direction = 'down'
) |>
pivot_longer(
cols = TotalSales:TotalDamagePercent, names_to = 'category', values_to = 'value'
) |>
dplyr::filter(
Department != Group
) |>
pivot_wider(
values_from = 'value', names_from = 'category'
)|>
rename(
'Department' = Group,
'Quartal' = Department
)|>
select(Department,Quartal, everything()
)|>
add_column(year=2017
)