Hello.
I have a data processing question:
-Data are water quality samples collected from many different locations and many different dates
- Data are in wide format and I want to get them into long format
- Each parameter (column) has a qualifier code ("_q")
I need to adjust the data based on the code:
- If "U" , take 1/2 the value
- If "V" or "K", remove the row of data
I could do this for each paramater using my script below, but I am thinking there has to be a more efficient method?
Thank you.
Some of the data:
test <-tibble::tribble(
~stream, ~site, ~date, ~DO, ~DO_q, ~Cond, ~Cond_q, ~Sal, ~Sal_q, ~TKN, ~TKN_q, ~NH3, ~NH3_q, ~NOX, ~NOX_q,
10L, 2L, 37635L, 9.34, NA, 492.4, NA, 0.25, NA, 0.8, NA, 0.05, NA, 0.66, NA,
19L, 10L, 37637L, 6.06, NA, 723.2, NA, 0.37, NA, 0.93, NA, 0.12, NA, 0.34, NA,
19L, 2L, 37637L, 8.93, NA, 16450, NA, 9.62, NA, 0.87, "U", 0.005, "U", 0.01, "U",
19L, 9L, 37637L, 9.22, "VU", 477.4, NA, 0.24, NA, 0.61, NA, 0.005, "UI", 0.32, "U",
19L, 5L, 37637L, 7.7, NA, 546.3, NA, 0.28, NA, 0.56, NA, 0.05, NA, 1.39, NA,
19L, 8L, 37637L, 8.86, "VU", 525, "V", 0.27, NA, 0.81, NA, 0.04, "I", 1.49, "U",
19L, 7L, 37637L, 9.82, NA, 479.2, NA, 0.24, NA, 0.56, "VU", 0.01, "I", 0.41, NA,
27L, 9L, 37637L, 9.12, NA, 427.4, NA, 0.21, "U", 0.95, NA, 0.1, NA, 0.27, NA,
27L, 8L, 37637L, 10.24, "UL", 622.9, NA, 0.32, NA, 1.1, NA, 0.005, "UV", 1.12, NA,
27L, 10L, 37637L, 10.03, NA, 353.4, NA, 0.17, NA, 0.71, NA, 0.005, "U", 0.13, NA,
27L, 3L, 37637L, 7.02, NA, 382, NA, 0.19, NA, 0.94, NA, 0.1, NA, 0.15, NA,
10L, 2L, 37670L, 8.48, NA, 435.3, NA, 0.22, NA, 0.84, "U", 0.04, "I", 0.27, NA,
19L, 10L, 37676L, 6.25, "KU", 625.7, "U", 0.32, NA, 0.81, NA, 0.08, NA, 0.15, NA,
19L, 2L, 37676L, 5.84, NA, 14230, NA, 8.23, "K", 0.85, NA, 0.1, NA, 0.07, "I",
19L, 9L, 37676L, 7.38, "U", 312.1, NA, 0.15, NA, 0.51, NA, 0.06, NA, 0.12, NA,
19L, 8L, 37676L, 7.93, "U", 512.6, NA, 0.26, NA, 0.56, NA, 0.09, NA, 1.02, NA,
19L, 7L, 37676L, 8.62, NA, 440.1, NA, 0.22, NA, 0.62, NA, 0.04, "I", 0.17, NA,
27L, 9L, 37676L, 8.35, NA, 449.5, NA, 0.23, NA, 0.82, NA, 0.04, "I", 0.08, "I",
27L, 8L, 37676L, 8.44, NA, 506, NA, 0.26, NA, 0.89, NA, 0.08, NA, 0.47, NA,
27L, 3L, 37676L, 6.96, NA, 416.8, NA, 0.21, NA, 0.9, NA, 0.09, NA, 0.01, "U",
27L, 10L, 37676L, 9.68, NA, 391.5, NA, 0.19, NA, 0.58, NA, 0.005, "U", 0.01, "U",
10L, 2L, 37705L, 7.78, "KU", 487.8, NA, 0.25, NA, 0.71, NA, 0.04, "I", 0.35, NA,
19L, 7L, 37711L, 8.39, NA, 526, NA, 0.3, NA, 0.53, NA, 0.05, NA, 0.21, NA,
19L, 8L, 37711L, 7.31, NA, 591, NA, 0.3, "U", 0.61, NA, 0.07, NA, 0.88, NA,
19L, 9L, 37711L, 7.49, NA, 468, NA, 0.2, NA, 0.58, NA, 0.03, "I", 0.13, NA,
19L, 10L, 37711L, 6.82, NA, 707, NA, 0.4, NA, 0.76, NA, 0.1, NA, 0.19, NA,
19L, 2L, 37711L, 9.56, NA, 125, NA, 0.1, NA, 1.12, NA, 0.005, "U", 0.01, "U",
27L, 9L, 37711L, 8.16, NA, 498, NA, 0.3, NA, 0.89, NA, 0.11, NA, 0.2, NA,
27L, 3L, 37711L, 6.3, NA, 163, NA, 0.1, NA, 0.8, NA, 0.11, NA, 0.04, "I",
27L, 10L, 37711L, 8.7, NA, 415, NA, 0.2, NA, 0.56, NA, 0.005, "U", 0.01, "UX",
27L, 8L, 37711L, 8.36, NA, 639, NA, 0.3, NA, 0.71, NA, 0.07, NA, 0.81, NA,
10L, 2L, 37748L, 6.5, NA, 561.6, NA, 0.29, NA, 0.79, NA, 0.22, NA, 0.33, NA,
19L, 7L, 37753L, 6.85, NA, 563.9, NA, 0.29, NA, 0.4, "I", 0.005, "U", 0.09, NA,
19L, 8L, 37753L, 6.02, NA, 760.8, NA, 0.39, NA, 0.4, "I", 0.005, "U", 0.73, NA,
19L, 9L, 37753L, 4.95, NA, 513, NA, 0.26, NA, 0.56, NA, 0.005, "U", 0.16, NA,
19L, 2L, 37753L, 4.16, NA, 26710, NA, 16.33, NA, 0.57, NA, 0.005, "U", 0.01, "U",
27L, 9L, 37753L, 6.85, NA, 460.8, NA, 0.23, NA, 0.64, NA, 0.06, NA, 0.23, NA
)
head(test)
#> # A tibble: 6 × 15
#> stream site date DO DO_q Cond Cond_q Sal Sal_q TKN TKN_q NH3
#> <int> <int> <int> <dbl> <chr> <dbl> <chr> <dbl> <chr> <dbl> <chr> <dbl>
#> 1 10 2 37635 9.34 <NA> 492. <NA> 0.25 <NA> 0.8 <NA> 0.05
#> 2 19 10 37637 6.06 <NA> 723. <NA> 0.37 <NA> 0.93 <NA> 0.12
#> 3 19 2 37637 8.93 <NA> 16450 <NA> 9.62 <NA> 0.87 U 0.005
#> 4 19 9 37637 9.22 VU 477. <NA> 0.24 <NA> 0.61 <NA> 0.005
#> 5 19 5 37637 7.7 <NA> 546. <NA> 0.28 <NA> 0.56 <NA> 0.05
#> 6 19 8 37637 8.86 VU 525 V 0.27 <NA> 0.81 <NA> 0.04
#> # … with 3 more variables: NH3_q <chr>, NOX <dbl>, NOX_q <chr>
Created on 2023-01-24 by the reprex package (v2.0.1)
My super short script, based on my data being long with one column of values, and another column with the qualifiers:
## Based on data being in long format:
test1 <-test %>%
#select(lake, date, QACode, parm, adj) %>%
filter(str_detect(QACode, "K|V")== FALSE) %>% ### Add qualifiers here ("?"does not work)
## mutate(adj2 = if_else(QACode == "Q", adj/2, adj)) Works!!!
mutate(adj2 = if_else(str_detect(QACode, "Q"), adj/2, adj)) ### Works!!!
#> Error in test %>% filter(str_detect(QACode, "K|V") == FALSE) %>% mutate(adj2 = if_else(str_detect(QACode, : could not find function "%>%"
Created on 2023-01-24 by the reprex package (v2.0.1)