Create function to organize data frame

Hi there!

I am trying to create a function that essentially mimics the results I get here, but with different site names. The below dput() is a subset of a much larger dataframe that contains many other sites.

glns2508 <- structure(list(PUBLICATION_DATE = c(42933, 42933, 42933, 42933, 
42933, 42933, 42933, 42933, 42933, 42933, 42933, 42933, 42933, 
42933, 42933), UID = c(175411, 175411, 175411, 175411, 175411, 
175411, 175411, 175411, 175411, 175411, 175411, 175411, 175411, 
175411, 175411), SITE_ID = c("GLNS15-2508", "GLNS15-2508", "GLNS15-2508", 
"GLNS15-2508", "GLNS15-2508", "GLNS15-2508", "GLNS15-2508", "GLNS15-2508", 
"GLNS15-2508", "GLNS15-2508", "GLNS15-2508", "GLNS15-2508", "GLNS15-2508", 
"GLNS15-2508", "GLNS15-2508"), DATE_COL = c(42220, 42220, 42220, 
42220, 42220, 42220, 42220, 42220, 42220, 42220, 42220, 42220, 
42220, 42220, 42220), VISIT_NO = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1), STUDY = c("LK ERIE ENHANCE", "LK ERIE ENHANCE", 
"LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE", 
"LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE", 
"LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE", 
"LK ERIE ENHANCE"), PSTL_CODE = c("MI", "MI", "MI", "MI", "MI", 
"MI", "MI", "MI", "MI", "MI", "MI", "MI", "MI", "MI", "MI"), 
    SAMPLE_TYPE = c("HYDRO", "HYDRO", "HYDRO", "HYDRO", "HYDRO", 
    "HYDRO", "HYDRO", "HYDRO", "HYDRO", "HYDRO", "HYDRO", "HYDRO", 
    "HYDRO", "HYDRO", "HYDRO"), LINE = c(1, 2, 3, 4, 5, 6, 7, 
    23, 22, 21, 20, 19, 18, 17, 0), CAST = c("DOWNCAST", "DOWNCAST", 
    "DOWNCAST", "DOWNCAST", "DOWNCAST", "DOWNCAST", "DOWNCAST", 
    "UPCAST", "UPCAST", "UPCAST", "UPCAST", "UPCAST", "UPCAST", 
    "UPCAST", NA), COL_LOC = c("Surface", "Mid-water", "Mid-water", 
    "Mid-water", "Mid-water", "Mid-water", "Bottom", "Surface", 
    "Mid-water", "Mid-water", "Mid-water", "Mid-water", "Mid-water", 
    "Bottom", NA), DEPTH = c(0.1, 0.5, 1, 2, 3, 4, 4.5, 0.1, 
    0.5, 1, 2, 3, 4, 4.5, NA), CONDUCTIVITY = c(305.4, 305.4, 
    305.5, 305.6, 305.7, 305.6, 305.7, 305.4, 305.4, 305.4, 305.4, 
    305.6, 305.6, 305.7, NA), DO = c(10.53, 10.71, 10.65, 10.47, 
    10.36, 10.34, 10.28, 10.72, 10.77, 10.74, 10.75, 10.61, 10.56, 
    10.3, NA), LIGHT_AMB = c(1172, 1064, 1059, 953.2, 1178, 1249, 
    1204, 1180, 1160, 1146, 1154, 1129, 1203, 1169, NA), LIGHT_UW = c(321.5, 
    167.2, 93.3, 11.74, 1.5, 0.3, 0.1, 195.4, 113.8, 156.5, 19.6, 
    2.6, 0.3, 0.07, NA), NCCA_FLAG = c(NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), PH = c(9.1, 9.1, 9.1, 9, 9, 9, 9, 9.1, 9.1, 
    9.1, 9.1, 9, 9, 9, NA), SALINITY = c(NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), STATION_DEPTH = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5), TEMPERATURE = c(24.4, 
    24.4, 24.4, 24.4, 24.4, 24.4, 24.4, 24.4, 24.4, 24.4, 24.4, 
    24.4, 24.4, 24.4, NA), NCCA_COMMENT = c(NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), WATERBODY = c("Lake_Erie", "Lake_Erie", "Lake_Erie", 
    "Lake_Erie", "Lake_Erie", "Lake_Erie", "Lake_Erie", "Lake_Erie", 
    "Lake_Erie", "Lake_Erie", "Lake_Erie", "Lake_Erie", "Lake_Erie", 
    "Lake_Erie", "Lake_Erie")), row.names = c(NA, -15L), class = c("tbl_df", 
"tbl", "data.frame"))
depth.2508 <- glns2508$DEPTH
do.2508 <- glns2508$DO
cond.2508 <- glns2508$CONDUCTIVITY
ph.2508 <- glns2508$PH
temp.2508 <- glns2508$TEMPERATURE
cast.2508 <- glns2508$CAST

glns2508 <- data.frame(cast.2508, depth.2508, do.2508, cond.2508, ph.2508, temp.2508)
colnames(glns2508) <- c("CAST", "DEPTH_METERS", "DO_MG.L", "COND_US", "PH", "TEMP_CELSIUS")
na.omit(glns2508)

Here is the function I am writing to try and get R to repeat this process in my larger dataframe, but with different site names. I have succesfully created a function to subset the sites themselves:

filt <- function(x) {
  filter(wq, SITE_ID == x)
}

But when I try to create a function to reorganize the data frame, it returns a character vector of the column names:

org <- function(x,y) {
  depth.x <- y["DEPTH"]
  do.x <- y["DO"]
  cond.x <- y["CONDUCTIVITY"]
  ph.x <- y["PH"]
  temp.x <- y["TEMPERATURE"]
  cast.x <- y["CAST"]
  y <- data.frame(cast.x, depth.x, do.x, cond.x, ph.x, temp.x)
  colnames(y) <- c('CAST', "DEPTH_METERS", "DO_MG.L", "COND_US", "PH", "TEMP_CELSIUS")
  na.omit(y)
}
glns3235 <- org(x="3235", y="glns3235")

Can anyone help me figure out how to do this? I initially tried using the $ operator for the columns, but I received an error message telling me that that operator is invalid for atomic vectors."

Thank you so much!

Assuming that wq is your data.frame with many sites (here only one is given)
you could want something like

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

wq <- structure(list(PUBLICATION_DATE = c(42933, 42933, 42933, 42933, 
42933, 42933, 42933, 42933, 42933, 42933, 42933, 42933, 42933, 
42933, 42933), UID = c(175411, 175411, 175411, 175411, 175411, 
175411, 175411, 175411, 175411, 175411, 175411, 175411, 175411, 
175411, 175411), SITE_ID = c("GLNS15-2508", "GLNS15-2508", "GLNS15-2508", 
"GLNS15-2508", "GLNS15-2508", "GLNS15-2508", "GLNS15-2508", "GLNS15-2508", 
"GLNS15-2508", "GLNS15-2508", "GLNS15-2508", "GLNS15-2508", "GLNS15-2508", 
"GLNS15-2508", "GLNS15-2508"), DATE_COL = c(42220, 42220, 42220, 
42220, 42220, 42220, 42220, 42220, 42220, 42220, 42220, 42220, 
42220, 42220, 42220), VISIT_NO = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1), STUDY = c("LK ERIE ENHANCE", "LK ERIE ENHANCE", 
"LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE", 
"LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE", 
"LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE", "LK ERIE ENHANCE", 
"LK ERIE ENHANCE"), PSTL_CODE = c("MI", "MI", "MI", "MI", "MI", 
"MI", "MI", "MI", "MI", "MI", "MI", "MI", "MI", "MI", "MI"), 
    SAMPLE_TYPE = c("HYDRO", "HYDRO", "HYDRO", "HYDRO", "HYDRO", 
    "HYDRO", "HYDRO", "HYDRO", "HYDRO", "HYDRO", "HYDRO", "HYDRO", 
    "HYDRO", "HYDRO", "HYDRO"), LINE = c(1, 2, 3, 4, 5, 6, 7, 
    23, 22, 21, 20, 19, 18, 17, 0), CAST = c("DOWNCAST", "DOWNCAST", 
    "DOWNCAST", "DOWNCAST", "DOWNCAST", "DOWNCAST", "DOWNCAST", 
    "UPCAST", "UPCAST", "UPCAST", "UPCAST", "UPCAST", "UPCAST", 
    "UPCAST", NA), COL_LOC = c("Surface", "Mid-water", "Mid-water", 
    "Mid-water", "Mid-water", "Mid-water", "Bottom", "Surface", 
    "Mid-water", "Mid-water", "Mid-water", "Mid-water", "Mid-water", 
    "Bottom", NA), DEPTH = c(0.1, 0.5, 1, 2, 3, 4, 4.5, 0.1, 
    0.5, 1, 2, 3, 4, 4.5, NA), CONDUCTIVITY = c(305.4, 305.4, 
    305.5, 305.6, 305.7, 305.6, 305.7, 305.4, 305.4, 305.4, 305.4, 
    305.6, 305.6, 305.7, NA), DO = c(10.53, 10.71, 10.65, 10.47, 
    10.36, 10.34, 10.28, 10.72, 10.77, 10.74, 10.75, 10.61, 10.56, 
    10.3, NA), LIGHT_AMB = c(1172, 1064, 1059, 953.2, 1178, 1249, 
    1204, 1180, 1160, 1146, 1154, 1129, 1203, 1169, NA), LIGHT_UW = c(321.5, 
    167.2, 93.3, 11.74, 1.5, 0.3, 0.1, 195.4, 113.8, 156.5, 19.6, 
    2.6, 0.3, 0.07, NA), NCCA_FLAG = c(NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), PH = c(9.1, 9.1, 9.1, 9, 9, 9, 9, 9.1, 9.1, 
    9.1, 9.1, 9, 9, 9, NA), SALINITY = c(NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), STATION_DEPTH = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5), TEMPERATURE = c(24.4, 
    24.4, 24.4, 24.4, 24.4, 24.4, 24.4, 24.4, 24.4, 24.4, 24.4, 
    24.4, 24.4, 24.4, NA), NCCA_COMMENT = c(NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), WATERBODY = c("Lake_Erie", "Lake_Erie", "Lake_Erie", 
    "Lake_Erie", "Lake_Erie", "Lake_Erie", "Lake_Erie", "Lake_Erie", 
    "Lake_Erie", "Lake_Erie", "Lake_Erie", "Lake_Erie", "Lake_Erie", 
    "Lake_Erie", "Lake_Erie")), row.names = c(NA, -15L), class = c("tbl_df", 
"tbl", "data.frame"))

org <- function(wq,x){
wq %>%
  filter(SITE_ID=={{x}}) %>%
  select(CAST,DEPTH_METERS=DEPTH,DO_MG.L=DO,COND_US=CONDUCTIVITY,PH,TEMP_CELSIUS=TEMPERATURE) %>%
  na.omit()
}

org(wq,'GLNS15-2508')
#> # A tibble: 14 x 6
#>    CAST     DEPTH_METERS DO_MG.L COND_US    PH TEMP_CELSIUS
#>    <chr>           <dbl>   <dbl>   <dbl> <dbl>        <dbl>
#>  1 DOWNCAST          0.1    10.5    305.   9.1         24.4
#>  2 DOWNCAST          0.5    10.7    305.   9.1         24.4
#>  3 DOWNCAST          1      10.6    306.   9.1         24.4
#>  4 DOWNCAST          2      10.5    306.   9           24.4
#>  5 DOWNCAST          3      10.4    306.   9           24.4
#>  6 DOWNCAST          4      10.3    306.   9           24.4
#>  7 DOWNCAST          4.5    10.3    306.   9           24.4
#>  8 UPCAST            0.1    10.7    305.   9.1         24.4
#>  9 UPCAST            0.5    10.8    305.   9.1         24.4
#> 10 UPCAST            1      10.7    305.   9.1         24.4
#> 11 UPCAST            2      10.8    305.   9.1         24.4
#> 12 UPCAST            3      10.6    306.   9           24.4
#> 13 UPCAST            4      10.6    306.   9           24.4
#> 14 UPCAST            4.5    10.3    306.   9           24.4
org(wq,'GLNS15-2509')
#> # A tibble: 0 x 6
#> # ... with 6 variables: CAST <chr>, DEPTH_METERS <dbl>, DO_MG.L <dbl>,
#> #   COND_US <dbl>, PH <dbl>, TEMP_CELSIUS <dbl>
Created on 2021-07-30 by the reprex package (v2.0.0)

Thank you so much for helping me with this!

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.