Thanks @gueyenono , I have been messing around with the approach and come up with two trains of thought. Neither one seems to work yet.
Using two df's via the summarize statement:
# install.packages(c("zoo", "peakPick"))
# install.packages("tidyr")
# install.packages("forcats")
# install.packages("tidyverse")
library(forcats)
library(tidyverse)
library(dplyr)
library(zoo)
library(peakPick)
library(tidyr)
# Data
transect <- readr::read_csv("https://raw.githubusercontent.com/Bryanrt-geophys/Thesis/Thesis/r_code/transect.csv")
transect = transect[-1]
# Pivot to long format
long_transect <- transect %>%
pivot_longer(-Y,
names_to = "timestep",
names_prefix="Z_",
values_to = "elevation") %>%
mutate(elevation = if_else(abs(elevation) < 10, 0, elevation)) # say that <0 is = 0
long_transect$timestep = fct_inorder(long_transect$timestep)
# used to find the mountains
find_mountain <- function(elevation_smooth, elevation){
peaks_high <- which(peakPick::peakpick(elevation_smooth,
neighlim = 20,
deriv.lim = 100,
peak.min.sd = 5))
peaks_high <- peaks_high[peaks_high > 200 & peaks_high < 800]
if(length(peaks_high) == 0){
warning("Too many peaks")
mountain <- NA
} else if(length(peaks_high) == 1){
mountain <- peaks_high
} else if(length(peaks_high) == 2 && elevation[peaks_high[2]] < 0){
# second mountain
mountain <- peaks_high[1]
} else if(length(peaks_high) == 2 && elevation[peaks_high[2]] > 0){
# second mountain
mountain <- peaks_high[2]
} else{
warning("Too many peaks")
mountain <- NA
}
return(c("mountain" = mountain))
}
# used to find the valleys
find_valley <- function(elevation_smooth, elevation){
peaks_high <- which(peakPick::peakpick(elevation_smooth,
neighlim = 20,
deriv.lim = 100,
peak.min.sd = 5))
peaks_high <- peaks_high[peaks_high > 200 & peaks_high < 800]
if(length(peaks_high) == 0){
warning("Too many peaks")
valley <- NA
} else if(length(peaks_high) == 1){
valley <- peaks_high
valley <- peaks_high + which.min(elevation[peaks_high:length(elevation)])
} else if(length(peaks_high) == 2 && elevation[peaks_high[2]] < 0){
# second valley
valley <- peaks_high[1]
#second valley
valley <- peaks_high[1]+which.min(elevation[peaks_high[1]:length(elevation)])
} else if(length(peaks_high) == 2 && elevation[peaks_high[2]] > 0){
# second valley
valley <- peaks_high[2]
#second valley
valley <- peaks_high[2]+which.min(elevation[peaks_high[2]:length(elevation)])
} else{
warning("Too many peaks")
valley <- NA
}
return(c("valley" = valley))
}
# used to find valley-side of basin - possibly replace variables with data.frames
find_first_non_zero <- function(long_transect_vector, valley){
print("long_transect_vector:")
print(long_transect_vector$elevation)
print("valley:")
print(valley)
start_at <- long_transect_vector$Y[valley] * 2 # the position of the valley - multiply by two based on if grouping works or not
print("start_at:")
print(start_at)
if(long_transect_vector$elevation[start_at] > 0){
warning("The lowest point is positive! Returning NA.")
return(NA)
}
subset_positions <- seq(start_at, length(long_transect_vector$elevation))
pos <- first(which(long_transect_vector$elevation[subset_positions] >= 0))
pos + start_at
}
# used to find mountain-side of basin - possibly replace variables with data.frames
find_first_zero <- function(long_transect_vector, mountain){
print("long_transect_vector:")
print(long_transect_vector$elevation)
print("mountain:")
print(mountain)
start_at <- long_transect_vector$Y[mountain] * 2 # the position of the mountain - multiply by two based on if grouping works or not
print("start_at:")
print(start_at)
if(long_transect_vector$elevation[start_at] > 0){
warning("The lowest point is positive! Returning NA.")
return(NA)
}
subset_positions <- seq(start_at, length(long_transect_vector$elevation))
pos <- first(which(long_transect_vector$elevation[subset_positions] <= 0))
pos + start_at
}
# adds the smoothed curves to the long data, aids in visualization of process
long_transect <- long_transect %>%
group_by(timestep) %>%
mutate(elevation_smooth = smooth.spline(elevation, spar=.5)$y)
# can only use what is being piped into summarize
basin_geometry <- long_transect %>%
group_by(timestep) %>%
summarize(mountain = find_mountain(elevation_smooth, elevation),
valley = find_valley(elevation_smooth, elevation),
i_lowest_point = which.min(elevation),
Z_lowest_point = elevation[i_lowest_point],
Y_lowest_point = Y[i_lowest_point])
# must pass long_transect and basin_geometry components independantly to find_first_x()
basin_geometry <- basin_geometry %>%
mutate(i_mountain_shore = find_first_zero(group_by(long_transect, timestep), mountain),
i_valley_shore = find_first_non_zero(group_by(long_transect, timestep), valley),
Z_mountian_shore = elevation[i_mountain_shore], # not sure if these lines will need long_transect grouped
Z_valley_level = elevation[i_valley_shore],
Y_mountain_shore = Y[i_mountain_shore],
Y_valley_shore = Y[i_valley_shore],
basin_width = Y_mountain_shore - Y_valley_shore,
My = c(1:(ncol(transect)-1)))
or keeping a single dataframe and then drawing from that once the functions have been successfully ran:
# install.packages(c("zoo", "peakPick"))
# install.packages("tidyr")
# install.packages("forcats")
# install.packages("tidyverse")
library(forcats)
library(tidyverse)
library(dplyr)
library(zoo)
library(peakPick)
library(tidyr)
# Data
transect <- readr::read_csv("https://raw.githubusercontent.com/Bryanrt-geophys/Thesis/Thesis/r_code/transect.csv")
transect = transect[-1]
# Pivot to long format
long_transect <- transect %>%
pivot_longer(-Y,
names_to = "timestep",
names_prefix="Z_",
values_to = "elevation") %>%
mutate(elevation = if_else(abs(elevation) < 10, 0, elevation)) # say that <0 is = 0
long_transect$timestep = fct_inorder(long_transect$timestep)
# used to find the mountains
find_mountain <- function(elevation_smooth, elevation){
peaks_high <- which(peakPick::peakpick(elevation_smooth,
neighlim = 20,
deriv.lim = 100,
peak.min.sd = 5))
peaks_high <- peaks_high[peaks_high > 200 & peaks_high < 800]
if(length(peaks_high) == 0){
warning("Too many peaks")
mountain <- NA
} else if(length(peaks_high) == 1){
mountain <- peaks_high
} else if(length(peaks_high) == 2 && elevation[peaks_high[2]] < 0){
# second mountain
mountain <- peaks_high[1]
} else if(length(peaks_high) == 2 && elevation[peaks_high[2]] > 0){
# second mountain
mountain <- peaks_high[2]
} else{
warning("Too many peaks")
mountain <- NA
}
return(c("mountain" = mountain))
}
# used to find the valleys
find_valley <- function(elevation_smooth, elevation){
peaks_high <- which(peakPick::peakpick(elevation_smooth,
neighlim = 20,
deriv.lim = 100,
peak.min.sd = 5))
peaks_high <- peaks_high[peaks_high > 200 & peaks_high < 800]
if(length(peaks_high) == 0){
warning("Too many peaks")
valley <- NA
} else if(length(peaks_high) == 1){
valley <- peaks_high
valley <- peaks_high + which.min(elevation[peaks_high:length(elevation)])
} else if(length(peaks_high) == 2 && elevation[peaks_high[2]] < 0){
# second valley
valley <- peaks_high[1]
#second valley
valley <- peaks_high[1]+which.min(elevation[peaks_high[1]:length(elevation)])
} else if(length(peaks_high) == 2 && elevation[peaks_high[2]] > 0){
# second valley
valley <- peaks_high[2]
#second valley
valley <- peaks_high[2]+which.min(elevation[peaks_high[2]:length(elevation)])
} else{
warning("Too many peaks")
valley <- NA
}
return(c("valley" = valley))
}
# used to find valley-side of basin - possibly replace variables with data.frames
find_first_non_zero <- function(elevation_vector, valley){
print("elevation_vector:")
print(elevation_vector)
print("valley:")
print(valley)
start_at <- elevation_vector[valley] * 2 # the position of the valley - multiply by two based on if grouping works or not
print("start_at:")
print(start_at)
if(elevation_vector[start_at] > 0){
warning("The lowest point is positive! Returning NA.")
return(NA)
}
subset_positions <- seq(start_at, length(elevation_vector))
pos <- first(which(elevation_vector[subset_positions] >= 0))
pos + start_at
}
# used to find mountain-side of basin - possibly replace variables with data.frames
find_first_zero <- function(elevation_vector, mountain_vector){
print("elevation_vector:")
print(elevation_vector)
print("mountain:")
print(mountain_vector)
start_at <- elevation_vector[mountain_vector] * 2 # the position of the mountain - multiply by two based on if grouping works or not
print("start_at:")
print(start_at)
if(elevation_vector[start_at] > 0){
warning("The lowest point is positive! Returning NA.")
return(NA)
}
subset_positions <- seq(start_at, length(elevation_vector))
pos <- first(which(elevation_vector[subset_positions] <= 0))
pos + start_at
}
# adds the smoothed curves to the long data, aids in visualization of process
long_transect <- long_transect %>%
group_by(timestep) %>%
mutate(elevation_smooth = smooth.spline(elevation, spar=.5)$y)
# can only use what is being piped into summarize
basin_geometry <- long_transect %>%
group_by(timestep) %>%
mutate(mountain = find_mountain(elevation_smooth, elevation),
valley = find_valley(elevation_smooth, elevation),
i_lowest_point = which.min(elevation),
Z_lowest_point = elevation[i_lowest_point],
Y_lowest_point = Y[i_lowest_point],
i_mountain_shore = find_first_zero(elevation, mountain),
i_valley_shore = find_first_non_zero(group_by(long_transect, timestep), valley),
Z_mountian_shore = elevation[i_mountain_shore], # not sure if these lines will need long_transect grouped
Z_valley_level = elevation[i_valley_shore],
Y_mountain_shore = Y[i_mountain_shore],
Y_valley_shore = Y[i_valley_shore],
basin_width = Y_mountain_shore - Y_valley_shore,
My = c(1:(ncol(transect)-1)))```