How to filter dataframe using a threshold value ?

Hi All,
I have two data frames i.e. observed and synthetic,
How can I replace syntheticn'rain such that value < threshold should be 0 and number of non-zero should equal or nearby to the number of non-zeroes rows of observed.

Thank you

library(gsheet)
library(dplyr, warn.conflicts = FALSE)
observed=gsheet2tbl('docs.google.com/spreadsheets/d/14dXMkK_h-tlA4jQg9nn0VraXD39H6bWtjBJ7HCFi0f0/edit?usp=sharing')
head(observed)
synthetic=gsheet2tbl("docs.google.com/spreadsheets/d/1vmCMwFhrBTEimhfgQEBfsdZl56tohfoC_pLhu8d2QCM/edit#gid=2075705027")
head(synthetic)

Your current way of sharing data involves requiring forum users to authenticate in browser, I find this burdensome.
Recommend you share minimal yet represenative data in the most forum friendly way. i.e. base::dput() or datapasta::

I have edited it. Thanks

2.18 seems to be a fine answer.

library(gsheet)
library(tidyverse)
observed=gsheet2tbl('docs.google.com/spreadsheets/d/14dXMkK_h-tlA4jQg9nn0VraXD39H6bWtjBJ7HCFi0f0/edit?usp=sharing')
head(observed)
synthetic=gsheet2tbl("docs.google.com/spreadsheets/d/1vmCMwFhrBTEimhfgQEBfsdZl56tohfoC_pLhu8d2QCM/edit#gid=2075705027")
head(synthetic)

(observed_non_zero <- sum(observed$rain!=0))

myfunc <- function(threshold,onz){
   s1 <-  synthetic %>% mutate(rv = 1 * (rain>=threshold)) %>% pull(rv) %>% sum
   abs(s1 - onz) # ideally this should be zero
}

(result1 <- optimise(f = myfunc,
         interval = range(synthetic$rain),onz=observed_non_zero))

options(scipen=99999)
result1$minimum

#zoomed out
seq_to_plot <- seq(from=0,to=14,length.out=100)
excess_non_zeros <- map_dbl(seq_to_plot,~myfunc(.,observed_non_zero))
plot(seq_to_plot,excess_non_zeros)

abline(v=result1$minimum)

# zoomed in
seq_to_plot <- seq(from=2.16,to=2.19,length.out=100)
excess_non_zeros <- map_dbl(seq_to_plot,~myfunc(.,observed_non_zero))
plot(seq_to_plot,excess_non_zeros)
abline(v=result1$minimum)

#confirm 
myfunc(2.18,observed_non_zero)

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.