Help with removing products only with all zeros

Hello,

Looking to remove products that have all zeros in any given row. For example data below, BW and FX should be removed as they have all zeros.
My dataset contains several of these products and thus, looking to remove them efficiently.

Thanks for the help!

df <- data.frame(
  stringsAsFactors = FALSE,
                                               Date = c("2017-01-01",
                                                        "2017-02-01",
                                                        "2017-03-01","2017-04-01",
                                                        "2017-05-01","2017-06-01",
                                                        "2017-07-01",
                                                        "2017-08-01","2017-09-01",
                                                        "2017-10-01","2017-11-01",
                                                        "2017-12-01","2018-01-01",
                                                        "2018-02-01",
                                                        "2018-03-01","2018-04-01",
                                                        "2018-05-01","2018-06-01",
                                                        "2018-07-01",
                                                        "2018-08-01","2018-09-01",
                                                        "2018-10-01","2018-11-01",
                                                        "2018-12-01","2017-01-01",
                                                        "2017-02-01",
                                                        "2017-03-01","2017-04-01",
                                                        "2017-05-01","2017-06-01",
                                                        "2017-07-01",
                                                        "2017-08-01","2017-09-01",
                                                        "2017-10-01","2017-11-01",
                                                        "2017-12-01","2018-01-01",
                                                        "2018-02-01",
                                                        "2018-03-01","2018-04-01",
                                                        "2018-05-01","2018-06-01",
                                                        "2018-07-01",
                                                        "2018-08-01","2018-09-01",
                                                        "2018-10-01","2018-11-01",
                                                        "2018-12-01","2017-01-01",
                                                        "2017-02-01",
                                                        "2017-03-01","2017-04-01",
                                                        "2017-05-01","2017-06-01",
                                                        "2017-07-01",
                                                        "2017-08-01","2017-09-01",
                                                        "2017-10-01","2017-11-01",
                                                        "2017-12-01","2018-01-01",
                                                        "2018-02-01",
                                                        "2018-03-01","2018-04-01",
                                                        "2018-05-01","2018-06-01",
                                                        "2018-07-01",
                                                        "2018-08-01","2018-09-01",
                                                        "2018-10-01","2018-11-01",
                                                        "2018-12-01"),
                                            Product = c("TS","TS",
                                                        "TS","TS","TS","TS",
                                                        "TS","TS","TS","TS",
                                                        "TS","TS","TS","TS",
                                                        "TS","TS","TS","TS",
                                                        "TS","TS","TS","TS",
                                                        "TS","TS","BW","BW",
                                                        "BW","BW","BW","BW",
                                                        "BW","BW","BW","BW",
                                                        "BW","BW","BW","BW",
                                                        "BW","BW","BW","BW",
                                                        "BW","BW","BW","BW",
                                                        "BW","BW","FX","FX",
                                                        "FX","FX","FX","FX",
                                                        "FX","FX","FX","FX",
                                                        "FX","FX","FX","FX",
                                                        "FX","FX","FX","FX",
                                                        "FX","FX","FX","FX",
                                                        "FX","FX"),
                                              Sales = c(0,0,0,0,0,
                                                        596,3476,4457,4666,
                                                        5967,10177,9475,6056,
                                                        4302,6134,6676,8440,
                                                        10171,9182,8154,
                                                        6006,5992,9028,7330,0,
                                                        0,0,0,0,0,0,0,0,
                                                        0,0,0,0,0,0,0,0,
                                                        0,0,0,0,0,0,0,0,
                                                        0,0,0,0,0,0,0,0,
                                                        0,0,0,0,0,0,0,0,
                                                        0,0,0,0,0,0,0)
                                 )

df <- df%>% 
  mutate(Date = ymd(Date))

You can do

df |>
  group_by(Product) |>
  filter(
    !all(Sales == 0)
  )
1 Like

Thanks a bunch @nirgrahamuk !

I noticed that it does a wonderful job eliminating products with zeros in all rows. However, I do want to retain all rows for products that have non-zero values, including previous years.

For above example, I extended the data from 2016 onwards and BW & FX should be removed as earlier because they have no values throughout the dataset, but I do want to have all rows for TS product as it contains non-zero values. So, we should still see rows for TS in 2016 even though it has zero values. As of now, it retains all values from 2017 but excludes 2016 values for TS

These are 2 equivalent statements.
By a matter of logic, if a product is zero in all rows, then there are no non-zero rows that might be retained

If you are manipulating the data out of order, first excluding products, then adding more records that would have meant you wouldnt have wanted to exclude the products, then you simply excluded too soon, and should exclude later.

Well! We do want to make sure we consider all products we sold in the past 10 years. This is refinement in Initial problem that was solved with the support of community (Link Here). So, if we sold product in 2012 and none thereafter. We would need to consider it when we look at Dec 2020 data because it was sold within last 10 years frame.

In the attached sample data, I have recreated reprex with modification to include values of BW in 2016 where we have sold a few of BW and none thereafter. So, if we are looking at 2018 and even if we have not sold any of BW in 2018, we would still like to retain all values of BW as we have sold a few in last 10 years.

We would like to delete the ones which we sold none in any of the years . This will help reducing the data before we can apply logic on addition of last 10 years sales (Link Here).

df <- data.frame(
  stringsAsFactors = FALSE,
              Date = c("2016-01-01","2016-02-01",
                       "2016-03-01","2016-04-01","2016-05-01","2016-06-01",
                       "2016-07-01","2016-08-01","2016-09-01","2016-10-01",
                       "2016-11-01","2016-12-01","2016-01-01","2016-02-01",
                       "2016-03-01","2016-04-01","2016-05-01","2016-06-01",
                       "2016-07-01","2016-08-01","2016-09-01","2016-10-01",
                       "2016-11-01","2016-12-01","2016-01-01","2016-02-01",
                       "2016-03-01","2016-04-01","2016-05-01","2016-06-01",
                       "2016-07-01","2016-08-01","2016-09-01","2016-10-01",
                       "2016-11-01","2016-12-01","2017-01-01","2017-02-01","2017-03-01",
                       "2017-04-01","2017-05-01","2017-06-01","2017-07-01",
                       "2017-08-01","2017-09-01","2017-10-01","2017-11-01",
                       "2017-12-01","2018-01-01","2018-02-01","2018-03-01",
                       "2018-04-01","2018-05-01","2018-06-01","2018-07-01",
                       "2018-08-01","2018-09-01","2018-10-01","2018-11-01",
                       "2018-12-01","2017-01-01","2017-02-01","2017-03-01",
                       "2017-04-01","2017-05-01","2017-06-01","2017-07-01",
                       "2017-08-01","2017-09-01","2017-10-01","2017-11-01",
                       "2017-12-01","2018-01-01","2018-02-01","2018-03-01",
                       "2018-04-01","2018-05-01","2018-06-01","2018-07-01",
                       "2018-08-01","2018-09-01","2018-10-01","2018-11-01",
                       "2018-12-01","2017-01-01","2017-02-01","2017-03-01",
                       "2017-04-01","2017-05-01","2017-06-01","2017-07-01",
                       "2017-08-01","2017-09-01","2017-10-01","2017-11-01","2017-12-01",
                       "2018-01-01","2018-02-01","2018-03-01","2018-04-01",
                       "2018-05-01","2018-06-01","2018-07-01","2018-08-01",
                       "2018-09-01","2018-10-01","2018-11-01","2018-12-01"),
           Product = c("TS","TS","TS","TS","TS",
                       "TS","TS","TS","TS","TS","TS","TS","BW","BW","BW",
                       "BW","BW","BW","BW","BW","BW","BW","BW","BW",
                       "FX","FX","FX","FX","FX","FX","FX","FX","FX","FX",
                       "FX","FX","TS","TS","TS","TS","TS","TS","TS",
                       "TS","TS","TS","TS","TS","TS","TS","TS","TS","TS",
                       "TS","TS","TS","TS","TS","TS","TS","BW","BW",
                       "BW","BW","BW","BW","BW","BW","BW","BW","BW","BW",
                       "BW","BW","BW","BW","BW","BW","BW","BW","BW",
                       "BW","BW","BW","FX","FX","FX","FX","FX","FX","FX",
                       "FX","FX","FX","FX","FX","FX","FX","FX","FX",
                       "FX","FX","FX","FX","FX","FX","FX","FX"),
             Sales = c(0,0,0,0,0,0,0,0,0,0,0,
                       0,1200,0,0,0,0,0,1800,0,0,0,0,0,0,0,0,0,
                       0,0,0,0,0,0,0,0,0,0,0,0,0,596,3476,4457,
                       4666,5967,10177,9475,6056,4302,6134,6676,8440,
                       10171,9182,8154,6006,5992,9028,7330,0,0,0,0,0,
                       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                       0,0,0,0,0)
)

df <- df%>% 
  mutate(Date = ymd(Date))

So...

df |>
  group_by(Product) |>
  filter(
    !all(Sales == 0)
  )

?

Yeah, it does work. I guess, some of the data points in my large dataset confused me a lot. Thanks again!

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.