Select and group observations

This dataframe:

data.frame(
  stringsAsFactors = FALSE,
       check.names = FALSE,
          unic = c("ARG","BAL",
                        "BEM","BEN","BER","CAB","CAC",
                        "CAM","CAR","DON"),
            `2022` = c(1, 1, 4, 1, 0, 1, 1, 5, 4, 0),
            `2021` = c(1, 0, 7, 0, 0, 4, 1, 3, 4, 2),
            `2020` = c(1, 0, 12, 0, 1, 4, 1, 8, 5, 0),
            `2019` = c(0, 0, 12, 0, 0, 5, 3, 4, 4, 4),
            `2018` = c(0, 0, 10, 1, 1, 0, 6, 6, 1, 0),
            `2017` = c(0, 0, 8, 1, 0, 1, 6, 8, 1, 1),
            `2016` = c(1, 0, 4, 0, 0, 1, 10, 1, 5, 1),
            `2015` = c(1, 1, 7, 2, 0, 1, 1, 8, 0, 0)
 )

I want to get the three 'munic' with more total ocurrences and group the remaining in an new row called 'Others'. In this example something similar to:

munic	2022	2021	2020	2019	2018	2017	2016	2015
BEM	4	7	12	12	10	8	4	7
CAC	1	1	1	3	6	6	10	1
CAM	5	3	8	4	6	8	1	8
Others 7	10	10	13	3	4	7	4

Any idea?

Here is one solution.

DF <- data.frame(
  stringsAsFactors = FALSE,
  check.names = FALSE,
  unic = c("ARG","BAL",
           "BEM","BEN","BER","CAB","CAC",
           "CAM","CAR","DON"),
  `2022` = c(1, 1, 4, 1, 0, 1, 1, 5, 4, 0),
  `2021` = c(1, 0, 7, 0, 0, 4, 1, 3, 4, 2),
  `2020` = c(1, 0, 12, 0, 1, 4, 1, 8, 5, 0),
  `2019` = c(0, 0, 12, 0, 0, 5, 3, 4, 4, 4),
  `2018` = c(0, 0, 10, 1, 1, 0, 6, 6, 1, 0),
  `2017` = c(0, 0, 8, 1, 0, 1, 6, 8, 1, 1),
  `2016` = c(1, 0, 4, 0, 0, 1, 10, 1, 5, 1),
  `2015` = c(1, 1, 7, 2, 0, 1, 1, 8, 0, 0)
)
library(dplyr)

DF <- DF |> rowwise() |> mutate(Total = sum(c_across(`2022`:`2015`)))
DF
#> # A tibble: 10 × 10
#> # Rowwise: 
#>    unic  `2022` `2021` `2020` `2019` `2018` `2017` `2016` `2015` Total
#>    <chr>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl>
#>  1 ARG        1      1      1      0      0      0      1      1     5
#>  2 BAL        1      0      0      0      0      0      0      1     2
#>  3 BEM        4      7     12     12     10      8      4      7    64
#>  4 BEN        1      0      0      0      1      1      0      2     5
#>  5 BER        0      0      1      0      1      0      0      0     2
#>  6 CAB        1      4      4      5      0      1      1      1    17
#>  7 CAC        1      1      1      3      6      6     10      1    29
#>  8 CAM        5      3      8      4      6      8      1      8    43
#>  9 CAR        4      4      5      4      1      1      5      0    24
#> 10 DON        0      2      0      4      0      1      1      0     8
DF <- DF |> arrange(desc(Total)) 
OrderedUnic <- DF |> pull(unic) 
DF$unic <- c(OrderedUnic[1:3],rep("Other",7))
DF
#> # A tibble: 10 × 10
#> # Rowwise: 
#>    unic  `2022` `2021` `2020` `2019` `2018` `2017` `2016` `2015` Total
#>    <chr>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl>
#>  1 BEM        4      7     12     12     10      8      4      7    64
#>  2 CAM        5      3      8      4      6      8      1      8    43
#>  3 CAC        1      1      1      3      6      6     10      1    29
#>  4 Other      4      4      5      4      1      1      5      0    24
#>  5 Other      1      4      4      5      0      1      1      1    17
#>  6 Other      0      2      0      4      0      1      1      0     8
#>  7 Other      1      1      1      0      0      0      1      1     5
#>  8 Other      1      0      0      0      1      1      0      2     5
#>  9 Other      1      0      0      0      0      0      0      1     2
#> 10 Other      0      0      1      0      1      0      0      0     2
DF <- DF |> group_by(unic) |> summarize(across(`2022`:`2015`, sum))
DF
#> # A tibble: 4 × 9
#>   unic  `2022` `2021` `2020` `2019` `2018` `2017` `2016` `2015`
#>   <chr>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
#> 1 BEM        4      7     12     12     10      8      4      7
#> 2 CAC        1      1      1      3      6      6     10      1
#> 3 CAM        5      3      8      4      6      8      1      8
#> 4 Other      8     11     11     13      3      4      8      5

Created on 2023-03-02 with reprex v2.0.2

1 Like

This seems to work. I thought I'd posted this earlier but I don't see it.


dat1 <- data.frame(
  stringsAsFactors = FALSE,
  check.names = FALSE,
  unic = c("ARG","BAL",
           "BEM","BEN","BER","CAB","CAC",
           "CAM","CAR","DON"),
  `2022` = c(1, 1, 4, 1, 0, 1, 1, 5, 4, 0),
  `2021` = c(1, 0, 7, 0, 0, 4, 1, 3, 4, 2),
  `2020` = c(1, 0, 12, 0, 1, 4, 1, 8, 5, 0),
  `2019` = c(0, 0, 12, 0, 0, 5, 3, 4, 4, 4),
  `2018` = c(0, 0, 10, 1, 1, 0, 6, 6, 1, 0),
  `2017` = c(0, 0, 8, 1, 0, 1, 6, 8, 1, 1),
  `2016` = c(1, 0, 4, 0, 0, 1, 10, 1, 5, 1),
  `2015` = c(1, 1, 7, 2, 0, 1, 1, 8, 0, 0)
)

library(data.table)

setDT(dat1) # convert data.frame to data.table
dat2  <- dat1[c(1, 3, 7, 8),]
xx <- data.table( nums = colSums(dat1[-c(3, 7, 8), -1])            )
dat3 <-  data.table(unic = "Others", t(xx))
names(dat3) <- names(dat1)
new_data <- rbind(dat2, dat3)
setnames(new_data, "unic", "munic")
new_data 

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.