Keep only numeric values

I am trying to keep only the numerical values on the surface column: 0,01 0,10 0,02 0,17 and so on until the end. Any ideas? Thanks

df <- data.frame(
  stringsAsFactors = FALSE,
              DATE = c("2021-08-07","2021-08-01",
                       "2021-07-31","2021-07-29","2021-07-22","2021-08-07",
                       "2021-08-06","2021-08-01","2021-08-01","2021-07-26",
                       "2021-07-21","2021-07-09","2021-07-05","2021-08-01",
                       "2021-07-25"),
           SURFACE = c("OTRAS:0,01 HA.;",
                       "MATORRAL:0,10 HA.;","MATORRAL:0,02 HA.;",
                       "PASTO:0,17 HA. PASTOS Y MALEZA;","AGRICOLA:0,01 HA.;",
                       "MATORRAL:0,01 HA. MATORRAL;","MATORRAL:0,11 HA.;","PASTO:0,04 HA. PASTO;",
                       "ARBOLADO:0,01 HA. PINOS;","ARBOLADO:1,91 HA. ROBLE, PINO;",
                       "MATORRAL:0,01 HA. BREZO;","MATORRAL:0,50 HA.;",
                       "MATORRAL:0,05 HA. ZARZAS;","MATORRAL:1,50 HA.;",
                       "PASTO:0,70 HA. PASTO Y ZARZAS;")
)
suppressPackageStartupMessages({
  library(dplyr)
  library(stringr)
})
df1 <- data.frame(
  stringsAsFactors = FALSE,
  DATE = c("2021-08-07","2021-08-01",
           "2021-07-31","2021-07-29","2021-07-22","2021-08-07",
           "2021-08-06","2021-08-01","2021-08-01","2021-07-26",
           "2021-07-21","2021-07-09","2021-07-05","2021-08-01",
           "2021-07-25"),
  SURFACE = c("OTRAS:0,01 HA.;",
              "MATORRAL:0,10 HA.;","MATORRAL:0,02 HA.;",
              "PASTO:0,17 HA. PASTOS Y MALEZA;","AGRICOLA:0,01 HA.;",
              "MATORRAL:0,01 HA. MATORRAL;","MATORRAL:0,11 HA.;","PASTO:0,04 HA. PASTO;",
              "ARBOLADO:0,01 HA. PINOS;","ARBOLADO:1,91 HA. ROBLE, PINO;",
              "MATORRAL:0,01 HA. BREZO;","MATORRAL:0,50 HA.;",
              "MATORRAL:0,05 HA. ZARZAS;","MATORRAL:1,50 HA.;",
              "PASTO:0,70 HA. PASTO Y ZARZAS;")
)

comma_radix <- ','
point_radix <- '.'
the_letters <- "[:alpha:]"
colons <- "[;:]"
end_comma <- " ,"
periods <- '[.]'

df1 %>% mutate(SURFACE = str_remove_all(SURFACE,the_letters),
               SURFACE = str_remove_all(SURFACE,colons),
               SURFACE = str_remove_all(SURFACE,periods),
               SURFACE = str_remove_all(SURFACE,end_comma),
               SURFACE = str_trim(SURFACE),
               SURFACE = str_replace(SURFACE,comma_radix,point_radix),
               SURFACE = as.numeric(SURFACE))
#>          DATE SURFACE
#> 1  2021-08-07    0.01
#> 2  2021-08-01    0.10
#> 3  2021-07-31    0.02
#> 4  2021-07-29    0.17
#> 5  2021-07-22    0.01
#> 6  2021-08-07    0.01
#> 7  2021-08-06    0.11
#> 8  2021-08-01    0.04
#> 9  2021-08-01    0.01
#> 10 2021-07-26    1.91
#> 11 2021-07-21    0.01
#> 12 2021-07-09    0.50
#> 13 2021-07-05    0.05
#> 14 2021-08-01    1.50
#> 15 2021-07-25    0.70

Great, works like a charm. Thanks technocrat :slight_smile:

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.