How make mutate with str_extract and str_remove?

Hi community

Im use list.files() for get .jpg files for many folders. Im want to obtain name of folder, file and Accesion. Im try to remove and extract but only is good for the first and last rows.

Other example was try with mutate() for each path but don't run well.

library(tidyverse)
library(stringr)
list_file3 <- data.frame(Folder_path=c("1ra ENTREGA 2022 100 Acc/G   24.jpg", "1ra ENTREGA 2022 100 Acc/G  114.jpg", 
  "2da ENTREGA 2022 100 Acc/G 1678.jpg", "2da ENTREGA 2022 100 Acc/G 2220.jpg", 
  "3ra ENTREGA 2022 27 Acc/G24888.jpg", "3ra ENTREGA 2022 27 Acc/G35109.jpg", 
  "4ta ENTREGA 2022 100 Acc/G 1653.jpg", "4ta ENTREGA 2022 100 Acc/G 1767.jpg", 
  "5ta ENTREGA 2022 100 Acc/G10703.jpg", "5ta ENTREGA 2022 100 Acc/G10705.jpg", 
  "6ta ENTREGA 2022 100 Acc/G13418.jpg", "6ta ENTREGA 2022 100 Acc/G13425.jpg", 
  "7a ENTEGA 2022 100 Acc/G11134.jpg", "7a ENTEGA 2022 100 Acc/G11158.jpg", 
  "8va ENTREGA 2022 119 Acc/G23816C.jpg", "8va ENTREGA 2022 119 Acc/G23834E.jpg"))

path <- c('1ra ENTREGA 2022 100 Acc/', '2da ENTREGA 2022 100 Acc/',
          '3ra ENTREGA 2022 27 Acc/', '4ta ENTREGA 2022 100 Acc/',
          '5ta ENTREGA 2022 100 Acc/','6ta ENTREGA 2022 100 Acc/',
          '7a ENTEGA 2022 100 Acc/', '8va ENTREGA 2022 119 Acc/')

list_file3 |> 
  mutate(Entrega=str_extract(Folder_path,path)) |> 
  mutate(file= str_remove(Folder_path,path)) |> 
  mutate(Accesion=str_remove(file,'\\.jpg$'))

Tnks!

Here is one approach for extracting the various file path elements.

list_file3 |>
  rowwise() |>
  mutate(Entrega = str_sub(Folder_path, 1, str_locate(Folder_path, '/')[1] - 1)) |>
  mutate(file = str_sub(Folder_path, str_locate(Folder_path, '/')[1] + 1, nchar(Folder_path))) |>
  mutate(Accesion=str_remove(file,'\\.jpg$')) |>
  ungroup()
#> # A tibble: 16 × 4
#>    Folder_path                          Entrega                  file    Acces…¹
#>    <chr>                                <chr>                    <chr>   <chr>  
#>  1 1ra ENTREGA 2022 100 Acc/G   24.jpg  1ra ENTREGA 2022 100 Acc G   24… G   24 
#>  2 1ra ENTREGA 2022 100 Acc/G  114.jpg  1ra ENTREGA 2022 100 Acc G  114… G  114 
#>  3 2da ENTREGA 2022 100 Acc/G 1678.jpg  2da ENTREGA 2022 100 Acc G 1678… G 1678 
#>  4 2da ENTREGA 2022 100 Acc/G 2220.jpg  2da ENTREGA 2022 100 Acc G 2220… G 2220 
#>  5 3ra ENTREGA 2022 27 Acc/G24888.jpg   3ra ENTREGA 2022 27 Acc  G24888… G24888 
#>  6 3ra ENTREGA 2022 27 Acc/G35109.jpg   3ra ENTREGA 2022 27 Acc  G35109… G35109 
#>  7 4ta ENTREGA 2022 100 Acc/G 1653.jpg  4ta ENTREGA 2022 100 Acc G 1653… G 1653 
#>  8 4ta ENTREGA 2022 100 Acc/G 1767.jpg  4ta ENTREGA 2022 100 Acc G 1767… G 1767 
#>  9 5ta ENTREGA 2022 100 Acc/G10703.jpg  5ta ENTREGA 2022 100 Acc G10703… G10703 
#> 10 5ta ENTREGA 2022 100 Acc/G10705.jpg  5ta ENTREGA 2022 100 Acc G10705… G10705 
#> 11 6ta ENTREGA 2022 100 Acc/G13418.jpg  6ta ENTREGA 2022 100 Acc G13418… G13418 
#> 12 6ta ENTREGA 2022 100 Acc/G13425.jpg  6ta ENTREGA 2022 100 Acc G13425… G13425 
#> 13 7a ENTEGA 2022 100 Acc/G11134.jpg    7a ENTEGA 2022 100 Acc   G11134… G11134 
#> 14 7a ENTEGA 2022 100 Acc/G11158.jpg    7a ENTEGA 2022 100 Acc   G11158… G11158 
#> 15 8va ENTREGA 2022 119 Acc/G23816C.jpg 8va ENTREGA 2022 119 Acc G23816… G23816C
#> 16 8va ENTREGA 2022 119 Acc/G23834E.jpg 8va ENTREGA 2022 119 Acc G23834… G23834E
#> # … with abbreviated variable name ¹​Accesion

Created on 2023-01-07 with reprex v2.0.2.9000

1 Like

Hi @M_AcostaCH,
So many ways to "skin this cat"....

suppressPackageStartupMessages(library(tidyverse))
library(stringr)
list_file3 <- data.frame(Folder_path=c(
  "1ra ENTREGA 2022 100 Acc/G   24.jpg",  "1ra ENTREGA 2022 100 Acc/G  114.jpg", 
  "2da ENTREGA 2022 100 Acc/G 1678.jpg",  "2da ENTREGA 2022 100 Acc/G 2220.jpg", 
  "3ra ENTREGA 2022 27 Acc/G24888.jpg",   "3ra ENTREGA 2022 27 Acc/G35109.jpg", 
  "4ta ENTREGA 2022 100 Acc/G 1653.jpg",  "4ta ENTREGA 2022 100 Acc/G 1767.jpg", 
  "5ta ENTREGA 2022 100 Acc/G10703.jpg",  "5ta ENTREGA 2022 100 Acc/G10705.jpg", 
  "6ta ENTREGA 2022 100 Acc/G13418.jpg",  "6ta ENTREGA 2022 100 Acc/G13425.jpg", 
  "7a ENTEGA 2022 100 Acc/G11134.jpg",    "7a ENTEGA 2022 100 Acc/G11158.jpg", 
  "8va ENTREGA 2022 119 Acc/G23816C.jpg", "8va ENTREGA 2022 119 Acc/G23834E.jpg"))

# path <- c('1ra ENTREGA 2022 100 Acc/', '2da ENTREGA 2022 100 Acc/',
#           '3ra ENTREGA 2022 27 Acc/', '4ta ENTREGA 2022 100 Acc/',
#           '5ta ENTREGA 2022 100 Acc/','6ta ENTREGA 2022 100 Acc/',
#           '7a ENTEGA 2022 100 Acc/', '8va ENTREGA 2022 119 Acc/')

# See various options at:
# https://stackoverflow.com/questions/10617702/remove-part-of-string-after

list_file3 |> 
  mutate(Entrega = paste0(str_split_i(Folder_path, "/", i=1), "/"), 
         file = str_remove(Folder_path, Entrega),
         Accesion = str_remove(file,'\\.jpg$')) |>
  head(., n=10)
#>                            Folder_path                   Entrega       file
#> 1  1ra ENTREGA 2022 100 Acc/G   24.jpg 1ra ENTREGA 2022 100 Acc/ G   24.jpg
#> 2  1ra ENTREGA 2022 100 Acc/G  114.jpg 1ra ENTREGA 2022 100 Acc/ G  114.jpg
#> 3  2da ENTREGA 2022 100 Acc/G 1678.jpg 2da ENTREGA 2022 100 Acc/ G 1678.jpg
#> 4  2da ENTREGA 2022 100 Acc/G 2220.jpg 2da ENTREGA 2022 100 Acc/ G 2220.jpg
#> 5   3ra ENTREGA 2022 27 Acc/G24888.jpg  3ra ENTREGA 2022 27 Acc/ G24888.jpg
#> 6   3ra ENTREGA 2022 27 Acc/G35109.jpg  3ra ENTREGA 2022 27 Acc/ G35109.jpg
#> 7  4ta ENTREGA 2022 100 Acc/G 1653.jpg 4ta ENTREGA 2022 100 Acc/ G 1653.jpg
#> 8  4ta ENTREGA 2022 100 Acc/G 1767.jpg 4ta ENTREGA 2022 100 Acc/ G 1767.jpg
#> 9  5ta ENTREGA 2022 100 Acc/G10703.jpg 5ta ENTREGA 2022 100 Acc/ G10703.jpg
#> 10 5ta ENTREGA 2022 100 Acc/G10705.jpg 5ta ENTREGA 2022 100 Acc/ G10705.jpg
#>    Accesion
#> 1    G   24
#> 2    G  114
#> 3    G 1678
#> 4    G 2220
#> 5    G24888
#> 6    G35109
#> 7    G 1653
#> 8    G 1767
#> 9    G10703
#> 10   G10705

# You can then do a left_join() if you want to check that the elements of 'path' match
# one of the extracted values of 'Entrega'. 

Created on 2023-01-08 with reprex v2.0.2

1 Like

Hi, I have this error in this line:
Myabe is the version of library(stringr)? :thinking:

# Im activate all libraries
Error in `mutate()`:
! Problem while computing `Entrega =
  paste0(str_split_i(Folder_path, "/", i = 1),
  "/")`.
Caused by error in `str_split_i()`:
! could not find function "str_split_i"

Hi @M_AcostaCH,
This is my set-up:

> sessionInfo()
R version 4.2.2 (2022-10-31 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 22621)

Matrix products: default

locale:
[1] LC_COLLATE=English_Australia.utf8  LC_CTYPE=English_Australia.utf8   
[3] LC_MONETARY=English_Australia.utf8 LC_NUMERIC=C                      
[5] LC_TIME=English_Australia.utf8    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] forcats_0.5.2   stringr_1.5.0   dplyr_1.0.10    purrr_1.0.0     readr_2.1.3    
[6] tidyr_1.2.1     tibble_3.1.8    ggplot2_3.4.0   tidyverse_1.3.2

loaded via a namespace (and not attached):
 [1] lubridate_1.9.0     ps_1.7.2            assertthat_0.2.1    digest_0.6.31      
 [5] utf8_1.2.2          R6_2.5.1            cellranger_1.1.0    backports_1.4.1    
 [9] reprex_2.0.2        evaluate_0.19       httr_1.4.4          highr_0.10         
[13] pillar_1.8.1        rlang_1.0.6         googlesheets4_1.0.1 readxl_1.4.1       
[17] rstudioapi_0.14     callr_3.7.3         R.utils_2.12.2      R.oo_1.25.0        
[21] rmarkdown_2.19      styler_1.8.1        googledrive_2.0.0   munsell_0.5.0      
[25] broom_1.0.2         compiler_4.2.2      modelr_0.1.10       xfun_0.36          
[29] pkgconfig_2.0.3     clipr_0.8.0         htmltools_0.5.4     tidyselect_1.2.0   
[33] fansi_1.0.3         crayon_1.5.2        tzdb_0.3.0          dbplyr_2.2.1       
[37] withr_2.5.0         R.methodsS3_1.8.2   grid_4.2.2          jsonlite_1.8.4     
[41] gtable_0.3.1        lifecycle_1.0.3     DBI_1.1.3           magrittr_2.0.3     
[45] scales_1.2.1        cli_3.5.0           stringi_1.7.8       fs_1.5.2           
[49] xml2_1.3.3          ellipsis_0.3.2      generics_0.1.3      vctrs_0.5.1        
[53] tools_4.2.2         R.cache_0.16.0      glue_1.6.2          hms_1.1.2          
[57] processx_3.8.0      fastmap_1.1.0       yaml_2.3.6          timechange_0.1.1   
[61] colorspace_2.0-3    gargle_1.2.1        rvest_1.0.3         knitr_1.41         
[65] haven_2.5.1        
> 

Ok, Im update the stringr and run well. Tnks for all.

Like you said, Im try other helps and find this options.

list_file3$Entrega <- str_trim(str_remove_all(list_file3$Folder_path,"\\w?\\d+\\w?\\.jpg" ), "both") # in somes columns this add 'G'
list_file3$file <- str_extract_all(list_file3$Folder_path,"\\w+\\s*\\d+\\w?\\.jpg" )
list_file3$Accesion <- str_remove(list_file3$file,"\\.jpg" )


# Folder_path                             Entrega                     file        Accesion
# 1   1ra ENTREGA 2022 100 Acc/G   24.jpg 1ra ENTREGA 2022 100 Acc/G  G   24.jpg   G   24
# 2   1ra ENTREGA 2022 100 Acc/G  114.jpg 1ra ENTREGA 2022 100 Acc/G  G  114.jpg   G  114
# 3   2da ENTREGA 2022 100 Acc/G 1678.jpg 2da ENTREGA 2022 100 Acc/G  G 1678.jpg   G 1678
# 4   2da ENTREGA 2022 100 Acc/G 2220.jpg 2da ENTREGA 2022 100 Acc/G  G 2220.jpg   G 2220
# 5    3ra ENTREGA 2022 27 Acc/G24888.jpg   3ra ENTREGA 2022 27 Acc/  G24888.jpg   G24888
# 6    3ra ENTREGA 2022 27 Acc/G35109.jpg   3ra ENTREGA 2022 27 Acc/  G35109.jpg   G35109
# 7   4ta ENTREGA 2022 100 Acc/G 1653.jpg 4ta ENTREGA 2022 100 Acc/G  G 1653.jpg   G 1653
# 8   4ta ENTREGA 2022 100 Acc/G 1767.jpg 4ta ENTREGA 2022 100 Acc/G  G 1767.jpg   G 1767
# 9   5ta ENTREGA 2022 100 Acc/G10703.jpg  5ta ENTREGA 2022 100 Acc/  G10703.jpg   G10703
# 10  5ta ENTREGA 2022 100 Acc/G10705.jpg  5ta ENTREGA 2022 100 Acc/  G10705.jpg   G10705
# 11  6ta ENTREGA 2022 100 Acc/G13418.jpg  6ta ENTREGA 2022 100 Acc/  G13418.jpg   G13418
# 12  6ta ENTREGA 2022 100 Acc/G13425.jpg  6ta ENTREGA 2022 100 Acc/  G13425.jpg   G13425
# 13    7a ENTEGA 2022 100 Acc/G11134.jpg    7a ENTEGA 2022 100 Acc/  G11134.jpg   G11134
# 14    7a ENTEGA 2022 100 Acc/G11158.jpg    7a ENTEGA 2022 100 Acc/  G11158.jpg   G11158
# 15 8va ENTREGA 2022 119 Acc/G23816C.jpg  8va ENTREGA 2022 119 Acc/ G23816C.jpg  G23816C
# 16 8va ENTREGA 2022 119 Acc/G23834E.jpg  8va ENTREGA 2022 119 Acc/ G23834E.jpg  G23834E

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.