Hi community
Im use list.files()
for get .jpg files for many folders. Im want to obtain name of folder , file and Accesion . Im try to remove and extract but only is good for the first and last rows.
Other example was try with mutate()
for each path but don't run well.
library(tidyverse)
library(stringr)
list_file3 <- data.frame(Folder_path=c("1ra ENTREGA 2022 100 Acc/G 24.jpg", "1ra ENTREGA 2022 100 Acc/G 114.jpg",
"2da ENTREGA 2022 100 Acc/G 1678.jpg", "2da ENTREGA 2022 100 Acc/G 2220.jpg",
"3ra ENTREGA 2022 27 Acc/G24888.jpg", "3ra ENTREGA 2022 27 Acc/G35109.jpg",
"4ta ENTREGA 2022 100 Acc/G 1653.jpg", "4ta ENTREGA 2022 100 Acc/G 1767.jpg",
"5ta ENTREGA 2022 100 Acc/G10703.jpg", "5ta ENTREGA 2022 100 Acc/G10705.jpg",
"6ta ENTREGA 2022 100 Acc/G13418.jpg", "6ta ENTREGA 2022 100 Acc/G13425.jpg",
"7a ENTEGA 2022 100 Acc/G11134.jpg", "7a ENTEGA 2022 100 Acc/G11158.jpg",
"8va ENTREGA 2022 119 Acc/G23816C.jpg", "8va ENTREGA 2022 119 Acc/G23834E.jpg"))
path <- c('1ra ENTREGA 2022 100 Acc/', '2da ENTREGA 2022 100 Acc/',
'3ra ENTREGA 2022 27 Acc/', '4ta ENTREGA 2022 100 Acc/',
'5ta ENTREGA 2022 100 Acc/','6ta ENTREGA 2022 100 Acc/',
'7a ENTEGA 2022 100 Acc/', '8va ENTREGA 2022 119 Acc/')
list_file3 |>
mutate(Entrega=str_extract(Folder_path,path)) |>
mutate(file= str_remove(Folder_path,path)) |>
mutate(Accesion=str_remove(file,'\\.jpg$'))
Tnks!
Here is one approach for extracting the various file path elements.
list_file3 |>
rowwise() |>
mutate(Entrega = str_sub(Folder_path, 1, str_locate(Folder_path, '/')[1] - 1)) |>
mutate(file = str_sub(Folder_path, str_locate(Folder_path, '/')[1] + 1, nchar(Folder_path))) |>
mutate(Accesion=str_remove(file,'\\.jpg$')) |>
ungroup()
#> # A tibble: 16 × 4
#> Folder_path Entrega file Acces…¹
#> <chr> <chr> <chr> <chr>
#> 1 1ra ENTREGA 2022 100 Acc/G 24.jpg 1ra ENTREGA 2022 100 Acc G 24… G 24
#> 2 1ra ENTREGA 2022 100 Acc/G 114.jpg 1ra ENTREGA 2022 100 Acc G 114… G 114
#> 3 2da ENTREGA 2022 100 Acc/G 1678.jpg 2da ENTREGA 2022 100 Acc G 1678… G 1678
#> 4 2da ENTREGA 2022 100 Acc/G 2220.jpg 2da ENTREGA 2022 100 Acc G 2220… G 2220
#> 5 3ra ENTREGA 2022 27 Acc/G24888.jpg 3ra ENTREGA 2022 27 Acc G24888… G24888
#> 6 3ra ENTREGA 2022 27 Acc/G35109.jpg 3ra ENTREGA 2022 27 Acc G35109… G35109
#> 7 4ta ENTREGA 2022 100 Acc/G 1653.jpg 4ta ENTREGA 2022 100 Acc G 1653… G 1653
#> 8 4ta ENTREGA 2022 100 Acc/G 1767.jpg 4ta ENTREGA 2022 100 Acc G 1767… G 1767
#> 9 5ta ENTREGA 2022 100 Acc/G10703.jpg 5ta ENTREGA 2022 100 Acc G10703… G10703
#> 10 5ta ENTREGA 2022 100 Acc/G10705.jpg 5ta ENTREGA 2022 100 Acc G10705… G10705
#> 11 6ta ENTREGA 2022 100 Acc/G13418.jpg 6ta ENTREGA 2022 100 Acc G13418… G13418
#> 12 6ta ENTREGA 2022 100 Acc/G13425.jpg 6ta ENTREGA 2022 100 Acc G13425… G13425
#> 13 7a ENTEGA 2022 100 Acc/G11134.jpg 7a ENTEGA 2022 100 Acc G11134… G11134
#> 14 7a ENTEGA 2022 100 Acc/G11158.jpg 7a ENTEGA 2022 100 Acc G11158… G11158
#> 15 8va ENTREGA 2022 119 Acc/G23816C.jpg 8va ENTREGA 2022 119 Acc G23816… G23816C
#> 16 8va ENTREGA 2022 119 Acc/G23834E.jpg 8va ENTREGA 2022 119 Acc G23834… G23834E
#> # … with abbreviated variable name ¹Accesion
Created on 2023-01-07 with reprex v2.0.2.9000
1 Like
DavoWW
January 8, 2023, 2:39am
3
Hi @M_AcostaCH ,
So many ways to "skin this cat"....
suppressPackageStartupMessages(library(tidyverse))
library(stringr)
list_file3 <- data.frame(Folder_path=c(
"1ra ENTREGA 2022 100 Acc/G 24.jpg", "1ra ENTREGA 2022 100 Acc/G 114.jpg",
"2da ENTREGA 2022 100 Acc/G 1678.jpg", "2da ENTREGA 2022 100 Acc/G 2220.jpg",
"3ra ENTREGA 2022 27 Acc/G24888.jpg", "3ra ENTREGA 2022 27 Acc/G35109.jpg",
"4ta ENTREGA 2022 100 Acc/G 1653.jpg", "4ta ENTREGA 2022 100 Acc/G 1767.jpg",
"5ta ENTREGA 2022 100 Acc/G10703.jpg", "5ta ENTREGA 2022 100 Acc/G10705.jpg",
"6ta ENTREGA 2022 100 Acc/G13418.jpg", "6ta ENTREGA 2022 100 Acc/G13425.jpg",
"7a ENTEGA 2022 100 Acc/G11134.jpg", "7a ENTEGA 2022 100 Acc/G11158.jpg",
"8va ENTREGA 2022 119 Acc/G23816C.jpg", "8va ENTREGA 2022 119 Acc/G23834E.jpg"))
# path <- c('1ra ENTREGA 2022 100 Acc/', '2da ENTREGA 2022 100 Acc/',
# '3ra ENTREGA 2022 27 Acc/', '4ta ENTREGA 2022 100 Acc/',
# '5ta ENTREGA 2022 100 Acc/','6ta ENTREGA 2022 100 Acc/',
# '7a ENTEGA 2022 100 Acc/', '8va ENTREGA 2022 119 Acc/')
# See various options at:
# https://stackoverflow.com/questions/10617702/remove-part-of-string-after
list_file3 |>
mutate(Entrega = paste0(str_split_i(Folder_path, "/", i=1), "/"),
file = str_remove(Folder_path, Entrega),
Accesion = str_remove(file,'\\.jpg$')) |>
head(., n=10)
#> Folder_path Entrega file
#> 1 1ra ENTREGA 2022 100 Acc/G 24.jpg 1ra ENTREGA 2022 100 Acc/ G 24.jpg
#> 2 1ra ENTREGA 2022 100 Acc/G 114.jpg 1ra ENTREGA 2022 100 Acc/ G 114.jpg
#> 3 2da ENTREGA 2022 100 Acc/G 1678.jpg 2da ENTREGA 2022 100 Acc/ G 1678.jpg
#> 4 2da ENTREGA 2022 100 Acc/G 2220.jpg 2da ENTREGA 2022 100 Acc/ G 2220.jpg
#> 5 3ra ENTREGA 2022 27 Acc/G24888.jpg 3ra ENTREGA 2022 27 Acc/ G24888.jpg
#> 6 3ra ENTREGA 2022 27 Acc/G35109.jpg 3ra ENTREGA 2022 27 Acc/ G35109.jpg
#> 7 4ta ENTREGA 2022 100 Acc/G 1653.jpg 4ta ENTREGA 2022 100 Acc/ G 1653.jpg
#> 8 4ta ENTREGA 2022 100 Acc/G 1767.jpg 4ta ENTREGA 2022 100 Acc/ G 1767.jpg
#> 9 5ta ENTREGA 2022 100 Acc/G10703.jpg 5ta ENTREGA 2022 100 Acc/ G10703.jpg
#> 10 5ta ENTREGA 2022 100 Acc/G10705.jpg 5ta ENTREGA 2022 100 Acc/ G10705.jpg
#> Accesion
#> 1 G 24
#> 2 G 114
#> 3 G 1678
#> 4 G 2220
#> 5 G24888
#> 6 G35109
#> 7 G 1653
#> 8 G 1767
#> 9 G10703
#> 10 G10705
# You can then do a left_join() if you want to check that the elements of 'path' match
# one of the extracted values of 'Entrega'.
Created on 2023-01-08 with reprex v2.0.2
1 Like
Hi, I have this error in this line:
Myabe is the version of library(stringr)
?
# Im activate all libraries
Error in `mutate()`:
! Problem while computing `Entrega =
paste0(str_split_i(Folder_path, "/", i = 1),
"/")`.
Caused by error in `str_split_i()`:
! could not find function "str_split_i"
DavoWW
January 8, 2023, 4:03am
5
Hi @M_AcostaCH ,
This is my set-up:
> sessionInfo()
R version 4.2.2 (2022-10-31 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 22621)
Matrix products: default
locale:
[1] LC_COLLATE=English_Australia.utf8 LC_CTYPE=English_Australia.utf8
[3] LC_MONETARY=English_Australia.utf8 LC_NUMERIC=C
[5] LC_TIME=English_Australia.utf8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] forcats_0.5.2 stringr_1.5.0 dplyr_1.0.10 purrr_1.0.0 readr_2.1.3
[6] tidyr_1.2.1 tibble_3.1.8 ggplot2_3.4.0 tidyverse_1.3.2
loaded via a namespace (and not attached):
[1] lubridate_1.9.0 ps_1.7.2 assertthat_0.2.1 digest_0.6.31
[5] utf8_1.2.2 R6_2.5.1 cellranger_1.1.0 backports_1.4.1
[9] reprex_2.0.2 evaluate_0.19 httr_1.4.4 highr_0.10
[13] pillar_1.8.1 rlang_1.0.6 googlesheets4_1.0.1 readxl_1.4.1
[17] rstudioapi_0.14 callr_3.7.3 R.utils_2.12.2 R.oo_1.25.0
[21] rmarkdown_2.19 styler_1.8.1 googledrive_2.0.0 munsell_0.5.0
[25] broom_1.0.2 compiler_4.2.2 modelr_0.1.10 xfun_0.36
[29] pkgconfig_2.0.3 clipr_0.8.0 htmltools_0.5.4 tidyselect_1.2.0
[33] fansi_1.0.3 crayon_1.5.2 tzdb_0.3.0 dbplyr_2.2.1
[37] withr_2.5.0 R.methodsS3_1.8.2 grid_4.2.2 jsonlite_1.8.4
[41] gtable_0.3.1 lifecycle_1.0.3 DBI_1.1.3 magrittr_2.0.3
[45] scales_1.2.1 cli_3.5.0 stringi_1.7.8 fs_1.5.2
[49] xml2_1.3.3 ellipsis_0.3.2 generics_0.1.3 vctrs_0.5.1
[53] tools_4.2.2 R.cache_0.16.0 glue_1.6.2 hms_1.1.2
[57] processx_3.8.0 fastmap_1.1.0 yaml_2.3.6 timechange_0.1.1
[61] colorspace_2.0-3 gargle_1.2.1 rvest_1.0.3 knitr_1.41
[65] haven_2.5.1
>
Ok, Im update the stringr
and run well. Tnks for all.
Like you said, Im try other helps and find this options.
list_file3$Entrega <- str_trim(str_remove_all(list_file3$Folder_path,"\\w?\\d+\\w?\\.jpg" ), "both") # in somes columns this add 'G'
list_file3$file <- str_extract_all(list_file3$Folder_path,"\\w+\\s*\\d+\\w?\\.jpg" )
list_file3$Accesion <- str_remove(list_file3$file,"\\.jpg" )
# Folder_path Entrega file Accesion
# 1 1ra ENTREGA 2022 100 Acc/G 24.jpg 1ra ENTREGA 2022 100 Acc/G G 24.jpg G 24
# 2 1ra ENTREGA 2022 100 Acc/G 114.jpg 1ra ENTREGA 2022 100 Acc/G G 114.jpg G 114
# 3 2da ENTREGA 2022 100 Acc/G 1678.jpg 2da ENTREGA 2022 100 Acc/G G 1678.jpg G 1678
# 4 2da ENTREGA 2022 100 Acc/G 2220.jpg 2da ENTREGA 2022 100 Acc/G G 2220.jpg G 2220
# 5 3ra ENTREGA 2022 27 Acc/G24888.jpg 3ra ENTREGA 2022 27 Acc/ G24888.jpg G24888
# 6 3ra ENTREGA 2022 27 Acc/G35109.jpg 3ra ENTREGA 2022 27 Acc/ G35109.jpg G35109
# 7 4ta ENTREGA 2022 100 Acc/G 1653.jpg 4ta ENTREGA 2022 100 Acc/G G 1653.jpg G 1653
# 8 4ta ENTREGA 2022 100 Acc/G 1767.jpg 4ta ENTREGA 2022 100 Acc/G G 1767.jpg G 1767
# 9 5ta ENTREGA 2022 100 Acc/G10703.jpg 5ta ENTREGA 2022 100 Acc/ G10703.jpg G10703
# 10 5ta ENTREGA 2022 100 Acc/G10705.jpg 5ta ENTREGA 2022 100 Acc/ G10705.jpg G10705
# 11 6ta ENTREGA 2022 100 Acc/G13418.jpg 6ta ENTREGA 2022 100 Acc/ G13418.jpg G13418
# 12 6ta ENTREGA 2022 100 Acc/G13425.jpg 6ta ENTREGA 2022 100 Acc/ G13425.jpg G13425
# 13 7a ENTEGA 2022 100 Acc/G11134.jpg 7a ENTEGA 2022 100 Acc/ G11134.jpg G11134
# 14 7a ENTEGA 2022 100 Acc/G11158.jpg 7a ENTEGA 2022 100 Acc/ G11158.jpg G11158
# 15 8va ENTREGA 2022 119 Acc/G23816C.jpg 8va ENTREGA 2022 119 Acc/ G23816C.jpg G23816C
# 16 8va ENTREGA 2022 119 Acc/G23834E.jpg 8va ENTREGA 2022 119 Acc/ G23834E.jpg G23834E
system
Closed
January 15, 2023, 5:09am
8
This topic was automatically closed 7 days after the last reply. New replies are no longer allowed. If you have a query related to it or one of the replies, start a new topic and refer back with a link.