Thanks @mara! Do you have any idea of the release cycle of readr? As in do you know when this bug fix would hit CRAN?
@technocrat it was the compression that was causing my poor reprex. Have a look here:
library(readr)
library(lobstr)
#> Warning: package 'lobstr' was built under R version 4.0.5
suppressPackageStartupMessages(library(gdata, warn.conflicts = FALSE))
#> Warning in system(cmd, intern = intern, wait = wait | intern,
#> show.output.on.console = wait, : running command 'C:\WINDOWS\system32\cmd.exe /c
#> ftype perl' had status 2
#> Warning in system(cmd, intern = intern, wait = wait | intern,
#> show.output.on.console = wait, : running command 'C:\WINDOWS\system32\cmd.exe /c
#> ftype perl' had status 2
mem_used()
#> 51,673,568 B
# flexible fn to make fixed width
make_fwf <- function(nrows, file) {
dat <- data.frame(
x = runif(nrows),
y = runif(nrows)
)
gdata::write.fwf(dat, file, colnames = FALSE)
rm(dat)
gc()
R.utils::gzip(file)
}
fwf_sample <- make_fwf(1E6, "fwf-eg.fwf")
(start <- mem_used())
#> 61,211,352 B
f <- function(x, pos) {
d <- read_fwf(fwf_sample, fwf_empty(fwf_sample, col_names = c("x", "y")), col_types = c("dd"))
rm(d)
gc()
}
read_lines_chunked(
file = fwf_sample,
callback = SideEffectChunkCallback$new(f),
chunk_size = 50000,
progress = FALSE
)
#> NULL
## Memory taken up
mem_used()
#> 1,713,272,496 B
## Memory added
mem_used() - start
#> 1,652,060,232 B
## Size of file
file.info(fwf_sample)$size
#> [1] 9579982
Session info
sessioninfo::session_info()
#> - Session info ---------------------------------------------------------------
#> setting value
#> version R version 4.0.4 (2021-02-15)
#> os Windows 10 x64
#> system x86_64, mingw32
#> ui RTerm
#> language (EN)
#> collate English_Canada.1252
#> ctype English_Canada.1252
#> tz America/Los_Angeles
#> date 2021-04-14
#>
#> - Packages -------------------------------------------------------------------
#> package * version date lib source
#> backports 1.2.1 2020-12-09 [1] CRAN (R 4.0.3)
#> cli 2.4.0 2021-04-05 [1] CRAN (R 4.0.4)
#> crayon 1.4.1 2021-02-08 [1] CRAN (R 4.0.3)
#> debugme 1.1.0 2017-10-22 [1] CRAN (R 4.0.2)
#> digest 0.6.27 2020-10-24 [1] CRAN (R 4.0.3)
#> ellipsis 0.3.1 2020-05-15 [1] CRAN (R 4.0.0)
#> evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.0)
#> fansi 0.4.2 2021-01-15 [1] CRAN (R 4.0.3)
#> fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
#> gdata * 2.18.0 2017-06-06 [1] CRAN (R 4.0.4)
#> glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
#> gtools 3.8.2 2020-03-31 [1] CRAN (R 4.0.3)
#> highr 0.8 2019-03-20 [1] CRAN (R 4.0.0)
#> hms 1.0.0 2021-01-13 [1] CRAN (R 4.0.3)
#> htmltools 0.5.1.1 2021-01-22 [1] CRAN (R 4.0.3)
#> knitr 1.31 2021-01-27 [1] CRAN (R 4.0.3)
#> lifecycle 1.0.0 2021-02-15 [1] CRAN (R 4.0.4)
#> lobstr * 1.1.1 2019-07-02 [1] CRAN (R 4.0.5)
#> magrittr 2.0.1 2020-11-17 [1] CRAN (R 4.0.3)
#> pillar 1.5.1 2021-03-05 [1] CRAN (R 4.0.4)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.0)
#> purrr 0.3.4 2020-04-17 [1] CRAN (R 4.0.0)
#> R.cache 0.14.0 2019-12-06 [1] CRAN (R 4.0.0)
#> R.methodsS3 1.8.1 2020-08-26 [1] CRAN (R 4.0.2)
#> R.oo 1.24.0 2020-08-26 [1] CRAN (R 4.0.2)
#> R.utils 2.10.1 2020-08-26 [1] CRAN (R 4.0.2)
#> R6 2.5.0 2020-10-28 [1] CRAN (R 4.0.3)
#> Rcpp 1.0.6 2021-01-15 [1] CRAN (R 4.0.3)
#> readr * 1.4.0 2020-10-05 [1] CRAN (R 4.0.2)
#> rematch2 2.1.2 2020-05-01 [1] CRAN (R 4.0.0)
#> reprex 2.0.0 2021-04-02 [1] CRAN (R 4.0.5)
#> rlang 0.4.10 2020-12-30 [1] CRAN (R 4.0.3)
#> rmarkdown 2.7 2021-02-19 [1] CRAN (R 4.0.4)
#> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.0)
#> stringi 1.5.3 2020-09-09 [1] CRAN (R 4.0.2)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 4.0.2)
#> styler 1.4.1 2021-03-30 [1] CRAN (R 4.0.4)
#> tibble 3.1.0 2021-02-25 [1] CRAN (R 4.0.4)
#> utf8 1.2.1 2021-03-12 [1] CRAN (R 4.0.5)
#> vctrs 0.3.7 2021-03-29 [1] CRAN (R 4.0.5)
#> withr 2.4.1 2021-01-26 [1] CRAN (R 4.0.3)
#> xfun 0.22 2021-03-11 [1] CRAN (R 4.0.4)
#> yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.0)
#>
#> [1] C:/Users/salbers/R/win-library/4.0
#> [2] C:/Program Files/R/R-4.0.4/library