Dear Kevin,
Please find below the code. Althought it works properly, if we try to View(dt_all_cells.nest) RStudio will hang severelly, forcing me to kiil the task via Windows.
Please let me know if you need something else.
Regards,
Fabio.
library(unpivotr)
library(tidyverse)
library(tidyxl)
library(data.table)
library(stringi)
library(purrr)
library(ggplot2)
library(RColorBrewer)
source("C://Users//fabio//OneDrive//Machine Learning Datasets//textPreProcess//cleanText.R")
############## Leitura dos ddos
read.file <- function(x) {
out <- xlsx_cells(path = x)
out$file <- x
return(out)
}
pattern_file <- "agosto18.xl?"
fpath <- "C://Users//fabio//OneDrive//Tecknowledge//ABSOLAR//Financiamento//CCEE-Res-Cons"
xlsx.list <- list.files(path = fpath, pattern = pattern_file, full.names = T)
dt_all_cells<-lapply(xlsx.list,read.file)
dt_all_cells<-rbindlist(dt_all_cells)
dt_all_cells <-
dt_all_cells[, .(sheet, row, col, data_type, numeric, date, character,file)
][data_type != "blank"
][, date := strftime(date, "%Y-%m", tz = "GMT")
]
conv.utf8 <- function(x) {
Encoding(x) <- "UTF-8"
return(x)
}
dt_all_cells[, sheet := conv.utf8(sheet)][, character := conv.utf8(character)]
################ Escolha das tabelas
dt_all_cells <- dt_all_cells[sheet %in% c("Resultado Consolidado"
, "Resultado Comprador"
, "REH 1.410_2013"
, "Resultado Vendedor"
,"MDER")
,]
pattern_corner<- c("ID ")
corners <- dt_all_cells[grepl(pattern_corner, character)
][,c("numeric","date"):=NULL
]
########## Funcao que desconsidera radical repetido do nome das tabelas
# gen.sheet.short.name <- function(x) {
# a<-substring(x, 1,4)
# anum <- as.numeric(a)*1
# if (!is.na(anum)) {
# out <- substr(x, 5, 1000)
# }
# else {
# out <- x
# }
# return(out)
# }
sn_function_sheet <- function (x){return (x)} ###### Antes era gen.sheet.short.name ()
sn_function_tabela <- function (x){return (x)} ###### substr(character, 1,10)
corners <- corners %>%
mutate(sheet_sn = sn_function_sheet(sheet)) %>%
mutate (tabela_sn = sn_function_tabela(character))
dt_all_cells <- dt_all_cells %>%
mutate(sheet_sn = sn_function_sheet(sheet))
dt_all_cells.nest <- dt_all_cells %>%
group_by(file, sheet_sn) %>%
nest()