stemming, stopword, delete space, and save to csv error on pre-processing

hello im a newbie and im trying to get to work with Rstudio for my thesis and i got some error while doing stemming, stopword, delete space, and save to csv on pre-processing. here is the code

setwd("F:/SKRIPSI/R pre-processing/data_preprocessing_test1")
rm(list = ls())

#install.packages("remotes")
#remotes::install_github("nurandi/katadasaR")

library(tm)
library(NLP)
library(stringr)
library(caret)
library(dplyr)
library(tau)
library(parallel)
library(readxl)
library(katadasaR)
library(tokenizers)

#memcaca dokumen excel
dok <- read_excel(path="data_preprocessing_test1.xlsx" ,
                  sheet="Sheet1",
                  col_names= TRUE)
View(dok)

#merubah file excel kedalam corpus dan select text, tulisan sesudah $ disesuaikan dengan nama tabel di data yang digunakan
corpusdok <- Corpus(VectorSource(dok$text))
inspect(corpusdok[1:10])

#mengubah semua huruf kapital menjadi huruf kecil pada dokumen sebelumnya yaitu corpusdok dengan memanfaatkan bantuan package tm dan tm_map.
dok_casefolding <- tm_map(corpusdok, content_transformer(tolower))
inspect(dok_casefolding[1:10])

#menghapus url pada dokumen sebelumnya yaitu dok_casefolding
removeURL <- function(x) gsub("http[^[:space:]]*", "", x)
dok_URL <- tm_map(dok_casefolding, content_transformer(removeURL))
inspect(dok_URL[1:10])

#menghapus mention pada dokumen sebelumnya yaitu dok_URL
remove.mention <- function(x) gsub("@\\S+", "", x)
dok_mention <- tm_map(dok_URL, remove.mention)
inspect(dok_mention[1:10])

#menghapus hastag
remove.hashtag <- function(x) gsub("#\\S+", "", x)
dok_hashtag <- tm_map(dok_mention, remove.hashtag)
inspect(dok_hashtag[1:10])

#menghapus tanda baca
dok_punctuation<-tm_map(dok_hashtag,content_transformer(removePunctuation))
inspect(dok_punctuation[1:10])

#menghapus angka
dok_nonumber<-tm_map(dok_punctuation, content_transformer(removeNumbers))
inspect(dok_nonumber[1:10])

#normalisasi perbaikan kata ejaan atau slang
slang <- read.csv("slangword_list.csv", header=T)
old_slang <- as.character(slang$old)
new_slang <- as.character(slang$new)
slangword<-function(x)Reduce(function(x,r)gsub(slang$old[r],slang$new[r],x,fixed=T),seq_len(nrow(slang)),x)
dok_slangword <- tm_map(dok_nonumber,slangword)
inspect(dok_slangword[1:10])

#stemming penguraian kata dasar
stem_text<-function(text,mc.cores=1)
{
  stem_string<-function(str)
  {
    str<-tokenize(x=str)
    str<-sapply(str,katadasaR)
    str<-paste(str,collapse = "")
    return(str)
  }
  x<-mclapply(X=text,FUN=stem_string,mc.cores=mc.cores)
  return(unlist(x))
}
dok_stemming<-tm_map(dok_slangword,stem_text)
inspect(dok_stemming[1:10])

#filtering atau stopword penghapusan kata tidak berpengaruh
cStopwordID<-readLines("stopwords.csv")
dok_stopword <- tm_map(dok_stemming, removeWords, cStopwordID)
inspect(dok_stopword[1:10])

#menghapus spasi berlebihan
dok_whitespace <- tm_map(dok_stopword,stripWhitespace)
inspect(dok_whitespace[1:10])

#menyimpan file ke csv
databersih <- data.frame(text=unlist(sapply(dok_whitespace,`[`)), tringsAsFactors=F)
write.csv(databersih,file="datasesudah.csv")

and the following error

#stemming penguraian kata dasar
> stem_text<-function(text,mc.cores=1)
+ {
+   stem_string<-function(str)
+   {
+     str<-tokenize(x=str)
+     str<-sapply(str,katadasaR)
+     str<-paste(str,collapse = "")
+     return(str)
+   }
+   x<-mclapply(X=text,FUN=stem_string,mc.cores=mc.cores)
+   return(unlist(x))
+ }
> dok_stemming<-tm_map(dok_slangword,stem_text)
 Error in tokenize(x = str) : unused argument (x = str) > inspect(dok_stemming[1:10])
Error in inspect(dok_stemming[1:10]) : object 'dok_stemming' not found
> 
> #filtering atau stopword penghapusan kata tidak berpengaruh
> cStopwordID<-readLines("stopwords.csv")
> dok_stopword <- tm_map(dok_stemming, removeWords, cStopwordID)
Error in tm_map(dok_stemming, removeWords, cStopwordID) : 
  object 'dok_stemming' not found
> inspect(dok_stopword[1:10])
Error in inspect(dok_stopword[1:10]) : object 'dok_stopword' not found
> 
> #menghapus spasi berlebihan
> dok_whitespace <- tm_map(dok_stopword,stripWhitespace)
Error in tm_map(dok_stopword, stripWhitespace) : 
  object 'dok_stopword' not found
> inspect(dok_whitespace[1:10])
Error in inspect(dok_whitespace[1:10]) : 
  object 'dok_whitespace' not found
> 
> #menyimpan file ke csv
> databersih <- data.frame(text=unlist(sapply(dok_whitespace,`[`)), tringsAsFactors=F)
Error in lapply(X = X, FUN = FUN, ...) : 
  object 'dok_whitespace' not found
> write.csv(databersih,file="datasesudah.csv")
Error in is.data.frame(x) : object 'databersih' not found

i hope everybody can help, thank you in advance!

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.