Convert txt file to dataframe in R

vinayprakash808 · May 10, 2022, 10:41am

is there a way to convert txt file data into Dataframe in R

For example, I have a df.txt file in my project folder

df.csv

"a", "e", "b", "c" ,"d", "f"
a 1, e 1, b1 ,c1 ,d1, f1
"a", "e", "b" ,"c" ,"d" ,"f"
a1, e1, sdf, c1 ,d1, f1
"a" ,"e" ,"b", "c", "d" ,"f"
a1 ,e1, sdf, sdf,d1 ,f1
"a", "e", "b" ,"c" ,"d", "f", "z"
a2, e1 ,b1, c1, d1, f1, z1

Expected output

     a    e    b    c    d    f    z   ### column names

    a 1  e 1  b1   c1   d1   f1   NA
    a1   e1   sdf  c1   d1   f1   NA
    a1   e1   sdf  sdf  d1   f1   NA
    a2   e1   b1   c1   d1   f1   z1

melih_guven · May 10, 2022, 12:36pm

library(tidyverse)

df<-read_delim("df.csv",delim=",",quote = "")

names(df)<-str_remove_all(names(df),'"')

names(df)<-str_remove_all(names(df),' ')

df%>%
  filter(row_number()%%2==1)
# 
# # A tibble: 4 x 6
# a     e      b      c      d     f    
# <chr> <chr>  <chr>  <chr>  <chr> <chr>
# 1 "a 1" " e 1" " b1 " "c1 "  "d1"  " f1"
# 2 "a1"  " e1"  " sdf" " c1 " "d1"  " f1"
# 3 "a1 " "e1"   " sdf" " sdf" "d1 " "f1" 
# 4 "a2"  " e1 " "b1"   " c1"  " d1" " f1"

vinayprakash808 · May 10, 2022, 1:33pm

Thanks. But there is another column called Z

nirgrahamuk · May 10, 2022, 2:15pm

process_funky_file <- function(filepath){
  require(dplyr)
  content <- readLines(con=file(filepath))
  close(file(filepath))
  pairs <- length(content)/2
  starts <- seq_len(pairs)*2 - 1
  parts <- list()
  for (i in starts){
    subsection <- content[c(i,i + 1)]
    # print(subsection)
  t <- tempfile()
  writeLines(text = subsection,con = t)
  parts[[i]] <- read.csv(t)
  # print(parts[[i]])
  }
  bind_rows(parts)
}

process_funky_file("df.txt")

melih_guven · May 10, 2022, 3:00pm



library(tidyverse)

num_of_cols<-max(count.fields("df.csv",sep = ","))

col_names_temp<-paste0("X",1:num_of_cols)

count_na <- function(x) sum(!is.na(x))

df<-read_delim("df.csv",delim=",",quote = "",col_names = col_names_temp)


col_names_org<-
df%>%
  rowwise()%>%
  mutate(nna=count_na(c_across()))%>%
  arrange(desc(nna))%>%
  select(-nna)%>%
  group_by()%>%
  filter(row_number()==1)%>%
  unlist(., use.names = FALSE)%>%
  str_remove_all(.,'"')%>%
  str_remove_all(.,' ')


df%>%
  filter(row_number()%%2==0)%>%
  rename_at(vars(colnames(.)), ~col_names_org)
  
    
# # A tibble: 4 x 7
# a     e      b      c      d     f     z    
# <chr> <chr>  <chr>  <chr>  <chr> <chr> <chr>
# 1 "a 1" " e 1" " b1 " "c1 "  "d1"  " f1"  NA  
# 2 "a1"  " e1"  " sdf" " c1 " "d1"  " f1"  NA  
# 3 "a1 " "e1"   " sdf" " sdf" "d1 " "f1"   NA  
# 4 "a2"  " e1 " "b1"   " c1"  " d1" " f1" " z1"

system · May 31, 2022, 3:01pm

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.