Hi @andrewjmdata,
Might it be easier to find where the errors are in the raw data file, and fix those?
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(stringr))
# A CSV-type "file" with some commas missing.
# How to detect these 'problem' rows?
# Valid fields that are 'empty' or contain NA are OK.
in.txt <- c("aa,bb,cc,dd,ee,ff
1,2,3,4,5,6
7,8,910,11,12
1314,15,16,1718
19,20,NA,,23,24")
in.con <- textConnection(in.txt)
in.lines <- readLines(in.con)
in.df <- data.frame(index=1:length(in.lines), xx = in.lines)
in.df
#> index xx
#> 1 1 aa,bb,cc,dd,ee,ff
#> 2 2 1,2,3,4,5,6
#> 3 3 7,8,910,11,12
#> 4 4 1314,15,16,1718
#> 5 5 19,20,NA,,23,24
# We expect SIX fields so only FIVE "," separators: which ones are not = 5?
lengths(str_extract_all(in.df$xx, ","))
#> [1] 5 5 4 3 5
in.df$field_freq <- lengths(str_extract_all(in.df$xx, ","))+1
which(lengths(str_extract_all(in.df$xx, ",")) != 5)
#> [1] 3 4
# Show which lines need editing.
in.df[which(lengths(str_extract_all(in.df$xx, ",")) != 5), ]
#> index xx field_freq
#> 3 3 7,8,910,11,12 5
#> 4 4 1314,15,16,1718 4
Created on 2021-06-17 by the reprex package (v2.0.0)