I have managed to get what i want by wrapping it in a function
The answer below in case it might help anyone in future
library(tokenizers)
library(stringi)
library(tidyverse)
library(glue)
#>
#> Attaching package: 'glue'
#> The following object is masked from 'package:dplyr':
#>
#> collapse
random_letters <- sprintf("%s%s%s", stri_rand_strings(5, 5, '[A-Z]'),
stri_rand_strings(5, 4, '[0-9]'), stri_rand_strings(5, 1, '[A-Z]')) %>%
as_tibble()
# Get the tokenizing to work for one
one_item_example <- random_letters$value[1]
generate_or_statement <- function(mycol){
test_col <- tokenize_character_shingles(mycol, n=4, n_min=4) %>%
unlist()
result <- glue_collapse(glue("'{test_col}'"), sep = ',')
result
}
# works for one item
result <- generate_or_statement(one_item_example)
result
#> 'qypx','ypxo','pxo7','xo71','o718','7186','186k'
# Work for all items
result <- map_chr(random_letters$value, generate_or_statement)
result
#> [1] "'qypx','ypxo','pxo7','xo71','o718','7186','186k'"
#> [2] "'aqoa','qoae','oae8','ae89','e899','8993','993c'"
#> [3] "'javg','avgc','vgc2','gc20','c201','2019','019h'"
#> [4] "'fptl','ptlf','tlf2','lf29','f295','2951','951p'"
#> [5] "'sgjo','gjop','jop9','op97','p971','9715','715n'"