True, but keyword in context is a well-known natural language processing problem that already has solutions that can handle large text collections efficiently.
library(quanteda)
#> Package version: 3.1.0
#> Unicode version: 13.0
#> ICU version: 67.1
#> Parallel computing: 12 of 12 threads used.
#> See https://quanteda.io for tutorials and examples.
txt <- c(doc1 = "This a sentence containing the a1 keyword.",
doc2 = "This sentence has both a1 and b2.",
doc3 = "Nothing to see here, folks.")
txt
#> doc1
#> "This a sentence containing the a1 keyword."
#> doc2
#> "This sentence has both a1 and b2."
#> doc3
#> "Nothing to see here, folks."
toks <- tokens(txt)
article_ids <- c("a1", "b2", "c3", "yx")
kwic(toks, pattern = article_ids, valuetype = "glob", window = 10)
#> Keyword-in-context with 3 matches.
#> [doc1, 6] This a sentence containing the | a1 | keyword.
#> [doc2, 5] This sentence has both | a1 | and b2.
#> [doc2, 7] This sentence has both a1 and | b2 | .