Not sure I'm following your logic. Is the output what you expect?
suppressPackageStartupMessages(library(dplyr))
a <- structure(list(Text = c("Aliens", "love", "underpants", "Of",
"every", "shape", "and", "size", "But", "there", "are", "no",
"underpants", "in", "space", "So", "here", "'s", "the", "big",
"surprise", "When", "aliens", "fly", "down", "to", "Earth", "They",
"do", "n't", "come", "to", "meet", "YOU", "They", "simply", "want",
"your", "underpants", "I", "'ll", "bet", "you", "never", "knew",
"Their", "spaceships", "'s", "radar", "bleeps", "and", "blinks",
"The", "moment", "that", "it", "sees", "A", "washing", "line",
"of", "underpants", "All", "flapping", "in", "the", "breeze",
"They", "land", "in", "your", "back", "garden", "Though", "they",
"have", "n't", "been", "invited", "Oooooh", "UNDERPANTS", "they",
"chant", "And", "dance", "around", "delighted", "They", "like",
"them", "red", "they", "like", "them", "green", "Or", "orange",
"like", "satsumas", "But", "best", "of", "all", "they", "love",
"the", "sight", "Of", "Granny", "'s", "spotted", "bloomers",
"Mum", "'s", "pink", "frilly", "knickers", "Are", "a", "perfect",
"place", "to", "hide", "And", "Grandpa", "'s", "woolly", "long",
"johns", "Make", "a", "super-whizzy", "slide", "In", "daring",
"competitions", "Held", "up", "by", "just", "one", "peg", "They",
"count", "how", "many", "aliens", "Can", "squeeze", "into", "each",
"leg", "They", "wear", "pants", "on", "their", "feet", "and",
"heads", "And", "other", "silly", "places", "They", "fly", "pants",
"from", "their", "spaceships", "and", "Hold", "Upside-Down-Pant",
"Races", "As", "they", "go", "zinging", "through", "the", "air",
"It", "really", "is", "pants-tastic", "What", "fun", "the", "aliens",
"can", "have", "With", "pingy", "pants", "elastic", "It", "'s",
"not", "your", "neighbour"), col2 = c("alien_NNS", "love_VBP",
"underpants_NNS", "of_IN", "every_DT", "shape_NN", "and_CC",
"size_NN", "but_CC", "there_EX", "be_VBP", "no_DT", "underpants_NNS",
"in_IN", "space_NN", "so_RB", "here_RB", "be_VBZ", "the_DT",
"big_JJ", "surprise_NN", "when_WRB", "alien_NNS", "fly_VBP",
"down_RB", "to_TO", "earth_NNP", "they_PRP", "do_VBP", "not_RB",
"come_VB", "to_TO", "meet_VB", "you_PRP", "they_PRP", "simply_RB",
"want_VB", "you_PRP$", "underpants_NNS", "i_PRP", "will_MD",
"bet_VB", "you_PRP", "never_RB", "know_VBD", "they_PRP$", "spaceship_NNS",
"'s_POS", "radar_NN", "bleep_NNS", "and_CC", "blink_VBZ", "the_DT",
"moment_NN", "that_IN", "it_PRP", "see_VBZ", "a_DT", "wash_VBG",
"line_NN", "of_IN", "underpants_NNS", "all_DT", "flap_VBG", "in_IN",
"the_DT", "breeze_NN", "they_PRP", "land_VBP", "in_IN", "you_PRP$",
"back_JJ", "garden_NN", "though_IN", "they_PRP", "have_VBP",
"not_RB", "be_VBN", "invite_VBN", "oooooh_NNP", "underpants_NNP",
"they_PRP", "chant_VBP", "and_CC", "dance_NN", "around_RB", "delighted_JJ",
"they_PRP", "like_VBP", "they_PRP", "red_JJ", "they_PRP", "like_VBP",
"they_PRP", "green_JJ", "or_CC", "orange_NN", "like_IN", "satsuma_NNS",
"but_CC", "best_JJS", "of_IN", "all_DT", "they_PRP", "love_VBP",
"the_DT", "sight_NN", "of_IN", "granny_NNP", "'s_POS", "spotted_JJ",
"bloomers_NNS", "mum_NNP", "'s_POS", "pink_JJ", "frilly_JJ",
"knickers_NNS", "be_VBP", "a_DT", "perfect_JJ", "place_NN", "to_TO",
"hide_VB", "and_CC", "grandpa_NNP", "'s_POS", "woolly_JJ", "long_JJ",
"john_NNS", "make_VBP", "a_DT", "super-whizzy_JJ", "slide_NN",
"in_IN", "daring_JJ", "competition_NNS", "hold_VBN", "up_RP",
"by_IN", "just_RB", "one_CD", "peg_VB", "they_PRP", "count_VBP",
"how_WRB", "many_JJ", "alien_NNS", "can_MD", "squeeze_VB", "into_IN",
"each_DT", "leg_NN", "they_PRP", "wear_VBP", "pants_NNS", "on_IN",
"they_PRP$", "foot_NNS", "and_CC", "head_NNS", "and_CC", "other_JJ",
"silly_JJ", "place_NNS", "they_PRP", "fly_VBP", "pants_NNS",
"from_IN", "they_PRP$", "spaceship_NNS", "and_CC", "hold_VB",
"upside-down-pant_NNP", "races_NN", "as_IN", "they_PRP", "go_VBP",
"zing_VBG", "through_IN", "the_DT", "air_NN", "it_PRP", "really_RB",
"be_VBZ", "pants-tastic_JJ", "what_WDT", "fun_NN", "the_DT",
"alien_NNS", "can_MD", "have_VB", "with_IN", "pingy_NN", "pants_NNS",
"elastic_JJ", "it_PRP", "be_VBZ", "not_RB", "you_PRP$", "neighbour_NN"
)), class = "data.frame", row.names = c(NA, 200L))
# Text is superfulous and can be reconstructed from col2 if needed
a %>% select(-Text) -> a
b <- structure(list(col1 = c("the_DT", "and_CC", "a_DT", "i_PRP",
"to_TO", "he_PRP", "be_VBD", "say_VBD", "it_PRP", "you_PRP",
"be_VBZ", "not_RB", "of_IN", "in_IN", "she_PRP", "be_VBP", "they_PRP",
"on_IN", "he_PRP$", "for_IN", "but_CC", "with_IN", "have_VBD",
"at_IN", "'s_POS", "she_PRP$", "be_VB", "what_WP", "my_PRP$",
"we_PRP", "that_DT", "as_IN", "that_IN", "do_VBP", "can_MD",
"henry_NNP", "would_MD", "then_RB", "this_DT", "all_DT", "will_MD",
"up_RP", "no_DT", "have_VBP", "one_CD", "very_RB", "so_RB", "could_MD",
"when_WRB", "there_EX"), n = c(16254L, 10381L, 8936L, 7168L,
6907L, 5393L, 5014L, 4554L, 4460L, 4431L, 4008L, 3941L, 3746L,
3594L, 3301L, 2691L, 2686L, 2522L, 2513L, 2021L, 1984L, 1859L,
1732L, 1701L, 1667L, 1659L, 1396L, 1376L, 1371L, 1363L, 1277L,
1260L, 1228L, 1201L, 1197L, 1190L, 1166L, 1162L, 1152L, 1125L,
1119L, 1111L, 1039L, 1028L, 985L, 934L, 917L, 905L, 894L, 886L
)), class = "data.frame", row.names = c(NA, 50L))
# create combined object with only tokens common to both
# first rename columns
a %>% rename(x = col2) -> a
b %>% rename(x = col1) -> b
# c has all the 1 results
inner_join(a,b, by = "x") -> c
# so note
c %>% mutate(rf_ = 1) -> c
# d has all the 0 results
anti_join(a,b, by = "x") -> d
# so note (rf_ instead of rf, because rf is a function in stats, always loaded)
d %>% mutate(rf_ = 0) -> d
c %>% select(rf_) -> rf_1
d %>% select(rf_) -> rf_2
rbind(rf_1,rf_2) -> rf_df
mean(rf_df$rf_)
#> [1] 0.39
Created on 2020-03-22 by the reprex package (v0.3.0)