here is the reprex code, what it is doing is matching two data frames based on the nearest distances now what requires is it should match only nearest locations within distance of 5000m/ 5kms only if not found it should run again and again (data frame df is fixed once lat and long generated in first run it should match with other data frame ref , data frame ref lat and long may be regenerated unless found within distance of 5 kms of data frame df) till match found within 5 kms. hope its understood now.
library(data.table)
df<-structure(list(ID=c(1:5), lat = c(runif(5,33.45,33.75))
, lon = c(runif(5,72.83,73.17)))
, col.Names = c("ID","lat", "lon"), row.names = c(NA, -10L), class = c("data.table","data.frame"))
df
#> ID lat lon
#> 1: 1 33.54173 72.88869
#> 2: 2 33.62414 73.03707
#> 3: 3 33.55003 73.03129
#> 4: 4 33.73126 73.14129
#> 5: 5 33.66748 72.91560
ref<-structure(list(ID=letters[1:5], lat = c(runif(5,33.45,33.75)), lon = c(runif(5,72.83,73.17)))
, col.Names = c("ID","lat", "lon"),row.names = c(NA, -10L), class = c("data.table","data.frame"))
ref
#> ID lat lon
#> 1: a 33.67757 72.83289
#> 2: b 33.52380 73.00690
#> 3: c 33.63362 72.83984
#> 4: d 33.63149 72.90808
#> 5: e 33.47954 73.05804
#Setting to data.table format
setDT(df)
setDT(ref)
#creating a table with cartesian join
df1<-setkey(df[,c(k=1,.SD)],k)[ref[,c(k=1,.SD)],allow.cartesian=TRUE][,k:=NULL]
df1
#calculating the Euclidean distance and giving a rank in ascending order of distance
df1[,EuDist:=sqrt((lat-i.lat)^2+(lon-i.lon)^2)][,distRank:=rank(EuDist,ties="random"),by=.(ID)]
df1
#> ID lat lon i.ID i.lat i.lon EuDist distRank
#> 1: 1 33.54173 72.88869 a 33.67757 72.83289 0.14684973 4
#> 2: 2 33.62414 73.03707 a 33.67757 72.83289 0.21105457 5
#> 3: 3 33.55003 73.03129 a 33.67757 72.83289 0.23584679 5
#> 4: 4 33.73126 73.14129 a 33.67757 72.83289 0.31303955 4
#> 5: 5 33.66748 72.91560 a 33.67757 72.83289 0.08332276 3
#> 6: 1 33.54173 72.88869 b 33.52380 73.00690 0.11956520 3
#> 7: 2 33.62414 73.03707 b 33.52380 73.00690 0.10477654 1
#> 8: 3 33.55003 73.03129 b 33.52380 73.00690 0.03581683 1
#> 9: 4 33.73126 73.14129 b 33.52380 73.00690 0.24718507 1
#> 10: 5 33.66748 72.91560 b 33.52380 73.00690 0.17022953 4
#> 11: 1 33.54173 72.88869 c 33.63362 72.83984 0.10407033 2
#> 12: 2 33.62414 73.03707 c 33.63362 72.83984 0.19745978 4
#> 13: 3 33.55003 73.03129 c 33.63362 72.83984 0.20889675 4
#> 14: 4 33.73126 73.14129 c 33.63362 72.83984 0.31686899 5
#> 15: 5 33.66748 72.91560 c 33.63362 72.83984 0.08298131 2
#> 16: 1 33.54173 72.88869 d 33.63149 72.90808 0.09183140 1
#> 17: 2 33.62414 73.03707 d 33.63149 72.90808 0.12919979 2
#> 18: 3 33.55003 73.03129 d 33.63149 72.90808 0.14769367 3
#> 19: 4 33.73126 73.14129 d 33.63149 72.90808 0.25365657 2
#> 20: 5 33.66748 72.91560 d 33.63149 72.90808 0.03676788 1
#> 21: 1 33.54173 72.88869 e 33.47954 73.05804 0.18040661 5
#> 22: 2 33.62414 73.03707 e 33.47954 73.05804 0.14610514 3
#> 23: 3 33.55003 73.03129 e 33.47954 73.05804 0.07539468 2
#> 24: 4 33.73126 73.14129 e 33.47954 73.05804 0.26512483 3
#> 25: 5 33.66748 72.91560 e 33.47954 73.05804 0.23580952 5
#> ID lat lon i.ID i.lat i.lon EuDist distRank
#selecting the shortest distance
df1<-df1[distRank==1]
df1
#> ID lat lon i.ID i.lat i.lon EuDist distRank
#> 1: 2 33.62414 73.03707 b 33.52380 73.00690 0.10477654 1
#> 2: 3 33.55003 73.03129 b 33.52380 73.00690 0.03581683 1
#> 3: 4 33.73126 73.14129 b 33.52380 73.00690 0.24718507 1
#> 4: 1 33.54173 72.88869 d 33.63149 72.90808 0.09183140 1
#> 5: 5 33.66748 72.91560 d 33.63149 72.90808 0.03676788 1
Created on 2020-11-22 by the reprex package (v0.3.0)