Calculate the distance between coordinates using the distm function considering that they are in clusters

Friends, I would like to calculate the distance between database df and database df1 using the distm function. Note that they are divided into clusters. Therefore, I would like the distances to be calculated taking into account which cluster the industry is in. For example, industry 1, industry 2 and industry 5 are in cluster 1. Therefore, the distances of these properties will be considered in relation to the cluster 1 coordinates of the df1 database.. The executable code is below.

Thank you!!

library(ggplot2)
library(rdist)
library(geosphere)

df<-structure(list(Industries = c(1,2,3,4,5,6), 
                     Latitude = c(-23.8, -23.8, -23.9, -23.7, -23.7,-23.7), 
                     Longitude = c(-49.5, -49.6, -49.7, -49.8, -49.6,-49.9), 
                     Waste = c(526, 350, 526, 469, 534, 346)), class = "data.frame", row.names = c(NA, -6L))

k=3 
  #clusters
  coordinates<-df[c("Latitude","Longitude")]
  d<-as.dist(distm(coordinates[,2:1]))
  fit.average<-hclust(d,method="average") 
  clusters<-cutree(fit.average, k) 
  nclusters<-matrix(table(clusters))  
  df$cluster <- clusters 

#Center of mass
  center_mass<-matrix(nrow=k,ncol=2)
  for(i in 1:k){
    center_mass[i,]<-c(weighted.mean(subset(df,cluster==i)$Latitude,subset(df,cluster==i)$Waste),
                       weighted.mean(subset(df,cluster==i)$Longitude,subset(df,cluster==i)$Waste))}
  coordinates$cluster<-clusters 
  center_mass<-cbind(center_mass,matrix(c(1:k),ncol=1)) 

#database df1
df1<-as.data.frame(center_mass)
colnames(df1) <-c("Latitude", "Longitude", "cluster")

> df
  Industries Latitude Longitude Waste cluster
1          1    -23.8     -49.5   526       1
2          2    -23.8     -49.6   350       1
3          3    -23.9     -49.7   526       2
4          4    -23.7     -49.8   469       3
5          5    -23.7     -49.6   534       1
6          6    -23.7     -49.9   346       3

> df1
   Latitude Longitude cluster
1 -23.76213 -49.56270       1
2 -23.90000 -49.70000       2
3 -23.70000 -49.84245       3
merge(df,df1,by = c("cluster"), suffixes = c("_df","_df1"))
# cluster Industries Latitude_df Longitude_df Waste Latitude_df1 Longitude_df1
# 1       1          1       -23.8        -49.5   526    -23.76213     -49.56270
# 2       1          2       -23.8        -49.6   350    -23.76213     -49.56270
# 3       1          5       -23.7        -49.6   534    -23.76213     -49.56270
# 4       2          3       -23.9        -49.7   526    -23.90000     -49.70000
# 5       3          4       -23.7        -49.8   469    -23.70000     -49.84245
# 6       3          6       -23.7        -49.9   346    -23.70000     -49.84245

thanks for the answer, but how do i calculate the distance using the distm function of this new base that you created?

(mydf$distances <- purrr::pmap_dbl(.l = list(mydf$Longitude_df,
                                             mydf$Latitude_df,
                                             mydf$Longitude_df1,
                                             mydf$Latitude_df1),
                                    .f = ~distm(c(..1,..2),c(..3,..4))))
mydf

  cluster Industries Latitude_df Longitude_df Waste Latitude_df1 Longitude_df1 distances
1       1          1       -23.8        -49.5   526    -23.76213     -49.56270  7643.779
2       1          2       -23.8        -49.6   350    -23.76213     -49.56270  5661.388
3       1          5       -23.7        -49.6   534    -23.76213     -49.56270  7862.250
4       2          3       -23.9        -49.7   526    -23.90000     -49.70000     0.000
5       3          4       -23.7        -49.8   469    -23.70000     -49.84245  4329.723
6       3          6       -23.7        -49.9   346    -23.70000     -49.84245  5868.903

Thanks for reply my friend!

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.