Friends, I would like to calculate the distance between database df and database df1 using the distm function. Note that they are divided into clusters. Therefore, I would like the distances to be calculated taking into account which cluster the industry is in. For example, industry 1, industry 2 and industry 5 are in cluster 1. Therefore, the distances of these properties will be considered in relation to the cluster 1 coordinates of the df1 database.. The executable code is below.
Thank you!!
library(ggplot2)
library(rdist)
library(geosphere)
df<-structure(list(Industries = c(1,2,3,4,5,6),
Latitude = c(-23.8, -23.8, -23.9, -23.7, -23.7,-23.7),
Longitude = c(-49.5, -49.6, -49.7, -49.8, -49.6,-49.9),
Waste = c(526, 350, 526, 469, 534, 346)), class = "data.frame", row.names = c(NA, -6L))
k=3
#clusters
coordinates<-df[c("Latitude","Longitude")]
d<-as.dist(distm(coordinates[,2:1]))
fit.average<-hclust(d,method="average")
clusters<-cutree(fit.average, k)
nclusters<-matrix(table(clusters))
df$cluster <- clusters
#Center of mass
center_mass<-matrix(nrow=k,ncol=2)
for(i in 1:k){
center_mass[i,]<-c(weighted.mean(subset(df,cluster==i)$Latitude,subset(df,cluster==i)$Waste),
weighted.mean(subset(df,cluster==i)$Longitude,subset(df,cluster==i)$Waste))}
coordinates$cluster<-clusters
center_mass<-cbind(center_mass,matrix(c(1:k),ncol=1))
#database df1
df1<-as.data.frame(center_mass)
colnames(df1) <-c("Latitude", "Longitude", "cluster")
> df
Industries Latitude Longitude Waste cluster
1 1 -23.8 -49.5 526 1
2 2 -23.8 -49.6 350 1
3 3 -23.9 -49.7 526 2
4 4 -23.7 -49.8 469 3
5 5 -23.7 -49.6 534 1
6 6 -23.7 -49.9 346 3
> df1
Latitude Longitude cluster
1 -23.76213 -49.56270 1
2 -23.90000 -49.70000 2
3 -23.70000 -49.84245 3