Number of possible solutions for generating clusters.

I have information of 29 properties (Points_properties), and I wanted to find out the number of possible solutions to generate clusters with at least 2 properties, i.e. I don't want cluster with only 1 property. I made code that shows there are 12 possible solutions, but I know because I used a value for k in the cutree function. Notice that in nclusters, I have 13 solutions, the first with 1 it is not considered, only the others that have at least 2 properties, that is, there are 12 possible solutions in total. So how do I find that there are 12 possible solutions, without using a number defined in cutree. If you have an easier way to find out, feel free to enter.

library(geosphere)

Points_properties<-structure(list(
Propertie=c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29), 
Latitude = c(-24.781624, -24.775017, -24.769196, 
-24.761741, -24.752019, -24.748008, -24.737312, -24.744718, -24.751996, 
-24.724589, -24.8004, -24.796899, -24.795041, -24.780501, -24.763376, 
-24.801715, -24.728005, -24.737845, -24.743485, -24.742601, -24.766422, 
-24.767525, -24.775631, -24.792703, -24.790994, -24.787275, -24.795902, 
-24.785587, -24.787558), Longitude = c(-49.937369, 
-49.950576, -49.927608, -49.92762, -49.920608, -49.927707, -49.922095, 
-49.915438, -49.910843, -49.899478, -49.901775, -49.89364, -49.925657, 
-49.893193, -49.94081, -49.911967, -49.893358, -49.903904, -49.906435, 
-49.927951, -49.939603, -49.941541, -49.94455, -49.929797, -49.92141, 
-49.915141, -49.91042, -49.904772, -49.894034)), row.names = c(NA, -29L), class = c("tbl_df", "tbl", 
"data.frame"))



coordinates<-subset(Points_properties,select=c("Latitude","Longitude"))
d<-distm(coordinates[,2:1])
d<-as.dist(d)
fit.average<-hclust(d,method="average")

clusters<-cutree(fit.average, 13) 
nclusters<-matrix(table(clusters))
nclusters
> nclusters
      [,1]
 [1,]    1
 [2,]    2
 [3,]    2
 [4,]    3
 [5,]    3
 [6,]    2
 [7,]    2
 [8,]    3
 [9,]    2
[10,]    3
[11,]    2
[12,]    2
[13,]    2
Points_properties <- data.frame(
  Propertie = c(1,2,3,4,5,6,7,8,9,10,11,12,13,
                14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29),
  Latitude = c(-24.781624,-24.775017,-24.769196,
               -24.761741,-24.752019,-24.748008,-24.737312,-24.744718,-24.751996,
               -24.724589,-24.8004,-24.796899,-24.795041,-24.780501,
               -24.763376,-24.801715,-24.728005,-24.737845,-24.743485,
               -24.742601,-24.766422,-24.767525,-24.775631,-24.792703,
               -24.790994,-24.787275,-24.795902,-24.785587,-24.787558),
  Longitude = c(-49.937369,-49.950576,-49.927608,
                -49.92762,-49.920608,-49.927707,-49.922095,-49.915438,-49.910843,
                -49.899478,-49.901775,-49.89364,-49.925657,-49.893193,
                -49.94081,-49.911967,-49.893358,-49.903904,-49.906435,
                -49.927951,-49.939603,-49.941541,-49.94455,-49.929797,-49.92141,
                -49.915141,-49.91042,-49.904772,-49.894034)
)
library(geosphere)
library(tidyverse)
coordinates<-subset(Points_properties,select=c("Latitude","Longitude"))
d<-distm(coordinates[,2:1])
d<-as.dist(d)
fit.average<-hclust(d,method="average")

(max_feasible_cut_groups <- floor(29/2))
full_results <- map_dfr(seq_len(max_feasible_cut_groups),
    ~{
      r1 <- cutree(fit.average,k= .x)
      ct <- table(r1)
      tibble(k=.x,
           cluster_label_for_each_node=list(r1),
           coverage_table = list(ct),
       problematic_unique_clusters= length(ct[which(ct<2)]))
      })

print(full_results,n=Inf)

(last_valid_run <- filter(full_results,
       problematic_unique_clusters==0) %>% slice_tail(n = 1))

last_valid_run$cluster_label_for_each_node
last_valid_run$coverage_table
1 Like

That's it @nirgrahamuk r, thank you so much for that. Could you please just make a comment exactly on what you are doing in max_feasible_cut_groups, full_results, last_valid_run, to make it more understandable to me, what was your idea. Thank you again!

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.