sorry for bad english.
Im trying to get 'k'for k-mean clutering.
I found method to get 'k' with elbow method by 'sse_ratio' like in later line.
#########################
library(workflows)
library(tune)
rec_spec <- recipe(~., data = mtcars) %>%
step_normalize(all_numeric_predictors()) %>%
step_pca(all_numeric_predictors())
kmeans_spec <- k_means(num_clusters = tune())
wflow <- workflow() %>%
add_recipe(rec_spec) %>%
add_model(kmeans_spec)
grid <- tibble(num_clusters = 1:10)
set.seed(4400)
folds <- vfold_cv(mtcars, v = 5)
res <- tune_cluster(
wflow,
resamples = folds,
grid = grid,
metrics = cluster_metric_set(sse_ratio) # want to get 'silhouette_avg'. how can i do that?
)
collect_metrics(res) → res_metrics
res_metrics %>%
filter(.metric == "sse_ratio") %>%
ggplot(aes(x = num_clusters, y = mean)) +
geom_point() +
geom_line() +
theme_minimal() +
ylab("mean WSS/TSS ratio, over 5 folds") +
xlab("Number of clusters") +
scale_x_continuous(breaks = 1:10)
#####################
I've tried to get 'k' with silhouette_avg but failed.
putting "silhouette_avg", "silhouette_avg(dists = dists)", "silhouette_ave, dists=dists"...
but failed.
how can i get table of silhouette instead of sse ratio?
thanks for your help.