R function daisy() from package cluster

I used the R function daisy() from package cluster to compute a Gower dissimilarity:

 # Library call
library(cluster)
#daisy(crx, metric = "gower", stand = FALSE, type = list(), weights = rep.int(1, p), warnBin = warnType, warnAsym = warnType, warnConst = warnType, warnType = TRUE)
Dist <- daisy(crx, metric = c("gower"))

# Convert the Gower dissimilarity object into a distance matrix
Dist <- as.matrix(Dist)

# Using the new distance matrix, what is the Gower similarity measure between the 10th and the 60th observation (row)?
Dist <- daisy(crx, metric = "gower", crx_frame (10,60))

# Visualise the distance matrix
dim <- ncol(Dist)  # used to define axis in image
image(1:dim, 1:dim, Dist, axes = FALSE, xlab="", ylab="", col = rainbow(100))

This code works fine until the final line:

 Error in 1:dim : argument of length 0

I don't understand why I am receiving this error. How do I get around this?

What are crx and crx_frame? Can you please provide a REPRoducible EXample of your problem? It provides more specifics of your problem, and it helps others to understand what problem you are facing.

If you don't know how to do it, take a look at this thread:

I tried with iris, and the results seem to be fine.

library(cluster)

Dist <- daisy(x = iris,
              metric = "gower")

Dist_mat <- as.matrix(x = Dist)

image(x = Dist_mat)


image(x = 1:nrow(x = Dist_mat),
      y = 1:ncol(x = Dist_mat),
      z = Dist_mat,
      axes = FALSE,
      xlab = "",
      ylab = "",
      col = rainbow(n = 100))

Created on 2019-05-23 by the reprex package (v0.3.0)

1 Like

Here's the reprex, it seems it can't locate the files, yet both crx and data frame are in the environment:

# Import the data  
# Read the data set
Data <- read.table ('crx.data', header=FALSE, sep=',', na.strings=c('?'))
#> Warning in file(file, "rt"): cannot open file 'crx.data': No such file or
#> directory
#> Error in file(file, "rt"): cannot open the connection

# Create dataframe
crx.data <- data.frame(crx)
#> Error in data.frame(crx): object 'crx' not found

# Add names to the dataset
names(crx) <- c("Gender", "Age", "MonthlyExpenses", "MaritalStatus", "HomeStatus", "Occupation", "BankingInstitution", "YearsEmployed", "NoPriorDefault", "Employed", "CreditScore", "DriversLicense", "AccountType", "MonthlyIncome", "AccountBalance", "Approved")
#> Error in names(crx) <- c("Gender", "Age", "MonthlyExpenses", "MaritalStatus", : object 'crx' not found

# Manually define the variables
crx$Gender <- as.factor(crx$Gender) 
#> Error in is.factor(x): object 'crx' not found
crx$Age <- as.numeric(crx$Age)
#> Error in eval(expr, envir, enclos): object 'crx' not found
crx$MonthlyExpenses <- as.integer(crx$MonthlyExpenses) 
#> Error in eval(expr, envir, enclos): object 'crx' not found
crx$MaritalStatus <- as.factor(crx$MaritalStatus) 
#> Error in is.factor(x): object 'crx' not found
crx$HomeStatus <- as.factor(crx$HomeStatus) 
#> Error in is.factor(x): object 'crx' not found
crx$Occupation <- as.factor(crx$Occupation) 
#> Error in is.factor(x): object 'crx' not found
crx$BankingInstitution <- as.factor(crx$BankingInstitution) 
#> Error in is.factor(x): object 'crx' not found
crx$YearsEmployed <- as.numeric(crx$YearsEmployed) 
#> Error in eval(expr, envir, enclos): object 'crx' not found
crx$NoPriorDefault <- as.factor(crx$NoPriorDefault) 
#> Error in is.factor(x): object 'crx' not found
crx$Employed <- as.factor(crx$Employed) 
#> Error in is.factor(x): object 'crx' not found
crx$CreditScore <- as.numeric(crx$CreditScore) 
#> Error in eval(expr, envir, enclos): object 'crx' not found
crx$DriversLicense <- as.factor(crx$DriversLicense)
#> Error in is.factor(x): object 'crx' not found
crx$AccountType <- as.factor(crx$AccountType) 
#> Error in is.factor(x): object 'crx' not found
crx$MonthlyIncome <- as.integer(crx$MonthlyIncome) 
#> Error in eval(expr, envir, enclos): object 'crx' not found
crx$AccountBalance <- as.numeric(crx$AccountBalance) 
#> Error in eval(expr, envir, enclos): object 'crx' not found
crx$Approved <- as.factor(crx$Approved)
#> Error in is.factor(x): object 'crx' not found

# Use the function summary() to determine total number of missing values
summary(crx)
#> Error in summary(crx): object 'crx' not found

# Remove the records with missing values from the dataset
crx <- na.omit(crx)
#> Error in na.omit(crx): object 'crx' not found

# How many missing values in total are there? 0
# Use the function summary()
summary(crx)
#> Error in summary(crx): object 'crx' not found
# How many records are removed by using the function na.omit()? 0

# The dataset contains variables with mixed types. 
# Use R function daisy() from package cluster to compute a Gower dissimilarity (distance) matrix between the data records, and refer to the result as “Dist”
# Library call
library(cluster)
#daisy(crx, metric = "gower", stand = FALSE, type = list(), weights = rep.int(1, p), warnBin = warnType, warnAsym = warnType, warnConst = warnType, warnType = TRUE)
Dist <- daisy(crx, metric = c("gower"))
#> Error in daisy(crx, metric = c("gower")): object 'crx' not found

# Convert the Gower dissimilarity object into a distance matrix
Dist <- as.matrix(Dist)
#> Error in as.matrix(Dist): object 'Dist' not found

# Using the new distance matrix, what is the Gower similarity measure between the 10th and the 60th observation (row)?
Dist <- daisy(crx, metric = "gower", crx_frame (10,60))
#> Error in daisy(crx, metric = "gower", crx_frame(10, 60)): object 'crx' not found

# Visualise the distance matrix
dim <- ncol(Dist)  # used to define axis in image
#> Error in ncol(Dist): object 'Dist' not found
image(1:dim, 1:dim, Dist, axes = FALSE, xlab="", ylab="", col = rainbow(100))
#> Error in 1:dim: NA/NaN argument

Created on 2019-05-24 by the reprex package (v0.3.0)

Without the data, I can't run your code, and it's impossible to guess what's wrong.

Can you please share a small part of the data set in a copy-paste friendly format?

In case you don't know how to do it, there are many options, which include:

  1. If you have stored the data set in some R object, dput function is very handy.

  2. In case the data set is in a spreadsheet, check out the datapasta package. Take a look at this link.

1 Like

I have managed to produce the image as per the above example..... all I had to do was to ignore the set up code to import the data and create the dataframe. The rest of the code works. Thanks for your assistance.

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.