Please kindly I have an issue with code below , I changed the generated data in the code to CSV file but iam getting errors, the first field of the CSV file is string and numbers, while the others are real numbers.
cat("\014");
rw.sample <- function(df = NULL, n = NULL) {
# Creating an `id` column.
id.creator <- function(df = NULL) {
a <- data.frame(1)
for (i in 1:nrow(df)) {
x <- i
a <- rbind(a, x)
}
a <- data.frame(a[1:nrow(a)-1,])
df <- cbind(a, df)
colnames(df)[1] <- "id"
return(df)
}
df <- id.creator(df = df)
# Function for sampling.
sampler <- function(df = NULL) {
a <- data.frame()
x <- as.numeric(df$id[sample(nrow(df), 1)])
for (i in 1:n) {
if (((x + n) > nrow(df)) == FALSE) {
x <- x + n
}
if (((x + n) > nrow(df)) == TRUE) {
x <- ((x + n) - nrow(df))
}
a <- rbind(a, x)
}
a <- data.frame(a)
colnames(a)[1] <- "id"
a <- merge(df, a)
return(a)
}
a <- sampler(df = df)
return(a)
}
data1 <- read_csv("Data.csv")
df<-data.frame(data1, row.names = NULL, optional = FALSE)
sampledData<-sapply(rw.sample,df,10)
@ nirgrahamuk
Thank you. I am trying to do systematic sampling for a csv file and its composite of 3 column the first one type is string& number while the others are numbers. The ID should be included with in the sampling .!
library( TeachingSampling )
set.seed(42) # for reproducibality when the data to be sampled
# isnt perfectly divisable by the chosen step size
#iris has 150 rows
k <- 10
(chosen_indexes <- S.SY(nrow(iris), 10))
(sampled_iris <- iris[chosen_indexes,])
if you'd rather not load the package, then the function is defined as
S.SY<-function (N, a)
{
r <- sample(a, 1)
c <- N - a * floor(N/a)
if (r <= c)
n <- floor((N/a)) + 1
else n <- floor(N/a)
sam <- matrix(0, n, 1)
for (k in 0:n) {
sam[k] <- r + (a * (k - 1))
}
sam
}
I have passed the CSV file as matrix since as. data. frame not working ?
rror in matrix(0, n, 1) : non-numeric matrix extent In addition: Warning message: In if (r <= c) n <- floor((N/a)) + 1 else n <- floor(N/a) :
** the condition has length > 1 and only the first element will be us* df<- as.matrix(data1, row.names = NULL, optional = FALSE) S.SY(df,10)
and when use as.matrix
the results are "
Warning messages:
1: In if (r <= c) n <- floor((N/a)) + 1 else n <- floor(N/a) :
the condition has length > 1 and only the first element will be used
data.frame works, my example is an example of that.
be aware that iris is a dataframe.
if you place your dataframe name where iris is named in my code, it will work...
S.SY<-function (N, a)
{
r <- sample(a, 1)
c <- N - a * floor(N/a)
if (r <= c)
n <- floor((N/a)) + 1
else n <- floor(N/a)
sam <- matrix(0, n, 1)
for (k in 0:n) {
sam[k] <- r + (a * (k - 1))
}
sam
}
data1<- read_csv("data.csv")
error : Error in matrix(0, n, 1) : non-numeric matrix extent
In addition: Warning message:
In if (r <= c) n <- floor((N/a)) + 1 else n <- floor(N/a) :
the condition has length > 1 and only the first element will be used
S.SY<-function (N, a)
{
r <- sample(a, 1)
c <- N - a * floor(N/a)
if (r <= c)
n <- floor((N/a)) + 1
else n <- floor(N/a)
sam <- matrix(0, n, 1)
for (k in 0:n) {
sam[k] <- r + (a * (k - 1))
}
sam
}
data1<- read_csv("data.csv")
df<- as.data.frame(data1, row.names = NULL, optional = FALSE)
(chosen_indexes <-S.SY( nrow(df) , 10))
(sampled_df<- df[chosen_indexes,])