The error message clearly fingers the source of the problem in the call to df_fail—it's the estimation.split argument given, 6.
The estimation.split paramter indicates the length of the estimation period. By default, it's null.
estimation.split $\dots May be specified as either the number of periods since the first transaction or the timepoint (either as character, Date, or POSIXct) at which the estimation period ends. The indicated timepoint itself will be part of the estimation sample. If no value is provided or set to NULL, the whole dataset will used for fitting the model (no holdout sample).
So, short answer is that dt_fail just had too few data points.
suppressPackageStartupMessages({
library(CLVTools)
})
df_fail <- structure(list(ID = c(
1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3
), TRANSACTION_DT = structure(c(
18539, 18544, 18539,
18551, 18539, 18555, 18555, 18555, 18574, 18541, 18541, 18542,
18574, 18543, 18543, 18543, 18543
), class = "Date"), AMOUNT = c(
1999,
199, 799, 499, 499, 299, 199, 199, 299, 999, 199, 199, 299, 199,
199, 299, 299
)), row.names = c(NA, -17L), class = "data.frame")
df_succeed <- structure(list(ID = c(
1, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 4, 4, 4, 4, 5, 5
), TRANSACTION_DT = structure(c(
18555, 18540,
18608, 18574, 18550, 18549, 18566, 18579, 18573, 18573, 18551,
18540, 18540, 18543, 18540, 18576, 18576, 18546, 18540, 18550
), class = "Date"), AMOUNT = c(
999, 199, 499, 199, 199, 299,
999, 999, 299, 299, 999, 499, 1999, 299, 199, 299, 199, 199,
199, 199
)), row.names = c(NA, -20L), class = "data.frame")
head(df_succeed)
#> ID TRANSACTION_DT AMOUNT
#> 1 1 2020-10-20 999
#> 2 2 2020-10-05 199
#> 3 2 2020-12-12 499
#> 4 3 2020-11-08 199
#> 5 3 2020-10-15 199
#> 6 3 2020-10-14 299
head(df_fail)
#> ID TRANSACTION_DT AMOUNT
#> 1 1 2020-10-04 1999
#> 2 2 2020-10-09 199
#> 3 2 2020-10-04 799
#> 4 2 2020-10-16 499
#> 5 3 2020-10-04 499
#> 6 3 2020-10-20 299
clvdata(
data.transactions = df_succeed, date.format = "ymd",
time.unit = "w", estimation.split = 6, name.id = "ID", name.date = "TRANSACTION_DT",
name.price = "AMOUNT"
)
#> CLV Transaction Data
#>
#> Call:
#> clvdata(data.transactions = df_succeed, date.format = "ymd",
#> time.unit = "w", estimation.split = 6, name.id = "ID", name.date = "TRANSACTION_DT",
#> name.price = "AMOUNT")
#>
#> Total # customers 5
#> Total # transactions 17
#> Spending information TRUE
#>
#>
#> Time unit Weeks
#>
#> Estimation start 2020-10-05
#> Estimation end 2020-11-16
#> Estimation length 6.0000 Weeks
#>
#> Holdout start 2020-11-17
#> Holdout end 2020-12-12
#> Holdout length 3.571429 Weeks
clvdata(
data.transactions = df_fail, date.format = "ymd",
time.unit = "w", estimation.split = 6, name.id = "ID", name.date = "TRANSACTION_DT",
name.price = "AMOUNT"
)
#> Error: Parameter estimation.split needs to indicate a point at least 2 periods before the last transaction!
clvdata(
data.transactions = df_succeed, date.format = "ymd",
time.unit = "w", estimation.split = 3, name.id = "ID", name.date = "TRANSACTION_DT",
name.price = "AMOUNT"
)
#> CLV Transaction Data
#>
#> Call:
#> clvdata(data.transactions = df_succeed, date.format = "ymd",
#> time.unit = "w", estimation.split = 3, name.id = "ID", name.date = "TRANSACTION_DT",
#> name.price = "AMOUNT")
#>
#> Total # customers 5
#> Total # transactions 17
#> Spending information TRUE
#>
#>
#> Time unit Weeks
#>
#> Estimation start 2020-10-05
#> Estimation end 2020-10-26
#> Estimation length 3.0000 Weeks
#>
#> Holdout start 2020-10-27
#> Holdout end 2020-12-12
#> Holdout length 6.571429 Weeks