Hello Mark,
I found out where the problem was.
Unfortunately, the apriori algorithm did not give me any rule at the end as the result. I assume it is telling me that it cannot find a rule that is applied to all customer's shopping habits. I am happy to provide a reprex, but I am not sure whether this is a direction I want to pursue.
My goal right now is to track customer shopping habits and see which Private Label items most customers tend to come back and buy them again.
For instance, I go to Target and only buy Colgate. Why? It is cheap, tastes fine and I am not picky between Crest or Colgate.
When I buy Cereals, I do not always buy Honey Crisp. One week I buy Honey Crisp, next week I buy Fruit Cereal. Why? New flavor and less boring breakfast.
library(tidyverse)
library(plyr)
#> -------------------------------------------------------------------------
#> You have loaded plyr after dplyr - this is likely to cause problems.
#> If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
#> library(plyr); library(dplyr)
#> -------------------------------------------------------------------------
#>
#> Attaching package: 'plyr'
#> The following objects are masked from 'package:dplyr':
#>
#> arrange, count, desc, failwith, id, mutate, rename, summarise,
#> summarize
#> The following object is masked from 'package:purrr':
#>
#> compact
library(dplyr)
library(ggplot2)
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
#> The following object is masked from 'package:purrr':
#>
#> transpose
library(reprex)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:data.table':
#>
#> hour, isoweek, mday, minute, month, quarter, second, wday,
#> week, yday, year
#> The following object is masked from 'package:plyr':
#>
#> here
#> The following object is masked from 'package:base':
#>
#> date
library(arules)
#> Loading required package: Matrix
#>
#> Attaching package: 'Matrix'
#> The following object is masked from 'package:tidyr':
#>
#> expand
#>
#> Attaching package: 'arules'
#> The following object is masked from 'package:dplyr':
#>
#> recode
#> The following objects are masked from 'package:base':
#>
#> abbreviate, write
library(arulesViz)
#> Loading required package: grid
my_data = structure(list(CUSTOMER_NUMBER = c(728201203L, 708905131L, 328327414L,
728355208L, 642200163L, 448454848L, 728355058L, 642468597L, 240676888L,
743200092L, 743392231L, 246200031L, 444727581L, 745816224L, 708182262L,
460353085L, 345471941L, 240194612L, 410200452L, 220498367L, 236200184L,
454645907L, 648200105L, 746762841L, 438512958L, 422115117L, 703447690L,
212291876L, 236200679L, 768573620L, 454518547L, 304549826L, 456742984L,
301661670L, 708519502L, 768459921L, 743109851L, 710772787L, 760879226L,
304675232L, 760149113L, 760157540L, 458381419L, 408813812L, 760214270L,
746200117L, 240464705L, 728434515L, 744725326L, 422883774L, 454465220L,
610314186L, 458361470L, 209815610L, 301242607L, 768254908L, 703208008L,
704699496L, 301214101L, 438100987L, 304322244L, 220601153L, 373281903L,
745128862L, 209479386L, 342200066L, 642200457L, 373200493L, 454651099L,
444200033L, 410141015L, 345153647L, 636378463L, 642217673L, 460684786L,
703369698L, 710651438L, 768121770L, 458192413L, 246635049L, 610587675L,
341647049L, 458330743L, 373200851L, 328201052L, 438321480L, 344703417L,
768471268L, 743200008L, 220200213L, 408200193L, 462766451L, 708535530L,
745200161L, 704202137L, 328168003L, 304673648L, 710238739L, 744200201L,
328614333L), Accounting_Date = c("2017-04-24", "2018-04-23",
"2016-11-29", "2017-08-25", "2017-09-29", "2017-02-20", "2017-06-01",
"2017-11-29", "2017-10-03", "2017-02-01", "2016-08-04", "2017-04-27",
"2016-06-02", "2017-03-02", "2018-03-06", "2017-09-06", "2016-06-08",
"2016-09-08", "2017-10-02", "2016-10-26", "2017-01-05", "2016-11-03",
"2017-01-30", "2017-12-06", "2017-01-04", "2017-06-20", "2017-06-15",
"2016-08-24", "2017-04-17", "2017-10-25", "2018-04-02", "2017-05-24",
"2017-01-12", "2018-03-16", "2017-01-11", "2017-04-04", "2016-11-29",
"2018-02-06", "2018-04-12", "2018-02-05", "2018-02-19", "2016-08-09",
"2017-03-09", "2018-04-16", "2016-05-26", "2017-01-26", "2016-06-02",
"2016-11-16", "2016-09-07", "2016-12-02", "2017-10-11", "2017-01-17",
"2016-07-22", "2016-07-06", "2017-05-03", "2017-11-07", "2016-10-03",
"2016-05-11", "2016-07-19", "2016-11-21", "2017-06-01", "2016-11-23",
"2017-06-12", "2017-08-29", "2016-10-19", "2016-09-19", "2017-03-06",
"2018-01-03", "2016-08-05", "2016-06-27", "2018-02-06", "2017-01-05",
"2017-10-10", "2017-03-10", "2017-01-27", "2017-06-29", "2018-02-19",
"2018-02-06", "2016-05-11", "2016-05-26", "2016-06-28", "2017-08-03",
"2017-02-16", "2018-04-23", "2017-08-07", "2018-01-04", "2018-03-21",
"2017-10-16", "2016-08-25", "2018-01-05", "2017-06-19", "2017-09-05",
"2018-04-17", "2017-08-23", "2017-11-20", "2016-11-29", "2017-08-25",
"2017-01-25", "2016-08-09", "2016-05-16"), PRODUCT_SUB_LINE_DESCR = c("SUNDRY",
"SUNDRY", "SUNDRY", "PRIVATE LABEL", "PRIVATE LABEL", "SUNDRY",
"SUNDRY", "SUNDRY", "PRIVATE LABEL", "SUNDRY", "SUNDRY", "SUNDRY",
"SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY",
"SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "PRIVATE LABEL",
"SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY",
"SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY",
"SUNDRY", "PRIVATE LABEL", "PRIVATE LABEL", "SUNDRY", "SUNDRY",
"SUNDRY", "PRIVATE LABEL", "PRIVATE LABEL", "SUNDRY", "SUNDRY",
"SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "PRIVATE LABEL", "SUNDRY",
"PRIVATE LABEL", "SUNDRY", "SUNDRY", "SUNDRY", "PRIVATE LABEL",
"SUNDRY", "PRIVATE LABEL", "SUNDRY", "PRIVATE LABEL", "PRIVATE LABEL",
"SUNDRY", "PRIVATE LABEL", "PRIVATE LABEL", "SUNDRY", "SUNDRY",
"SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "PRIVATE LABEL", "SUNDRY",
"SUNDRY", "PRIVATE LABEL", "SUNDRY", "SUNDRY", "PRIVATE LABEL",
"PRIVATE LABEL", "SUNDRY", "PRIVATE LABEL", "SUNDRY", "PRIVATE LABEL",
"SUNDRY", "PRIVATE LABEL", "SUNDRY", "SUNDRY", "SUNDRY", "PRIVATE LABEL",
"SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "PRIVATE LABEL",
"PRIVATE LABEL", "SUNDRY"), ITEM_CATEGORY_DESCR = c("PLASTER",
"INTERNAL USE ONLY (PROMOS, FEES...) ", "AIR/WATER SYRINGE TIPS DISPOSABLE",
"ANESTHETIC INJECTION MEPIVACAINE", "TABLE COVERS", "CEMENT TEMPORARY",
"INTERNAL USE ONLY (PROMOS, FEES...) ", "WORKWEAR DISPOSABLE GOWNS",
"GLOVES NITRILE", "ENDODONTIC MEDICAMENTS", "RETRACTORS - GINGIVAL, CHEEK AND TISSUE ",
"SCALERS AND CURETTES", "CAD/CAM, OPERATORY ",
"BURS CARBIDE STANDARD", "CEMENT GLASS IONOMER LUTING", "DENTAL FLOSS",
"CEMENT TEMPORARY", "TRASH BAGS (REGULAR - NOT HAZARDOUS)", "IV SOLUTIONS",
"AMALGAM CAPSULES", "INTERNAL USE ONLY (PROMOS, FEES...) ",
"MIXING BOWLS", "X-RAY FILM INTRAORAL", "INTERDENTAL CLEANERS",
"HVE TIPS NON DISPOSABLE", "ULTRASONIC CLEANER SOLUTIONS", "GLOVES NITRILE",
"BURS CARBIDE TRIMMING AND FINISHING", "TOOTHBRUSHES", "CAD/CAM, OPERATORY ",
"SURFACE DISINFECTANT WIPES", "MIXING AND INTRAORAL TIPS", "GLOVES MISCELLANEOUS",
"STERILANTS", "DENTURE LINER AND CONDITIONER", "RETAINER AND DENTURE BOXES",
"RETRACTION PASTE", "GLOVES VINYL", "X-RAY HOLDING DEVICES - DISPOSABLE",
"COMPOSITE UNIVERSAL", "ULTRASONIC CLEANER SOLUTIONS", "ULTRASONIC CLEANER TABLETS AND POWDERS",
"ENDODONTIC MEDICAMENTS", "MATRIX BANDS METAL", "FINISHING DISCS",
"MULTIPURPOSE DISPENSING & APPLICATOR TIPS", "IMPRESSION MATERIAL VINYL POLYSILOXANE",
"BONE REGENERATION MATERIAL", "FLUORIDE VARNISHES", "TRAYS, PLASTIC ",
"GLOVES NITRILE", "COTTON ROLL ALTERNATIVES", "ANGLES PROPHY DISPOSABLE",
"BURS CARBIDE STANDARD", "ANESTHETIC TOPICAL", "SALIVA EJECTORS",
"INTERNAL USE ONLY (PROMOS, FEES...) ", "CUPS DRINKING",
"COMPOSITE UNIVERSAL", "SALIVA EJECTORS", "INTERNAL USE ONLY (PROMOS, FEES...) ",
"SALIVA EJECTORS", "IMPRESSION MATERIAL ALGINATE", "MATRIX RETAINER",
"AUTOCLAVE TUBING", "GYPSUM", "AUTOCLAVE POUCHES", "ANESTHETIC INJECTION LIDOCAINE",
"MOUTH MIRRORS METAL", "AUTOCLAVE ACCESSORIES", "DIAMOND STANDARD",
"X-RAY SOLUTION AUTOMATIC", "CAD/CAM, OPERATORY ",
"GLOVES NITRILE", "ANESTHETIC INJECTION LIDOCAINE", "BURS CARBIDE STANDARD",
"MIRROR DEFOGGERS", "TABLE COVERS", "APPLICATOR BRUSHES AND BRUSH TIPS",
"CROWNS METAL", "ENDODONTIC HAND FILES", "IMPRESSION MATERIAL ALGINATE SUBSTITUTE",
"DIAMOND STANDARD", "NON-ABSORBABLE SUTURE AND NEEDLE COMBINATION",
"X-RAY HOLDING DEVICES - REUSABLE", "TABLE COVERS", "GLOVES CHLOROPRENE",
"TRAYS, PLASTIC ", "WARNING LABELS AND SIGNAGE",
"CAD/CAM, LABORATORY ", "CAD/CAM MISCELLANEOUS",
"AUTOCLAVE POUCHES", "BARRIER COVERS", "CLEANER LUBRICANT FOR HANDPIECE",
"ANESTHETIC INJECTION ARTICAINE", "DIAMOND DISPOSABLE", "NAPKIN HOLDERS - BIB CLIPS",
"MOUTH MIRRORS METAL", "ANGLES PROPHY DISPOSABLE", "RETAINER AND DENTURE BOXES"
)), row.names = c(15366552L,
14796145L, 3526202L, 15438185L, 12537551L, 9233990L, 15437652L,
12693204L, 2162651L, 15771545L, 15841206L, 2291430L, 8960191L,
16701166L, 14623148L, 11009336L, 4744798L, 1745653L, 7304511L,
808395L, 1298878L, 9898829L, 12881259L, 16915304L, 8533598L,
7579822L, 14026314L, 462018L, 1360821L, 19288278L, 9838498L,
3207798L, 10350263L, 2919804L, 14733881L, 19234252L, 15731330L,
15235887L, 17455100L, 3262267L, 16964692L, 16973133L, 10611556L,
7192617L, 17068445L, 16769976L, 2035124L, 15468536L, 16430164L,
8048463L, 9808625L, 11490043L, 10603483L, 286899L, 2702936L,
19143836L, 13927074L, 14556731L, 2670644L, 8215921L, 3120251L,
822435L, 5623177L, 16505375L, 189243L, 4187970L, 12563874L, 5513606L,
9902979L, 8786437L, 7242190L, 4610847L, 12392057L, 12586397L,
11094108L, 13993948L, 15174426L, 18998352L, 10434363L, 2469914L,
11658521L, 4136942L, 10590667L, 5545580L, 3467749L, 8420183L,
4549132L, 19239796L, 15758141L, 737950L, 6871346L, 11226119L,
14737209L, 16545145L, 14312967L, 3340450L, 3261447L, 14966204L,
16089645L, 3675093L), class = c("data.table", "data.frame"))
df_itemList <- ddply(my_data,c("CUSTOMER_NUMBER", "Accounting_Date"),
function(df1)paste(df1$ITEM_CATEGORY_DESCR,
collapse = ","))
View(df_itemList)
df_itemList$CUSTOMER_NUMBER <- NULL
df_itemList$Accounting_Date <- NULL
#Rename column headers for ease of use
colnames(df_itemList) <- c("itemList")
write.csv(df_itemList,"ItemList.csv", row.names = TRUE)
txn = read.transactions(file="ItemList.csv", rm.duplicates= TRUE, format="basket",sep=",",cols=1)
txn@itemInfo$labels <- gsub("\"","",txn@itemInfo$labels)
basket_rules <- apriori(txn,parameter = list(sup = 0.01, conf = 0.5))
#> Apriori
#>
#> Parameter specification:
#> confidence minval smax arem aval originalSupport maxtime support minlen
#> 0.5 0.1 1 none FALSE TRUE 5 0.01 1
#> maxlen target ext
#> 10 rules FALSE
#>
#> Algorithmic control:
#> filter tree heap memopt load sort verbose
#> 0.1 TRUE TRUE FALSE TRUE 2 TRUE
#>
#> Absolute minimum support count: 1
#>
#> set item appearances ...[0 item(s)] done [0.00s].
#> set transactions ...[75 item(s), 101 transaction(s)] done [0.00s].
#> sorting and recoding items ... [17 item(s)] done [0.00s].
#> creating transaction tree ... done [0.00s].
#> checking subsets of size 1 done [0.00s].
#> writing ... [0 rule(s)] done [0.00s].
#> creating S4 object ... done [0.00s].
inspect(basket_rules)
plot(basket_rules)
#> Error in plot.rules(basket_rules): x contains 0 rules!
plot(basket_rules, method = "grouped", control = list(k = 5))
#> Error in plot.rules(basket_rules, method = "grouped", control = list(k = 5)): x contains 0 rules!
plot(basket_rules, method="graph", control=list(type="items"))
#> Error in plot.rules(basket_rules, method = "graph", control = list(type = "items")): x contains 0 rules!
plot(basket_rules, method="paracoord", control=list(alpha=.5, reorder=TRUE))
#> Error in plot.rules(basket_rules, method = "paracoord", control = list(alpha = 0.5, : x contains 0 rules!
plot(basket_rules,measure=c("support","lift"),shading="confidence",interactive=T)
#> Error in plot.rules(basket_rules, measure = c("support", "lift"), shading = "confidence", : x contains 0 rules!
#Alternative to inspect() is to convert rules to a dataframe and then use View()
df_basket <- as(basket_rules,"data.frame")
View(df_basket)
#> Error in View(df_basket): invalid 'x' argument
Created on 2018-07-30 by the reprex
package (v0.2.0).