How to track each unique customer's purchases over time

Hello,

I do not know how Amazon does a great job of keeping track of EACH customer's shopping habits and gives out each individual suggestion for them. I do not think I am there yet to do that kind of task. If you know a method/system to do this, please recommend me a source or a book for me to read up.

library(tidyverse) 
library(dplyr)
library(data.table)
#> 
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#> 
#>     between, first, last
#> The following object is masked from 'package:purrr':
#> 
#>     transpose
library(reprex)
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:data.table':
#> 
#>     hour, isoweek, mday, minute, month, quarter, second, wday,
#>     week, yday, year
#> The following object is masked from 'package:base':
#> 
#>     date

data = structure(list(CUSTOMER_NUMBER = c(0L, 0L, 0L, 0L, 0L, 138990000L, 
                                          138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 
                                          138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 
                                          138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 
                                          138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 
                                          138990000L, 138990000L, 138990000L, 209020998L, 209020998L, 209020998L, 
                                          209020998L, 209020998L, 209020998L, 209020998L, 209020998L, 209020998L, 
                                          209020998L, 209020998L, 209100072L, 209100072L, 209100072L, 209100072L, 
                                          209100072L, 209100072L), ITEM_CATEGORY_DESCR = c("BARRIER COVERS", 
                                                                                           "FACEMASKS", "FACEMASKS", "GLOVES LATEX", "GLOVES NITRILE", "GLOVES LATEX", 
                                                                                           "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", 
                                                                                           "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", 
                                                                                           "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", 
                                                                                           "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", 
                                                                                           "GLOVES NITRILE", "GLOVES NITRILE", "SURFACE DISINFECTANT WIPES", 
                                                                                           "SURFACE DISINFECTANT WIPES", "SURFACE DISINFECTANT WIPES", "SURFACE DISINFECTANT WIPES", 
                                                                                           "SURFACE DISINFECTANT WIPES", "SURFACE DISINFECTANT WIPES", "SURFACE DISINFECTANT WIPES", 
                                                                                           "SURFACE DISINFECTANT WIPES", "SURFACE DISINFECTANT WIPES", "BITE REGISTRATION MATERIAL", 
                                                                                           "ENDODONTIC HAND FILES", "ENDODONTIC HAND FILES", "FACEMASKS", 
                                                                                           "FACEMASKS", "GLOVES LATEX", "GLOVES LATEX", "GLOVES NITRILE", 
                                                                                           "IMPRESSION MATERIAL VINYL POLYSILOXANE", "IMPRESSION MATERIAL VINYL POLYSILOXANE", 
                                                                                           "SPONGES", "ANGLES PROPHY DISPOSABLE", "ANGLES PROPHY DISPOSABLE", 
                                                                                           "ANGLES PROPHY DISPOSABLE", "ANGLES PROPHY DISPOSABLE", "ANGLES PROPHY DISPOSABLE", 
                                                                                           "ANGLES PROPHY DISPOSABLE"), month = structure(c(17471, 17410, 
                                                                                                                                            17622, 17198, 17410, 17106, 16922, 17045, 17075, 17106, 17198, 
                                                                                                                                            17226, 17257, 17257, 17287, 17318, 17379, 17410, 17440, 17471, 
                                                                                                                                            17501, 17532, 17591, 17622, 16953, 17136, 17198, 17379, 17410, 
                                                                                                                                            17440, 17471, 17501, 17563, 16922, 17106, 17226, 17106, 17410, 
                                                                                                                                            17014, 17410, 17106, 17318, 17379, 17106, 16983, 17106, 17198, 
                                                                                                                                            17287, 17379, 17440), class = "Date"), Count = c(1L, 1L, 1L, 
                                                                                                                                                                                             1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 4L, 1L, 1L, 1L, 2L, 1L, 
                                                                                                                                                                                             3L, 3L, 4L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                                                                                                                                             3L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), 
                      `PRIVATE LABEL` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 11, 
                                          0, 7, 1, 1, 1, 2, 1, 12, 5, 5, 8, 4, 5, 2, 3, 0, 0, 5, 0, 
                                          2, 3, 0, 0, 0, 5, 0, 0, 0, 10, 0, 3, 2, 0, 0, 0, 0, 0, 0), 
                      SUNDRY = c(3, 5, 24, 30, 1, 2, 1, 2, 2, 0, 0, 0, 1, 0, 0, 
                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 1, 0, 0, 1, 
                                 2, 3, 0, 1, 10, 10, 0, 1, 0, 0, 3, 3, 3, 3, 2, 3)), row.names = c(NA, 
                                                                                                   -50L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), vars = c("CUSTOMER_NUMBER", 
                                                                                                                                                                           "ITEM_CATEGORY_DESCR"), drop = TRUE, indices = list(0L, 1:2, 
                                                                                                                                                                                                                               3L, 4L, 5L, 6:23, 24:32, 33L, 34:35, 36:37, 38:39, 40L, 41:42, 
                                                                                                                                                                                                                               43L, 44:49), group_sizes = c(1L, 2L, 1L, 1L, 1L, 18L, 9L, 
                                                                                                                                                                                                                                                            1L, 2L, 2L, 2L, 1L, 2L, 1L, 6L), biggest_group_size = 18L, labels = structure(list(
                                                                                                                                                                                                                                                              CUSTOMER_NUMBER = c(0L, 0L, 0L, 0L, 138990000L, 138990000L, 
                                                                                                                                                                                                                                                                                  138990000L, 209020998L, 209020998L, 209020998L, 209020998L, 
                                                                                                                                                                                                                                                                                  209020998L, 209020998L, 209020998L, 209100072L), ITEM_CATEGORY_DESCR = c("BARRIER COVERS", 
                                                                                                                                                                                                                                                                                                                                                           "FACEMASKS", "GLOVES LATEX", "GLOVES NITRILE", "GLOVES LATEX", 
                                                                                                                                                                                                                                                                                                                                                           "GLOVES NITRILE", "SURFACE DISINFECTANT WIPES", "BITE REGISTRATION MATERIAL", 
                                                                                                                                                                                                                                                                                                                                                           "ENDODONTIC HAND FILES", "FACEMASKS", "GLOVES LATEX", "GLOVES NITRILE", 
                                                                                                                                                                                                                                                                                                                                                           "IMPRESSION MATERIAL VINYL POLYSILOXANE", "SPONGES", "ANGLES PROPHY DISPOSABLE"
                                                                                                                                                                                                                                                                                  )), row.names = c(NA, -15L), class = "data.frame", vars = c("CUSTOMER_NUMBER", 
                                                                                                                                                                                                                                                                                                                                              "ITEM_CATEGORY_DESCR"), drop = TRUE))

Created on 2018-07-26 by the reprex
package
(v0.2.0).

My goal is simple: I want to see whether a unique CUSTOMER_NUMBER keeps buying
(or repeat the purchase of) the same item over time or whether he or she tries my house brand this month and switches to the branded one next month. The scope here is to focus on ITEM_CATEGORY_DESCRIPTION that has a Private Label and I want to see if customers keep buying this over months or at some point they just stop!

This may seem a lot but if you have any suggestion on how to do this, that would be great.

Thanks, all!

One approach could be to use broom to summarize the trend in Private Label use for each customer - item combination. Perhaps normalize the time axis to the first month they try Private Label. Then you could look at each item to look for aggregate trends across customers.

Jon:
I looked up the package broom online and it seems like I may have not searched the right thing or I have not found the right article that shows me how to do this.

Can you point out some resources beside CRAN-R please?

Thank you.

I wonder if the arules package could be of use? arules:

Provides the infrastructure for representing, manipulating and analyzing transaction data and patterns

There are plenty of blogs available about arules and the apriori algorithm, which will likely provide more concrete ideas, but my initial thoughts could be to use the apriori algorithm and instead of looking at a transaction date and basket level, look over a longer timeframe and at the customer level?

Or once you have a list of a customer's transactions in a date order, you could use the apriori algorithm in someway to look at associations between transactions (rather than associations within a transaction), thus seeing if

he or she tries my house brand this month and switches to the branded one next month [or switches between transactions].

2 Likes

The general approach I’m describing is that you could

  1. abstract your data into a simpler model that captures the change in Private Label use for a single user and a single product group. You could try a metric like “percent of monthly purchases that are private label.”
  2. apply that model to all your users and review the output.

Broom helps handle step 2 using the tidyverse principles.

Here are a few examples I’ve seen of using broom this way:

2 Likes

Jon:
Those are amazing videos.
I just watched one of them.

I am having an issue. I assume since my working data is so small, one of the blogs showing me how to use apriori algorithm does not work out well. I have waited for the code to run almost 15 minutes...
Maybe, I do not want to take into account what people usually buy together with the Private Label products.
All I care is whether they buy Private Label A today and they will come back next time and buy it again.

1 Like

15 minutes for the code to run does seem like a long time. It's difficult to say why without seeing it; maybe you could produce a reprex?

Hello Mark,
I found out where the problem was.
Unfortunately, the apriori algorithm did not give me any rule at the end as the result. I assume it is telling me that it cannot find a rule that is applied to all customer's shopping habits. I am happy to provide a reprex, but I am not sure whether this is a direction I want to pursue.
My goal right now is to track customer shopping habits and see which Private Label items most customers tend to come back and buy them again.
For instance, I go to Target and only buy Colgate. Why? It is cheap, tastes fine and I am not picky between Crest or Colgate.
When I buy Cereals, I do not always buy Honey Crisp. One week I buy Honey Crisp, next week I buy Fruit Cereal. Why? New flavor and less boring breakfast.

library(tidyverse) 
library(plyr)
#> -------------------------------------------------------------------------
#> You have loaded plyr after dplyr - this is likely to cause problems.
#> If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
#> library(plyr); library(dplyr)
#> -------------------------------------------------------------------------
#> 
#> Attaching package: 'plyr'
#> The following objects are masked from 'package:dplyr':
#> 
#>     arrange, count, desc, failwith, id, mutate, rename, summarise,
#>     summarize
#> The following object is masked from 'package:purrr':
#> 
#>     compact
library(dplyr)
library(ggplot2)
library(data.table)
#> 
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#> 
#>     between, first, last
#> The following object is masked from 'package:purrr':
#> 
#>     transpose
library(reprex)
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:data.table':
#> 
#>     hour, isoweek, mday, minute, month, quarter, second, wday,
#>     week, yday, year
#> The following object is masked from 'package:plyr':
#> 
#>     here
#> The following object is masked from 'package:base':
#> 
#>     date
library(arules)
#> Loading required package: Matrix
#> 
#> Attaching package: 'Matrix'
#> The following object is masked from 'package:tidyr':
#> 
#>     expand
#> 
#> Attaching package: 'arules'
#> The following object is masked from 'package:dplyr':
#> 
#>     recode
#> The following objects are masked from 'package:base':
#> 
#>     abbreviate, write
library(arulesViz)
#> Loading required package: grid


my_data = structure(list(CUSTOMER_NUMBER = c(728201203L, 708905131L, 328327414L, 
                                             728355208L, 642200163L, 448454848L, 728355058L, 642468597L, 240676888L, 
                                             743200092L, 743392231L, 246200031L, 444727581L, 745816224L, 708182262L, 
                                             460353085L, 345471941L, 240194612L, 410200452L, 220498367L, 236200184L, 
                                             454645907L, 648200105L, 746762841L, 438512958L, 422115117L, 703447690L, 
                                             212291876L, 236200679L, 768573620L, 454518547L, 304549826L, 456742984L, 
                                             301661670L, 708519502L, 768459921L, 743109851L, 710772787L, 760879226L, 
                                             304675232L, 760149113L, 760157540L, 458381419L, 408813812L, 760214270L, 
                                             746200117L, 240464705L, 728434515L, 744725326L, 422883774L, 454465220L, 
                                             610314186L, 458361470L, 209815610L, 301242607L, 768254908L, 703208008L, 
                                             704699496L, 301214101L, 438100987L, 304322244L, 220601153L, 373281903L, 
                                             745128862L, 209479386L, 342200066L, 642200457L, 373200493L, 454651099L, 
                                             444200033L, 410141015L, 345153647L, 636378463L, 642217673L, 460684786L, 
                                             703369698L, 710651438L, 768121770L, 458192413L, 246635049L, 610587675L, 
                                             341647049L, 458330743L, 373200851L, 328201052L, 438321480L, 344703417L, 
                                             768471268L, 743200008L, 220200213L, 408200193L, 462766451L, 708535530L, 
                                             745200161L, 704202137L, 328168003L, 304673648L, 710238739L, 744200201L, 
                                             328614333L), Accounting_Date = c("2017-04-24", "2018-04-23", 
                                                                              "2016-11-29", "2017-08-25", "2017-09-29", "2017-02-20", "2017-06-01", 
                                                                              "2017-11-29", "2017-10-03", "2017-02-01", "2016-08-04", "2017-04-27", 
                                                                              "2016-06-02", "2017-03-02", "2018-03-06", "2017-09-06", "2016-06-08", 
                                                                              "2016-09-08", "2017-10-02", "2016-10-26", "2017-01-05", "2016-11-03", 
                                                                              "2017-01-30", "2017-12-06", "2017-01-04", "2017-06-20", "2017-06-15", 
                                                                              "2016-08-24", "2017-04-17", "2017-10-25", "2018-04-02", "2017-05-24", 
                                                                              "2017-01-12", "2018-03-16", "2017-01-11", "2017-04-04", "2016-11-29", 
                                                                              "2018-02-06", "2018-04-12", "2018-02-05", "2018-02-19", "2016-08-09", 
                                                                              "2017-03-09", "2018-04-16", "2016-05-26", "2017-01-26", "2016-06-02", 
                                                                              "2016-11-16", "2016-09-07", "2016-12-02", "2017-10-11", "2017-01-17", 
                                                                              "2016-07-22", "2016-07-06", "2017-05-03", "2017-11-07", "2016-10-03", 
                                                                              "2016-05-11", "2016-07-19", "2016-11-21", "2017-06-01", "2016-11-23", 
                                                                              "2017-06-12", "2017-08-29", "2016-10-19", "2016-09-19", "2017-03-06", 
                                                                              "2018-01-03", "2016-08-05", "2016-06-27", "2018-02-06", "2017-01-05", 
                                                                              "2017-10-10", "2017-03-10", "2017-01-27", "2017-06-29", "2018-02-19", 
                                                                              "2018-02-06", "2016-05-11", "2016-05-26", "2016-06-28", "2017-08-03", 
                                                                              "2017-02-16", "2018-04-23", "2017-08-07", "2018-01-04", "2018-03-21", 
                                                                              "2017-10-16", "2016-08-25", "2018-01-05", "2017-06-19", "2017-09-05", 
                                                                              "2018-04-17", "2017-08-23", "2017-11-20", "2016-11-29", "2017-08-25", 
                                                                              "2017-01-25", "2016-08-09", "2016-05-16"), PRODUCT_SUB_LINE_DESCR = c("SUNDRY", 
                                                                                                                                                    "SUNDRY", "SUNDRY", "PRIVATE LABEL", "PRIVATE LABEL", "SUNDRY", 
                                                                                                                                                    "SUNDRY", "SUNDRY", "PRIVATE LABEL", "SUNDRY", "SUNDRY", "SUNDRY", 
                                                                                                                                                    "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", 
                                                                                                                                                    "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "PRIVATE LABEL", 
                                                                                                                                                    "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", 
                                                                                                                                                    "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", 
                                                                                                                                                    "SUNDRY", "PRIVATE LABEL", "PRIVATE LABEL", "SUNDRY", "SUNDRY", 
                                                                                                                                                    "SUNDRY", "PRIVATE LABEL", "PRIVATE LABEL", "SUNDRY", "SUNDRY", 
                                                                                                                                                    "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "PRIVATE LABEL", "SUNDRY", 
                                                                                                                                                    "PRIVATE LABEL", "SUNDRY", "SUNDRY", "SUNDRY", "PRIVATE LABEL", 
                                                                                                                                                    "SUNDRY", "PRIVATE LABEL", "SUNDRY", "PRIVATE LABEL", "PRIVATE LABEL", 
                                                                                                                                                    "SUNDRY", "PRIVATE LABEL", "PRIVATE LABEL", "SUNDRY", "SUNDRY", 
                                                                                                                                                    "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "PRIVATE LABEL", "SUNDRY", 
                                                                                                                                                    "SUNDRY", "PRIVATE LABEL", "SUNDRY", "SUNDRY", "PRIVATE LABEL", 
                                                                                                                                                    "PRIVATE LABEL", "SUNDRY", "PRIVATE LABEL", "SUNDRY", "PRIVATE LABEL", 
                                                                                                                                                    "SUNDRY", "PRIVATE LABEL", "SUNDRY", "SUNDRY", "SUNDRY", "PRIVATE LABEL", 
                                                                                                                                                    "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "SUNDRY", "PRIVATE LABEL", 
                                                                                                                                                    "PRIVATE LABEL", "SUNDRY"), ITEM_CATEGORY_DESCR = c("PLASTER", 
                                                                                                                                                                                                        "INTERNAL USE ONLY (PROMOS, FEES...)                    ", "AIR/WATER SYRINGE TIPS DISPOSABLE", 
                                                                                                                                                                                                        "ANESTHETIC INJECTION MEPIVACAINE", "TABLE COVERS", "CEMENT TEMPORARY", 
                                                                                                                                                                                                        "INTERNAL USE ONLY (PROMOS, FEES...)                    ", "WORKWEAR DISPOSABLE GOWNS", 
                                                                                                                                                                                                        "GLOVES NITRILE", "ENDODONTIC MEDICAMENTS", "RETRACTORS - GINGIVAL, CHEEK AND TISSUE                ", 
                                                                                                                                                                                                        "SCALERS AND CURETTES", "CAD/CAM, OPERATORY                                     ", 
                                                                                                                                                                                                        "BURS CARBIDE STANDARD", "CEMENT GLASS IONOMER LUTING", "DENTAL FLOSS", 
                                                                                                                                                                                                        "CEMENT TEMPORARY", "TRASH BAGS (REGULAR - NOT HAZARDOUS)", "IV SOLUTIONS", 
                                                                                                                                                                                                        "AMALGAM CAPSULES", "INTERNAL USE ONLY (PROMOS, FEES...)                    ", 
                                                                                                                                                                                                        "MIXING BOWLS", "X-RAY FILM INTRAORAL", "INTERDENTAL CLEANERS", 
                                                                                                                                                                                                        "HVE TIPS NON DISPOSABLE", "ULTRASONIC CLEANER SOLUTIONS", "GLOVES NITRILE", 
                                                                                                                                                                                                        "BURS CARBIDE TRIMMING AND FINISHING", "TOOTHBRUSHES", "CAD/CAM, OPERATORY                                     ", 
                                                                                                                                                                                                        "SURFACE DISINFECTANT WIPES", "MIXING AND INTRAORAL TIPS", "GLOVES MISCELLANEOUS", 
                                                                                                                                                                                                        "STERILANTS", "DENTURE LINER AND CONDITIONER", "RETAINER AND DENTURE BOXES", 
                                                                                                                                                                                                        "RETRACTION PASTE", "GLOVES VINYL", "X-RAY HOLDING DEVICES - DISPOSABLE", 
                                                                                                                                                                                                        "COMPOSITE UNIVERSAL", "ULTRASONIC CLEANER SOLUTIONS", "ULTRASONIC CLEANER TABLETS AND POWDERS", 
                                                                                                                                                                                                        "ENDODONTIC MEDICAMENTS", "MATRIX BANDS METAL", "FINISHING DISCS", 
                                                                                                                                                                                                        "MULTIPURPOSE DISPENSING & APPLICATOR TIPS", "IMPRESSION MATERIAL VINYL POLYSILOXANE", 
                                                                                                                                                                                                        "BONE REGENERATION MATERIAL", "FLUORIDE VARNISHES", "TRAYS, PLASTIC                                         ", 
                                                                                                                                                                                                        "GLOVES NITRILE", "COTTON ROLL ALTERNATIVES", "ANGLES PROPHY DISPOSABLE", 
                                                                                                                                                                                                        "BURS CARBIDE STANDARD", "ANESTHETIC TOPICAL", "SALIVA EJECTORS", 
                                                                                                                                                                                                        "INTERNAL USE ONLY (PROMOS, FEES...)                    ", "CUPS DRINKING", 
                                                                                                                                                                                                        "COMPOSITE UNIVERSAL", "SALIVA EJECTORS", "INTERNAL USE ONLY (PROMOS, FEES...)                    ", 
                                                                                                                                                                                                        "SALIVA EJECTORS", "IMPRESSION MATERIAL ALGINATE", "MATRIX RETAINER", 
                                                                                                                                                                                                        "AUTOCLAVE TUBING", "GYPSUM", "AUTOCLAVE POUCHES", "ANESTHETIC INJECTION LIDOCAINE", 
                                                                                                                                                                                                        "MOUTH MIRRORS METAL", "AUTOCLAVE ACCESSORIES", "DIAMOND STANDARD", 
                                                                                                                                                                                                        "X-RAY SOLUTION AUTOMATIC", "CAD/CAM, OPERATORY                                     ", 
                                                                                                                                                                                                        "GLOVES NITRILE", "ANESTHETIC INJECTION LIDOCAINE", "BURS CARBIDE STANDARD", 
                                                                                                                                                                                                        "MIRROR DEFOGGERS", "TABLE COVERS", "APPLICATOR BRUSHES AND BRUSH TIPS", 
                                                                                                                                                                                                        "CROWNS METAL", "ENDODONTIC HAND FILES", "IMPRESSION MATERIAL ALGINATE SUBSTITUTE", 
                                                                                                                                                                                                        "DIAMOND STANDARD", "NON-ABSORBABLE SUTURE AND NEEDLE COMBINATION", 
                                                                                                                                                                                                        "X-RAY HOLDING DEVICES - REUSABLE", "TABLE COVERS", "GLOVES CHLOROPRENE", 
                                                                                                                                                                                                        "TRAYS, PLASTIC                                         ", "WARNING LABELS AND SIGNAGE", 
                                                                                                                                                                                                        "CAD/CAM, LABORATORY                                    ", "CAD/CAM MISCELLANEOUS", 
                                                                                                                                                                                                        "AUTOCLAVE POUCHES", "BARRIER COVERS", "CLEANER LUBRICANT FOR HANDPIECE", 
                                                                                                                                                                                                        "ANESTHETIC INJECTION ARTICAINE", "DIAMOND DISPOSABLE", "NAPKIN HOLDERS - BIB CLIPS", 
                                                                                                                                                                                                        "MOUTH MIRRORS METAL", "ANGLES PROPHY DISPOSABLE", "RETAINER AND DENTURE BOXES"
                                                                                                                                                    )), row.names = c(15366552L, 
                                                                                                                                                                                                                  14796145L, 3526202L, 15438185L, 12537551L, 9233990L, 15437652L, 
                                                                                                                                                                                                                  12693204L, 2162651L, 15771545L, 15841206L, 2291430L, 8960191L, 
                                                                                                                                                                                                                  16701166L, 14623148L, 11009336L, 4744798L, 1745653L, 7304511L, 
                                                                                                                                                                                                                  808395L, 1298878L, 9898829L, 12881259L, 16915304L, 8533598L, 
                                                                                                                                                                                                                  7579822L, 14026314L, 462018L, 1360821L, 19288278L, 9838498L, 
                                                                                                                                                                                                                  3207798L, 10350263L, 2919804L, 14733881L, 19234252L, 15731330L, 
                                                                                                                                                                                                                  15235887L, 17455100L, 3262267L, 16964692L, 16973133L, 10611556L, 
                                                                                                                                                                                                                  7192617L, 17068445L, 16769976L, 2035124L, 15468536L, 16430164L, 
                                                                                                                                                                                                                  8048463L, 9808625L, 11490043L, 10603483L, 286899L, 2702936L, 
                                                                                                                                                                                                                  19143836L, 13927074L, 14556731L, 2670644L, 8215921L, 3120251L, 
                                                                                                                                                                                                                  822435L, 5623177L, 16505375L, 189243L, 4187970L, 12563874L, 5513606L, 
                                                                                                                                                                                                                  9902979L, 8786437L, 7242190L, 4610847L, 12392057L, 12586397L, 
                                                                                                                                                                                                                  11094108L, 13993948L, 15174426L, 18998352L, 10434363L, 2469914L, 
                                                                                                                                                                                                                  11658521L, 4136942L, 10590667L, 5545580L, 3467749L, 8420183L, 
                                                                                                                                                                                                                  4549132L, 19239796L, 15758141L, 737950L, 6871346L, 11226119L, 
                                                                                                                                                                                                                  14737209L, 16545145L, 14312967L, 3340450L, 3261447L, 14966204L, 
                                                                                                                                                                                                                  16089645L, 3675093L), class = c("data.table", "data.frame"))

df_itemList <- ddply(my_data,c("CUSTOMER_NUMBER", "Accounting_Date"), 
                       function(df1)paste(df1$ITEM_CATEGORY_DESCR, 
                                          collapse = ","))
View(df_itemList)
df_itemList$CUSTOMER_NUMBER <- NULL
df_itemList$Accounting_Date <- NULL

#Rename column headers for ease of use
colnames(df_itemList) <- c("itemList")
write.csv(df_itemList,"ItemList.csv", row.names = TRUE)
txn = read.transactions(file="ItemList.csv", rm.duplicates= TRUE, format="basket",sep=",",cols=1)
txn@itemInfo$labels <- gsub("\"","",txn@itemInfo$labels)
basket_rules <- apriori(txn,parameter = list(sup = 0.01, conf = 0.5))
#> Apriori
#> 
#> Parameter specification:
#>  confidence minval smax arem  aval originalSupport maxtime support minlen
#>         0.5    0.1    1 none FALSE            TRUE       5    0.01      1
#>  maxlen target   ext
#>      10  rules FALSE
#> 
#> Algorithmic control:
#>  filter tree heap memopt load sort verbose
#>     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
#> 
#> Absolute minimum support count: 1 
#> 
#> set item appearances ...[0 item(s)] done [0.00s].
#> set transactions ...[75 item(s), 101 transaction(s)] done [0.00s].
#> sorting and recoding items ... [17 item(s)] done [0.00s].
#> creating transaction tree ... done [0.00s].
#> checking subsets of size 1 done [0.00s].
#> writing ... [0 rule(s)] done [0.00s].
#> creating S4 object  ... done [0.00s].
inspect(basket_rules)
plot(basket_rules)
#> Error in plot.rules(basket_rules): x contains 0 rules!
plot(basket_rules, method = "grouped", control = list(k = 5))
#> Error in plot.rules(basket_rules, method = "grouped", control = list(k = 5)): x contains 0 rules!
plot(basket_rules, method="graph", control=list(type="items"))
#> Error in plot.rules(basket_rules, method = "graph", control = list(type = "items")): x contains 0 rules!
plot(basket_rules, method="paracoord",  control=list(alpha=.5, reorder=TRUE))
#> Error in plot.rules(basket_rules, method = "paracoord", control = list(alpha = 0.5, : x contains 0 rules!
plot(basket_rules,measure=c("support","lift"),shading="confidence",interactive=T)
#> Error in plot.rules(basket_rules, measure = c("support", "lift"), shading = "confidence", : x contains 0 rules!

#Alternative to inspect() is to convert rules to a dataframe and then use View()
df_basket <- as(basket_rules,"data.frame")
View(df_basket)
#> Error in View(df_basket): invalid 'x' argument

Created on 2018-07-30 by the reprex
package
(v0.2.0).

I thought this was an interesting problem so I experimented with it a bit. I looked at the share of purchases that were Private Label in periods following the first Private Label purchase, and looked to see if the trend was increasing or decreasing.

There's probably a better measure out there, but hopefully this gives you some ideas.

# First, group the data by product, customer, and time period.
data_group_sums <- 
  data %>%
  group_by(ITEM_CATEGORY_DESCR, CUSTOMER_NUMBER, 
           period = floor_date(month, "1 month")) %>%
  summarise_if(is.numeric, sum) %>%   # Assuming this is meaningful...?
  mutate(Private_share = `PRIVATE LABEL` / sum(`PRIVATE LABEL` + SUNDRY)) 

# This table captures the month of first Private Label purchase 
# for each cust-item combo.
data_first_PrivLabel <-
  data_group_sums %>%
  filter(`PRIVATE LABEL` > 0) %>%
  group_by(ITEM_CATEGORY_DESCR, CUSTOMER_NUMBER) %>%
  summarise(First_PL = min(period))

# This joins the two prior tables, but only including months after first PL
data_normalized <-
  data_group_sums %>%
  left_join(data_first_PrivLabel) %>%
  mutate(norm_period = interval(First_PL, period) %/% months(1)) %>%
  filter(norm_period >= 0)


# Faceted by cust-item combo, which is a manageably short set here. 
# The slope of the trend line in each facet tells us whether Private Label 
# sales share is increasing or decreasing after starting. I don't know if 
# that's a useful measure for your business case, but it should be possible 
# to use a similar approach for whatever measures fit your question.
ggplot(data_normalized, aes(norm_period, Private_share)) + 
  geom_point() + 
  geom_smooth(method = "lm") +
  scale_y_continuous(labels = scales::percent) +
  facet_grid(CUSTOMER_NUMBER ~ ITEM_CATEGORY_DESCR, 
             scales = "free_y")
# # OR, could just show the combos that exist in the data:
# facet_wrap(~interaction(ITEM_CATEGORY_DESCR, CUSTOMER_NUMBER))

PL_plot

# The steps below extract the slope of the regression lines.
# You could use this data in aggregate to see if there are different trends
# of increasing or decreasing PL use between product lines.
data_grouped <-
  data_normalized %>%
  group_by(ITEM_CATEGORY_DESCR, CUSTOMER_NUMBER) %>%
  nest()

linear_model <- function(df) { lm(Private_share ~ norm_period, data = df) }

slopes <- data_grouped %>% 
  mutate(model = map(data, linear_model)) %>%
  unnest(model %>% purrr::map(broom::tidy)
  ) %>%  filter(term == "norm_period")
1 Like

Jon:

library(tidyverse) 
library(dplyr)
library(data.table)
#> 
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#> 
#>     between, first, last
#> The following object is masked from 'package:purrr':
#> 
#>     transpose
#> 
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#> 
#>     between, first, last
#> The following object is masked from 'package:purrr':
#> 
#>     transpose
library(reprex)
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:data.table':
#> 
#>     hour, isoweek, mday, minute, month, quarter, second, wday,
#>     week, yday, year
#> The following object is masked from 'package:base':
#> 
#>     date
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:data.table':
#> 
#>     hour, isoweek, mday, minute, month, quarter, second, wday,
#>     week, yday, year
#> The following object is masked from 'package:base':
#> 
#>     date

data = structure(list(CUSTOMER_NUMBER = c(0L, 0L, 0L, 0L, 0L, 138990000L, 
                                          138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 
                                          138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 
                                          138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 
                                          138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 138990000L, 
                                          138990000L, 138990000L, 138990000L, 209020998L, 209020998L, 209020998L, 
                                          209020998L, 209020998L, 209020998L, 209020998L, 209020998L, 209020998L, 
                                          209020998L, 209020998L, 209100072L, 209100072L, 209100072L, 209100072L, 
                                          209100072L, 209100072L), ITEM_CATEGORY_DESCR = c("BARRIER COVERS", 
                                                                                           "FACEMASKS", "FACEMASKS", "GLOVES LATEX", "GLOVES NITRILE", "GLOVES LATEX", 
                                                                                           "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", 
                                                                                           "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", 
                                                                                           "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", 
                                                                                           "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", "GLOVES NITRILE", 
                                                                                           "GLOVES NITRILE", "GLOVES NITRILE", "SURFACE DISINFECTANT WIPES", 
                                                                                           "SURFACE DISINFECTANT WIPES", "SURFACE DISINFECTANT WIPES", "SURFACE DISINFECTANT WIPES", 
                                                                                           "SURFACE DISINFECTANT WIPES", "SURFACE DISINFECTANT WIPES", "SURFACE DISINFECTANT WIPES", 
                                                                                           "SURFACE DISINFECTANT WIPES", "SURFACE DISINFECTANT WIPES", "BITE REGISTRATION MATERIAL", 
                                                                                           "ENDODONTIC HAND FILES", "ENDODONTIC HAND FILES", "FACEMASKS", 
                                                                                           "FACEMASKS", "GLOVES LATEX", "GLOVES LATEX", "GLOVES NITRILE", 
                                                                                           "IMPRESSION MATERIAL VINYL POLYSILOXANE", "IMPRESSION MATERIAL VINYL POLYSILOXANE", 
                                                                                           "SPONGES", "ANGLES PROPHY DISPOSABLE", "ANGLES PROPHY DISPOSABLE", 
                                                                                           "ANGLES PROPHY DISPOSABLE", "ANGLES PROPHY DISPOSABLE", "ANGLES PROPHY DISPOSABLE", 
                                                                                           "ANGLES PROPHY DISPOSABLE"), month = structure(c(17471, 17410, 
                                                                                                                                            17622, 17198, 17410, 17106, 16922, 17045, 17075, 17106, 17198, 
                                                                                                                                            17226, 17257, 17257, 17287, 17318, 17379, 17410, 17440, 17471, 
                                                                                                                                            17501, 17532, 17591, 17622, 16953, 17136, 17198, 17379, 17410, 
                                                                                                                                            17440, 17471, 17501, 17563, 16922, 17106, 17226, 17106, 17410, 
                                                                                                                                            17014, 17410, 17106, 17318, 17379, 17106, 16983, 17106, 17198, 
                                                                                                                                            17287, 17379, 17440), class = "Date"), Count = c(1L, 1L, 1L, 
                                                                                                                                                                                             1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 4L, 1L, 1L, 1L, 2L, 1L, 
                                                                                                                                                                                             3L, 3L, 4L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                                                                                                                                             3L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), 
                      `PRIVATE LABEL` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 11, 
                                          0, 7, 1, 1, 1, 2, 1, 12, 5, 5, 8, 4, 5, 2, 3, 0, 0, 5, 0, 
                                          2, 3, 0, 0, 0, 5, 0, 0, 0, 10, 0, 3, 2, 0, 0, 0, 0, 0, 0), 
                      SUNDRY = c(3, 5, 24, 30, 1, 2, 1, 2, 2, 0, 0, 0, 1, 0, 0, 
                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 1, 0, 0, 1, 
                                 2, 3, 0, 1, 10, 10, 0, 1, 0, 0, 3, 3, 3, 3, 2, 3)), row.names = c(NA, 
                                                                                                   -50L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), vars = c("CUSTOMER_NUMBER", 
                                                                                                                                                                           "ITEM_CATEGORY_DESCR"), drop = TRUE, indices = list(0L, 1:2, 
                                                                                                                                                                                                                               3L, 4L, 5L, 6:23, 24:32, 33L, 34:35, 36:37, 38:39, 40L, 41:42, 
                                                                                                                                                                                                                               43L, 44:49), group_sizes = c(1L, 2L, 1L, 1L, 1L, 18L, 9L, 
                                                                                                                                                                                                                                                            1L, 2L, 2L, 2L, 1L, 2L, 1L, 6L), biggest_group_size = 18L, labels = structure(list(
                                                                                                                                                                                                                                                              CUSTOMER_NUMBER = c(0L, 0L, 0L, 0L, 138990000L, 138990000L, 
                                                                                                                                                                                                                                                                                  138990000L, 209020998L, 209020998L, 209020998L, 209020998L, 
                                                                                                                                                                                                                                                                                  209020998L, 209020998L, 209020998L, 209100072L), ITEM_CATEGORY_DESCR = c("BARRIER COVERS", 
                                                                                                                                                                                                                                                                                                                                                           "FACEMASKS", "GLOVES LATEX", "GLOVES NITRILE", "GLOVES LATEX", 
                                                                                                                                                                                                                                                                                                                                                           "GLOVES NITRILE", "SURFACE DISINFECTANT WIPES", "BITE REGISTRATION MATERIAL", 
                                                                                                                                                                                                                                                                                                                                                           "ENDODONTIC HAND FILES", "FACEMASKS", "GLOVES LATEX", "GLOVES NITRILE", 
                                                                                                                                                                                                                                                                                                                                                           "IMPRESSION MATERIAL VINYL POLYSILOXANE", "SPONGES", "ANGLES PROPHY DISPOSABLE"
                                                                                                                                                                                                                                                                                  )), row.names = c(NA, -15L), class = "data.frame", vars = c("CUSTOMER_NUMBER", 
                                                                                                                                                                                                                                                                                                                                              "ITEM_CATEGORY_DESCR"), drop = TRUE))
data_group_sums <- 
  data %>%
  group_by(ITEM_CATEGORY_DESCR, CUSTOMER_NUMBER, 
           period = floor_date(month, "1 month")) %>%
  summarise_if(is.numeric, sum) %>%   # Assuming this is meaningful...?
  mutate(Private_share = `PRIVATE LABEL` / sum(`PRIVATE LABEL` + SUNDRY)) 

# This table captures the month of first Private Label purchase 
# for each cust-item combo.
data_first_PrivLabel <-
  data_group_sums %>%
  filter(`PRIVATE LABEL` > 0) %>%
  group_by(ITEM_CATEGORY_DESCR, CUSTOMER_NUMBER) %>%
  summarise(First_PL = min(period))

# This joins the two prior tables, but only including months after first PL
data_normalized <-
  data_group_sums %>%
  left_join(data_first_PrivLabel) %>%
  mutate(norm_period = interval(First_PL, period) %/% months(1)) %>%
  filter(norm_period >= 0)
#> Joining, by = c("ITEM_CATEGORY_DESCR", "CUSTOMER_NUMBER")
#> Note: method with signature 'Timespan#Timespan' chosen for function '%/%',
#>  target signature 'Interval#Period'.
#>  "Interval#ANY", "ANY#Period" would also be valid


# Faceted by cust-item combo, which is a manageably short set here. 
# The slope of the trend line in each facet tells us whether Private Label 
# sales share is increasing or decreasing after starting. I don't know if 
# that's a useful measure for your business case, but it should be possible 
# to use a similar approach for whatever measures fit your question.
ggplot(data_normalized, aes(norm_period, Private_share)) + 
  geom_point() + 
  geom_smooth(method = "lm") +
  scale_y_continuous(labels = scales::percent) +
  facet_grid(CUSTOMER_NUMBER ~ ITEM_CATEGORY_DESCR, 
             scales = "free_y")
#> Warning in qt((1 - level)/2, df): NaNs produced

# # OR, could just show the combos that exist in the data:
# facet_wrap(~interaction(ITEM_CATEGORY_DESCR, CUSTOMER_NUMBER))
ggplot(data_normalized, aes(norm_period, Private_share)) + 
  geom_point() + 
  geom_smooth(method = "lm") +
  scale_y_continuous(labels = scales::percent) +
  facet_wrap(~interaction(ITEM_CATEGORY_DESCR, CUSTOMER_NUMBER))
#> Warning in qt((1 - level)/2, df): NaNs produced

Created on 2018-08-05 by the reprex
package
(v0.2.0).

  • There is a warning about Nan produced.
  • Also, since I have about 84,000 customers and 20 products, would facet_wrap() be able to plot everything on the plot?

Thank you, Jon!