I have done some group_by in tidyverse from my data set. So I have a few different tasks that need your assistance.
Summary
> dput(final_df[1:50,])
structure(list(ACCTG_YEAR_KEY = c(2018L, 2018L, 2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L,
2018L), CUSTOMER_NUMBER = c(220200080L, 240548132L, 301360595L,
328452940L, 341200277L, 230428210L, 240202617L, 344714103L, 345200417L,
371200194L, 373315788L, 374419508L, 374677593L, 402200310L, 220722498L,
230287664L, 230553268L, 230652420L, 240202617L, 240202621L, 240206733L,
240825287L, 209198530L, 304143540L, 328200348L, 344200453L, 344431705L,
344714103L, 345200446L, 304639741L, 344200453L, 344608751L, 370776672L,
373200163L, 373200603L, 374216239L, 341702398L, 373201131L, 373201151L,
373201195L, 373201195L, 374419508L, 371522452L, 375724769L, 408200760L,
438126238L, 438496803L, 448200151L, 374745489L, 410200452L),
Sales = c(96.4, 74.96, 89, 94.85, 81.94, 75, 87.72, 94.85,
94.85, 79.27, 87.99, 71.31, 77.78, 94.85, 94.85, 80.62, 85,
85.37, 80.65, 80.62, 94.85, 85.36, 89.95, 75.88, 85.36, 70.28,
94.85, 94.85, 75, 85, 70.28, 75, 94.85, 81.94, 70.99, 91.58,
85.36, 72.3, 87.5, 69.99, 71.99, 71.31, 94.85, 86.76, 73.27,
91.58, 82.95, 86.76, 71.31, 86.76), QtySold = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), MFGCOST = c(36.05, 36.05, 36.05, 36.05, 36.05, 36.05,
36.05, 36.05, 36.05, 36.05, 36.05, 36.05, 36.05, 36.05, 36.05,
36.05, 36.05, 36.05, 36.05, 36.05, 36.05, 36.05, 36.05, 36.05,
36.05, 36.05, 36.05, 36.05, 36.05, 36.05, 36.05, 36.05, 36.05,
36.05, 36.05, 36.05, 36.05, 36.05, 36.05, 36.05, 36.05, 36.05,
36.05, 36.05, 36.05, 36.05, 36.05, 36.05, 36.05, 36.05),
MarginDollars = c(48.98, 27.54, 41.58, 47.43, 34.52, 27.58,
40.3, 47.43, 47.43, 31.85, 40.57, 23.89, 30.36, 47.43, 47.43,
33.2, 37.58, 37.95, 33.23, 33.2, 47.43, 37.94, 42.53, 28.46,
37.94, 22.86, 47.43, 47.43, 27.58, 37.58, 22.86, 27.58, 47.43,
34.52, 23.57, 44.16, 37.94, 24.88, 40.08, 22.57, 24.57, 23.89,
47.43, 39.34, 25.85, 44.16, 35.53, 39.34, 23.89, 39.34),
PRODUCT_SUB_LINE_DESCR = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("PRIVATE LABEL", "SUNDRY"), class = "factor"),
Document_Key = c(109728177L, 105927185L, 109524825L, 107340834L,
109326548L, 105217321L, 109036278L, 106887077L, 106829163L,
108487569L, 105511403L, 109328000L, 106756016L, 107475976L,
106799857L, 105507436L, 107338996L, 107550135L, 106423749L,
106942047L, 105543940L, 107866146L, 108541653L, 107119996L,
107037597L, 106467534L, 107096390L, 108129873L, 107154419L,
109656355L, 109710707L, 105752057L, 107609854L, 109083555L,
106787295L, 109700398L, 107096083L, 109658183L, 105420733L,
105855786L, 108145877L, 108946083L, 105164588L, 108929791L,
105374455L, 109952072L, 108067902L, 110043209L, 106803795L,
109345189L), Gross_MarginDollars = c(58.422, 37.4108, 51.17,
56.903, 44.2512, 37.45, 49.9156, 56.903, 56.903, 41.6346,
50.1802, 33.8338, 40.1744, 56.903, 56.903, 42.9576, 47.25,
47.6126, 42.987, 42.9576, 56.903, 47.6028, 52.101, 38.3124,
47.6028, 32.8244, 56.903, 56.903, 37.45, 47.25, 32.8244,
37.45, 56.903, 44.2512, 33.5202, 53.6984, 47.6028, 34.804,
49.7, 32.5402, 34.5002, 33.8338, 56.903, 48.9748, 35.7546,
53.6984, 45.241, 48.9748, 33.8338, 48.9748), Gross_MarginDollars_Percentage = c(60.603734439834,
49.9076840981857, 57.4943820224719, 59.9926199261993, 54.0043934586283,
49.9333333333333, 56.9033287733698, 59.9926199261993, 59.9926199261993,
52.5225179765359, 57.0294351630867, 47.4460804936194, 51.6513242478786,
59.9926199261993, 59.9926199261993, 53.2840486231704, 55.5882352941177,
55.7720510718051, 53.3006819590825, 53.2840486231704, 59.9926199261993,
55.7671040299906, 57.9221789883269, 50.4907749077491, 55.7671040299906,
46.7051792828685, 59.9926199261993, 59.9926199261993, 49.9333333333333,
55.5882352941177, 46.7051792828685, 49.9333333333333, 59.9926199261993,
54.0043934586283, 47.2181997464432, 58.6355099366674, 55.7671040299906,
48.1383125864454, 56.8, 46.4926418059723, 47.9236005000695,
47.4460804936194, 59.9926199261993, 56.4485938220378, 48.7984168145216,
58.6355099366674, 54.5400843881857, 56.4485938220378, 47.4460804936194,
56.4485938220378)), row.names = c(NA, -50L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = c("Document_Key", "CUSTOMER_NUMBER"
), drop = TRUE, indices = list(42L, 5L, 44L, 38L, 15L, 10L, 20L,
31L, 39L, 1L, 18L, 25L, 12L, 34L, 14L, 48L, 8L, 7L, 19L,
24L, 36L, 26L, 23L, 28L, 16L, 3L, 13L, 17L, 32L, 21L, 46L,
27L, 40L, 9L, 22L, 43L, 41L, 6L, 33L, 4L, 11L, 49L, 2L, 29L,
37L, 35L, 30L, 0L, 45L, 47L), group_sizes = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
Document_Key = c(105164588L, 105217321L, 105374455L, 105420733L,
105507436L, 105511403L, 105543940L, 105752057L, 105855786L,
105927185L, 106423749L, 106467534L, 106756016L, 106787295L,
106799857L, 106803795L, 106829163L, 106887077L, 106942047L,
107037597L, 107096083L, 107096390L, 107119996L, 107154419L,
107338996L, 107340834L, 107475976L, 107550135L, 107609854L,
107866146L, 108067902L, 108129873L, 108145877L, 108487569L,
108541653L, 108929791L, 108946083L, 109036278L, 109083555L,
109326548L, 109328000L, 109345189L, 109524825L, 109656355L,
109658183L, 109700398L, 109710707L, 109728177L, 109952072L,
110043209L), CUSTOMER_NUMBER = c(371522452L, 230428210L,
408200760L, 373201151L, 230287664L, 373315788L, 240206733L,
344608751L, 373201195L, 240548132L, 240202617L, 344200453L,
374677593L, 373200603L, 220722498L, 374745489L, 345200417L,
344714103L, 240202621L, 328200348L, 341702398L, 344431705L,
304143540L, 345200446L, 230553268L, 328452940L, 402200310L,
230652420L, 370776672L, 240825287L, 438496803L, 344714103L,
373201195L, 371200194L, 209198530L, 375724769L, 374419508L,
240202617L, 373200163L, 341200277L, 374419508L, 410200452L,
301360595L, 304639741L, 373201131L, 374216239L, 344200453L,
220200080L, 438126238L, 448200151L)), row.names = c(NA, -50L
), class = "data.frame", vars = c("Document_Key", "CUSTOMER_NUMBER"
), drop = TRUE))
So what I want to do is to group or create a new data frame that shows me the number of invoices belong to one Unique customer number. For instance,
Customer A has invoices A1, A2
Customer B has invoices B1, B3
The other columns that are attached to these entities will be changed as well but I hope there is a way for me to accomplish this.
I will follow with other tasks soon. Let's focus on this one first.
Thank you!