OK thank you for your guidance. I have cleaned up and simplified my data, and improved the request best to my ability and use of reprex.
reprex::reprex(> #Read in updated version of business analysis individual roof fan asset data
>
>
> library(dplyr)
> library(tidyverse)
> library(readxl)
> library(writexl)
> library(purrr)
> library(readxl)
>
>
>
>
>
>
> IndividualFanData<- read_excel("ProgrammingData/NYCHA GIS PM Ventilation Tracking2022/SpreadsheetData/Copy of Roof Fan Ventilation Tracking.xlsx",
+ sheet = "Individual Fan Data",
col_types = c("text",
"numeric", "text", "text", "text",
+ "text", "text", "numeric", "numeric",
+ "numeric", "text", "text", "text",
+ "text", "numeric", "numeric", "numeric",
+ "text", "numeric", "numeric", "numeric",
+ "numeric", "numeric", "text", "numeric",
+ "text", "numeric", "numeric", "text",
+ "text", "text", "numeric", "text",
+ "text", "text", "text", "text", "numeric",
+ "text", "text", "date", "numeric",
+ "text", "date", "text", "text", "date",
+ "text", "text", "text", "text", "numeric",
+ "numeric", "numeric", "numeric",
+ "numeric", "numeric", "numeric",
+ "numeric", "numeric", "numeric",
+ "numeric", "numeric", "numeric",
+ "numeric", "numeric", "numeric",
+ "text", "text", "text", "text", "text",
+ "text", "text"))
There were 50 or more warnings (use warnings() to see the first 50)
> #Print out extensive list of column names
> colnames(IndividualFanData)
[1] "Roof Fan Asset ID" "Dummy" "Borough Grouping"
[4] "Managed By" "Development" "Managed By (Proper)"
[7] "Development (Proper)" "Capital Flag" "RAD Flag"
[10] "Private Flag" "Location Bypassed" "Reason for Bypassing"
[13] "Reconciled" "Building ID" "Building Text Check"
[16] "Building Number" "Building Fan Count" "Building Address"
[19] "Development Fan Count" "Unique Consolidation Count" "Unique Development Count"
[22] "Unique Development Building Count" "TDS_num" "TDS_text"
[25] "TDS Text Check" "Asset Verified" "Time Clock Present"
[28] "Clock Bypassed" "Engineering Labelling" "Engineering Rank"
[31] "Replacement Roof Fan Model" "Attachment/Extension Required?" "Fan to Be Retained"
[34] "Retention Type" "Roll-Out Order" "Phase"
[37] "Phase Detail" "Development Status Rank" "Fan Status"
[40] "Fan Status_Full" "Installation Date" "Install Age"
[43] "Install Quarter" "Fan Installation Month" "Engineer Assigned"
[46] "Asbestos Filing Status" "Fan Delivery Date" "Contractor Name"
[49] "Secondary Contractor" "Asbestos Flag" "Asbestos Tested"
[52] "Asbestos Tested #" "Asbestos Flag #" "In Procurement"
[55] "Bypass #" "Not Yet Assigned" "Asbestos"
[58] "Initial Asbestos" "Non-Residential" "To Be Retained"
[61] "Preliminary Preparation" "Order Initiated" "Fans Ordered"
[64] "Fans Installed" "Outside Initiative" "Not on Manufacturer List"
[67] "Validation" "Assigned to Engineer" "Site Visit Completed"
[70] "Engineering Docs Received" "Contractor Assigned" "Fan Order Initiated"
[73] "Fan Order Confirmed" "Fans Installed2"
>
> unique(IndividualFanData$Phase)
[1] "Phase 2" "Not Assigned" "Phase 1" "Phase 3" NA
> unique(IndividualFanData$`Phase Detail`)
[1] "Phase 2 - Batch 3" "-" "Phase 2 - Batch 1" "Phase 2 - Batch 4" "Phase 2 - Batch 2"
[6] "Phase 1" "Phase 3" "Phase 1 - Start Up" "Phase 1 - Pilot" NA
>
> #Create subset data of individual roof fan records that are part of specific batch
> ReprexSample1 <- IndividualFanData[IndividualFanData$`Phase Detail`== "Phase 2 - Batch 3",]
>
> #Cut into a smaller subset that incorporates only 6 developments
> ReprexSample1A <- ReprexSample1[1:319,]
>
> dim(ReprexSample1A)
[1] 319 74
> View(ReprexSample1A)
>
> #Group rooftop asset subset data based on unique development
> DvlmtDataGroupingREPREX<- ReprexSample1A%>%
+ group_by(Development, `Managed By`, `Development (Proper)`,`Borough Grouping`,
+ `Development Fan Count`, TDS_text, Phase, `Phase Detail`)%>%
+ summarise(n())
`summarise()` has grouped output by 'Development', 'Managed By', 'Development (Proper)', 'Borough Grouping',
'Development Fan Count', 'TDS_text', 'Phase'. You can override using the `.groups` argument.
>
> dim(DvlmtDataGroupingREPREX)
[1] 6 9
> View(DvlmtDataGroupingREPREX)
>
> #Group rooftop asset subset data based on unique development building
> BldgDataGroupingREPREX <- ReprexSample1A%>%
+ group_by(Development, `Managed By`, `Building ID`, `Building Number`,
+ `Building Address`, `Building Fan Count`, Phase, `Phase Detail`,
+ TDS_text, `Engineer Assigned`,`Contractor Name`)%>%
+ summarise(n())
`summarise()` has grouped output by 'Development', 'Managed By', 'Building ID', 'Building Number', 'Building
Address', 'Building Fan Count', 'Phase', 'Phase Detail', 'TDS_text', 'Engineer Assigned'. You can override using the
`.groups` argument.
>
> View(BldgDataGroupingREPREX)
>
> #Perform a nesting operation of development bldg information based on development
> BldgDataGroupingNestREPREX <- BldgDataGroupingREPREX%>%
+ group_by(Development, Phase, `Phase Detail`, TDS_text)%>%
+ nest()
>
> View(BldgDataGroupingNestREPREX)
> dim(BldgDataGroupingNestREPREX)
[1] 6 5
>
> #Perform a similar nesting operation of rooftop asset data based on development
> IndivVentNestReprex <- ReprexSample1A%>%
+ group_by(Development, `Managed By`, `Development (Proper)`,`Borough Grouping`, `Development Fan Count`, TDS_num, TDS_text, Phase, `Phase Detail`)%>%
+ nest()
>
> ###This data arrangement workflow approach is as follows: for the first data-set:
>
> #Every record will be based on keys unique to a Development: each unique
> #development is matched to its building information in a list which is then matched to
> #observations for unique building roof fan information in a list
>
>
> #For the second data set, every record will be based on keys unique to each building:
> #each unique building, regardless of respective development, will be matched to
> #observations for unique building roof fan information in a list
>
> #Starting with the tibble of grouped development info, I select
> #the first development and list all it's elements
> DvlmtDataGroupingREPREX[1,]
# A tibble: 1 x 9
# Groups: Development, Managed By, Development (Proper), Borough Grouping, Development Fan Count, TDS_text, Phase
# [1]
Development `Managed By` `Development (~` `Borough Group~` `Development F~` TDS_text Phase `Phase Detail` `n()`
<chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <int>
1 1010 EAST 178T~ 1010 EAST 1~ 1010 East 178th~ Bronx 13 180 Phas~ Phase 2 - Bat~ 13
>
> #I move on to the grouped list of Bldg Data and list all the elements for the first development
> BldgDataGroupingNestREPREX[1,]
# A tibble: 1 x 5
# Groups: Development, Phase, Phase Detail, TDS_text [1]
Development Phase `Phase Detail` TDS_text data
<chr> <chr> <chr> <chr> <list>
1 1010 EAST 178TH STREET Phase 2 Phase 2 - Batch 3 180 <tibble [1 x 8]>
>
> #I list the elements of the list within that contains specific building information for the first development
> BldgDataGroupingNestREPREX[[1,5]]
[[1]]
# A tibble: 1 x 8
`Managed By` `Building ID` `Building Numb~` `Building Addr~` `Building Fan ~` `Engineer Assi~` `Contractor Na~`
<chr> <chr> <dbl> <chr> <dbl> <chr> <chr>
1 1010 EAST 178TH~ 180.01 1 1010 EAST 178TH~ 13 LiRo Sahara
# ... with 1 more variable: `n()` <int>
>
> #I move on to the tibble of nested individual roof fan data listing the elements
> #of roof fan information for the first development
> IndivVentNestReprex[1,]
# A tibble: 1 x 10
# Groups: Development, Managed By, Development (Proper), Borough Grouping, Development Fan Count, TDS_num,
# TDS_text, Phase, Phase Detail [1]
`Borough Grouping` `Managed By` Development `Development (~` `Development F~` TDS_num TDS_text Phase `Phase Detail`
<chr> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
1 Bronx 1010 EAST 1~ 1010 EAST ~ 1010 East 178th~ 13 180 180 Phas~ Phase 2 - Bat~
# ... with 1 more variable: data <list>
>
> #List the list of individual roof fan information for the first development
> IndivVentNestReprex[[1,10]]
[[1]]
# A tibble: 13 x 65
`Roof Fan Asse~` Dummy `Managed By (P~` `Capital Flag` `RAD Flag` `Private Flag` `Location Bypa~` `Reason for By~`
<chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <chr>
1 180.01.01 1 1010 East 178th~ 0 0 0 No -
2 180.01.03 1 1010 East 178th~ 0 0 0 No -
3 180.01.02 1 1010 East 178th~ 0 0 0 No -
4 180.01.04 1 1010 East 178th~ 0 0 0 No -
5 180.01.05 1 1010 East 178th~ 0 0 0 No -
6 180.01.06 1 1010 East 178th~ 0 0 0 No -
7 180.01.07 1 1010 East 178th~ 0 0 0 No -
8 180.01.12 1 1010 East 178th~ 0 0 0 No -
9 180.01.08 1 1010 East 178th~ 0 0 0 No -
10 180.01.16 1 1010 East 178th~ 0 0 0 No -
11 180.01.09 1 1010 East 178th~ 0 0 0 No -
12 180.01.11 1 1010 East 178th~ 0 0 0 No -
13 180.01.10 1 1010 East 178th~ 0 0 0 No -
# ... with 57 more variables: Reconciled <chr>, `Building ID` <chr>, `Building Text Check` <dbl>,
# `Building Number` <dbl>, `Building Fan Count` <dbl>, `Building Address` <chr>,
# `Unique Consolidation Count` <dbl>, `Unique Development Count` <dbl>, `Unique Development Building Count` <dbl>,
# `TDS Text Check` <dbl>, `Asset Verified` <chr>, `Time Clock Present` <dbl>, `Clock Bypassed` <dbl>,
# `Engineering Labelling` <chr>, `Engineering Rank` <chr>, `Replacement Roof Fan Model` <chr>,
# `Attachment/Extension Required?` <dbl>, `Fan to Be Retained` <chr>, `Retention Type` <chr>,
# `Roll-Out Order` <chr>, `Development Status Rank` <dbl>, `Fan Status` <chr>, `Fan Status_Full` <chr>, ..)