Help converting dataset

Hello, I am trying to format my data in Tidyverse. I have multiple observations of species in columns. I am hoping to find some code to create a single observation with all the species as columns. Help is greatly appreciated. Thank you.

Hello @AleG ,

this (pivoting data) can be done with the tidyr package.
See https://tidyr.tidyverse.org/articles/pivot.html for details and examples.

Hello, the only problem is I have multiple repeated observations as rows. There are multiple rows that all belong to a single day for example.

Hi!

To help us help you, could you please prepare a reproducible example (reprex) illustrating your issue? Please have a look at this guide, to see how to create one:

data.frame(
Count = c(1L,14L,1L,1L,1L,9L,1L,8L,1L,3L,
2L,1L,4L,1L,22L,2L,2L,1L,1L,1L,11L,1L,1L,1L,2L,
2L,1L,1L,4L,4L,4L,1L),
date = as.factor(c("02Jul21LFO411-413",
"02Jul21LFO411-413","02Jul21LFO411-413",
"02Jul21LFO411-413","02Jul21LFO411-413","02Jul21LFO411-413",
"02Jul21LFO411-413","02Jul21LFO411-413",
"02Jul21LFO441-47","02Jul21LFO441-47","02Jul21LFO441-47",
"02Jul21LFO441-47","02Jul21LFO441-47",
"02Jul21LFO441-47","02Jul21LFO441-47","02Jul21WGF42",
"02Jul21WGF42","02Jul21WGF42","02Jul21WGF42",
"02Jul21WGF42","02Jul21WGF42","02Jul21WGF42","05Jul21LFC32",
"05Jul21LFC32","05Jul21LFC32","05Jul21LFC32",
"05Jul21LFC32","05Jul21LFC32","05Jul21LFC32",
"05Jul21MO","05Jul21MO","05Jul21MO")),
farm = as.factor(c("LFO","LFO","LFO",
"LFO","LFO","LFO","LFO","LFO","LFO","LFO",
"LFO","LFO","LFO","LFO","LFO","WGF","WGF","WGF",
"WGF","WGF","WGF","WGF","LFC","LFC","LFC",
"LFC","LFC","LFC","LFC","MO","MO","MO")),
field = as.factor(c("411-413","411-413",
"411-413","411-413","411-413","411-413",
"411-413","411-413","441-47","441-47","441-47",
"441-47","441-47","441-47","441-47","42","42","42",
"42","42","42","42","32","32","32","32","32",
"32","32","1","1","1")),
trap_method = as.factor(c("SN","SN","SN",
"SN","SN","SN","SN","SN","SN","SN","SN","SN",
"SN","SN","SN","SN","SN","SN","SN","SN","SN",
"SN","SN","SN","SN","SN","SN","SN","SN",
"SN","SN","SN")),
field_type = as.factor(c("Organic","Organic",
"Organic","Organic","Organic","Organic",
"Organic","Organic","Organic","Organic","Organic",
"Organic","Organic","Organic","Organic","Organic",
"Organic","Organic","Organic","Organic","Organic",
"Organic","Conventional","Conventional",
"Conventional","Conventional","Conventional",
"Conventional","Conventional","Organic","Organic","Organic")),
Class = as.factor(c("Insecta","Insecta",
"Insecta","Insecta","Insecta","Insecta",
"Insecta","Insecta","Insecta","Insecta","Insecta",
"Insecta","Insecta","Arachnida","Insecta","Insecta",
"Insecta","Insecta","Arachnida","Insecta",
"Insecta","Insecta","Insecta","Insecta","Arachnida",
"Arachnida","Arachnida","Insecta","Insecta",
"Insecta","Insecta","Insecta")),
Order = as.factor(c("Hemiptera",
"Diptera","Hemiptera","Hemiptera","Diptera","Diptera",
"Diptera","Odonata","Hemiptera","Diptera",
"Hemiptera","Diptera","Odonata","Araneae","Diptera",
"Diptera","Diptera","Lepidoptera","Araneae",
"Odonata","Odonata","Hymenoptera","Diptera","Diptera",
"Araneae","Araneae","Ixodida","Odonata",
"Odonata","Odonata","Diptera","Diptera")),
Family = as.factor(c("Cicadellidae",
"Syrphidae","Pentatomidae","Miridae","Chironomidae",
"Calliphoridae","Stratiomyidae","Coenagrionidae",
"Cicadellidae","Chironomidae","Delphacidae",
"Syrphidae","Coenagrionidae","Salticidae ",
"Calliphoridae","Chironomidae","Calliphoridae","Noctuidae",
"Philodromidae","Lestidae","Coenagrionidae",
"Eulophidae","Syrphidae","Calliphoridae",
"Tetragnathidae","Araneidae","Ixodidae","Coenagrionidae",
"Coenagrionidae","Coenagrionidae","Calliphoridae",
"Culicidae")),
Genus = as.factor(c("Helochara ",
"Toxomerus ","Halyomorpha ","Lygus ","Chironomus ",
"Phormia ","Microchrysa ","Enallagma ",
"Helochara ","Cricotopus ","Delphacidae ","Toxomerus ",
"Enallagma ","Peckhamia ","Phormia ","Cricotopus ",
"Phormia ","Spodoptera ","Tibellus ","Lestes ",
"Enallagma ","Eulophidae ","Toxomerus ",
"Phormia ","Tetragnatha ","Araneus ","Dermacentor ",
"Ischnura ","Enallagma ","Enallagma ","Phormia ",
"Culex ")),
function. = as.factor(c("pest","pollinator",
"pest","pest","pest","neutral","pollinator",
"pest predator","pest","pest","pest","pollinator",
"pest predator","pest predator","pollinator",
"pest ","pollinator","pest ","pest predator",
"pest predator","pest predator","pollinator",
"pollinator","pollinator","pest predator","pest predator",
"disease vector","pest predator","pest predator",
"pest predator","pollinator","disease vector"))

Does this work for you?

Hi @AleG,
Thanks to @andresrcs 's prompting we have a good dataset from you to work with. Still not sure exactly what output you require but here are some possibilities:

# Removed lots of trailing spaces in the strings; renamed one column
df <- data.frame(
Count = c(1L,14L,1L,1L,1L,9L,1L,8L,1L,3L,
2L,1L,4L,1L,22L,2L,2L,1L,1L,1L,11L,1L,1L,1L,2L,
2L,1L,1L,4L,4L,4L,1L),
date = as.factor(c("02Jul21LFO411-413",
"02Jul21LFO411-413","02Jul21LFO411-413",
"02Jul21LFO411-413","02Jul21LFO411-413","02Jul21LFO411-413",
"02Jul21LFO411-413","02Jul21LFO411-413",
"02Jul21LFO441-47","02Jul21LFO441-47","02Jul21LFO441-47",
"02Jul21LFO441-47","02Jul21LFO441-47",
"02Jul21LFO441-47","02Jul21LFO441-47","02Jul21WGF42",
"02Jul21WGF42","02Jul21WGF42","02Jul21WGF42",
"02Jul21WGF42","02Jul21WGF42","02Jul21WGF42","05Jul21LFC32",
"05Jul21LFC32","05Jul21LFC32","05Jul21LFC32",
"05Jul21LFC32","05Jul21LFC32","05Jul21LFC32",
"05Jul21MO","05Jul21MO","05Jul21MO")),
farm = as.factor(c("LFO","LFO","LFO",
"LFO","LFO","LFO","LFO","LFO","LFO","LFO",
"LFO","LFO","LFO","LFO","LFO","WGF","WGF","WGF",
"WGF","WGF","WGF","WGF","LFC","LFC","LFC",
"LFC","LFC","LFC","LFC","MO","MO","MO")),
field = as.factor(c("411-413","411-413",
"411-413","411-413","411-413","411-413",
"411-413","411-413","441-47","441-47","441-47",
"441-47","441-47","441-47","441-47","42","42","42",
"42","42","42","42","32","32","32","32","32",
"32","32","1","1","1")),
trap_method = as.factor(c("SN","SN","SN",
"SN","SN","SN","SN","SN","SN","SN","SN","SN",
"SN","SN","SN","SN","SN","SN","SN","SN","SN",
"SN","SN","SN","SN","SN","SN","SN","SN",
"SN","SN","SN")),
field_type = as.factor(c("Organic","Organic",
"Organic","Organic","Organic","Organic",
"Organic","Organic","Organic","Organic","Organic",
"Organic","Organic","Organic","Organic","Organic",
"Organic","Organic","Organic","Organic","Organic",
"Organic","Conventional","Conventional",
"Conventional","Conventional","Conventional",
"Conventional","Conventional","Organic","Organic","Organic")),
Class = as.factor(c("Insecta","Insecta",
"Insecta","Insecta","Insecta","Insecta",
"Insecta","Insecta","Insecta","Insecta","Insecta",
"Insecta","Insecta","Arachnida","Insecta","Insecta",
"Insecta","Insecta","Arachnida","Insecta",
"Insecta","Insecta","Insecta","Insecta","Arachnida",
"Arachnida","Arachnida","Insecta","Insecta",
"Insecta","Insecta","Insecta")),
Order = as.factor(c("Hemiptera",
"Diptera","Hemiptera","Hemiptera","Diptera","Diptera",
"Diptera","Odonata","Hemiptera","Diptera",
"Hemiptera","Diptera","Odonata","Araneae","Diptera",
"Diptera","Diptera","Lepidoptera","Araneae",
"Odonata","Odonata","Hymenoptera","Diptera","Diptera",
"Araneae","Araneae","Ixodida","Odonata",
"Odonata","Odonata","Diptera","Diptera")),
Family = as.factor(c("Cicadellidae",
"Syrphidae","Pentatomidae","Miridae","Chironomidae",
"Calliphoridae","Stratiomyidae","Coenagrionidae",
"Cicadellidae","Chironomidae","Delphacidae",
"Syrphidae","Coenagrionidae","Salticidae ",
"Calliphoridae","Chironomidae","Calliphoridae","Noctuidae",
"Philodromidae","Lestidae","Coenagrionidae",
"Eulophidae","Syrphidae","Calliphoridae",
"Tetragnathidae","Araneidae","Ixodidae","Coenagrionidae",
"Coenagrionidae","Coenagrionidae","Calliphoridae",
"Culicidae")),
Genus = as.factor(c("Helochara",
"Toxomerus","Halyomorpha","Lygus","Chironomus",
"Phormia","Microchrysa","Enallagma",
"Helochara","Cricotopus","Delphacidae","Toxomerus",
"Enallagma","Peckhamia","Phormia","Cricotopus",
"Phormia","Spodoptera","Tibellus","Lestes",
"Enallagma","Eulophidae","Toxomerus",
"Phormia","Tetragnatha","Araneus","Dermacentor",
"Ischnura","Enallagma","Enallagma","Phormia",
"Culex")),
Function = as.factor(c("pest","pollinator",
"pest","pest","pest","neutral","pollinator",
"pest predator","pest","pest","pest","pollinator",
"pest predator","pest predator","pollinator",
"pest","pollinator","pest","pest predator",
"pest predator","pest predator","pollinator",
"pollinator","pollinator","pest predator","pest predator",
"disease vector","pest predator","pest predator",
"pest predator","pollinator","disease vector")))

suppressPackageStartupMessages(library(tidyverse))

# Get the "real" date (if required later)
df <- df %>% 
  mutate(real_date = as.Date(str_sub(date, start=1L, end=7L), format="%d%b%y"))

# Simple 2-way incidence tables
table(df$date, df$Genus)
#>                    
#>                     Araneus Chironomus Cricotopus Culex Delphacidae Dermacentor
#>   02Jul21LFO411-413       0          1          0     0           0           0
#>   02Jul21LFO441-47        0          0          1     0           1           0
#>   02Jul21WGF42            0          0          1     0           0           0
#>   05Jul21LFC32            1          0          0     0           0           1
#>   05Jul21MO               0          0          0     1           0           0
#>                    
#>                     Enallagma Eulophidae Halyomorpha Helochara Ischnura Lestes
#>   02Jul21LFO411-413         1          0           1         1        0      0
#>   02Jul21LFO441-47          1          0           0         1        0      0
#>   02Jul21WGF42              1          1           0         0        0      1
#>   05Jul21LFC32              1          0           0         0        1      0
#>   05Jul21MO                 1          0           0         0        0      0
#>                    
#>                     Lygus Microchrysa Peckhamia Phormia Spodoptera Tetragnatha
#>   02Jul21LFO411-413     1           1         0       1          0           0
#>   02Jul21LFO441-47      0           0         1       1          0           0
#>   02Jul21WGF42          0           0         0       1          1           0
#>   05Jul21LFC32          0           0         0       1          0           1
#>   05Jul21MO             0           0         0       1          0           0
#>                    
#>                     Tibellus Toxomerus
#>   02Jul21LFO411-413        0         1
#>   02Jul21LFO441-47         0         1
#>   02Jul21WGF42             1         0
#>   05Jul21LFC32             0         1
#>   05Jul21MO                0         0
table(df$date, df$Function)
#>                    
#>                     disease vector neutral pest pest predator pollinator
#>   02Jul21LFO411-413              0       1    4             1          2
#>   02Jul21LFO441-47               0       0    3             2          2
#>   02Jul21WGF42                   0       0    2             3          2
#>   05Jul21LFC32                   1       0    0             4          2
#>   05Jul21MO                      1       0    0             1          1
table(df$field_type, df$Function)  # Much more diversity in the organic fields!
#>               
#>                disease vector neutral pest pest predator pollinator
#>   Conventional              1       0    0             4          2
#>   Organic                   1       1    9             7          7

# pivot the data to sum the Count data
pivot_wider(df, names_from=Function, values_from=Count) %>% 
  group_by(field_type) %>% 
  summarise_at(vars(10:14), sum, na.rm=TRUE)
#> # A tibble: 2 × 6
#>   field_type    pest pollinator neutral `pest predator` `disease vector`
#>   <fct>        <int>      <int>   <int>           <int>            <int>
#> 1 Conventional     0          2       0               9                1
#> 2 Organic         13         45       9              30                1

Created on 2022-11-02 with reprex v2.0.2

1 Like

This is super helpful. I am trying to interpret the code. What does the "start = 1L , end= 7L" mean?

Thank you.

Hi @AleG,
That is just extracting the first seven characters of each string as part of creating an actual R-format date which may be needed later for graphing, etc.

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.