Recode column in the gapminder wide dataset

blacng · September 5, 2020, 10:02pm

I want to replicate what was done here with tidyr::pivot_longer instead of tidyr::gather

# load libraries
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(readr)

# Read in gapminder (wide format)
file_url <- "https://bit.ly/2Z406vV"
(gapminder_wide <- read_csv(file_url))
#> Parsed with column specification:
#> cols(
#>   .default = col_double(),
#>   continent = col_character(),
#>   country = col_character()
#> )
#> See spec(...) for full column specifications.
#> # A tibble: 142 x 38
#>    continent country gdpPercap_1952 gdpPercap_1957 gdpPercap_1962 gdpPercap_1967
#>    <chr>     <chr>            <dbl>          <dbl>          <dbl>          <dbl>
#>  1 Africa    Algeria          2449.          3014.          2551.          3247.
#>  2 Africa    Angola           3521.          3828.          4269.          5523.
#>  3 Africa    Benin            1063.           960.           949.          1036.
#>  4 Africa    Botswa…           851.           918.           984.          1215.
#>  5 Africa    Burkin…           543.           617.           723.           795.
#>  6 Africa    Burundi           339.           380.           355.           413.
#>  7 Africa    Camero…          1173.          1313.          1400.          1508.
#>  8 Africa    Centra…          1071.          1191.          1193.          1136.
#>  9 Africa    Chad             1179.          1308.          1390.          1197.
#> 10 Africa    Comoros          1103.          1211.          1407.          1876.
#> # … with 132 more rows, and 32 more variables: gdpPercap_1972 <dbl>,
#> #   gdpPercap_1977 <dbl>, gdpPercap_1982 <dbl>, gdpPercap_1987 <dbl>,
#> #   gdpPercap_1992 <dbl>, gdpPercap_1997 <dbl>, gdpPercap_2002 <dbl>,
#> #   gdpPercap_2007 <dbl>, lifeExp_1952 <dbl>, lifeExp_1957 <dbl>,
#> #   lifeExp_1962 <dbl>, lifeExp_1967 <dbl>, lifeExp_1972 <dbl>,
#> #   lifeExp_1977 <dbl>, lifeExp_1982 <dbl>, lifeExp_1987 <dbl>,
#> #   lifeExp_1992 <dbl>, lifeExp_1997 <dbl>, lifeExp_2002 <dbl>,
#> #   lifeExp_2007 <dbl>, pop_1952 <dbl>, pop_1957 <dbl>, pop_1962 <dbl>,
#> #   pop_1967 <dbl>, pop_1972 <dbl>, pop_1977 <dbl>, pop_1982 <dbl>,
#> #   pop_1987 <dbl>, pop_1992 <dbl>, pop_1997 <dbl>, pop_2002 <dbl>,
#> #   pop_2007 <dbl>

# Attempt converting wide format to long format
gapminder_wide %>%
  pivot_longer(
    cols = gdpPercap_1952:gdpPercap_2007, 
    names_to = "gdp_year", 
    values_to = "gdpPercap") %>% 
  pivot_longer(
    cols = lifeExp_1952:lifeExp_2007,
    names_to = "lifeExp_year",
    values_to = "lifeExp"
  ) %>% 
    pivot_longer(
    cols = pop_1952:pop_2007,
    names_to = "pop_year",
    values_to = "pop"
  ) %>% 
  select(-everything(), 
         starts_with("c"),
         "gdpPercap",
         "pop_year",
         "lifeExp",
         "pop") %>% 
  rename(year = pop_year) %>% 
  mutate_at(vars(matches("[a-z_0-9]*")),   recode(pop_1952 = 1952,
         pop_1957 = 1957,
         pop_1962 = 1962,
         pop_1967 = 1967,
         pop_1972 = 1972,
         pop_1977 = 1977,
         pop_1982 = 1982,
         pop_1987 = 1987,
         pop_1992 = 1992,
         pop_1997 = 1997,
         pop_2002 = 2002,
         pop_2007 = 2007))
#> Warning in recode.numeric(pop_1952 = 1952, pop_1957 = 1957, pop_1962 = 1962, :
#> NAs introduced by coercion
#> Error in recode.numeric(pop_1952 = 1952, pop_1957 = 1957, pop_1962 = 1962, : argument ".x" is missing, with no default

elmstedt · September 5, 2020, 10:34pm

library(tidyverse) # install.packages("tidyverse")

gap_wide <- read.csv('https://raw.githubusercontent.com/OHI-Science/data-science-training/master/data/gapminder_wide.csv')

gap_g <- gap_wide %>% 
  gather(key   = obstype_year,
         value = obs_values,
         -continent, -country)%>%
  as_tibble() # convert to a tibble object to match the value from pivot_longer()
# fix some sorting issues.
gap_g_ord <- order(gap_g$continent,gap_g$country, gap_g$obstype_year )
gap_g <- gap_g[gap_g_ord, ]

gap_pl <- gap_wide %>%
  pivot_longer(cols = -c(continent, country),
               names_to = "obstype_year",
               values_to = "obs_values")

identical(gap_g, gap_pl)
#> [1] TRUE

^{Created on 2020-09-05 by the reprex package (v0.3.0)}

system · September 12, 2020, 10:34pm

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.