Pivot wider then reverse back to tall data

library(gapminder)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

1 Read in and view data

data(gapminder)
head(gapminder)
# A tibble: 6 × 6
  country     continent  year lifeExp      pop gdpPercap
  <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
1 Afghanistan Asia       1952    28.8  8425333      779.
2 Afghanistan Asia       1957    30.3  9240934      821.
3 Afghanistan Asia       1962    32.0 10267083      853.
4 Afghanistan Asia       1967    34.0 11537966      836.
5 Afghanistan Asia       1972    36.1 13079460      740.
6 Afghanistan Asia       1977    38.4 14880372      786.

2 Full wide dataset with pivot_wider()

gap_wide <- gapminder %>% pivot_wider(names_from = year, 
                                values_from = c(lifeExp, pop, gdpPercap))

head(gap_wide)
# A tibble: 6 × 38
  country     continent lifeExp_1952 lifeExp_1957 lifeExp_1962 lifeExp_1967
  <fct>       <fct>            <dbl>        <dbl>        <dbl>        <dbl>
1 Afghanistan Asia              28.8         30.3         32.0         34.0
2 Albania     Europe            55.2         59.3         64.8         66.2
3 Algeria     Africa            43.1         45.7         48.3         51.4
4 Angola      Africa            30.0         32.0         34           36.0
5 Argentina   Americas          62.5         64.4         65.1         65.6
6 Australia   Oceania           69.1         70.3         70.9         71.1
# ℹ 32 more variables: lifeExp_1972 <dbl>, lifeExp_1977 <dbl>,
#   lifeExp_1982 <dbl>, lifeExp_1987 <dbl>, lifeExp_1992 <dbl>,
#   lifeExp_1997 <dbl>, lifeExp_2002 <dbl>, lifeExp_2007 <dbl>, pop_1952 <int>,
#   pop_1957 <int>, pop_1962 <int>, pop_1967 <int>, pop_1972 <int>,
#   pop_1977 <int>, pop_1982 <int>, pop_1987 <int>, pop_1992 <int>,
#   pop_1997 <int>, pop_2002 <int>, pop_2007 <int>, gdpPercap_1952 <dbl>,
#   gdpPercap_1957 <dbl>, gdpPercap_1962 <dbl>, gdpPercap_1967 <dbl>, …
  • One column per variable (lifeExp, pop, gdpPercap) per year

3 Step 1: Reverse part way with pivot_longer()

gap_tall <- gap_wide %>% pivot_longer(cols = 3:38, 
                                      names_to = "var_year", 
                                      values_to = "value")

head(gap_tall) 
# A tibble: 6 × 4
  country     continent var_year     value
  <fct>       <fct>     <chr>        <dbl>
1 Afghanistan Asia      lifeExp_1952  28.8
2 Afghanistan Asia      lifeExp_1957  30.3
3 Afghanistan Asia      lifeExp_1962  32.0
4 Afghanistan Asia      lifeExp_1967  34.0
5 Afghanistan Asia      lifeExp_1972  36.1
6 Afghanistan Asia      lifeExp_1977  38.4
  • var_year contains both variable and year information
  • value has the actual number value
  • Many rows!

4 Step 2: Split var_year into two variables with separate()

gap_tall2 <- gap_tall %>% separate(var_year, 
                                   sep = "_", 
                                   into = c("variable", "year"))

head(gap_tall2)
# A tibble: 6 × 5
  country     continent variable year  value
  <fct>       <fct>     <chr>    <chr> <dbl>
1 Afghanistan Asia      lifeExp  1952   28.8
2 Afghanistan Asia      lifeExp  1957   30.3
3 Afghanistan Asia      lifeExp  1962   32.0
4 Afghanistan Asia      lifeExp  1967   34.0
5 Afghanistan Asia      lifeExp  1972   36.1
6 Afghanistan Asia      lifeExp  1977   38.4
  • Still many rows, but the variable name and year are split into 2 variables now

5 Step 3: Finish converting back to tall data with another pivot_wider()

  • Weird to have to use pivot_wider() to get back to the tall dataset…
gap_tall3 <- gap_tall2 %>% pivot_wider(names_from = variable,
                                       values_from = value)

head(gap_tall3)
# A tibble: 6 × 6
  country     continent year  lifeExp      pop gdpPercap
  <fct>       <fct>     <chr>   <dbl>    <dbl>     <dbl>
1 Afghanistan Asia      1952     28.8  8425333      779.
2 Afghanistan Asia      1957     30.3  9240934      821.
3 Afghanistan Asia      1962     32.0 10267083      853.
4 Afghanistan Asia      1967     34.0 11537966      836.
5 Afghanistan Asia      1972     36.1 13079460      740.
6 Afghanistan Asia      1977     38.4 14880372      786.

6 Compare to original data

head(gapminder) 
# A tibble: 6 × 6
  country     continent  year lifeExp      pop gdpPercap
  <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
1 Afghanistan Asia       1952    28.8  8425333      779.
2 Afghanistan Asia       1957    30.3  9240934      821.
3 Afghanistan Asia       1962    32.0 10267083      853.
4 Afghanistan Asia       1967    34.0 11537966      836.
5 Afghanistan Asia       1972    36.1 13079460      740.
6 Afghanistan Asia       1977    38.4 14880372      786.
head(gap_tall3)
# A tibble: 6 × 6
  country     continent year  lifeExp      pop gdpPercap
  <fct>       <fct>     <chr>   <dbl>    <dbl>     <dbl>
1 Afghanistan Asia      1952     28.8  8425333      779.
2 Afghanistan Asia      1957     30.3  9240934      821.
3 Afghanistan Asia      1962     32.0 10267083      853.
4 Afghanistan Asia      1967     34.0 11537966      836.
5 Afghanistan Asia      1972     36.1 13079460      740.
6 Afghanistan Asia      1977     38.4 14880372      786.