14
18

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 3 years have passed since last update.

Rのtidyrのpivot_longerとpivot_widerの使い方メモ

Last updated at Posted at 2021-03-07

はじめに

Rのtidyrpivot_longerpivot_widerには、いろいろ引数が指定できます。Rのサンプルデータを用いて、pivot_longerpivot_widerの使い方を練習してみました。

目次

pivot_longer(), pivot_wider()

Rのtidyrpivot_longerpivot_widerの使い方をRのサンプルデータを用いて練習します。
tidyr
Pivoting
Pivot data from wide to long
Pivot data from long to wide

参考:
https://r4ds.had.co.nz/tidy-data.html#pivoting
https://heavywatal.github.io/rstats/tidyr.html

練習に用いるデータ

Rのサンプルデータirisanscombeを用います。
行番号を表示した方が分かりやすいため、tibble::rowid_to_column()で行番号を列名にしています。

R
library(dplyr)
library(tibble)
library(tidyr)

iris

# 行番号を列名に
iris_tbl <- iris %>% as_tibble() %>% rowid_to_column("id")
iris_tbl
# # A tibble: 150 x 6
#       id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#    <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#  1     1          5.1         3.5          1.4         0.2 setosa 
#  2     2          4.9         3            1.4         0.2 setosa 
#  3     3          4.7         3.2          1.3         0.2 setosa 
#  4     4          4.6         3.1          1.5         0.2 setosa 
#  5     5          5           3.6          1.4         0.2 setosa 
#  6     6          5.4         3.9          1.7         0.4 setosa 
#  7     7          4.6         3.4          1.4         0.3 setosa 
#  8     8          5           3.4          1.5         0.2 setosa 
#  9     9          4.4         2.9          1.4         0.2 setosa 
# 10    10          4.9         3.1          1.5         0.1 setosa 
# # ... with 140 more rows

iris_tbl <- iris %>%
  as_tibble() %>% print() %>% # 途中経過をprint
  rowid_to_column("id") %>% print() # 最終結果をprint
# # A tibble: 150 x 5
#    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#           <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#  1          5.1         3.5          1.4         0.2 setosa 
#  2          4.9         3            1.4         0.2 setosa 
#  3          4.7         3.2          1.3         0.2 setosa 
#  4          4.6         3.1          1.5         0.2 setosa 
#  5          5           3.6          1.4         0.2 setosa 
#  6          5.4         3.9          1.7         0.4 setosa 
#  7          4.6         3.4          1.4         0.3 setosa 
#  8          5           3.4          1.5         0.2 setosa 
#  9          4.4         2.9          1.4         0.2 setosa 
# 10          4.9         3.1          1.5         0.1 setosa 
# # ... with 140 more rows
# # A tibble: 150 x 6
#       id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#    <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#  1     1          5.1         3.5          1.4         0.2 setosa 
#  2     2          4.9         3            1.4         0.2 setosa 
#  3     3          4.7         3.2          1.3         0.2 setosa 
#  4     4          4.6         3.1          1.5         0.2 setosa 
#  5     5          5           3.6          1.4         0.2 setosa 
#  6     6          5.4         3.9          1.7         0.4 setosa 
#  7     7          4.6         3.4          1.4         0.3 setosa 
#  8     8          5           3.4          1.5         0.2 setosa 
#  9     9          4.4         2.9          1.4         0.2 setosa 
# 10    10          4.9         3.1          1.5         0.1 setosa 
# # ... with 140 more rows

anscombe

anscombe_tbl <- anscombe %>% as_tibble() %>% rowid_to_column("id")
anscombe_tbl
# # A tibble: 11 x 9
#       id    x1    x2    x3    x4    y1    y2    y3    y4
#    <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8  8.04  9.14  7.46  6.58
#  2     2     8     8     8     8  6.95  8.14  6.77  5.76
#  3     3    13    13    13     8  7.58  8.74 12.7   7.71
#  4     4     9     9     9     8  8.81  8.77  7.11  8.84
#  5     5    11    11    11     8  8.33  9.26  7.81  8.47
#  6     6    14    14    14     8  9.96  8.1   8.84  7.04
#  7     7     6     6     6     8  7.24  6.13  6.08  5.25
#  8     8     4     4     4    19  4.26  3.1   5.39 12.5 
#  9     9    12    12    12     8 10.8   9.13  8.15  5.56
# 10    10     7     7     7     8  4.82  7.26  6.42  7.91
# 11    11     5     5     5     8  5.68  4.74  5.73  6.89

pivot_longer()

引数colsに指定した列を縦持ちデータに変換します。

R
# Sepal.Length~Petal.Width列を縦長に
iris_tbl %>%
  pivot_longer(cols = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"))
# # A tibble: 600 x 4
#       id Species name         value
#    <int> <fct>   <chr>        <dbl>
#  1     1 setosa  Sepal.Length   5.1
#  2     1 setosa  Sepal.Width    3.5
#  3     1 setosa  Petal.Length   1.4
#  4     1 setosa  Petal.Width    0.2
#  5     2 setosa  Sepal.Length   4.9
#  6     2 setosa  Sepal.Width    3  
#  7     2 setosa  Petal.Length   1.4
#  8     2 setosa  Petal.Width    0.2
#  9     3 setosa  Sepal.Length   4.7
# 10     3 setosa  Sepal.Width    3.2
# # ... with 590 more rows

# id,Species列以外の列(Sepal.Length~Petal.Width列)を縦長に
iris_tbl %>%
  pivot_longer(cols = -c(id, Species))
iris_tbl %>%
  pivot_longer(cols = c(-id, -Species))
iris_tbl %>%
  pivot_longer(cols = !c(id, Species))

# thで終わる列(Sepal.Length~Petal.Width列)を縦長に
iris_tbl %>%
  pivot_longer(cols = ends_with("th"))

anscombe_tbl %>%
  pivot_longer(cols = -id)
# # A tibble: 88 x 3
#       id name  value
#    <int> <chr> <dbl>
#  1     1 x1    10   
#  2     1 x2    10   
#  3     1 x3    10   
#  4     1 x4     8   
#  5     1 y1     8.04
#  6     1 y2     9.14
#  7     1 y3     7.46
#  8     1 y4     6.58
#  9     2 x1     8   
# 10     2 x2     8   
# # ... with 78 more rows

引数names_to, values_to

引数names_tovalues_toで、変換後の列名を指定できます。

R
iris_tbl %>%
  pivot_longer(cols = c(-id, -Species), names_to = "item", values_to = "val")
# # A tibble: 600 x 4
#       id Species item           val
#    <int> <fct>   <chr>        <dbl>
#  1     1 setosa  Sepal.Length   5.1
#  2     1 setosa  Sepal.Width    3.5
#  3     1 setosa  Petal.Length   1.4
#  4     1 setosa  Petal.Width    0.2
#  5     2 setosa  Sepal.Length   4.9
#  6     2 setosa  Sepal.Width    3  
#  7     2 setosa  Petal.Length   1.4
#  8     2 setosa  Petal.Width    0.2
#  9     3 setosa  Sepal.Length   4.7
# 10     3 setosa  Sepal.Width    3.2
# # ... with 590 more rows

anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = "name", values_to = "val")
# # A tibble: 88 x 3
#       id name    val
#    <int> <chr> <dbl>
#  1     1 x1    10   
#  2     1 x2    10   
#  3     1 x3    10   
#  4     1 x4     8   
#  5     1 y1     8.04
#  6     1 y2     9.14
#  7     1 y3     7.46
#  8     1 y4     6.58
#  9     2 x1     8   
# 10     2 x2     8   
# # ... with 78 more rows

anscombe_tbl %>%
  select(id, starts_with("x")) %>% print() %>%
  pivot_longer(cols = -id, names_to = "xn", values_to = "val")
# # A tibble: 11 x 5
#       id    x1    x2    x3    x4
#    <int> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8
#  2     2     8     8     8     8
#  3     3    13    13    13     8
#  4     4     9     9     9     8
#  5     5    11    11    11     8
#  6     6    14    14    14     8
#  7     7     6     6     6     8
#  8     8     4     4     4    19
#  9     9    12    12    12     8
# 10    10     7     7     7     8
# 11    11     5     5     5     8
# # A tibble: 44 x 3
#       id xn      val
#    <int> <chr> <dbl>
#  1     1 x1       10
#  2     1 x2       10
#  3     1 x3       10
#  4     1 x4        8
#  5     2 x1        8
#  6     2 x2        8
#  7     2 x3        8
#  8     2 x4        8
#  9     3 x1       13
# 10     3 x2       13
# # ... with 34 more rows

引数names_prefix

縦持ちデータに変換する列名のうち、引数names_prefixに指定した文字列を接頭辞(prefix)として変換後のデータに入れないようにできます。

R
# 列x1~x4("xn")の"x"を接頭辞(prefix)として"n"部分のみ取り出す
anscombe_tbl %>%
  select(id, starts_with("x")) %>% print() %>%
  pivot_longer(cols = -id, names_to = "xn", names_prefix = "x", values_to = "val")
# # A tibble: 11 x 5
#       id    x1    x2    x3    x4
#    <int> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8
#  2     2     8     8     8     8
#  3     3    13    13    13     8
#  4     4     9     9     9     8
#  5     5    11    11    11     8
#  6     6    14    14    14     8
#  7     7     6     6     6     8
#  8     8     4     4     4    19
#  9     9    12    12    12     8
# 10    10     7     7     7     8
# 11    11     5     5     5     8
# # A tibble: 44 x 3
#       id xn      val
#    <int> <chr> <dbl>
#  1     1 1        10
#  2     1 2        10
#  3     1 3        10
#  4     1 4         8
#  5     2 1         8
#  6     2 2         8
#  7     2 3         8
#  8     2 4         8
#  9     3 1        13
# 10     3 2        13
# # ... with 34 more rows

# 接頭辞の文字列がない場合は無視
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = "xn", names_prefix = "x", values_to = "val")
# # A tibble: 88 x 3
#       id xn      val
#    <int> <chr> <dbl>
#  1     1 1     10   
#  2     1 2     10   
#  3     1 3     10   
#  4     1 4      8   
#  5     1 y1     8.04
#  6     1 y2     9.14
#  7     1 y3     7.46
#  8     1 y4     6.58
#  9     2 1      8   
# 10     2 2      8   
# # ... with 78 more rows

iris_tbl %>%
  select(id, starts_with("Sepal"), Species) %>% print() %>%
  pivot_longer(cols = -c(id, Species), names_to = "name", names_prefix = "Sepal.",
               values_to = "val")
# # A tibble: 150 x 4
#       id Sepal.Length Sepal.Width Species
#    <int>        <dbl>       <dbl> <fct>  
#  1     1          5.1         3.5 setosa 
#  2     2          4.9         3   setosa 
#  3     3          4.7         3.2 setosa 
#  4     4          4.6         3.1 setosa 
#  5     5          5           3.6 setosa 
#  6     6          5.4         3.9 setosa 
#  7     7          4.6         3.4 setosa 
#  8     8          5           3.4 setosa 
#  9     9          4.4         2.9 setosa 
# 10    10          4.9         3.1 setosa 
# # ... with 140 more rows
# # A tibble: 300 x 4
#       id Species name     val
#    <int> <fct>   <chr>  <dbl>
#  1     1 setosa  Length   5.1
#  2     1 setosa  Width    3.5
#  3     2 setosa  Length   4.9
#  4     2 setosa  Width    3  
#  5     3 setosa  Length   4.7
#  6     3 setosa  Width    3.2
#  7     4 setosa  Length   4.6
#  8     4 setosa  Width    3.1
#  9     5 setosa  Length   5  
# 10     5 setosa  Width    3.6
# # ... with 290 more rows

引数names_sep

引数names_sepを指定すると、pivot_longerしてseparateするのを一度にできます。

R
# "xn"を1桁目で分ける(1Lは整数1のこと)
anscombe_tbl %>%
  select(id, starts_with("x")) %>% print() %>%
  pivot_longer(cols = -id, names_to = c("x", "n"), names_sep = 1L, values_to = "val")
# # A tibble: 11 x 5
#       id    x1    x2    x3    x4
#    <int> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8
#  2     2     8     8     8     8
#  3     3    13    13    13     8
#  4     4     9     9     9     8
#  5     5    11    11    11     8
#  6     6    14    14    14     8
#  7     7     6     6     6     8
#  8     8     4     4     4    19
#  9     9    12    12    12     8
# 10    10     7     7     7     8
# 11    11     5     5     5     8
# # A tibble: 44 x 4
#       id x     n       val
#    <int> <chr> <chr> <dbl>
#  1     1 x     1        10
#  2     1 x     2        10
#  3     1 x     3        10
#  4     1 x     4         8
#  5     2 x     1         8
#  6     2 x     2         8
#  7     2 x     3         8
#  8     2 x     4         8
#  9     3 x     1        13
# 10     3 x     2        13
# # ... with 34 more rows
# こうするのと同じ
anscombe_tbl %>%
  select(id, starts_with("x")) %>% print() %>%
  pivot_longer(cols = -id, names_to = "xn", values_to = "val") %>% print() %>%
  separate(col = xn, into = c("x", "n"), sep = 1L)
# # A tibble: 11 x 5
#       id    x1    x2    x3    x4
#    <int> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8
#  2     2     8     8     8     8
#  3     3    13    13    13     8
#  4     4     9     9     9     8
#  5     5    11    11    11     8
#  6     6    14    14    14     8
#  7     7     6     6     6     8
#  8     8     4     4     4    19
#  9     9    12    12    12     8
# 10    10     7     7     7     8
# 11    11     5     5     5     8
# # A tibble: 44 x 3
#    id    xn      val
#       id xn      val
#    <int> <chr> <dbl>
#  1     1 x1       10
#  2     1 x2       10
#  3     1 x3       10
#  4     1 x4        8
#  5     2 x1        8
#  6     2 x2        8
#  7     2 x3        8
#  8     2 x4        8
#  9     3 x1       13
# 10     3 x2       13
# # ... with 34 more rows
# # A tibble: 44 x 4
#       id x     n       val
#    <int> <chr> <chr> <dbl>
#  1     1 x     1        10
#  2     1 x     2        10
#  3     1 x     3        10
#  4     1 x     4         8
#  5     2 x     1         8
#  6     2 x     2         8
#  7     2 x     3         8
#  8     2 x     4         8
#  9     3 x     1        13
# 10     3 x     2        13
# # ... with 34 more rows

# 軸(x,y)とセット番号(1,2,3,4)を分けてpivot
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = c("axis", "set"), names_sep = 1L,
               values_to = "val")
# # A tibble: 88 x 4
#       id axis  set     val
#    <int> <chr> <chr> <dbl>
#  1     1 x     1     10   
#  2     1 x     2     10   
#  3     1 x     3     10   
#  4     1 x     4      8   
#  5     1 y     1      8.04
#  6     1 y     2      9.14
#  7     1 y     3      7.46
#  8     1 y     4      6.58
#  9     2 x     1      8   
# 10     2 x     2      8   
# # ... with 78 more rows
# こうするのと同じ
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = "name", values_to = "val") %>%
  separate(col = name, into = c("axis", "set"), sep = 1L)

# "Sepal.Length"の"."で"Sepal"と"Length"に分ける。ただし、"."は"\\."で表す(正規表現)
iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = c("part", "measure"),
               names_sep = "\\.", values_to = "val")
# # A tibble: 600 x 5
#       id Species part  mesure   val
#    <int> <fct>   <chr> <chr>  <dbl>
#  1     1 setosa  Sepal Length   5.1
#  2     1 setosa  Sepal Width    3.5
#  3     1 setosa  Petal Length   1.4
#  4     1 setosa  Petal Width    0.2
#  5     2 setosa  Sepal Length   4.9
#  6     2 setosa  Sepal Width    3  
#  7     2 setosa  Petal Length   1.4
#  8     2 setosa  Petal Width    0.2
#  9     3 setosa  Sepal Length   4.7
# 10     3 setosa  Sepal Width    3.2
# # ... with 590 more rows
# こうするのと同じ
iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = "name",
               values_to = "val") %>% print() %>%
  separate(col = name, into = c("part", "mesure"), sep = "\\.")

# 6桁目で分ける
iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = c("part", "measure"),
               names_sep = 6L, values_to = "val")
# # A tibble: 600 x 5
#       id Species part   measure   val
#    <int> <fct>   <chr>  <chr>   <dbl>
#  1     1 setosa  Sepal. Length    5.1
#  2     1 setosa  Sepal. Width     3.5
#  3     1 setosa  Petal. Length    1.4
#  4     1 setosa  Petal. Width     0.2
#  5     2 setosa  Sepal. Length    4.9
#  6     2 setosa  Sepal. Width     3  
#  7     2 setosa  Petal. Length    1.4
#  8     2 setosa  Petal. Width     0.2
#  9     3 setosa  Sepal. Length    4.7
# 10     3 setosa  Sepal. Width     3.2
# # ... with 590 more rows

注意)正規表現(ICU正規表現)について:

引数names_pattern

引数names_sepの代わりに、引数names_patternで分け方のパターンを指定できます。

R
# 正規表現"(.)(.)"に従って、1桁の何かと1桁の何かに分ける
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = c("axis", "set"),
               names_pattern = "(.)(.)", values_to = "val")
# # A tibble: 88 x 4
#       id axis  set     val
#    <int> <chr> <chr> <dbl>
#  1     1 x     1     10   
#  2     1 x     2     10   
#  3     1 x     3     10   
#  4     1 x     4      8   
#  5     1 y     1      8.04
#  6     1 y     2      9.14
#  7     1 y     3      7.46
#  8     1 y     4      6.58
#  9     2 x     1      8   
# 10     2 x     2      8   
# # ... with 78 more rows

# 正規表現"(.+)\\.(.+)"に従って、1桁以上の何かと"."をはさんで1桁以上の何かに分ける
iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = c("part", "measure"),
               names_pattern = "(.+)\\.(.+)", values_to = "val")
# # A tibble: 600 x 5
#       id Species part  measure   val
#    <int> <fct>   <chr> <chr>   <dbl>
#  1     1 setosa  Sepal Length    5.1
#  2     1 setosa  Sepal Width     3.5
#  3     1 setosa  Petal Length    1.4
#  4     1 setosa  Petal Width     0.2
#  5     2 setosa  Sepal Length    4.9
#  6     2 setosa  Sepal Width     3  
#  7     2 setosa  Petal Length    1.4
#  8     2 setosa  Petal Width     0.2
#  9     3 setosa  Sepal Length    4.7
# 10     3 setosa  Sepal Width     3.2
# # ... with 590 more rows

".value"

列名を、縦持ちに変換する部分と列名に残す部分とに分けて、変換できます。

R
# 軸とセット番号を分けてセット番号のみ縦持ちに(軸は列名に残す)
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = c(".value", "set"), names_sep = 1L) 
# # A tibble: 44 x 4
#       id set       x     y
#    <int> <chr> <dbl> <dbl>
#  1     1 1        10  8.04
#  2     1 2        10  9.14
#  3     1 3        10  7.46
#  4     1 4         8  6.58
#  5     2 1         8  6.95
#  6     2 2         8  8.14
#  7     2 3         8  6.77
#  8     2 4         8  5.76
#  9     3 1        13  7.58
# 10     3 2        13  8.74
# # ... with 34 more rows
# pivot_longerして、軸とセット番号を分けて、軸だけpivot_widerで列に戻したのと同じ
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = "name", values_to = "val") %>% print() %>%
  separate(col = name, into = c("axis", "set"), sep = 1L) %>% print() %>%
  pivot_wider(names_from = axis, values_from = val)
# # A tibble: 88 x 3
#       id name    val
#    <int> <chr> <dbl>
#  1     1 x1    10   
#  2     1 x2    10   
#  3     1 x3    10   
#  4     1 x4     8   
#  5     1 y1     8.04
#  6     1 y2     9.14
#  7     1 y3     7.46
#  8     1 y4     6.58
#  9     2 x1     8   
# 10     2 x2     8   
# # ... with 78 more rows
# # A tibble: 88 x 4
#       id axis  set     val
#    <int> <chr> <chr> <dbl>
#  1     1 x     1     10   
#  2     1 x     2     10   
#  3     1 x     3     10   
#  4     1 x     4      8   
#  5     1 y     1      8.04
#  6     1 y     2      9.14
#  7     1 y     3      7.46
#  8     1 y     4      6.58
#  9     2 x     1      8   
# 10     2 x     2      8   
# # ... with 78 more rows
# # A tibble: 44 x 4
#       id set       x     y
#    <int> <chr> <dbl> <dbl>
#  1     1 1        10  8.04
#  2     1 2        10  9.14
#  3     1 3        10  7.46
#  4     1 4         8  6.58
#  5     2 1         8  6.95
#  6     2 2         8  8.14
#  7     2 3         8  6.77
#  8     2 4         8  5.76
#  9     3 1        13  7.58
# 10     3 2        13  8.74
# # ... with 34 more rows

# names_sepの代わりにnames_patternで指定しても同じ
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = c(".value", "set"), names_pattern = "(.)(.)")

# 軸とセット番号を分けて軸のみ縦持ち(セット番号は列名に残す)
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = c("axis", ".value"), names_sep = 1L)
# # A tibble: 22 x 6
#       id axis    `1`   `2`   `3`   `4`
#    <int> <chr> <dbl> <dbl> <dbl> <dbl>
#  1     1 x     10    10    10     8   
#  2     1 y      8.04  9.14  7.46  6.58
#  3     2 x      8     8     8     8   
#  4     2 y      6.95  8.14  6.77  5.76
#  5     3 x     13    13    13     8   
#  6     3 y      7.58  8.74 12.7   7.71
#  7     4 x      9     9     9     8   
#  8     4 y      8.81  8.77  7.11  8.84
#  9     5 x     11    11    11     8   
# 10     5 y      8.33  9.26  7.81  8.47
# # ... with 12 more rows
# pivot_longerして、軸とセット番号を分けて、セット番号だけpivot_widerで列に戻したのと同じ
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = "name", values_to = "val") %>% print() %>%
  separate(col = name, into = c("axis", "set"), sep = 1L) %>% print() %>%
  pivot_wider(names_from = set, values_from = val)

# names_sepの代わりにnames_patternで指定しても同じ
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = c("axis", ".value"), names_pattern = "(.)(.)")

iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = c(".value", "measure"),
               names_sep = "\\.")
# # A tibble: 300 x 5
#       id Species measure Sepal Petal
#    <int> <fct>   <chr>   <dbl> <dbl>
#  1     1 setosa  Length    5.1   1.4
#  2     1 setosa  Width     3.5   0.2
#  3     2 setosa  Length    4.9   1.4
#  4     2 setosa  Width     3     0.2
#  5     3 setosa  Length    4.7   1.3
#  6     3 setosa  Width     3.2   0.2
#  7     4 setosa  Length    4.6   1.5
#  8     4 setosa  Width     3.1   0.2
#  9     5 setosa  Length    5     1.4
# 10     5 setosa  Width     3.6   0.2
# # ... with 290 more rows
# pivot_longerして、separateして、partだけpivot_widerで列に戻したのと同じ
iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = "name", values_to = "val") %>% print() %>%
  separate(col = name, into = c("part", "measure"), sep = "\\.") %>% print() %>%
  pivot_wider(names_from = part, values_from = val)

iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = c("part", ".value"),
               names_sep = "\\.")
# # A tibble: 300 x 5
#       id Species part  Length Width
#    <int> <fct>   <chr>  <dbl> <dbl>
#  1     1 setosa  Sepal    5.1   3.5
#  2     1 setosa  Petal    1.4   0.2
#  3     2 setosa  Sepal    4.9   3  
#  4     2 setosa  Petal    1.4   0.2
#  5     3 setosa  Sepal    4.7   3.2
#  6     3 setosa  Petal    1.3   0.2
#  7     4 setosa  Sepal    4.6   3.1
#  8     4 setosa  Petal    1.5   0.2
#  9     5 setosa  Sepal    5     3.6
# 10     5 setosa  Petal    1.4   0.2
# # ... with 290 more rows
# pivot_longerして、separateして、partだけpivot_widerで列に戻したのと同じ
iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = "name", values_to = "val") %>% print() %>%
  separate(col = name, into = c("part", "measure"), sep = "\\.") %>% print() %>%
  pivot_wider(names_from = measure, values_from = val)

separate()

列をある規則的に分けます。

R
# データの準備
iris_2 <- iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = "name", values_to = "value")
iris_2
# # A tibble: 600 x 4
#       id Species name         value
#    <int> <fct>   <chr>        <dbl>
#  1     1 setosa  Sepal.Length   5.1
#  2     1 setosa  Sepal.Width    3.5
#  3     1 setosa  Petal.Length   1.4
#  4     1 setosa  Petal.Width    0.2
#  5     2 setosa  Sepal.Length   4.9
#  6     2 setosa  Sepal.Width    3  
#  7     2 setosa  Petal.Length   1.4
#  8     2 setosa  Petal.Width    0.2
#  9     3 setosa  Sepal.Length   4.7
# 10     3 setosa  Sepal.Width    3.2
# # ... with 590 more rows

# name列を"."でpart列とmeasure列に分ける
iris_2 %>%
  separate(col = name, into = c("part", "measure"), sep = "\\.")
# # A tibble: 600 x 5
#       id Species part  measure value
#    <int> <fct>   <chr> <chr>   <dbl>
#  1     1 setosa  Sepal Length    5.1
#  2     1 setosa  Sepal Width     3.5
#  3     1 setosa  Petal Length    1.4
#  4     1 setosa  Petal Width     0.2
#  5     2 setosa  Sepal Length    4.9
#  6     2 setosa  Sepal Width     3  
#  7     2 setosa  Petal Length    1.4
#  8     2 setosa  Petal Width     0.2
#  9     3 setosa  Sepal Length    4.7
# 10     3 setosa  Sepal Width     3.2
# # ... with 590 more rows

# sepを指定しなくても明かなときは自動で判断
iris_2 %>%
  separate(col = name, into = c("part", "measure"))
# # A tibble: 600 x 5
#       id Species part  measure value
#    <int> <fct>   <chr> <chr>   <dbl>
#  1     1 setosa  Sepal Length    5.1
#  2     1 setosa  Sepal Width     3.5
#  3     1 setosa  Petal Length    1.4
#  4     1 setosa  Petal Width     0.2
#  5     2 setosa  Sepal Length    4.9
#  6     2 setosa  Sepal Width     3  
#  7     2 setosa  Petal Length    1.4
#  8     2 setosa  Petal Width     0.2
#  9     3 setosa  Sepal Length    4.7
# 10     3 setosa  Sepal Width     3.2
# # ... with 590 more rows

# データの準備
anscombe2 <- anscombe_tbl %>%
  pivot_longer(cols = -id)
anscombe2
# # A tibble: 88 x 3
#       id name  value
#    <int> <chr> <dbl>
#  1     1 x1    10   
#  2     1 x2    10   
#  3     1 x3    10   
#  4     1 x4     8   
#  5     1 y1     8.04
#  6     1 y2     9.14
#  7     1 y3     7.46
#  8     1 y4     6.58
#  9     2 x1     8   
# 10     2 x2     8   
# # ... with 78 more rows

# name列を1桁目でaxis列とset列に分ける
anscombe2 %>%
  separate(col = name, into = c("axis", "set"), sep = 1L)
# # A tibble: 88 x 4
#       id axis  set   value
#    <int> <chr> <chr> <dbl>
#  1     1 x     1     10   
#  2     1 x     2     10   
#  3     1 x     3     10   
#  4     1 x     4      8   
#  5     1 y     1      8.04
#  6     1 y     2      9.14
#  7     1 y     3      7.46
#  8     1 y     4      6.58
#  9     2 x     1      8   
# 10     2 x     2      8   
# # ... with 78 more rows

引数convert

引数convertで分けた列の型変換も同時にできます。

R
# 整数に変換も同時に行う
anscombe2 %>%
  separate(col = name, into = c("axis", "set"), sep = 1L, convert = TRUE)
# # A tibble: 88 x 4
#       id axis    set value
#    <int> <chr> <int> <dbl>
#  1     1 x         1 10   
#  2     1 x         2 10   
#  3     1 x         3 10   
#  4     1 x         4  8   
#  5     1 y         1  8.04
#  6     1 y         2  9.14
#  7     1 y         3  7.46
#  8     1 y         4  6.58
#  9     2 x         1  8   
# 10     2 x         2  8   
# # ... with 78 more rows
# separateしてsetを整数に変換するのと同じ
anscombe2 %>%
  separate(col = name, into = c("axis", "set"), sep = 1L) %>% print() %>%
  mutate(set = as.integer(set))
# # A tibble: 88 x 4
#  id axis  set   value
#    <int> <chr> <chr> <dbl>
#  1     1 x     1     10   
#  2     1 x     2     10   
#  3     1 x     3     10   
#  4     1 x     4      8   
#  5     1 y     1      8.04
#  6     1 y     2      9.14
#  7     1 y     3      7.46
#  8     1 y     4      6.58
#  9     2 x     1      8   
# 10     2 x     2      8   
# # ... with 78 more rows
# # A tibble: 88 x 4
#  id axis    set value
#    <int> <chr> <int> <dbl>
#  1     1 x         1 10   
#  2     1 x         2 10   
#  3     1 x         3 10   
#  4     1 x         4  8   
#  5     1 y         1  8.04
#  6     1 y         2  9.14
#  7     1 y         3  7.46
#  8     1 y         4  6.58
#  9     2 x         1  8   
# 10     2 x         2  8   
# # ... with 78 more rows

pivot_wider()

pivot_longerの逆の操作で、縦持ちデータを横持データに変換します。

R
# データの準備
iris_long <- iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = c("part", "measure"), names_sep = "\\.", values_to = "value")
iris_long
# # A tibble: 600 x 5
#       id Species part  measure value
#    <int> <fct>   <chr> <chr>   <dbl>
#  1     1 setosa  Sepal Length    5.1
#  2     1 setosa  Sepal Width     3.5
#  3     1 setosa  Petal Length    1.4
#  4     1 setosa  Petal Width     0.2
#  5     2 setosa  Sepal Length    4.9
#  6     2 setosa  Sepal Width     3  
#  7     2 setosa  Petal Length    1.4
#  8     2 setosa  Petal Width     0.2
#  9     3 setosa  Sepal Length    4.7
# 10     3 setosa  Sepal Width     3.2
# # ... with 590 more rows

iris_long %>%
  pivot_wider(names_from = part, values_from = value)
# # A tibble: 300 x 5
#       id Species measure Sepal Petal
#    <int> <fct>   <chr>   <dbl> <dbl>
#  1     1 setosa  Length    5.1   1.4
#  2     1 setosa  Width     3.5   0.2
#  3     2 setosa  Length    4.9   1.4
#  4     2 setosa  Width     3     0.2
#  5     3 setosa  Length    4.7   1.3
#  6     3 setosa  Width     3.2   0.2
#  7     4 setosa  Length    4.6   1.5
#  8     4 setosa  Width     3.1   0.2
#  9     5 setosa  Length    5     1.4
# 10     5 setosa  Width     3.6   0.2
# # ... with 290 more rows

iris_long %>%
  pivot_wider(names_from = measure, values_from = value)
# # A tibble: 300 x 5
#       id Species part  Length Width
#    <int> <fct>   <chr>  <dbl> <dbl>
#  1     1 setosa  Sepal    5.1   3.5
#  2     1 setosa  Petal    1.4   0.2
#  3     2 setosa  Sepal    4.9   3  
#  4     2 setosa  Petal    1.4   0.2
#  5     3 setosa  Sepal    4.7   3.2
#  6     3 setosa  Petal    1.3   0.2
#  7     4 setosa  Sepal    4.6   3.1
#  8     4 setosa  Petal    1.5   0.2
#  9     5 setosa  Sepal    5     3.6
# 10     5 setosa  Petal    1.4   0.2
# # ... with 290 more rows

# データの準備
anscombe_long <- anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = c("axis", "set"), names_sep = 1L, values_to = "value")
anscombe_long
# # A tibble: 88 x 4
#       id axis  set   value
#    <int> <chr> <chr> <dbl>
#  1     1 x     1     10   
#  2     1 x     2     10   
#  3     1 x     3     10   
#  4     1 x     4      8   
#  5     1 y     1      8.04
#  6     1 y     2      9.14
#  7     1 y     3      7.46
#  8     1 y     4      6.58
#  9     2 x     1      8   
# 10     2 x     2      8   
# # ... with 78 more rows

anscombe_long %>%
  pivot_wider(names_from = axis, values_from = value)
# # A tibble: 44 x 4
#       id set       x     y
#    <int> <chr> <dbl> <dbl>
#  1     1 1        10  8.04
#  2     1 2        10  9.14
#  3     1 3        10  7.46
#  4     1 4         8  6.58
#  5     2 1         8  6.95
#  6     2 2         8  8.14
#  7     2 3         8  6.77
#  8     2 4         8  5.76
#  9     3 1        13  7.58
# 10     3 2        13  8.74
# # ... with 34 more rows

anscombe_long %>%
  pivot_wider(names_from = set, values_from = value)
# # A tibble: 22 x 6
#       id axis    `1`   `2`   `3`   `4`
#    <int> <chr> <dbl> <dbl> <dbl> <dbl>
#  1     1 x     10    10    10     8   
#  2     1 y      8.04  9.14  7.46  6.58
#  3     2 x      8     8     8     8   
#  4     2 y      6.95  8.14  6.77  5.76
#  5     3 x     13    13    13     8   
#  6     3 y      7.58  8.74 12.7   7.71
#  7     4 x      9     9     9     8   
#  8     4 y      8.81  8.77  7.11  8.84
#  9     5 x     11    11    11     8   
# 10     5 y      8.33  9.26  7.81  8.47
# # ... with 12 more rows

引数names_fromに複数列を指定できます。

R
iris_long %>%
  pivot_wider(names_from = c(part, measure), values_from = value)
# A tibble: 150 x 6
#       id Species Sepal_Length Sepal_Width Petal_Length Petal_Width
#    <int> <fct>          <dbl>       <dbl>        <dbl>       <dbl>
#  1     1 setosa           5.1         3.5          1.4         0.2
#  2     2 setosa           4.9         3            1.4         0.2
#  3     3 setosa           4.7         3.2          1.3         0.2
#  4     4 setosa           4.6         3.1          1.5         0.2
#  5     5 setosa           5           3.6          1.4         0.2
#  6     6 setosa           5.4         3.9          1.7         0.4
#  7     7 setosa           4.6         3.4          1.4         0.3
#  8     8 setosa           5           3.4          1.5         0.2
#  9     9 setosa           4.4         2.9          1.4         0.2
# 10    10 setosa           4.9         3.1          1.5         0.1
# # ... with 140 more rows

# 順にpivot_widerするのと同じ
iris_long %>% print() %>%
  pivot_wider(names_from = part, values_from = value) %>% print() %>%
  pivot_wider(names_from = measure, values_from = c(Sepal, Petal))
# # A tibble: 600 x 5
#       id Species part  measure value
#    <int> <fct>   <chr> <chr>   <dbl>
#  1     1 setosa  Sepal Length    5.1
#  2     1 setosa  Sepal Width     3.5
#  3     1 setosa  Petal Length    1.4
#  4     1 setosa  Petal Width     0.2
#  5     2 setosa  Sepal Length    4.9
#  6     2 setosa  Sepal Width     3  
#  7     2 setosa  Petal Length    1.4
#  8     2 setosa  Petal Width     0.2
#  9     3 setosa  Sepal Length    4.7
# 10     3 setosa  Sepal Width     3.2
# # ... with 590 more rows
# # A tibble: 300 x 5
#       id Species measure Sepal Petal
#    <int> <fct>   <chr>   <dbl> <dbl>
#  1     1 setosa  Length    5.1   1.4
#  2     1 setosa  Width     3.5   0.2
#  3     2 setosa  Length    4.9   1.4
#  4     2 setosa  Width     3     0.2
#  5     3 setosa  Length    4.7   1.3
#  6     3 setosa  Width     3.2   0.2
#  7     4 setosa  Length    4.6   1.5
#  8     4 setosa  Width     3.1   0.2
#  9     5 setosa  Length    5     1.4
# 10     5 setosa  Width     3.6   0.2
# # ... with 290 more rows
# # A tibble: 150 x 6
#       id Species Sepal_Length Sepal_Width Petal_Length Petal_Width
#    <int> <fct>          <dbl>       <dbl>        <dbl>       <dbl>
#  1     1 setosa           5.1         3.5          1.4         0.2
#  2     2 setosa           4.9         3            1.4         0.2
#  3     3 setosa           4.7         3.2          1.3         0.2
#  4     4 setosa           4.6         3.1          1.5         0.2
#  5     5 setosa           5           3.6          1.4         0.2
#  6     6 setosa           5.4         3.9          1.7         0.4
#  7     7 setosa           4.6         3.4          1.4         0.3
#  8     8 setosa           5           3.4          1.5         0.2
#  9     9 setosa           4.4         2.9          1.4         0.2
# 10    10 setosa           4.9         3.1          1.5         0.1
# # ... with 140 more rows

anscombe_long %>%
  pivot_wider(names_from = c(axis, set), values_from = value)
# # A tibble: 11 x 9
#       id   x_1   x_2   x_3   x_4   y_1   y_2   y_3   y_4
#    <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8  8.04  9.14  7.46  6.58
#  2     2     8     8     8     8  6.95  8.14  6.77  5.76
#  3     3    13    13    13     8  7.58  8.74 12.7   7.71
#  4     4     9     9     9     8  8.81  8.77  7.11  8.84
#  5     5    11    11    11     8  8.33  9.26  7.81  8.47
#  6     6    14    14    14     8  9.96  8.1   8.84  7.04
#  7     7     6     6     6     8  7.24  6.13  6.08  5.25
#  8     8     4     4     4    19  4.26  3.1   5.39 12.5 
#  9     9    12    12    12     8 10.8   9.13  8.15  5.56
# 10    10     7     7     7     8  4.82  7.26  6.42  7.91
# 11    11     5     5     5     8  5.68  4.74  5.73  6.89

# 順にpivot_widerするのと同じ
anscombe_long %>%
  pivot_wider(names_from = axis, values_from = value) %>% print() %>%
  pivot_wider(names_from = set, values_from = c(x, y))
# # A tibble: 44 x 4
#       id set       x     y
#    <int> <chr> <dbl> <dbl>
#  1     1 1        10  8.04
#  2     1 2        10  9.14
#  3     1 3        10  7.46
#  4     1 4         8  6.58
#  5     2 1         8  6.95
#  6     2 2         8  8.14
#  7     2 3         8  6.77
#  8     2 4         8  5.76
#  9     3 1        13  7.58
# 10     3 2        13  8.74
# # ... with 34 more rows
# # A tibble: 11 x 9
#       id   x_1   x_2   x_3   x_4   y_1   y_2   y_3   y_4
#    <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8  8.04  9.14  7.46  6.58
#  2     2     8     8     8     8  6.95  8.14  6.77  5.76
#  3     3    13    13    13     8  7.58  8.74 12.7   7.71
#  4     4     9     9     9     8  8.81  8.77  7.11  8.84
#  5     5    11    11    11     8  8.33  9.26  7.81  8.47
#  6     6    14    14    14     8  9.96  8.1   8.84  7.04
#  7     7     6     6     6     8  7.24  6.13  6.08  5.25
#  8     8     4     4     4    19  4.26  3.1   5.39 12.5 
#  9     9    12    12    12     8 10.8   9.13  8.15  5.56
# 10    10     7     7     7     8  4.82  7.26  6.42  7.91
# 11    11     5     5     5     8  5.68  4.74  5.73  6.89

anscombe_long %>%
  pivot_wider(names_from = c(set, axis), values_from = value)
# # A tibble: 11 x 9
#       id `1_x` `2_x` `3_x` `4_x` `1_y` `2_y` `3_y` `4_y`
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8  8.04  9.14  7.46  6.58
#  2     2     8     8     8     8  6.95  8.14  6.77  5.76
#  3     3    13    13    13     8  7.58  8.74 12.7   7.71
#  4     4     9     9     9     8  8.81  8.77  7.11  8.84
#  5     5    11    11    11     8  8.33  9.26  7.81  8.47
#  6     6    14    14    14     8  9.96  8.1   8.84  7.04
#  7     7     6     6     6     8  7.24  6.13  6.08  5.25
#  8     8     4     4     4    19  4.26  3.1   5.39 12.5 
#  9     9    12    12    12     8 10.8   9.13  8.15  5.56
# 10    10     7     7     7     8  4.82  7.26  6.42  7.91
# 11    11     5     5     5     8  5.68  4.74  5.73  6.89

values_fromに複数列を指定できます。

R
# データの準備
anscombe_xy <- anscombe_long %>%
  pivot_wider(names_from = axis, values_from = value)
anscombe_xy
# # A tibble: 44 x 4
#       id set       x     y
#    <int> <chr> <dbl> <dbl>
#  1     1 1        10  8.04
#  2     1 2        10  9.14
#  3     1 3        10  7.46
#  4     1 4         8  6.58
#  5     2 1         8  6.95
#  6     2 2         8  8.14
#  7     2 3         8  6.77
#  8     2 4         8  5.76
#  9     3 1        13  7.58
# 10     3 2        13  8.74
# # ... with 34 more rows

anscombe_xy %>%
  pivot_wider(names_from = set, values_from = c(x, y))
# # A tibble: 11 x 9
#       id   x_1   x_2   x_3   x_4   y_1   y_2   y_3   y_4
#    <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8  8.04  9.14  7.46  6.58
#  2     2     8     8     8     8  6.95  8.14  6.77  5.76
#  3     3    13    13    13     8  7.58  8.74 12.7   7.71
#  4     4     9     9     9     8  8.81  8.77  7.11  8.84
#  5     5    11    11    11     8  8.33  9.26  7.81  8.47
#  6     6    14    14    14     8  9.96  8.1   8.84  7.04
#  7     7     6     6     6     8  7.24  6.13  6.08  5.25
#  8     8     4     4     4    19  4.26  3.1   5.39 12.5 
#  9     9    12    12    12     8 10.8   9.13  8.15  5.56
# 10    10     7     7     7     8  4.82  7.26  6.42  7.91
# 11    11     5     5     5     8  5.68  4.74  5.73  6.89

# データの準備
iris_part <- iris_long %>%
  pivot_wider(names_from = part, values_from = value)
iris_part
# # A tibble: 300 x 5
#       id Species measure Sepal Petal
#    <int> <fct>   <chr>   <dbl> <dbl>
#  1     1 setosa  Length    5.1   1.4
#  2     1 setosa  Width     3.5   0.2
#  3     2 setosa  Length    4.9   1.4
#  4     2 setosa  Width     3     0.2
#  5     3 setosa  Length    4.7   1.3
#  6     3 setosa  Width     3.2   0.2
#  7     4 setosa  Length    4.6   1.5
#  8     4 setosa  Width     3.1   0.2
#  9     5 setosa  Length    5     1.4
# 10     5 setosa  Width     3.6   0.2
# # ... with 290 more rows

iris_part %>%
  pivot_wider(names_from = measure, values_from = c(Sepal, Petal))
# # A tibble: 150 x 6
#       id Species Sepal_Length Sepal_Width Petal_Length Petal_Width
#    <int> <fct>          <dbl>       <dbl>        <dbl>       <dbl>
#  1     1 setosa           5.1         3.5          1.4         0.2
#  2     2 setosa           4.9         3            1.4         0.2
#  3     3 setosa           4.7         3.2          1.3         0.2
#  4     4 setosa           4.6         3.1          1.5         0.2
#  5     5 setosa           5           3.6          1.4         0.2
#  6     6 setosa           5.4         3.9          1.7         0.4
#  7     7 setosa           4.6         3.4          1.4         0.3
#  8     8 setosa           5           3.4          1.5         0.2
#  9     9 setosa           4.4         2.9          1.4         0.2
# 10    10 setosa           4.9         3.1          1.5         0.1
# # ... with 140 more rows

引数names_sep

引数names_fromに複数列を指定したとき、列名の間にデフォルトでは"_"が入りますが、引数names_sepで列名の区切りを指定できます。

R
iris_long %>%
  pivot_wider(names_from = c(part, measure), names_sep = ":", values_from = value)
# # A tibble: 150 x 6
#       id Species `Sepal:Length` `Sepal:Width` `Petal:Length` `Petal:Width`
#    <int> <fct>            <dbl>         <dbl>          <dbl>         <dbl>
#  1     1 setosa             5.1           3.5            1.4           0.2
#  2     2 setosa             4.9           3              1.4           0.2
#  3     3 setosa             4.7           3.2            1.3           0.2
#  4     4 setosa             4.6           3.1            1.5           0.2
#  5     5 setosa             5             3.6            1.4           0.2
#  6     6 setosa             5.4           3.9            1.7           0.4
#  7     7 setosa             4.6           3.4            1.4           0.3
#  8     8 setosa             5             3.4            1.5           0.2
#  9     9 setosa             4.4           2.9            1.4           0.2
# 10    10 setosa             4.9           3.1            1.5           0.1
# # ... with 140 more rows

iris_long %>%
  pivot_wider(names_from = c(part, measure), names_sep = "", values_from = value)
# # A tibble: 150 x 6
#       id Species SepalLength SepalWidth PetalLength PetalWidth
#    <int> <fct>         <dbl>      <dbl>       <dbl>      <dbl>
#  1     1 setosa          5.1        3.5         1.4        0.2
#  2     2 setosa          4.9        3           1.4        0.2
#  3     3 setosa          4.7        3.2         1.3        0.2
#  4     4 setosa          4.6        3.1         1.5        0.2
#  5     5 setosa          5          3.6         1.4        0.2
#  6     6 setosa          5.4        3.9         1.7        0.4
#  7     7 setosa          4.6        3.4         1.4        0.3
#  8     8 setosa          5          3.4         1.5        0.2
#  9     9 setosa          4.4        2.9         1.4        0.2
# 10    10 setosa          4.9        3.1         1.5        0.1
# # ... with 140 more rows

anscombe_long %>%
  pivot_wider(names_from = c(axis, set), names_sep = "", values_from = value)
# # A tibble: 11 x 9
#       id    x1    x2    x3    x4    y1    y2    y3    y4
#    <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8  8.04  9.14  7.46  6.58
#  2     2     8     8     8     8  6.95  8.14  6.77  5.76
#  3     3    13    13    13     8  7.58  8.74 12.7   7.71
#  4     4     9     9     9     8  8.81  8.77  7.11  8.84
#  5     5    11    11    11     8  8.33  9.26  7.81  8.47
#  6     6    14    14    14     8  9.96  8.1   8.84  7.04
#  7     7     6     6     6     8  7.24  6.13  6.08  5.25
#  8     8     4     4     4    19  4.26  3.1   5.39 12.5 
#  9     9    12    12    12     8 10.8   9.13  8.15  5.56
# 10    10     7     7     7     8  4.82  7.26  6.42  7.91
# 11    11     5     5     5     8  5.68  4.74  5.73  6.89

引数names_prefix

引数names_prefixで横持ちデータに変換後の列名に接頭辞(prefix)を付けられます。

R
iris_long %>%
  pivot_wider(names_from = part, names_prefix = "part_", values_from = value)
# # A tibble: 300 x 5
#       id Species measure part_Sepal part_Petal
#    <int> <fct>   <chr>        <dbl>      <dbl>
#  1     1 setosa  Length         5.1        1.4
#  2     1 setosa  Width          3.5        0.2
#  3     2 setosa  Length         4.9        1.4
#  4     2 setosa  Width          3          0.2
#  5     3 setosa  Length         4.7        1.3
#  6     3 setosa  Width          3.2        0.2
#  7     4 setosa  Length         4.6        1.5
#  8     4 setosa  Width          3.1        0.2
#  9     5 setosa  Length         5          1.4
# 10     5 setosa  Width          3.6        0.2
# # ... with 290 more rows

iris_long %>% 
  filter(part == "Sepal") %>% print() %>%
  pivot_wider(names_from = measure, names_prefix = "Sepal_", values_from = value)
# # A tibble: 300 x 5
#       id Species part  measure value
#    <int> <fct>   <chr> <chr>   <dbl>
#  1     1 setosa  Sepal Length    5.1
#  2     1 setosa  Sepal Width     3.5
#  3     2 setosa  Sepal Length    4.9
#  4     2 setosa  Sepal Width     3  
#  5     3 setosa  Sepal Length    4.7
#  6     3 setosa  Sepal Width     3.2
#  7     4 setosa  Sepal Length    4.6
#  8     4 setosa  Sepal Width     3.1
#  9     5 setosa  Sepal Length    5  
# 10     5 setosa  Sepal Width     3.6
# # ... with 290 more rows
# # A tibble: 150 x 5
#       id Species part  Sepal_Length Sepal_Width
#    <int> <fct>   <chr>        <dbl>       <dbl>
#  1     1 setosa  Sepal          5.1         3.5
#  2     2 setosa  Sepal          4.9         3  
#  3     3 setosa  Sepal          4.7         3.2
#  4     4 setosa  Sepal          4.6         3.1
#  5     5 setosa  Sepal          5           3.6
#  6     6 setosa  Sepal          5.4         3.9
#  7     7 setosa  Sepal          4.6         3.4
#  8     8 setosa  Sepal          5           3.4
#  9     9 setosa  Sepal          4.4         2.9
# 10    10 setosa  Sepal          4.9         3.1
# # ... with 140 more rows

iris_long %>% 
  filter(part == "Sepal") %>%
  pivot_wider(names_from = measure, names_prefix = "Sepal.", values_from = value)
# # A tibble: 150 x 5
#       id Species part  Sepal.Length Sepal.Width
#    <int> <fct>   <chr>        <dbl>       <dbl>
#  1     1 setosa  Sepal          5.1         3.5
#  2     2 setosa  Sepal          4.9         3  
#  3     3 setosa  Sepal          4.7         3.2
#  4     4 setosa  Sepal          4.6         3.1
#  5     5 setosa  Sepal          5           3.6
#  6     6 setosa  Sepal          5.4         3.9
#  7     7 setosa  Sepal          4.6         3.4
#  8     8 setosa  Sepal          5           3.4
#  9     9 setosa  Sepal          4.4         2.9
# 10    10 setosa  Sepal          4.9         3.1
# # ... with 140 more rows

anscombe_long %>%
  filter(axis == "x") %>% print() %>%
  pivot_wider(names_from = set, names_prefix = "x_", values_from = value)
# # A tibble: 44 x 4
#       id axis  set   value
#    <int> <chr> <chr> <dbl>
#  1     1 x     1        10
#  2     1 x     2        10
#  3     1 x     3        10
#  4     1 x     4         8
#  5     2 x     1         8
#  6     2 x     2         8
#  7     2 x     3         8
#  8     2 x     4         8
#  9     3 x     1        13
# 10     3 x     2        13
# # ... with 34 more rows
# # A tibble: 11 x 6
#       id axis    x_1   x_2   x_3   x_4
#    <int> <chr> <dbl> <dbl> <dbl> <dbl>
#  1     1 x        10    10    10     8
#  2     2 x         8     8     8     8
#  3     3 x        13    13    13     8
#  4     4 x         9     9     9     8
#  5     5 x        11    11    11     8
#  6     6 x        14    14    14     8
#  7     7 x         6     6     6     8
#  8     8 x         4     4     4    19
#  9     9 x        12    12    12     8
# 10    10 x         7     7     7     8
# 11    11 x         5     5     5     8

引数values_fill

縦持ちデータから横持ちデータに変換した後のデータにNAができるとき引数values_fillNAを変換できます。リストの形で指定します。

R
# NAを含むデータを準備
anscombe_long %>%
  filter(!(axis == "y" & set == "4")) %>% print() %>% # y4のみ除く
  pivot_wider(names_from = axis, values_from = value) # y4がないので、NAができる
# # A tibble: 77 x 4
#       id axis  set   value
#    <int> <chr> <chr> <dbl>
#  1     1 x     1     10   
#  2     1 x     2     10   
#  3     1 x     3     10   
#  4     1 x     4      8   
#  5     1 y     1      8.04
#  6     1 y     2      9.14
#  7     1 y     3      7.46
#  8     2 x     1      8   
#  9     2 x     2      8   
# 10     2 x     3      8   
# # ... with 67 more rows
# # A tibble: 44 x 4
#       id set       x     y
#    <int> <chr> <dbl> <dbl>
#  1     1 1        10  8.04
#  2     1 2        10  9.14
#  3     1 3        10  7.46
#  4     1 4         8 NA   
#  5     2 1         8  6.95
#  6     2 2         8  8.14
#  7     2 3         8  6.77
#  8     2 4         8 NA   
#  9     3 1        13  7.58
# 10     3 2        13  8.74
# # ... with 34 more rows
# このNAを0で埋める
anscombe_long %>%
  filter(!(axis == "y" & set == "4")) %>% print() %>% # y4のみ除く
  pivot_wider(names_from = axis, values_from = value, values_fill = list(value = 0)) # NAを0で埋める
# A tibble: 44 x 4
#       id set       x     y
#    <int> <chr> <dbl> <dbl>
#  1     1 1        10  8.04
#  2     1 2        10  9.14
#  3     1 3        10  7.46
#  4     1 4         8  0   
#  5     2 1         8  6.95
#  6     2 2         8  8.14
#  7     2 3         8  6.77
#  8     2 4         8  0   
#  9     3 1        13  7.58
# 10     3 2        13  8.74
# # ... with 34 more rows

# NAを含むデータを準備
anscombe_tbl %>%
  select(x1, y1) %>% print() %>%
  pivot_wider(names_from = y1, values_from = y1)
# # A tibble: 11 x 2
#       x1    y1
#    <dbl> <dbl>
#  1    10  8.04
#  2     8  6.95
#  3    13  7.58
#  4     9  8.81
#  5    11  8.33
#  6    14  9.96
#  7     6  7.24
#  8     4  4.26
#  9    12 10.8 
# 10     7  4.82
# 11     5  5.68
# # A tibble: 11 x 12
#       x1 `8.04` `6.95` `7.58` `8.81` `8.33` `9.96` `7.24` `4.26` `10.84` `4.82` `5.68`
#    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
#  1    10   8.04  NA     NA     NA     NA     NA     NA     NA       NA    NA     NA   
#  2     8  NA      6.95  NA     NA     NA     NA     NA     NA       NA    NA     NA   
#  3    13  NA     NA      7.58  NA     NA     NA     NA     NA       NA    NA     NA   
#  4     9  NA     NA     NA      8.81  NA     NA     NA     NA       NA    NA     NA   
#  5    11  NA     NA     NA     NA      8.33  NA     NA     NA       NA    NA     NA   
#  6    14  NA     NA     NA     NA     NA      9.96  NA     NA       NA    NA     NA   
#  7     6  NA     NA     NA     NA     NA     NA      7.24  NA       NA    NA     NA   
#  8     4  NA     NA     NA     NA     NA     NA     NA      4.26    NA    NA     NA   
#  9    12  NA     NA     NA     NA     NA     NA     NA     NA       10.8  NA     NA   
# 10     7  NA     NA     NA     NA     NA     NA     NA     NA       NA     4.82  NA   
# 11     5  NA     NA     NA     NA     NA     NA     NA     NA       NA    NA      5.68
# このNAを0で埋める
anscombe_tbl %>%
  select(x1, y1) %>% print() %>%
  pivot_wider(names_from = y1, values_from = y1, values_fill = list(y1 = 0))
# # A tibble: 11 x 12
#       x1 `8.04` `6.95` `7.58` `8.81` `8.33` `9.96` `7.24` `4.26` `10.84` `4.82` `5.68`
#    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
#  1    10   8.04   0      0      0      0      0      0      0        0     0      0   
#  2     8   0      6.95   0      0      0      0      0      0        0     0      0   
#  3    13   0      0      7.58   0      0      0      0      0        0     0      0   
#  4     9   0      0      0      8.81   0      0      0      0        0     0      0   
#  5    11   0      0      0      0      8.33   0      0      0        0     0      0   
#  6    14   0      0      0      0      0      9.96   0      0        0     0      0   
#  7     6   0      0      0      0      0      0      7.24   0        0     0      0   
#  8     4   0      0      0      0      0      0      0      4.26     0     0      0   
#  9    12   0      0      0      0      0      0      0      0       10.8   0      0   
# 10     7   0      0      0      0      0      0      0      0        0     4.82   0   
# 11     5   0      0      0      0      0      0      0      0        0     0      5.68

# NAを含むデータを準備
iris_tbl %>%
  mutate(Len = floor(Sepal.Length), Wid = floor(Sepal.Width)) %>% print() %>%
  count(Len, Wid, name = "count") %>% print() %>%
  pivot_wider(names_from = Wid, values_from = count) # LenとWidのクロス表(NAがある)
# # A tibble: 150 x 8
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species   Len   Wid
#    <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>   <dbl> <dbl>
#  1     1          5.1         3.5          1.4         0.2 setosa      5     3
#  2     2          4.9         3            1.4         0.2 setosa      4     3
#  3     3          4.7         3.2          1.3         0.2 setosa      4     3
#  4     4          4.6         3.1          1.5         0.2 setosa      4     3
#  5     5          5           3.6          1.4         0.2 setosa      5     3
#  6     6          5.4         3.9          1.7         0.4 setosa      5     3
#  7     7          4.6         3.4          1.4         0.3 setosa      4     3
#  8     8          5           3.4          1.5         0.2 setosa      5     3
#  9     9          4.4         2.9          1.4         0.2 setosa      4     2
# 10    10          4.9         3.1          1.5         0.1 setosa      4     3
# # ... with 140 more rows
# # A tibble: 9 x 3
#     Len   Wid count
#   <dbl> <dbl> <int>
# 1     4     2     4
# 2     4     3    18
# 3     5     2    24
# 4     5     3    33
# 5     5     4     4
# 6     6     2    25
# 7     6     3    29
# 8     7     2     4
# 9     7     3     9
# # A tibble: 4 x 4
#     Len   `2`   `3`   `4`
#   <dbl> <int> <int> <int>
# 1     4     4    18    NA
# 2     5    24    33     4
# 3     6    25    29    NA
# 4     7     4     9    NA
# このNAを0で埋める
iris_tbl %>%
  mutate(Len = floor(Sepal.Length), Wid = floor(Sepal.Width)) %>% print() %>%
  select(Len, Wid) %>%
  count(Len, Wid, name = "count") %>%
  pivot_wider(names_from = Wid, values_from = count, values_fill = list(count = 0))
# # A tibble: 4 x 4
#     Len   `2`   `3`   `4`
#   <dbl> <int> <int> <int>
# 1     4     4    18     0
# 2     5    24    33     4
# 3     6    25    29     0
# 4     7     4     9     0

# NAを含むデータを準備
iris_tbl %>%
  group_by(Species, Sepal.Length) %>%
  count(name = "count") %>% print() %>%
  pivot_wider(names_from = Species, values_from = count) # NAになるところがある
# # A tibble: 57 x 3
# # Groups:   Species, Sepal.Length [57]
#    Species Sepal.Length count
#    <fct>          <dbl> <int>
#  1 setosa           4.3     1
#  2 setosa           4.4     3
#  3 setosa           4.5     1
#  4 setosa           4.6     4
#  5 setosa           4.7     2
#  6 setosa           4.8     5
#  7 setosa           4.9     4
#  8 setosa           5       8
#  9 setosa           5.1     8
# 10 setosa           5.2     3
# # ... with 47 more rows
# # A tibble: 35 x 4
# # Groups:   Sepal.Length [35]
#    Sepal.Length setosa versicolor virginica
#           <dbl>  <int>      <int>     <int>
#  1          4.3      1         NA        NA
#  2          4.4      3         NA        NA
#  3          4.5      1         NA        NA
#  4          4.6      4         NA        NA
#  5          4.7      2         NA        NA
#  6          4.8      5         NA        NA
#  7          4.9      4          1         1
#  8          5        8          2        NA
#  9          5.1      8          1        NA
# 10          5.2      3          1        NA
# # ... with 25 more rows
# このNAを0で埋める
iris_tbl %>%
  group_by(Species, Sepal.Length) %>%
  count(name = "count") %>% print() %>%
  pivot_wider(names_from = Species, values_from = count,
              values_fill = list(count = 0))
# # A tibble: 35 x 4
# # Groups:   Sepal.Length [35]
#    Sepal.Length setosa versicolor virginica
#           <dbl>  <int>      <int>     <int>
#  1          4.3      1          0         0
#  2          4.4      3          0         0
#  3          4.5      1          0         0
#  4          4.6      4          0         0
#  5          4.7      2          0         0
#  6          4.8      5          0         0
#  7          4.9      4          1         1
#  8          5        8          2         0
#  9          5.1      8          1         0
# 10          5.2      3          1         0
# # ... with 25 more rows

これを用いると、カテゴリー変数の列を0,1の2値変数に変換できます。

R
iris_tbl %>%
  mutate(value = 1) %>% print() %>%
  pivot_wider(names_from = Species, values_from = value)
# # A tibble: 150 x 7
#       id Sepal.Length Sepal.Width Petal.Length Petal.Width Species value
#    <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>   <dbl>
#  1     1          5.1         3.5          1.4         0.2 setosa      1
#  2     2          4.9         3            1.4         0.2 setosa      1
#  3     3          4.7         3.2          1.3         0.2 setosa      1
#  4     4          4.6         3.1          1.5         0.2 setosa      1
#  5     5          5           3.6          1.4         0.2 setosa      1
#  6     6          5.4         3.9          1.7         0.4 setosa      1
#  7     7          4.6         3.4          1.4         0.3 setosa      1
#  8     8          5           3.4          1.5         0.2 setosa      1
#  9     9          4.4         2.9          1.4         0.2 setosa      1
# 10    10          4.9         3.1          1.5         0.1 setosa      1
# # ... with 140 more rows
# # A tibble: 150 x 8
#       id Sepal.Length Sepal.Width Petal.Length Petal.Width setosa versicolor virginica
#    <int>        <dbl>       <dbl>        <dbl>       <dbl>  <dbl>      <dbl>     <dbl>
#  1     1          5.1         3.5          1.4         0.2      1         NA        NA
#  2     2          4.9         3            1.4         0.2      1         NA        NA
#  3     3          4.7         3.2          1.3         0.2      1         NA        NA
#  4     4          4.6         3.1          1.5         0.2      1         NA        NA
#  5     5          5           3.6          1.4         0.2      1         NA        NA
#  6     6          5.4         3.9          1.7         0.4      1         NA        NA
#  7     7          4.6         3.4          1.4         0.3      1         NA        NA
#  8     8          5           3.4          1.5         0.2      1         NA        NA
#  9     9          4.4         2.9          1.4         0.2      1         NA        NA
# 10    10          4.9         3.1          1.5         0.1      1         NA        NA
# # ... with 140 more rows
iris_tbl %>%
  mutate(value = 1) %>% print() %>%
  pivot_wider(names_from = Species, values_from = value,
              values_fill = list(value = 0))
# # A tibble: 150 x 8
#       id Sepal.Length Sepal.Width Petal.Length Petal.Width setosa versicolor virginica
#    <int>        <dbl>       <dbl>        <dbl>       <dbl>  <dbl>      <dbl>     <dbl>
#  1     1          5.1         3.5          1.4         0.2      1          0         0
#  2     2          4.9         3            1.4         0.2      1          0         0
#  3     3          4.7         3.2          1.3         0.2      1          0         0
#  4     4          4.6         3.1          1.5         0.2      1          0         0
#  5     5          5           3.6          1.4         0.2      1          0         0
#  6     6          5.4         3.9          1.7         0.4      1          0         0
#  7     7          4.6         3.4          1.4         0.3      1          0         0
#  8     8          5           3.4          1.5         0.2      1          0         0
#  9     9          4.4         2.9          1.4         0.2      1          0         0
# 10    10          4.9         3.1          1.5         0.1      1          0         0
# # ... with 140 more rows
# Species列を0,1の2値変数にできる

引数values_fn

引数values_fnを使うとエクセルのピボット集計のような集計も同時にできます。

R
# ピボット集計で合計
iris_long %>%
  pivot_wider(id_cols = c(part, measure),
              names_from = Species, values_from = value,
              values_fn = list(value = sum))
# # A tibble: 4 x 5
#   part  measure setosa versicolor virginica
#   <chr> <chr>    <dbl>      <dbl>     <dbl>
# 1 Sepal Length   250.       297.       329.
# 2 Sepal Width    171.       138.       149.
# 3 Petal Length    73.1      213        278.
# 4 Petal Width     12.3       66.3      101.
# グループ化して集計してpivot_widerするのと同じ(順序は異なるが)
iris_long %>%
  group_by(Species, part, measure) %>%
  summarise(Sum = sum(value)) %>% print() %>%
  pivot_wider(names_from = Species, values_from = Sum)
# # A tibble: 12 x 4
# # Groups:   Species, part [6]
#    Species    part  measure   Sum
#    <fct>      <chr> <chr>   <dbl>
#  1 setosa     Petal Length   73.1
#  2 setosa     Petal Width    12.3
#  3 setosa     Sepal Length  250. 
#  4 setosa     Sepal Width   171. 
#  5 versicolor Petal Length  213  
#  6 versicolor Petal Width    66.3
#  7 versicolor Sepal Length  297. 
#  8 versicolor Sepal Width   138. 
#  9 virginica  Petal Length  278. 
# 10 virginica  Petal Width   101. 
# 11 virginica  Sepal Length  329. 
# 12 virginica  Sepal Width   149. 
# # A tibble: 4 x 5
# # Groups:   part [2]
#   part  measure setosa versicolor virginica
#   <chr> <chr>    <dbl>      <dbl>     <dbl>
# 1 Petal Length    73.1      213        278.
# 2 Petal Width     12.3       66.3      101.
# 3 Sepal Length   250.       297.       329.
# 4 Sepal Width    171.       138.       149.

# ピボット集計で平均
iris_long %>%
  pivot_wider(id_cols = c(part,measure),
              names_from = Species, values_from = value,
              values_fn = list(value = mean))
# # A tibble: 4 x 5
#   part  measure setosa versicolor virginica
#   <chr> <chr>    <dbl>      <dbl>     <dbl>
# 1 Sepal Length   5.01        5.94      6.59
# 2 Sepal Width    3.43        2.77      2.97
# 3 Petal Length   1.46        4.26      5.55
# 4 Petal Width    0.246       1.33      2.03
# グループ化して集計してpivot_widerするのと同じ(順序は異なるが)
iris_long %>%
  group_by(Species, part, measure) %>%
  summarise(Mean = mean(value)) %>% print() %>%
  pivot_wider(names_from = Species, values_from = Mean)
# # A tibble: 12 x 4
# # Groups:   Species, part [6]
#    Species    part  measure  Mean
#    <fct>      <chr> <chr>   <dbl>
#  1 setosa     Petal Length  1.46 
#  2 setosa     Petal Width   0.246
#  3 setosa     Sepal Length  5.01 
#  4 setosa     Sepal Width   3.43 
#  5 versicolor Petal Length  4.26 
#  6 versicolor Petal Width   1.33 
#  7 versicolor Sepal Length  5.94 
#  8 versicolor Sepal Width   2.77 
#  9 virginica  Petal Length  5.55 
# 10 virginica  Petal Width   2.03 
# 11 virginica  Sepal Length  6.59 
# 12 virginica  Sepal Width   2.97 
# # A tibble: 4 x 5
# # Groups:   part [2]
#   part  measure setosa versicolor virginica
#   <chr> <chr>    <dbl>      <dbl>     <dbl>
# 1 Petal Length   1.46        4.26      5.55
# 2 Petal Width    0.246       1.33      2.03
# 3 Sepal Length   5.01        5.94      6.59
# 4 Sepal Width    3.43        2.77      2.97

参考

14
18
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
14
18

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?