はじめに
Rのtidyr
のpivot_longer
とpivot_wider
には、いろいろ引数が指定できます。Rのサンプルデータを用いて、pivot_longer
とpivot_wider
の使い方を練習してみました。
目次
pivot_longer(), pivot_wider()
Rのtidyr
のpivot_longer
とpivot_wider
の使い方をRのサンプルデータを用いて練習します。
tidyr
Pivoting
Pivot data from wide to long
Pivot data from long to wide
参考:
https://r4ds.had.co.nz/tidy-data.html#pivoting
https://heavywatal.github.io/rstats/tidyr.html
練習に用いるデータ
Rのサンプルデータiris
とanscombe
を用います。
行番号を表示した方が分かりやすいため、tibble::rowid_to_column()
で行番号を列名にしています。
library(dplyr)
library(tibble)
library(tidyr)
iris
# 行番号を列名に
iris_tbl <- iris %>% as_tibble() %>% rowid_to_column("id")
iris_tbl
# # A tibble: 150 x 6
# id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# <int> <dbl> <dbl> <dbl> <dbl> <fct>
# 1 1 5.1 3.5 1.4 0.2 setosa
# 2 2 4.9 3 1.4 0.2 setosa
# 3 3 4.7 3.2 1.3 0.2 setosa
# 4 4 4.6 3.1 1.5 0.2 setosa
# 5 5 5 3.6 1.4 0.2 setosa
# 6 6 5.4 3.9 1.7 0.4 setosa
# 7 7 4.6 3.4 1.4 0.3 setosa
# 8 8 5 3.4 1.5 0.2 setosa
# 9 9 4.4 2.9 1.4 0.2 setosa
# 10 10 4.9 3.1 1.5 0.1 setosa
# # ... with 140 more rows
iris_tbl <- iris %>%
as_tibble() %>% print() %>% # 途中経過をprint
rowid_to_column("id") %>% print() # 最終結果をprint
# # A tibble: 150 x 5
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# <dbl> <dbl> <dbl> <dbl> <fct>
# 1 5.1 3.5 1.4 0.2 setosa
# 2 4.9 3 1.4 0.2 setosa
# 3 4.7 3.2 1.3 0.2 setosa
# 4 4.6 3.1 1.5 0.2 setosa
# 5 5 3.6 1.4 0.2 setosa
# 6 5.4 3.9 1.7 0.4 setosa
# 7 4.6 3.4 1.4 0.3 setosa
# 8 5 3.4 1.5 0.2 setosa
# 9 4.4 2.9 1.4 0.2 setosa
# 10 4.9 3.1 1.5 0.1 setosa
# # ... with 140 more rows
# # A tibble: 150 x 6
# id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# <int> <dbl> <dbl> <dbl> <dbl> <fct>
# 1 1 5.1 3.5 1.4 0.2 setosa
# 2 2 4.9 3 1.4 0.2 setosa
# 3 3 4.7 3.2 1.3 0.2 setosa
# 4 4 4.6 3.1 1.5 0.2 setosa
# 5 5 5 3.6 1.4 0.2 setosa
# 6 6 5.4 3.9 1.7 0.4 setosa
# 7 7 4.6 3.4 1.4 0.3 setosa
# 8 8 5 3.4 1.5 0.2 setosa
# 9 9 4.4 2.9 1.4 0.2 setosa
# 10 10 4.9 3.1 1.5 0.1 setosa
# # ... with 140 more rows
anscombe
anscombe_tbl <- anscombe %>% as_tibble() %>% rowid_to_column("id")
anscombe_tbl
# # A tibble: 11 x 9
# id x1 x2 x3 x4 y1 y2 y3 y4
# <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 10 10 10 8 8.04 9.14 7.46 6.58
# 2 2 8 8 8 8 6.95 8.14 6.77 5.76
# 3 3 13 13 13 8 7.58 8.74 12.7 7.71
# 4 4 9 9 9 8 8.81 8.77 7.11 8.84
# 5 5 11 11 11 8 8.33 9.26 7.81 8.47
# 6 6 14 14 14 8 9.96 8.1 8.84 7.04
# 7 7 6 6 6 8 7.24 6.13 6.08 5.25
# 8 8 4 4 4 19 4.26 3.1 5.39 12.5
# 9 9 12 12 12 8 10.8 9.13 8.15 5.56
# 10 10 7 7 7 8 4.82 7.26 6.42 7.91
# 11 11 5 5 5 8 5.68 4.74 5.73 6.89
pivot_longer()
引数cols
に指定した列を縦持ちデータに変換します。
# Sepal.Length~Petal.Width列を縦長に
iris_tbl %>%
pivot_longer(cols = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"))
# # A tibble: 600 x 4
# id Species name value
# <int> <fct> <chr> <dbl>
# 1 1 setosa Sepal.Length 5.1
# 2 1 setosa Sepal.Width 3.5
# 3 1 setosa Petal.Length 1.4
# 4 1 setosa Petal.Width 0.2
# 5 2 setosa Sepal.Length 4.9
# 6 2 setosa Sepal.Width 3
# 7 2 setosa Petal.Length 1.4
# 8 2 setosa Petal.Width 0.2
# 9 3 setosa Sepal.Length 4.7
# 10 3 setosa Sepal.Width 3.2
# # ... with 590 more rows
# id,Species列以外の列(Sepal.Length~Petal.Width列)を縦長に
iris_tbl %>%
pivot_longer(cols = -c(id, Species))
iris_tbl %>%
pivot_longer(cols = c(-id, -Species))
iris_tbl %>%
pivot_longer(cols = !c(id, Species))
# thで終わる列(Sepal.Length~Petal.Width列)を縦長に
iris_tbl %>%
pivot_longer(cols = ends_with("th"))
anscombe_tbl %>%
pivot_longer(cols = -id)
# # A tibble: 88 x 3
# id name value
# <int> <chr> <dbl>
# 1 1 x1 10
# 2 1 x2 10
# 3 1 x3 10
# 4 1 x4 8
# 5 1 y1 8.04
# 6 1 y2 9.14
# 7 1 y3 7.46
# 8 1 y4 6.58
# 9 2 x1 8
# 10 2 x2 8
# # ... with 78 more rows
引数names_to, values_to
引数names_to
・values_to
で、変換後の列名を指定できます。
iris_tbl %>%
pivot_longer(cols = c(-id, -Species), names_to = "item", values_to = "val")
# # A tibble: 600 x 4
# id Species item val
# <int> <fct> <chr> <dbl>
# 1 1 setosa Sepal.Length 5.1
# 2 1 setosa Sepal.Width 3.5
# 3 1 setosa Petal.Length 1.4
# 4 1 setosa Petal.Width 0.2
# 5 2 setosa Sepal.Length 4.9
# 6 2 setosa Sepal.Width 3
# 7 2 setosa Petal.Length 1.4
# 8 2 setosa Petal.Width 0.2
# 9 3 setosa Sepal.Length 4.7
# 10 3 setosa Sepal.Width 3.2
# # ... with 590 more rows
anscombe_tbl %>%
pivot_longer(cols = -id, names_to = "name", values_to = "val")
# # A tibble: 88 x 3
# id name val
# <int> <chr> <dbl>
# 1 1 x1 10
# 2 1 x2 10
# 3 1 x3 10
# 4 1 x4 8
# 5 1 y1 8.04
# 6 1 y2 9.14
# 7 1 y3 7.46
# 8 1 y4 6.58
# 9 2 x1 8
# 10 2 x2 8
# # ... with 78 more rows
anscombe_tbl %>%
select(id, starts_with("x")) %>% print() %>%
pivot_longer(cols = -id, names_to = "xn", values_to = "val")
# # A tibble: 11 x 5
# id x1 x2 x3 x4
# <int> <dbl> <dbl> <dbl> <dbl>
# 1 1 10 10 10 8
# 2 2 8 8 8 8
# 3 3 13 13 13 8
# 4 4 9 9 9 8
# 5 5 11 11 11 8
# 6 6 14 14 14 8
# 7 7 6 6 6 8
# 8 8 4 4 4 19
# 9 9 12 12 12 8
# 10 10 7 7 7 8
# 11 11 5 5 5 8
# # A tibble: 44 x 3
# id xn val
# <int> <chr> <dbl>
# 1 1 x1 10
# 2 1 x2 10
# 3 1 x3 10
# 4 1 x4 8
# 5 2 x1 8
# 6 2 x2 8
# 7 2 x3 8
# 8 2 x4 8
# 9 3 x1 13
# 10 3 x2 13
# # ... with 34 more rows
引数names_prefix
縦持ちデータに変換する列名のうち、引数names_prefix
に指定した文字列を接頭辞(prefix)として変換後のデータに入れないようにできます。
# 列x1~x4("xn")の"x"を接頭辞(prefix)として"n"部分のみ取り出す
anscombe_tbl %>%
select(id, starts_with("x")) %>% print() %>%
pivot_longer(cols = -id, names_to = "xn", names_prefix = "x", values_to = "val")
# # A tibble: 11 x 5
# id x1 x2 x3 x4
# <int> <dbl> <dbl> <dbl> <dbl>
# 1 1 10 10 10 8
# 2 2 8 8 8 8
# 3 3 13 13 13 8
# 4 4 9 9 9 8
# 5 5 11 11 11 8
# 6 6 14 14 14 8
# 7 7 6 6 6 8
# 8 8 4 4 4 19
# 9 9 12 12 12 8
# 10 10 7 7 7 8
# 11 11 5 5 5 8
# # A tibble: 44 x 3
# id xn val
# <int> <chr> <dbl>
# 1 1 1 10
# 2 1 2 10
# 3 1 3 10
# 4 1 4 8
# 5 2 1 8
# 6 2 2 8
# 7 2 3 8
# 8 2 4 8
# 9 3 1 13
# 10 3 2 13
# # ... with 34 more rows
# 接頭辞の文字列がない場合は無視
anscombe_tbl %>%
pivot_longer(cols = -id, names_to = "xn", names_prefix = "x", values_to = "val")
# # A tibble: 88 x 3
# id xn val
# <int> <chr> <dbl>
# 1 1 1 10
# 2 1 2 10
# 3 1 3 10
# 4 1 4 8
# 5 1 y1 8.04
# 6 1 y2 9.14
# 7 1 y3 7.46
# 8 1 y4 6.58
# 9 2 1 8
# 10 2 2 8
# # ... with 78 more rows
iris_tbl %>%
select(id, starts_with("Sepal"), Species) %>% print() %>%
pivot_longer(cols = -c(id, Species), names_to = "name", names_prefix = "Sepal.",
values_to = "val")
# # A tibble: 150 x 4
# id Sepal.Length Sepal.Width Species
# <int> <dbl> <dbl> <fct>
# 1 1 5.1 3.5 setosa
# 2 2 4.9 3 setosa
# 3 3 4.7 3.2 setosa
# 4 4 4.6 3.1 setosa
# 5 5 5 3.6 setosa
# 6 6 5.4 3.9 setosa
# 7 7 4.6 3.4 setosa
# 8 8 5 3.4 setosa
# 9 9 4.4 2.9 setosa
# 10 10 4.9 3.1 setosa
# # ... with 140 more rows
# # A tibble: 300 x 4
# id Species name val
# <int> <fct> <chr> <dbl>
# 1 1 setosa Length 5.1
# 2 1 setosa Width 3.5
# 3 2 setosa Length 4.9
# 4 2 setosa Width 3
# 5 3 setosa Length 4.7
# 6 3 setosa Width 3.2
# 7 4 setosa Length 4.6
# 8 4 setosa Width 3.1
# 9 5 setosa Length 5
# 10 5 setosa Width 3.6
# # ... with 290 more rows
引数names_sep
引数names_sep
を指定すると、pivot_longer
してseparate
するのを一度にできます。
# "xn"を1桁目で分ける(1Lは整数1のこと)
anscombe_tbl %>%
select(id, starts_with("x")) %>% print() %>%
pivot_longer(cols = -id, names_to = c("x", "n"), names_sep = 1L, values_to = "val")
# # A tibble: 11 x 5
# id x1 x2 x3 x4
# <int> <dbl> <dbl> <dbl> <dbl>
# 1 1 10 10 10 8
# 2 2 8 8 8 8
# 3 3 13 13 13 8
# 4 4 9 9 9 8
# 5 5 11 11 11 8
# 6 6 14 14 14 8
# 7 7 6 6 6 8
# 8 8 4 4 4 19
# 9 9 12 12 12 8
# 10 10 7 7 7 8
# 11 11 5 5 5 8
# # A tibble: 44 x 4
# id x n val
# <int> <chr> <chr> <dbl>
# 1 1 x 1 10
# 2 1 x 2 10
# 3 1 x 3 10
# 4 1 x 4 8
# 5 2 x 1 8
# 6 2 x 2 8
# 7 2 x 3 8
# 8 2 x 4 8
# 9 3 x 1 13
# 10 3 x 2 13
# # ... with 34 more rows
# こうするのと同じ
anscombe_tbl %>%
select(id, starts_with("x")) %>% print() %>%
pivot_longer(cols = -id, names_to = "xn", values_to = "val") %>% print() %>%
separate(col = xn, into = c("x", "n"), sep = 1L)
# # A tibble: 11 x 5
# id x1 x2 x3 x4
# <int> <dbl> <dbl> <dbl> <dbl>
# 1 1 10 10 10 8
# 2 2 8 8 8 8
# 3 3 13 13 13 8
# 4 4 9 9 9 8
# 5 5 11 11 11 8
# 6 6 14 14 14 8
# 7 7 6 6 6 8
# 8 8 4 4 4 19
# 9 9 12 12 12 8
# 10 10 7 7 7 8
# 11 11 5 5 5 8
# # A tibble: 44 x 3
# id xn val
# id xn val
# <int> <chr> <dbl>
# 1 1 x1 10
# 2 1 x2 10
# 3 1 x3 10
# 4 1 x4 8
# 5 2 x1 8
# 6 2 x2 8
# 7 2 x3 8
# 8 2 x4 8
# 9 3 x1 13
# 10 3 x2 13
# # ... with 34 more rows
# # A tibble: 44 x 4
# id x n val
# <int> <chr> <chr> <dbl>
# 1 1 x 1 10
# 2 1 x 2 10
# 3 1 x 3 10
# 4 1 x 4 8
# 5 2 x 1 8
# 6 2 x 2 8
# 7 2 x 3 8
# 8 2 x 4 8
# 9 3 x 1 13
# 10 3 x 2 13
# # ... with 34 more rows
# 軸(x,y)とセット番号(1,2,3,4)を分けてpivot
anscombe_tbl %>%
pivot_longer(cols = -id, names_to = c("axis", "set"), names_sep = 1L,
values_to = "val")
# # A tibble: 88 x 4
# id axis set val
# <int> <chr> <chr> <dbl>
# 1 1 x 1 10
# 2 1 x 2 10
# 3 1 x 3 10
# 4 1 x 4 8
# 5 1 y 1 8.04
# 6 1 y 2 9.14
# 7 1 y 3 7.46
# 8 1 y 4 6.58
# 9 2 x 1 8
# 10 2 x 2 8
# # ... with 78 more rows
# こうするのと同じ
anscombe_tbl %>%
pivot_longer(cols = -id, names_to = "name", values_to = "val") %>%
separate(col = name, into = c("axis", "set"), sep = 1L)
# "Sepal.Length"の"."で"Sepal"と"Length"に分ける。ただし、"."は"\\."で表す(正規表現)
iris_tbl %>%
pivot_longer(cols = -c(id, Species), names_to = c("part", "measure"),
names_sep = "\\.", values_to = "val")
# # A tibble: 600 x 5
# id Species part mesure val
# <int> <fct> <chr> <chr> <dbl>
# 1 1 setosa Sepal Length 5.1
# 2 1 setosa Sepal Width 3.5
# 3 1 setosa Petal Length 1.4
# 4 1 setosa Petal Width 0.2
# 5 2 setosa Sepal Length 4.9
# 6 2 setosa Sepal Width 3
# 7 2 setosa Petal Length 1.4
# 8 2 setosa Petal Width 0.2
# 9 3 setosa Sepal Length 4.7
# 10 3 setosa Sepal Width 3.2
# # ... with 590 more rows
# こうするのと同じ
iris_tbl %>%
pivot_longer(cols = -c(id, Species), names_to = "name",
values_to = "val") %>% print() %>%
separate(col = name, into = c("part", "mesure"), sep = "\\.")
# 6桁目で分ける
iris_tbl %>%
pivot_longer(cols = -c(id, Species), names_to = c("part", "measure"),
names_sep = 6L, values_to = "val")
# # A tibble: 600 x 5
# id Species part measure val
# <int> <fct> <chr> <chr> <dbl>
# 1 1 setosa Sepal. Length 5.1
# 2 1 setosa Sepal. Width 3.5
# 3 1 setosa Petal. Length 1.4
# 4 1 setosa Petal. Width 0.2
# 5 2 setosa Sepal. Length 4.9
# 6 2 setosa Sepal. Width 3
# 7 2 setosa Petal. Length 1.4
# 8 2 setosa Petal. Width 0.2
# 9 3 setosa Sepal. Length 4.7
# 10 3 setosa Sepal. Width 3.2
# # ... with 590 more rows
注意)正規表現(ICU正規表現)について:
- http://userguide.icu-project.org/strings/regexp
- https://heavywatal.github.io/rstats/stringr.html#正規表現
引数names_pattern
引数names_sep
の代わりに、引数names_pattern
で分け方のパターンを指定できます。
# 正規表現"(.)(.)"に従って、1桁の何かと1桁の何かに分ける
anscombe_tbl %>%
pivot_longer(cols = -id, names_to = c("axis", "set"),
names_pattern = "(.)(.)", values_to = "val")
# # A tibble: 88 x 4
# id axis set val
# <int> <chr> <chr> <dbl>
# 1 1 x 1 10
# 2 1 x 2 10
# 3 1 x 3 10
# 4 1 x 4 8
# 5 1 y 1 8.04
# 6 1 y 2 9.14
# 7 1 y 3 7.46
# 8 1 y 4 6.58
# 9 2 x 1 8
# 10 2 x 2 8
# # ... with 78 more rows
# 正規表現"(.+)\\.(.+)"に従って、1桁以上の何かと"."をはさんで1桁以上の何かに分ける
iris_tbl %>%
pivot_longer(cols = -c(id, Species), names_to = c("part", "measure"),
names_pattern = "(.+)\\.(.+)", values_to = "val")
# # A tibble: 600 x 5
# id Species part measure val
# <int> <fct> <chr> <chr> <dbl>
# 1 1 setosa Sepal Length 5.1
# 2 1 setosa Sepal Width 3.5
# 3 1 setosa Petal Length 1.4
# 4 1 setosa Petal Width 0.2
# 5 2 setosa Sepal Length 4.9
# 6 2 setosa Sepal Width 3
# 7 2 setosa Petal Length 1.4
# 8 2 setosa Petal Width 0.2
# 9 3 setosa Sepal Length 4.7
# 10 3 setosa Sepal Width 3.2
# # ... with 590 more rows
".value"
列名を、縦持ちに変換する部分と列名に残す部分とに分けて、変換できます。
# 軸とセット番号を分けてセット番号のみ縦持ちに(軸は列名に残す)
anscombe_tbl %>%
pivot_longer(cols = -id, names_to = c(".value", "set"), names_sep = 1L)
# # A tibble: 44 x 4
# id set x y
# <int> <chr> <dbl> <dbl>
# 1 1 1 10 8.04
# 2 1 2 10 9.14
# 3 1 3 10 7.46
# 4 1 4 8 6.58
# 5 2 1 8 6.95
# 6 2 2 8 8.14
# 7 2 3 8 6.77
# 8 2 4 8 5.76
# 9 3 1 13 7.58
# 10 3 2 13 8.74
# # ... with 34 more rows
# pivot_longerして、軸とセット番号を分けて、軸だけpivot_widerで列に戻したのと同じ
anscombe_tbl %>%
pivot_longer(cols = -id, names_to = "name", values_to = "val") %>% print() %>%
separate(col = name, into = c("axis", "set"), sep = 1L) %>% print() %>%
pivot_wider(names_from = axis, values_from = val)
# # A tibble: 88 x 3
# id name val
# <int> <chr> <dbl>
# 1 1 x1 10
# 2 1 x2 10
# 3 1 x3 10
# 4 1 x4 8
# 5 1 y1 8.04
# 6 1 y2 9.14
# 7 1 y3 7.46
# 8 1 y4 6.58
# 9 2 x1 8
# 10 2 x2 8
# # ... with 78 more rows
# # A tibble: 88 x 4
# id axis set val
# <int> <chr> <chr> <dbl>
# 1 1 x 1 10
# 2 1 x 2 10
# 3 1 x 3 10
# 4 1 x 4 8
# 5 1 y 1 8.04
# 6 1 y 2 9.14
# 7 1 y 3 7.46
# 8 1 y 4 6.58
# 9 2 x 1 8
# 10 2 x 2 8
# # ... with 78 more rows
# # A tibble: 44 x 4
# id set x y
# <int> <chr> <dbl> <dbl>
# 1 1 1 10 8.04
# 2 1 2 10 9.14
# 3 1 3 10 7.46
# 4 1 4 8 6.58
# 5 2 1 8 6.95
# 6 2 2 8 8.14
# 7 2 3 8 6.77
# 8 2 4 8 5.76
# 9 3 1 13 7.58
# 10 3 2 13 8.74
# # ... with 34 more rows
# names_sepの代わりにnames_patternで指定しても同じ
anscombe_tbl %>%
pivot_longer(cols = -id, names_to = c(".value", "set"), names_pattern = "(.)(.)")
# 軸とセット番号を分けて軸のみ縦持ち(セット番号は列名に残す)
anscombe_tbl %>%
pivot_longer(cols = -id, names_to = c("axis", ".value"), names_sep = 1L)
# # A tibble: 22 x 6
# id axis `1` `2` `3` `4`
# <int> <chr> <dbl> <dbl> <dbl> <dbl>
# 1 1 x 10 10 10 8
# 2 1 y 8.04 9.14 7.46 6.58
# 3 2 x 8 8 8 8
# 4 2 y 6.95 8.14 6.77 5.76
# 5 3 x 13 13 13 8
# 6 3 y 7.58 8.74 12.7 7.71
# 7 4 x 9 9 9 8
# 8 4 y 8.81 8.77 7.11 8.84
# 9 5 x 11 11 11 8
# 10 5 y 8.33 9.26 7.81 8.47
# # ... with 12 more rows
# pivot_longerして、軸とセット番号を分けて、セット番号だけpivot_widerで列に戻したのと同じ
anscombe_tbl %>%
pivot_longer(cols = -id, names_to = "name", values_to = "val") %>% print() %>%
separate(col = name, into = c("axis", "set"), sep = 1L) %>% print() %>%
pivot_wider(names_from = set, values_from = val)
# names_sepの代わりにnames_patternで指定しても同じ
anscombe_tbl %>%
pivot_longer(cols = -id, names_to = c("axis", ".value"), names_pattern = "(.)(.)")
iris_tbl %>%
pivot_longer(cols = -c(id, Species), names_to = c(".value", "measure"),
names_sep = "\\.")
# # A tibble: 300 x 5
# id Species measure Sepal Petal
# <int> <fct> <chr> <dbl> <dbl>
# 1 1 setosa Length 5.1 1.4
# 2 1 setosa Width 3.5 0.2
# 3 2 setosa Length 4.9 1.4
# 4 2 setosa Width 3 0.2
# 5 3 setosa Length 4.7 1.3
# 6 3 setosa Width 3.2 0.2
# 7 4 setosa Length 4.6 1.5
# 8 4 setosa Width 3.1 0.2
# 9 5 setosa Length 5 1.4
# 10 5 setosa Width 3.6 0.2
# # ... with 290 more rows
# pivot_longerして、separateして、partだけpivot_widerで列に戻したのと同じ
iris_tbl %>%
pivot_longer(cols = -c(id, Species), names_to = "name", values_to = "val") %>% print() %>%
separate(col = name, into = c("part", "measure"), sep = "\\.") %>% print() %>%
pivot_wider(names_from = part, values_from = val)
iris_tbl %>%
pivot_longer(cols = -c(id, Species), names_to = c("part", ".value"),
names_sep = "\\.")
# # A tibble: 300 x 5
# id Species part Length Width
# <int> <fct> <chr> <dbl> <dbl>
# 1 1 setosa Sepal 5.1 3.5
# 2 1 setosa Petal 1.4 0.2
# 3 2 setosa Sepal 4.9 3
# 4 2 setosa Petal 1.4 0.2
# 5 3 setosa Sepal 4.7 3.2
# 6 3 setosa Petal 1.3 0.2
# 7 4 setosa Sepal 4.6 3.1
# 8 4 setosa Petal 1.5 0.2
# 9 5 setosa Sepal 5 3.6
# 10 5 setosa Petal 1.4 0.2
# # ... with 290 more rows
# pivot_longerして、separateして、partだけpivot_widerで列に戻したのと同じ
iris_tbl %>%
pivot_longer(cols = -c(id, Species), names_to = "name", values_to = "val") %>% print() %>%
separate(col = name, into = c("part", "measure"), sep = "\\.") %>% print() %>%
pivot_wider(names_from = measure, values_from = val)
separate()
列をある規則的に分けます。
# データの準備
iris_2 <- iris_tbl %>%
pivot_longer(cols = -c(id, Species), names_to = "name", values_to = "value")
iris_2
# # A tibble: 600 x 4
# id Species name value
# <int> <fct> <chr> <dbl>
# 1 1 setosa Sepal.Length 5.1
# 2 1 setosa Sepal.Width 3.5
# 3 1 setosa Petal.Length 1.4
# 4 1 setosa Petal.Width 0.2
# 5 2 setosa Sepal.Length 4.9
# 6 2 setosa Sepal.Width 3
# 7 2 setosa Petal.Length 1.4
# 8 2 setosa Petal.Width 0.2
# 9 3 setosa Sepal.Length 4.7
# 10 3 setosa Sepal.Width 3.2
# # ... with 590 more rows
# name列を"."でpart列とmeasure列に分ける
iris_2 %>%
separate(col = name, into = c("part", "measure"), sep = "\\.")
# # A tibble: 600 x 5
# id Species part measure value
# <int> <fct> <chr> <chr> <dbl>
# 1 1 setosa Sepal Length 5.1
# 2 1 setosa Sepal Width 3.5
# 3 1 setosa Petal Length 1.4
# 4 1 setosa Petal Width 0.2
# 5 2 setosa Sepal Length 4.9
# 6 2 setosa Sepal Width 3
# 7 2 setosa Petal Length 1.4
# 8 2 setosa Petal Width 0.2
# 9 3 setosa Sepal Length 4.7
# 10 3 setosa Sepal Width 3.2
# # ... with 590 more rows
# sepを指定しなくても明かなときは自動で判断
iris_2 %>%
separate(col = name, into = c("part", "measure"))
# # A tibble: 600 x 5
# id Species part measure value
# <int> <fct> <chr> <chr> <dbl>
# 1 1 setosa Sepal Length 5.1
# 2 1 setosa Sepal Width 3.5
# 3 1 setosa Petal Length 1.4
# 4 1 setosa Petal Width 0.2
# 5 2 setosa Sepal Length 4.9
# 6 2 setosa Sepal Width 3
# 7 2 setosa Petal Length 1.4
# 8 2 setosa Petal Width 0.2
# 9 3 setosa Sepal Length 4.7
# 10 3 setosa Sepal Width 3.2
# # ... with 590 more rows
# データの準備
anscombe2 <- anscombe_tbl %>%
pivot_longer(cols = -id)
anscombe2
# # A tibble: 88 x 3
# id name value
# <int> <chr> <dbl>
# 1 1 x1 10
# 2 1 x2 10
# 3 1 x3 10
# 4 1 x4 8
# 5 1 y1 8.04
# 6 1 y2 9.14
# 7 1 y3 7.46
# 8 1 y4 6.58
# 9 2 x1 8
# 10 2 x2 8
# # ... with 78 more rows
# name列を1桁目でaxis列とset列に分ける
anscombe2 %>%
separate(col = name, into = c("axis", "set"), sep = 1L)
# # A tibble: 88 x 4
# id axis set value
# <int> <chr> <chr> <dbl>
# 1 1 x 1 10
# 2 1 x 2 10
# 3 1 x 3 10
# 4 1 x 4 8
# 5 1 y 1 8.04
# 6 1 y 2 9.14
# 7 1 y 3 7.46
# 8 1 y 4 6.58
# 9 2 x 1 8
# 10 2 x 2 8
# # ... with 78 more rows
引数convert
引数convert
で分けた列の型変換も同時にできます。
# 整数に変換も同時に行う
anscombe2 %>%
separate(col = name, into = c("axis", "set"), sep = 1L, convert = TRUE)
# # A tibble: 88 x 4
# id axis set value
# <int> <chr> <int> <dbl>
# 1 1 x 1 10
# 2 1 x 2 10
# 3 1 x 3 10
# 4 1 x 4 8
# 5 1 y 1 8.04
# 6 1 y 2 9.14
# 7 1 y 3 7.46
# 8 1 y 4 6.58
# 9 2 x 1 8
# 10 2 x 2 8
# # ... with 78 more rows
# separateしてsetを整数に変換するのと同じ
anscombe2 %>%
separate(col = name, into = c("axis", "set"), sep = 1L) %>% print() %>%
mutate(set = as.integer(set))
# # A tibble: 88 x 4
# id axis set value
# <int> <chr> <chr> <dbl>
# 1 1 x 1 10
# 2 1 x 2 10
# 3 1 x 3 10
# 4 1 x 4 8
# 5 1 y 1 8.04
# 6 1 y 2 9.14
# 7 1 y 3 7.46
# 8 1 y 4 6.58
# 9 2 x 1 8
# 10 2 x 2 8
# # ... with 78 more rows
# # A tibble: 88 x 4
# id axis set value
# <int> <chr> <int> <dbl>
# 1 1 x 1 10
# 2 1 x 2 10
# 3 1 x 3 10
# 4 1 x 4 8
# 5 1 y 1 8.04
# 6 1 y 2 9.14
# 7 1 y 3 7.46
# 8 1 y 4 6.58
# 9 2 x 1 8
# 10 2 x 2 8
# # ... with 78 more rows
pivot_wider()
pivot_longer
の逆の操作で、縦持ちデータを横持データに変換します。
# データの準備
iris_long <- iris_tbl %>%
pivot_longer(cols = -c(id, Species), names_to = c("part", "measure"), names_sep = "\\.", values_to = "value")
iris_long
# # A tibble: 600 x 5
# id Species part measure value
# <int> <fct> <chr> <chr> <dbl>
# 1 1 setosa Sepal Length 5.1
# 2 1 setosa Sepal Width 3.5
# 3 1 setosa Petal Length 1.4
# 4 1 setosa Petal Width 0.2
# 5 2 setosa Sepal Length 4.9
# 6 2 setosa Sepal Width 3
# 7 2 setosa Petal Length 1.4
# 8 2 setosa Petal Width 0.2
# 9 3 setosa Sepal Length 4.7
# 10 3 setosa Sepal Width 3.2
# # ... with 590 more rows
iris_long %>%
pivot_wider(names_from = part, values_from = value)
# # A tibble: 300 x 5
# id Species measure Sepal Petal
# <int> <fct> <chr> <dbl> <dbl>
# 1 1 setosa Length 5.1 1.4
# 2 1 setosa Width 3.5 0.2
# 3 2 setosa Length 4.9 1.4
# 4 2 setosa Width 3 0.2
# 5 3 setosa Length 4.7 1.3
# 6 3 setosa Width 3.2 0.2
# 7 4 setosa Length 4.6 1.5
# 8 4 setosa Width 3.1 0.2
# 9 5 setosa Length 5 1.4
# 10 5 setosa Width 3.6 0.2
# # ... with 290 more rows
iris_long %>%
pivot_wider(names_from = measure, values_from = value)
# # A tibble: 300 x 5
# id Species part Length Width
# <int> <fct> <chr> <dbl> <dbl>
# 1 1 setosa Sepal 5.1 3.5
# 2 1 setosa Petal 1.4 0.2
# 3 2 setosa Sepal 4.9 3
# 4 2 setosa Petal 1.4 0.2
# 5 3 setosa Sepal 4.7 3.2
# 6 3 setosa Petal 1.3 0.2
# 7 4 setosa Sepal 4.6 3.1
# 8 4 setosa Petal 1.5 0.2
# 9 5 setosa Sepal 5 3.6
# 10 5 setosa Petal 1.4 0.2
# # ... with 290 more rows
# データの準備
anscombe_long <- anscombe_tbl %>%
pivot_longer(cols = -id, names_to = c("axis", "set"), names_sep = 1L, values_to = "value")
anscombe_long
# # A tibble: 88 x 4
# id axis set value
# <int> <chr> <chr> <dbl>
# 1 1 x 1 10
# 2 1 x 2 10
# 3 1 x 3 10
# 4 1 x 4 8
# 5 1 y 1 8.04
# 6 1 y 2 9.14
# 7 1 y 3 7.46
# 8 1 y 4 6.58
# 9 2 x 1 8
# 10 2 x 2 8
# # ... with 78 more rows
anscombe_long %>%
pivot_wider(names_from = axis, values_from = value)
# # A tibble: 44 x 4
# id set x y
# <int> <chr> <dbl> <dbl>
# 1 1 1 10 8.04
# 2 1 2 10 9.14
# 3 1 3 10 7.46
# 4 1 4 8 6.58
# 5 2 1 8 6.95
# 6 2 2 8 8.14
# 7 2 3 8 6.77
# 8 2 4 8 5.76
# 9 3 1 13 7.58
# 10 3 2 13 8.74
# # ... with 34 more rows
anscombe_long %>%
pivot_wider(names_from = set, values_from = value)
# # A tibble: 22 x 6
# id axis `1` `2` `3` `4`
# <int> <chr> <dbl> <dbl> <dbl> <dbl>
# 1 1 x 10 10 10 8
# 2 1 y 8.04 9.14 7.46 6.58
# 3 2 x 8 8 8 8
# 4 2 y 6.95 8.14 6.77 5.76
# 5 3 x 13 13 13 8
# 6 3 y 7.58 8.74 12.7 7.71
# 7 4 x 9 9 9 8
# 8 4 y 8.81 8.77 7.11 8.84
# 9 5 x 11 11 11 8
# 10 5 y 8.33 9.26 7.81 8.47
# # ... with 12 more rows
引数names_from
に複数列を指定できます。
iris_long %>%
pivot_wider(names_from = c(part, measure), values_from = value)
# A tibble: 150 x 6
# id Species Sepal_Length Sepal_Width Petal_Length Petal_Width
# <int> <fct> <dbl> <dbl> <dbl> <dbl>
# 1 1 setosa 5.1 3.5 1.4 0.2
# 2 2 setosa 4.9 3 1.4 0.2
# 3 3 setosa 4.7 3.2 1.3 0.2
# 4 4 setosa 4.6 3.1 1.5 0.2
# 5 5 setosa 5 3.6 1.4 0.2
# 6 6 setosa 5.4 3.9 1.7 0.4
# 7 7 setosa 4.6 3.4 1.4 0.3
# 8 8 setosa 5 3.4 1.5 0.2
# 9 9 setosa 4.4 2.9 1.4 0.2
# 10 10 setosa 4.9 3.1 1.5 0.1
# # ... with 140 more rows
# 順にpivot_widerするのと同じ
iris_long %>% print() %>%
pivot_wider(names_from = part, values_from = value) %>% print() %>%
pivot_wider(names_from = measure, values_from = c(Sepal, Petal))
# # A tibble: 600 x 5
# id Species part measure value
# <int> <fct> <chr> <chr> <dbl>
# 1 1 setosa Sepal Length 5.1
# 2 1 setosa Sepal Width 3.5
# 3 1 setosa Petal Length 1.4
# 4 1 setosa Petal Width 0.2
# 5 2 setosa Sepal Length 4.9
# 6 2 setosa Sepal Width 3
# 7 2 setosa Petal Length 1.4
# 8 2 setosa Petal Width 0.2
# 9 3 setosa Sepal Length 4.7
# 10 3 setosa Sepal Width 3.2
# # ... with 590 more rows
# # A tibble: 300 x 5
# id Species measure Sepal Petal
# <int> <fct> <chr> <dbl> <dbl>
# 1 1 setosa Length 5.1 1.4
# 2 1 setosa Width 3.5 0.2
# 3 2 setosa Length 4.9 1.4
# 4 2 setosa Width 3 0.2
# 5 3 setosa Length 4.7 1.3
# 6 3 setosa Width 3.2 0.2
# 7 4 setosa Length 4.6 1.5
# 8 4 setosa Width 3.1 0.2
# 9 5 setosa Length 5 1.4
# 10 5 setosa Width 3.6 0.2
# # ... with 290 more rows
# # A tibble: 150 x 6
# id Species Sepal_Length Sepal_Width Petal_Length Petal_Width
# <int> <fct> <dbl> <dbl> <dbl> <dbl>
# 1 1 setosa 5.1 3.5 1.4 0.2
# 2 2 setosa 4.9 3 1.4 0.2
# 3 3 setosa 4.7 3.2 1.3 0.2
# 4 4 setosa 4.6 3.1 1.5 0.2
# 5 5 setosa 5 3.6 1.4 0.2
# 6 6 setosa 5.4 3.9 1.7 0.4
# 7 7 setosa 4.6 3.4 1.4 0.3
# 8 8 setosa 5 3.4 1.5 0.2
# 9 9 setosa 4.4 2.9 1.4 0.2
# 10 10 setosa 4.9 3.1 1.5 0.1
# # ... with 140 more rows
anscombe_long %>%
pivot_wider(names_from = c(axis, set), values_from = value)
# # A tibble: 11 x 9
# id x_1 x_2 x_3 x_4 y_1 y_2 y_3 y_4
# <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 10 10 10 8 8.04 9.14 7.46 6.58
# 2 2 8 8 8 8 6.95 8.14 6.77 5.76
# 3 3 13 13 13 8 7.58 8.74 12.7 7.71
# 4 4 9 9 9 8 8.81 8.77 7.11 8.84
# 5 5 11 11 11 8 8.33 9.26 7.81 8.47
# 6 6 14 14 14 8 9.96 8.1 8.84 7.04
# 7 7 6 6 6 8 7.24 6.13 6.08 5.25
# 8 8 4 4 4 19 4.26 3.1 5.39 12.5
# 9 9 12 12 12 8 10.8 9.13 8.15 5.56
# 10 10 7 7 7 8 4.82 7.26 6.42 7.91
# 11 11 5 5 5 8 5.68 4.74 5.73 6.89
# 順にpivot_widerするのと同じ
anscombe_long %>%
pivot_wider(names_from = axis, values_from = value) %>% print() %>%
pivot_wider(names_from = set, values_from = c(x, y))
# # A tibble: 44 x 4
# id set x y
# <int> <chr> <dbl> <dbl>
# 1 1 1 10 8.04
# 2 1 2 10 9.14
# 3 1 3 10 7.46
# 4 1 4 8 6.58
# 5 2 1 8 6.95
# 6 2 2 8 8.14
# 7 2 3 8 6.77
# 8 2 4 8 5.76
# 9 3 1 13 7.58
# 10 3 2 13 8.74
# # ... with 34 more rows
# # A tibble: 11 x 9
# id x_1 x_2 x_3 x_4 y_1 y_2 y_3 y_4
# <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 10 10 10 8 8.04 9.14 7.46 6.58
# 2 2 8 8 8 8 6.95 8.14 6.77 5.76
# 3 3 13 13 13 8 7.58 8.74 12.7 7.71
# 4 4 9 9 9 8 8.81 8.77 7.11 8.84
# 5 5 11 11 11 8 8.33 9.26 7.81 8.47
# 6 6 14 14 14 8 9.96 8.1 8.84 7.04
# 7 7 6 6 6 8 7.24 6.13 6.08 5.25
# 8 8 4 4 4 19 4.26 3.1 5.39 12.5
# 9 9 12 12 12 8 10.8 9.13 8.15 5.56
# 10 10 7 7 7 8 4.82 7.26 6.42 7.91
# 11 11 5 5 5 8 5.68 4.74 5.73 6.89
anscombe_long %>%
pivot_wider(names_from = c(set, axis), values_from = value)
# # A tibble: 11 x 9
# id `1_x` `2_x` `3_x` `4_x` `1_y` `2_y` `3_y` `4_y`
# <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 10 10 10 8 8.04 9.14 7.46 6.58
# 2 2 8 8 8 8 6.95 8.14 6.77 5.76
# 3 3 13 13 13 8 7.58 8.74 12.7 7.71
# 4 4 9 9 9 8 8.81 8.77 7.11 8.84
# 5 5 11 11 11 8 8.33 9.26 7.81 8.47
# 6 6 14 14 14 8 9.96 8.1 8.84 7.04
# 7 7 6 6 6 8 7.24 6.13 6.08 5.25
# 8 8 4 4 4 19 4.26 3.1 5.39 12.5
# 9 9 12 12 12 8 10.8 9.13 8.15 5.56
# 10 10 7 7 7 8 4.82 7.26 6.42 7.91
# 11 11 5 5 5 8 5.68 4.74 5.73 6.89
values_from
に複数列を指定できます。
# データの準備
anscombe_xy <- anscombe_long %>%
pivot_wider(names_from = axis, values_from = value)
anscombe_xy
# # A tibble: 44 x 4
# id set x y
# <int> <chr> <dbl> <dbl>
# 1 1 1 10 8.04
# 2 1 2 10 9.14
# 3 1 3 10 7.46
# 4 1 4 8 6.58
# 5 2 1 8 6.95
# 6 2 2 8 8.14
# 7 2 3 8 6.77
# 8 2 4 8 5.76
# 9 3 1 13 7.58
# 10 3 2 13 8.74
# # ... with 34 more rows
anscombe_xy %>%
pivot_wider(names_from = set, values_from = c(x, y))
# # A tibble: 11 x 9
# id x_1 x_2 x_3 x_4 y_1 y_2 y_3 y_4
# <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 10 10 10 8 8.04 9.14 7.46 6.58
# 2 2 8 8 8 8 6.95 8.14 6.77 5.76
# 3 3 13 13 13 8 7.58 8.74 12.7 7.71
# 4 4 9 9 9 8 8.81 8.77 7.11 8.84
# 5 5 11 11 11 8 8.33 9.26 7.81 8.47
# 6 6 14 14 14 8 9.96 8.1 8.84 7.04
# 7 7 6 6 6 8 7.24 6.13 6.08 5.25
# 8 8 4 4 4 19 4.26 3.1 5.39 12.5
# 9 9 12 12 12 8 10.8 9.13 8.15 5.56
# 10 10 7 7 7 8 4.82 7.26 6.42 7.91
# 11 11 5 5 5 8 5.68 4.74 5.73 6.89
# データの準備
iris_part <- iris_long %>%
pivot_wider(names_from = part, values_from = value)
iris_part
# # A tibble: 300 x 5
# id Species measure Sepal Petal
# <int> <fct> <chr> <dbl> <dbl>
# 1 1 setosa Length 5.1 1.4
# 2 1 setosa Width 3.5 0.2
# 3 2 setosa Length 4.9 1.4
# 4 2 setosa Width 3 0.2
# 5 3 setosa Length 4.7 1.3
# 6 3 setosa Width 3.2 0.2
# 7 4 setosa Length 4.6 1.5
# 8 4 setosa Width 3.1 0.2
# 9 5 setosa Length 5 1.4
# 10 5 setosa Width 3.6 0.2
# # ... with 290 more rows
iris_part %>%
pivot_wider(names_from = measure, values_from = c(Sepal, Petal))
# # A tibble: 150 x 6
# id Species Sepal_Length Sepal_Width Petal_Length Petal_Width
# <int> <fct> <dbl> <dbl> <dbl> <dbl>
# 1 1 setosa 5.1 3.5 1.4 0.2
# 2 2 setosa 4.9 3 1.4 0.2
# 3 3 setosa 4.7 3.2 1.3 0.2
# 4 4 setosa 4.6 3.1 1.5 0.2
# 5 5 setosa 5 3.6 1.4 0.2
# 6 6 setosa 5.4 3.9 1.7 0.4
# 7 7 setosa 4.6 3.4 1.4 0.3
# 8 8 setosa 5 3.4 1.5 0.2
# 9 9 setosa 4.4 2.9 1.4 0.2
# 10 10 setosa 4.9 3.1 1.5 0.1
# # ... with 140 more rows
引数names_sep
引数names_from
に複数列を指定したとき、列名の間にデフォルトでは"_"が入りますが、引数names_sep
で列名の区切りを指定できます。
iris_long %>%
pivot_wider(names_from = c(part, measure), names_sep = ":", values_from = value)
# # A tibble: 150 x 6
# id Species `Sepal:Length` `Sepal:Width` `Petal:Length` `Petal:Width`
# <int> <fct> <dbl> <dbl> <dbl> <dbl>
# 1 1 setosa 5.1 3.5 1.4 0.2
# 2 2 setosa 4.9 3 1.4 0.2
# 3 3 setosa 4.7 3.2 1.3 0.2
# 4 4 setosa 4.6 3.1 1.5 0.2
# 5 5 setosa 5 3.6 1.4 0.2
# 6 6 setosa 5.4 3.9 1.7 0.4
# 7 7 setosa 4.6 3.4 1.4 0.3
# 8 8 setosa 5 3.4 1.5 0.2
# 9 9 setosa 4.4 2.9 1.4 0.2
# 10 10 setosa 4.9 3.1 1.5 0.1
# # ... with 140 more rows
iris_long %>%
pivot_wider(names_from = c(part, measure), names_sep = "", values_from = value)
# # A tibble: 150 x 6
# id Species SepalLength SepalWidth PetalLength PetalWidth
# <int> <fct> <dbl> <dbl> <dbl> <dbl>
# 1 1 setosa 5.1 3.5 1.4 0.2
# 2 2 setosa 4.9 3 1.4 0.2
# 3 3 setosa 4.7 3.2 1.3 0.2
# 4 4 setosa 4.6 3.1 1.5 0.2
# 5 5 setosa 5 3.6 1.4 0.2
# 6 6 setosa 5.4 3.9 1.7 0.4
# 7 7 setosa 4.6 3.4 1.4 0.3
# 8 8 setosa 5 3.4 1.5 0.2
# 9 9 setosa 4.4 2.9 1.4 0.2
# 10 10 setosa 4.9 3.1 1.5 0.1
# # ... with 140 more rows
anscombe_long %>%
pivot_wider(names_from = c(axis, set), names_sep = "", values_from = value)
# # A tibble: 11 x 9
# id x1 x2 x3 x4 y1 y2 y3 y4
# <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 10 10 10 8 8.04 9.14 7.46 6.58
# 2 2 8 8 8 8 6.95 8.14 6.77 5.76
# 3 3 13 13 13 8 7.58 8.74 12.7 7.71
# 4 4 9 9 9 8 8.81 8.77 7.11 8.84
# 5 5 11 11 11 8 8.33 9.26 7.81 8.47
# 6 6 14 14 14 8 9.96 8.1 8.84 7.04
# 7 7 6 6 6 8 7.24 6.13 6.08 5.25
# 8 8 4 4 4 19 4.26 3.1 5.39 12.5
# 9 9 12 12 12 8 10.8 9.13 8.15 5.56
# 10 10 7 7 7 8 4.82 7.26 6.42 7.91
# 11 11 5 5 5 8 5.68 4.74 5.73 6.89
引数names_prefix
引数names_prefix
で横持ちデータに変換後の列名に接頭辞(prefix)を付けられます。
iris_long %>%
pivot_wider(names_from = part, names_prefix = "part_", values_from = value)
# # A tibble: 300 x 5
# id Species measure part_Sepal part_Petal
# <int> <fct> <chr> <dbl> <dbl>
# 1 1 setosa Length 5.1 1.4
# 2 1 setosa Width 3.5 0.2
# 3 2 setosa Length 4.9 1.4
# 4 2 setosa Width 3 0.2
# 5 3 setosa Length 4.7 1.3
# 6 3 setosa Width 3.2 0.2
# 7 4 setosa Length 4.6 1.5
# 8 4 setosa Width 3.1 0.2
# 9 5 setosa Length 5 1.4
# 10 5 setosa Width 3.6 0.2
# # ... with 290 more rows
iris_long %>%
filter(part == "Sepal") %>% print() %>%
pivot_wider(names_from = measure, names_prefix = "Sepal_", values_from = value)
# # A tibble: 300 x 5
# id Species part measure value
# <int> <fct> <chr> <chr> <dbl>
# 1 1 setosa Sepal Length 5.1
# 2 1 setosa Sepal Width 3.5
# 3 2 setosa Sepal Length 4.9
# 4 2 setosa Sepal Width 3
# 5 3 setosa Sepal Length 4.7
# 6 3 setosa Sepal Width 3.2
# 7 4 setosa Sepal Length 4.6
# 8 4 setosa Sepal Width 3.1
# 9 5 setosa Sepal Length 5
# 10 5 setosa Sepal Width 3.6
# # ... with 290 more rows
# # A tibble: 150 x 5
# id Species part Sepal_Length Sepal_Width
# <int> <fct> <chr> <dbl> <dbl>
# 1 1 setosa Sepal 5.1 3.5
# 2 2 setosa Sepal 4.9 3
# 3 3 setosa Sepal 4.7 3.2
# 4 4 setosa Sepal 4.6 3.1
# 5 5 setosa Sepal 5 3.6
# 6 6 setosa Sepal 5.4 3.9
# 7 7 setosa Sepal 4.6 3.4
# 8 8 setosa Sepal 5 3.4
# 9 9 setosa Sepal 4.4 2.9
# 10 10 setosa Sepal 4.9 3.1
# # ... with 140 more rows
iris_long %>%
filter(part == "Sepal") %>%
pivot_wider(names_from = measure, names_prefix = "Sepal.", values_from = value)
# # A tibble: 150 x 5
# id Species part Sepal.Length Sepal.Width
# <int> <fct> <chr> <dbl> <dbl>
# 1 1 setosa Sepal 5.1 3.5
# 2 2 setosa Sepal 4.9 3
# 3 3 setosa Sepal 4.7 3.2
# 4 4 setosa Sepal 4.6 3.1
# 5 5 setosa Sepal 5 3.6
# 6 6 setosa Sepal 5.4 3.9
# 7 7 setosa Sepal 4.6 3.4
# 8 8 setosa Sepal 5 3.4
# 9 9 setosa Sepal 4.4 2.9
# 10 10 setosa Sepal 4.9 3.1
# # ... with 140 more rows
anscombe_long %>%
filter(axis == "x") %>% print() %>%
pivot_wider(names_from = set, names_prefix = "x_", values_from = value)
# # A tibble: 44 x 4
# id axis set value
# <int> <chr> <chr> <dbl>
# 1 1 x 1 10
# 2 1 x 2 10
# 3 1 x 3 10
# 4 1 x 4 8
# 5 2 x 1 8
# 6 2 x 2 8
# 7 2 x 3 8
# 8 2 x 4 8
# 9 3 x 1 13
# 10 3 x 2 13
# # ... with 34 more rows
# # A tibble: 11 x 6
# id axis x_1 x_2 x_3 x_4
# <int> <chr> <dbl> <dbl> <dbl> <dbl>
# 1 1 x 10 10 10 8
# 2 2 x 8 8 8 8
# 3 3 x 13 13 13 8
# 4 4 x 9 9 9 8
# 5 5 x 11 11 11 8
# 6 6 x 14 14 14 8
# 7 7 x 6 6 6 8
# 8 8 x 4 4 4 19
# 9 9 x 12 12 12 8
# 10 10 x 7 7 7 8
# 11 11 x 5 5 5 8
引数values_fill
縦持ちデータから横持ちデータに変換した後のデータにNA
ができるとき引数values_fill
でNA
を変換できます。リストの形で指定します。
# NAを含むデータを準備
anscombe_long %>%
filter(!(axis == "y" & set == "4")) %>% print() %>% # y4のみ除く
pivot_wider(names_from = axis, values_from = value) # y4がないので、NAができる
# # A tibble: 77 x 4
# id axis set value
# <int> <chr> <chr> <dbl>
# 1 1 x 1 10
# 2 1 x 2 10
# 3 1 x 3 10
# 4 1 x 4 8
# 5 1 y 1 8.04
# 6 1 y 2 9.14
# 7 1 y 3 7.46
# 8 2 x 1 8
# 9 2 x 2 8
# 10 2 x 3 8
# # ... with 67 more rows
# # A tibble: 44 x 4
# id set x y
# <int> <chr> <dbl> <dbl>
# 1 1 1 10 8.04
# 2 1 2 10 9.14
# 3 1 3 10 7.46
# 4 1 4 8 NA
# 5 2 1 8 6.95
# 6 2 2 8 8.14
# 7 2 3 8 6.77
# 8 2 4 8 NA
# 9 3 1 13 7.58
# 10 3 2 13 8.74
# # ... with 34 more rows
# このNAを0で埋める
anscombe_long %>%
filter(!(axis == "y" & set == "4")) %>% print() %>% # y4のみ除く
pivot_wider(names_from = axis, values_from = value, values_fill = list(value = 0)) # NAを0で埋める
# A tibble: 44 x 4
# id set x y
# <int> <chr> <dbl> <dbl>
# 1 1 1 10 8.04
# 2 1 2 10 9.14
# 3 1 3 10 7.46
# 4 1 4 8 0
# 5 2 1 8 6.95
# 6 2 2 8 8.14
# 7 2 3 8 6.77
# 8 2 4 8 0
# 9 3 1 13 7.58
# 10 3 2 13 8.74
# # ... with 34 more rows
# NAを含むデータを準備
anscombe_tbl %>%
select(x1, y1) %>% print() %>%
pivot_wider(names_from = y1, values_from = y1)
# # A tibble: 11 x 2
# x1 y1
# <dbl> <dbl>
# 1 10 8.04
# 2 8 6.95
# 3 13 7.58
# 4 9 8.81
# 5 11 8.33
# 6 14 9.96
# 7 6 7.24
# 8 4 4.26
# 9 12 10.8
# 10 7 4.82
# 11 5 5.68
# # A tibble: 11 x 12
# x1 `8.04` `6.95` `7.58` `8.81` `8.33` `9.96` `7.24` `4.26` `10.84` `4.82` `5.68`
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 10 8.04 NA NA NA NA NA NA NA NA NA NA
# 2 8 NA 6.95 NA NA NA NA NA NA NA NA NA
# 3 13 NA NA 7.58 NA NA NA NA NA NA NA NA
# 4 9 NA NA NA 8.81 NA NA NA NA NA NA NA
# 5 11 NA NA NA NA 8.33 NA NA NA NA NA NA
# 6 14 NA NA NA NA NA 9.96 NA NA NA NA NA
# 7 6 NA NA NA NA NA NA 7.24 NA NA NA NA
# 8 4 NA NA NA NA NA NA NA 4.26 NA NA NA
# 9 12 NA NA NA NA NA NA NA NA 10.8 NA NA
# 10 7 NA NA NA NA NA NA NA NA NA 4.82 NA
# 11 5 NA NA NA NA NA NA NA NA NA NA 5.68
# このNAを0で埋める
anscombe_tbl %>%
select(x1, y1) %>% print() %>%
pivot_wider(names_from = y1, values_from = y1, values_fill = list(y1 = 0))
# # A tibble: 11 x 12
# x1 `8.04` `6.95` `7.58` `8.81` `8.33` `9.96` `7.24` `4.26` `10.84` `4.82` `5.68`
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 10 8.04 0 0 0 0 0 0 0 0 0 0
# 2 8 0 6.95 0 0 0 0 0 0 0 0 0
# 3 13 0 0 7.58 0 0 0 0 0 0 0 0
# 4 9 0 0 0 8.81 0 0 0 0 0 0 0
# 5 11 0 0 0 0 8.33 0 0 0 0 0 0
# 6 14 0 0 0 0 0 9.96 0 0 0 0 0
# 7 6 0 0 0 0 0 0 7.24 0 0 0 0
# 8 4 0 0 0 0 0 0 0 4.26 0 0 0
# 9 12 0 0 0 0 0 0 0 0 10.8 0 0
# 10 7 0 0 0 0 0 0 0 0 0 4.82 0
# 11 5 0 0 0 0 0 0 0 0 0 0 5.68
# NAを含むデータを準備
iris_tbl %>%
mutate(Len = floor(Sepal.Length), Wid = floor(Sepal.Width)) %>% print() %>%
count(Len, Wid, name = "count") %>% print() %>%
pivot_wider(names_from = Wid, values_from = count) # LenとWidのクロス表(NAがある)
# # A tibble: 150 x 8
# id Sepal.Length Sepal.Width Petal.Length Petal.Width Species Len Wid
# <int> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl>
# 1 1 5.1 3.5 1.4 0.2 setosa 5 3
# 2 2 4.9 3 1.4 0.2 setosa 4 3
# 3 3 4.7 3.2 1.3 0.2 setosa 4 3
# 4 4 4.6 3.1 1.5 0.2 setosa 4 3
# 5 5 5 3.6 1.4 0.2 setosa 5 3
# 6 6 5.4 3.9 1.7 0.4 setosa 5 3
# 7 7 4.6 3.4 1.4 0.3 setosa 4 3
# 8 8 5 3.4 1.5 0.2 setosa 5 3
# 9 9 4.4 2.9 1.4 0.2 setosa 4 2
# 10 10 4.9 3.1 1.5 0.1 setosa 4 3
# # ... with 140 more rows
# # A tibble: 9 x 3
# Len Wid count
# <dbl> <dbl> <int>
# 1 4 2 4
# 2 4 3 18
# 3 5 2 24
# 4 5 3 33
# 5 5 4 4
# 6 6 2 25
# 7 6 3 29
# 8 7 2 4
# 9 7 3 9
# # A tibble: 4 x 4
# Len `2` `3` `4`
# <dbl> <int> <int> <int>
# 1 4 4 18 NA
# 2 5 24 33 4
# 3 6 25 29 NA
# 4 7 4 9 NA
# このNAを0で埋める
iris_tbl %>%
mutate(Len = floor(Sepal.Length), Wid = floor(Sepal.Width)) %>% print() %>%
select(Len, Wid) %>%
count(Len, Wid, name = "count") %>%
pivot_wider(names_from = Wid, values_from = count, values_fill = list(count = 0))
# # A tibble: 4 x 4
# Len `2` `3` `4`
# <dbl> <int> <int> <int>
# 1 4 4 18 0
# 2 5 24 33 4
# 3 6 25 29 0
# 4 7 4 9 0
# NAを含むデータを準備
iris_tbl %>%
group_by(Species, Sepal.Length) %>%
count(name = "count") %>% print() %>%
pivot_wider(names_from = Species, values_from = count) # NAになるところがある
# # A tibble: 57 x 3
# # Groups: Species, Sepal.Length [57]
# Species Sepal.Length count
# <fct> <dbl> <int>
# 1 setosa 4.3 1
# 2 setosa 4.4 3
# 3 setosa 4.5 1
# 4 setosa 4.6 4
# 5 setosa 4.7 2
# 6 setosa 4.8 5
# 7 setosa 4.9 4
# 8 setosa 5 8
# 9 setosa 5.1 8
# 10 setosa 5.2 3
# # ... with 47 more rows
# # A tibble: 35 x 4
# # Groups: Sepal.Length [35]
# Sepal.Length setosa versicolor virginica
# <dbl> <int> <int> <int>
# 1 4.3 1 NA NA
# 2 4.4 3 NA NA
# 3 4.5 1 NA NA
# 4 4.6 4 NA NA
# 5 4.7 2 NA NA
# 6 4.8 5 NA NA
# 7 4.9 4 1 1
# 8 5 8 2 NA
# 9 5.1 8 1 NA
# 10 5.2 3 1 NA
# # ... with 25 more rows
# このNAを0で埋める
iris_tbl %>%
group_by(Species, Sepal.Length) %>%
count(name = "count") %>% print() %>%
pivot_wider(names_from = Species, values_from = count,
values_fill = list(count = 0))
# # A tibble: 35 x 4
# # Groups: Sepal.Length [35]
# Sepal.Length setosa versicolor virginica
# <dbl> <int> <int> <int>
# 1 4.3 1 0 0
# 2 4.4 3 0 0
# 3 4.5 1 0 0
# 4 4.6 4 0 0
# 5 4.7 2 0 0
# 6 4.8 5 0 0
# 7 4.9 4 1 1
# 8 5 8 2 0
# 9 5.1 8 1 0
# 10 5.2 3 1 0
# # ... with 25 more rows
これを用いると、カテゴリー変数の列を0,1の2値変数に変換できます。
iris_tbl %>%
mutate(value = 1) %>% print() %>%
pivot_wider(names_from = Species, values_from = value)
# # A tibble: 150 x 7
# id Sepal.Length Sepal.Width Petal.Length Petal.Width Species value
# <int> <dbl> <dbl> <dbl> <dbl> <fct> <dbl>
# 1 1 5.1 3.5 1.4 0.2 setosa 1
# 2 2 4.9 3 1.4 0.2 setosa 1
# 3 3 4.7 3.2 1.3 0.2 setosa 1
# 4 4 4.6 3.1 1.5 0.2 setosa 1
# 5 5 5 3.6 1.4 0.2 setosa 1
# 6 6 5.4 3.9 1.7 0.4 setosa 1
# 7 7 4.6 3.4 1.4 0.3 setosa 1
# 8 8 5 3.4 1.5 0.2 setosa 1
# 9 9 4.4 2.9 1.4 0.2 setosa 1
# 10 10 4.9 3.1 1.5 0.1 setosa 1
# # ... with 140 more rows
# # A tibble: 150 x 8
# id Sepal.Length Sepal.Width Petal.Length Petal.Width setosa versicolor virginica
# <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 5.1 3.5 1.4 0.2 1 NA NA
# 2 2 4.9 3 1.4 0.2 1 NA NA
# 3 3 4.7 3.2 1.3 0.2 1 NA NA
# 4 4 4.6 3.1 1.5 0.2 1 NA NA
# 5 5 5 3.6 1.4 0.2 1 NA NA
# 6 6 5.4 3.9 1.7 0.4 1 NA NA
# 7 7 4.6 3.4 1.4 0.3 1 NA NA
# 8 8 5 3.4 1.5 0.2 1 NA NA
# 9 9 4.4 2.9 1.4 0.2 1 NA NA
# 10 10 4.9 3.1 1.5 0.1 1 NA NA
# # ... with 140 more rows
iris_tbl %>%
mutate(value = 1) %>% print() %>%
pivot_wider(names_from = Species, values_from = value,
values_fill = list(value = 0))
# # A tibble: 150 x 8
# id Sepal.Length Sepal.Width Petal.Length Petal.Width setosa versicolor virginica
# <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 5.1 3.5 1.4 0.2 1 0 0
# 2 2 4.9 3 1.4 0.2 1 0 0
# 3 3 4.7 3.2 1.3 0.2 1 0 0
# 4 4 4.6 3.1 1.5 0.2 1 0 0
# 5 5 5 3.6 1.4 0.2 1 0 0
# 6 6 5.4 3.9 1.7 0.4 1 0 0
# 7 7 4.6 3.4 1.4 0.3 1 0 0
# 8 8 5 3.4 1.5 0.2 1 0 0
# 9 9 4.4 2.9 1.4 0.2 1 0 0
# 10 10 4.9 3.1 1.5 0.1 1 0 0
# # ... with 140 more rows
# Species列を0,1の2値変数にできる
引数values_fn
引数values_fn
を使うとエクセルのピボット集計のような集計も同時にできます。
# ピボット集計で合計
iris_long %>%
pivot_wider(id_cols = c(part, measure),
names_from = Species, values_from = value,
values_fn = list(value = sum))
# # A tibble: 4 x 5
# part measure setosa versicolor virginica
# <chr> <chr> <dbl> <dbl> <dbl>
# 1 Sepal Length 250. 297. 329.
# 2 Sepal Width 171. 138. 149.
# 3 Petal Length 73.1 213 278.
# 4 Petal Width 12.3 66.3 101.
# グループ化して集計してpivot_widerするのと同じ(順序は異なるが)
iris_long %>%
group_by(Species, part, measure) %>%
summarise(Sum = sum(value)) %>% print() %>%
pivot_wider(names_from = Species, values_from = Sum)
# # A tibble: 12 x 4
# # Groups: Species, part [6]
# Species part measure Sum
# <fct> <chr> <chr> <dbl>
# 1 setosa Petal Length 73.1
# 2 setosa Petal Width 12.3
# 3 setosa Sepal Length 250.
# 4 setosa Sepal Width 171.
# 5 versicolor Petal Length 213
# 6 versicolor Petal Width 66.3
# 7 versicolor Sepal Length 297.
# 8 versicolor Sepal Width 138.
# 9 virginica Petal Length 278.
# 10 virginica Petal Width 101.
# 11 virginica Sepal Length 329.
# 12 virginica Sepal Width 149.
# # A tibble: 4 x 5
# # Groups: part [2]
# part measure setosa versicolor virginica
# <chr> <chr> <dbl> <dbl> <dbl>
# 1 Petal Length 73.1 213 278.
# 2 Petal Width 12.3 66.3 101.
# 3 Sepal Length 250. 297. 329.
# 4 Sepal Width 171. 138. 149.
# ピボット集計で平均
iris_long %>%
pivot_wider(id_cols = c(part,measure),
names_from = Species, values_from = value,
values_fn = list(value = mean))
# # A tibble: 4 x 5
# part measure setosa versicolor virginica
# <chr> <chr> <dbl> <dbl> <dbl>
# 1 Sepal Length 5.01 5.94 6.59
# 2 Sepal Width 3.43 2.77 2.97
# 3 Petal Length 1.46 4.26 5.55
# 4 Petal Width 0.246 1.33 2.03
# グループ化して集計してpivot_widerするのと同じ(順序は異なるが)
iris_long %>%
group_by(Species, part, measure) %>%
summarise(Mean = mean(value)) %>% print() %>%
pivot_wider(names_from = Species, values_from = Mean)
# # A tibble: 12 x 4
# # Groups: Species, part [6]
# Species part measure Mean
# <fct> <chr> <chr> <dbl>
# 1 setosa Petal Length 1.46
# 2 setosa Petal Width 0.246
# 3 setosa Sepal Length 5.01
# 4 setosa Sepal Width 3.43
# 5 versicolor Petal Length 4.26
# 6 versicolor Petal Width 1.33
# 7 versicolor Sepal Length 5.94
# 8 versicolor Sepal Width 2.77
# 9 virginica Petal Length 5.55
# 10 virginica Petal Width 2.03
# 11 virginica Sepal Length 6.59
# 12 virginica Sepal Width 2.97
# # A tibble: 4 x 5
# # Groups: part [2]
# part measure setosa versicolor virginica
# <chr> <chr> <dbl> <dbl> <dbl>
# 1 Petal Length 1.46 4.26 5.55
# 2 Petal Width 0.246 1.33 2.03
# 3 Sepal Length 5.01 5.94 6.59
# 4 Sepal Width 3.43 2.77 2.97
参考
- tidyr
- R for Data Science
- Heavy Watal - R stats
- Rのデータベースの操作についての自前のまとめ