LoginSignup
8
5

More than 3 years have passed since last update.

Rのデータフレームの列操作についてのメモ

Last updated at Posted at 2021-03-11

はじめに

Rのデータフレームの列の操作について、サンプルデータを用いて具体的に練習してみました。

目次

Rのデータフレームの列についての操作

Rのデータフレームの列の操作について、サンプルデータを用いて具体的に練習します。
なお、以下については別記事でまとめています。
- 行の操作について:こちら
- 集計操作について:こちら
- pivot操作について:こちら
- データフレームの関数について:こちら

参考:
https://r4ds.had.co.nz/transform.html
https://github.com/tidyverse/dplyr
https://heavywatal.github.io/rstats/dplyr.html

練習に用いるデータ

Rのサンプルデータirisanscombeを用います。
行番号を表示した方が分かりやすいため、tibble::rowid_to_column()で行番号を列名にしています。
こちらの記事と同じデータを用います。)

R
library(dplyr)
library(tibble)
library(tidyr)

iris

# 行番号を列名に
iris_tbl <- iris %>% as_tibble() %>% rowid_to_column("id")
iris_tbl
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1          5.1         3.5          1.4         0.2 setosa 
# 2     2          4.9         3            1.4         0.2 setosa 

anscombe

anscombe_tbl <- anscombe %>% as_tibble() %>% rowid_to_column("id")
anscombe_tbl
# # A tibble: 11 x 9
#      id    x1    x2    x3    x4    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10    10     8  8.04  9.14  7.46  6.58
# 2     2     8     8     8     8  6.95  8.14  6.77  5.76

select():列の選択・並び替え

列を選択します。

R
# select()
iris_tbl %>%
  select(c(1, 6, 2))
iris_tbl %>%
  select(c(id, Species, Sepal.Length))
iris_tbl %>%
  select(id, Species, Sepal.Length)
# # A tibble: 150 x 3
#      id Species Sepal.Length
#   <int> <fct>          <dbl>
# 1     1 setosa           5.1
# 2     2 setosa           4.9

iris_tbl %>%
  select(2:5)
iris_tbl %>%
  select(Sepal.Length:Petal.Width)
# # A tibble: 150 x 4
#   Sepal.Length Sepal.Width Petal.Length Petal.Width
#          <dbl>       <dbl>        <dbl>       <dbl>
# 1          5.1         3.5          1.4         0.2
# 2          4.9         3            1.4         0.2

anscombe_tbl %>%
  select(2:5)
anscombe_tbl %>%
  select(x1:y4)
# # A tibble: 11 x 4
#      x1    x2    x3    x4
#   <dbl> <dbl> <dbl> <dbl>
# 1    10    10    10     8
# 2     8     8     8     8

everything():すべての列

R
# everything()
iris_tbl %>%
  select(everything())
# # A tibble: 150 x 4
#   Sepal.Length Sepal.Width Petal.Length Petal.Width
#          <dbl>       <dbl>        <dbl>       <dbl>
# 1          5.1         3.5          1.4         0.2
# 2          4.9         3            1.4         0.2

anscombe_tbl %>%
  select(everything())
# # A tibble: 11 x 9
#      id    x1    x2    x3    x4    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10    10     8  8.04  9.14  7.46  6.58
# 2     2     8     8     8     8  6.95  8.14  6.77  5.76

last_col():最後の列

R
# last_col()
iris_tbl %>%
  select(last_col())  # 最後の列
iris_tbl %>%
  select(last_col(0)) # 最後の列
# # A tibble: 150 x 1
#  Species
#    <fct>  
# 1 setosa 
# 2 setosa 

iris_tbl %>%
  select(last_col(1)) # 最後の1つ前の列(後ろから2番目の列)
# # A tibble: 150 x 1
#   Petal.Width
#         <dbl>
# 1         0.2
# 2         0.2

iris_tbl %>%
  select(last_col(2)) # 最後の2つ前の列(後ろから3番目の列)
# # A tibble: 150 x 1
#   Petal.Length
#          <dbl>
# 1          1.4
# 2          1.4

iris_tbl %>%
  select(2:last_col(2)) # 2列目から最後の2つ前の列(後ろから3番目の列)まで
# # A tibble: 150 x 3
#   Sepal.Length Sepal.Width Petal.Length
#          <dbl>       <dbl>        <dbl>
# 1          5.1         3.5          1.4
# 2          4.9         3            1.4

iris_tbl %>%
  select(!(2:last_col(2)))
# # A tibble: 150 x 3
#      id Petal.Width Species
#   <int>       <dbl> <fct>  
# 1     1         0.2 setosa 
# 2     2         0.2 setosa 

&, |:列の条件選択 and・or

R
# &, |
iris_tbl %>%
  select(1:3 & 2:5)
# # A tibble: 150 x 2
#   Sepal.Length Sepal.Width
#          <dbl>       <dbl>
# 1          5.1         3.5
# 2          4.9         3  

iris_tbl %>%
  select(1:3 | 2:5)
# # A tibble: 150 x 5
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width
#   <int>        <dbl>       <dbl>        <dbl>       <dbl>
# 1     1          5.1         3.5          1.4         0.2
# 2     2          4.9         3            1.4         0.2

iris_tbl %>%
  select(starts_with("Sepal") & ends_with("Width"))
# # A tibble: 150 x 1
#   Sepal.Width
#         <dbl>
# 1         3.5
# 2         3  

iris_tbl %>%
  select(starts_with("Sepal") | ends_with("Width"))
# # A tibble: 150 x 3
#   Sepal.Length Sepal.Width Petal.Width
#          <dbl>       <dbl>       <dbl>
# 1          5.1         3.5         0.2
# 2          4.9         3           0.2

-, !:列の条件選択 not

R
# -, !
iris_tbl %>%
  select(-id)
iris_tbl %>%
  select(!id)
# # A tibble: 150 x 5
#   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1          5.1         3.5          1.4         0.2 setosa 
# 2          4.9         3            1.4         0.2 setosa 

iris_tbl %>%
  select(!2:5)
iris_tbl %>%
  select(-(2:5))
# # A tibble: 150 x 2
#      id Species
#   <int> <fct>  
# 1     1 setosa 
# 2     2 setosa 

iris_tbl %>%
  select(-c(1, 6, 2))
iris_tbl %>%
  select(-id, -Species, -Sepal.Length)
# # A tibble: 150 x 3
#   Sepal.Width Petal.Length Petal.Width
#         <dbl>        <dbl>       <dbl>
# 1         3.5          1.4         0.2
# 2         3            1.4         0.2

iris_tbl %>%
  select(-starts_with("Sepal"))
iris_tbl %>%
  select(!starts_with("Sepal"))
# # A tibble: 150 x 4
#      id Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl> <fct>  
# 1     1          1.4         0.2 setosa 
# 2     2          1.4         0.2 setosa 

starts_with(), ends_with():指定した文字列で始まる・終わる列名

R
# starts_with(), ends_with()
iris_tbl %>%
  select(id, starts_with("S")) # "S"で始まる列
# # A tibble: 150 x 4
#      id Sepal.Length Sepal.Width Species
#   <int>        <dbl>       <dbl> <fct>  
# 1     1          5.1         3.5 setosa 
# 2     2          4.9         3   setosa 

iris_tbl %>%
  select(id, ends_with("th")) # "th"で終わる列
# # A tibble: 150 x 5
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width
#   <int>        <dbl>       <dbl>        <dbl>       <dbl>
# 1     1          5.1         3.5          1.4         0.2
# 2     2          4.9         3            1.4         0.2

contains():指定した文字列を含む列名

R
# contains()
iris_tbl %>%
  select(contains("al")) # "al"を含む列
iris_tbl %>%
  select(contains(".")) # "."を含む列
# # A tibble: 150 x 4
#   Sepal.Length Sepal.Width Petal.Length Petal.Width
#          <dbl>       <dbl>        <dbl>       <dbl>
# 1          5.1         3.5          1.4         0.2
# 2          4.9         3            1.4         0.2

iris_tbl %>%
  select(-contains("al"))
# # A tibble: 150 x 2
#      id Species
#   <int> <fct>  
# 1     1 setosa 
# 2     2 setosa 

matches():指定した正規表現にマッチする列名

R
# matches()
iris_tbl %>%
  select(matches("al")) # "al"を含む列名
# # A tibble: 150 x 4
#   Sepal.Length Sepal.Width Petal.Length Petal.Width
#          <dbl>       <dbl>        <dbl>       <dbl>
# 1          5.1         3.5          1.4         0.2
# 2          4.9         3            1.4         0.2

iris_tbl %>%
  select(matches(".")) # 「"."を含む列名」ではない
# contains()に指定するのは文字列だが、matches()に指定するのは正規表現
# 正規表現で"."は「何でも」の意味
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1          5.1         3.5          1.4         0.2 setosa 
# 2     2          4.9         3            1.4         0.2 setosa 

iris_tbl %>%
  select(matches("\\.")) # "."を含む列名
# 正規表現で"\\."は"."を含む文字列
# # A tibble: 150 x 4
#   Sepal.Length Sepal.Width Petal.Length Petal.Width
#          <dbl>       <dbl>        <dbl>       <dbl>
# 1          5.1         3.5          1.4         0.2
# 2          4.9         3            1.4         0.2

iris_tbl %>%
  select(matches("^..$")) # 2桁の列名
# # A tibble: 150 x 1
#      id
#   <int>
# 1     1
# 2     2

iris_tbl %>%
  select(matches("^S......$")) # "S"で始まる7桁の列名
iris_tbl %>%
  select(matches("^S.{6}$")) # "S"で始まる7桁の列名
# # A tibble: 150 x 1
#  Species
#    <fct>  
# 1 setosa 
# 2 setosa 

iris_tbl %>%
  select(matches("^S.+$")) # "S"で始まる2桁以上の列名
iris_tbl %>%
  select(matches("^S.*$")) # "S"で始まる1桁以上の列名
# # A tibble: 150 x 3
#   Sepal.Length Sepal.Width Species
#          <dbl>       <dbl> <fct>  
# 1          5.1         3.5 setosa 
# 2          4.9         3   setosa 

iris_tbl %>%
  select(matches("p")) # "p"を含む列名
# # A tibble: 150 x 5
#   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1          5.1         3.5          1.4         0.2 setosa 
# 2          4.9         3            1.4         0.2 setosa 

iris_tbl %>%
  select(matches("^p")) # "p"で始まる列名(1桁目が"p")
# # A tibble: 150 x 2
#   Petal.Length Petal.Width
#          <dbl>       <dbl>
# 1          1.4         0.2
# 2          1.4         0.2

iris_tbl %>%
  select(matches("^.p")) # 2桁目が"p"の列名
# # A tibble: 150 x 1
#  Species
#    <fct>  
# 1 setosa 
# 2 setosa 

iris_tbl %>%
  select(matches("\\W")) # 英数字以外を含む列名
# # A tibble: 150 x 4
#   Sepal.Length Sepal.Width Petal.Length Petal.Width
#          <dbl>       <dbl>        <dbl>       <dbl>
# 1          5.1         3.5          1.4         0.2
# 2          4.9         3            1.4         0.2

iris_tbl %>%
  select(matches("^\\w+$")) # 先頭から末尾まですべて英数字の列名(英数字以外を含まない列名)
iris_tbl %>%
  select(-matches("\\W")) # 英数字以外を含まない列名
# # A tibble: 150 x 2
#      id Species
#   <int> <fct>  
# 1     1 setosa 
# 2     2 setosa 

anscombe_tbl %>%
  select(matches("^x|y")) # "x"か"y"で始まる列名
anscombe_tbl %>%
  select(matches("[1-4]$")) # "1"-"4"で終わる列名
anscombe_tbl %>%
  select(matches("^\\w\\d")) # 1桁目が英数字、2桁目が数字の列名
# # A tibble: 11 x 8
#      x1    x2    x3    x4    y1    y2    y3    y4
#   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1    10    10    10     8  8.04  9.14  7.46  6.58
# 2     8     8     8     8  6.95  8.14  6.77  5.76

anscombe_tbl %>%
  rename(`x 1` = x1) %>% print() %>%
  select(matches("\\s")) # 空白を含む列名
# # A tibble: 11 x 1
#   `x 1`
#   <dbl>
# 1    10
# 2     8

anscombe_tbl %>%
  rename(`x 1` = x1) %>% print() %>%
  select(matches("^\\S+$")) # 空白を含まない列名
anscombe_tbl %>%
  rename(`x 1` = x1) %>% print() %>%
  select(!matches("\\s")) # 空白を含まない列名
# # A tibble: 11 x 8
#      id    x2    x3    x4    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10     8  8.04  9.14  7.46  6.58
# 2     2     8     8     8  6.95  8.14  6.77  5.76

anscombe_tbl %>%
  select(matches("^[a-z]{2}$")) # 英字小文字2桁の列名
# # A tibble: 11 x 1
#      id
#   <int>
# 1     1
# 2     2

iris_tbl %>%
  select(matches("^[a-z|A-Z]{7}$")) # 英字7桁の列名
# # A tibble: 150 x 1
#  Species
#    <fct>  
# 1 setosa 
# 2 setosa 

num_range():数字の連番の列名

R
# num_range()
anscombe_tbl %>%
  select(num_range("x", 2:3))
# # A tibble: 11 x 2
#      x2    x3
#   <dbl> <dbl>
# 1    10    10
# 2     8     8

anscombe_tbl %>%
  select(-num_range("x", 2:3))
# # A tibble: 11 x 7
#      id    x1    x4    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10     8  8.04  9.14  7.46  6.58
# 2     2     8     8  6.95  8.14  6.77  5.76

anscombe_tbl %>%
  select(num_range("x", 3:10))
# # A tibble: 11 x 2
#      x3    x4
#   <dbl> <dbl>
# 1    10     8
# 2     8     8

anscombe_tbl %>%
  select(-num_range("x", 3:10))
# # A tibble: 11 x 7
#      id    x1    x2    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10  8.04  9.14  7.46  6.58
# 2     2     8     8  6.95  8.14  6.77  5.76

all_of(), any_of():文字列ベクトルによる列名の選択

R
# all_of(), any_of()
vars <- names(iris_tbl)
vars
# [1] "id"           "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species" 
vars <- vars[2:5]
vars
# [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width" 
iris_tbl %>%
  select(all_of(vars))
# # A tibble: 150 x 4
#   Sepal.Length Sepal.Width Petal.Length Petal.Width
#          <dbl>       <dbl>        <dbl>       <dbl>
# 1          5.1         3.5          1.4         0.2
# 2          4.9         3            1.4         0.2

library(stringr)
vars <- str_c("x", 1:10)
vars
# [1] "x1"  "x2"  "x3"  "x4"  "x5"  "x6"  "x7"  "x8"  "x9"  "x10"
anscombe_tbl %>%
  select(any_of(vars))
# # A tibble: 11 x 4
#      x1    x2    x3    x4
#   <dbl> <dbl> <dbl> <dbl>
# 1    10    10    10     8
# 2     8     8     8     8

vars <- str_c(letters, 1)
vars
#  [1] "a1" "b1" "c1" "d1" "e1" "f1" "g1" "h1" "i1" "j1" "k1" "l1" "m1"
# [14] "n1" "o1" "p1" "q1" "r1" "s1" "t1" "u1" "v1" "w1" "x1" "y1" "z1"
anscombe_tbl %>%
  select(any_of(vars))
# # A tibble: 11 x 2
#      x1    y1
#   <dbl> <dbl>
# 1    10  8.04
# 2     8  6.95

anscombe_tbl %>%
  select(-any_of(vars))
# # A tibble: 11 x 7
#      id    x2    x3    x4    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10     8  9.14  7.46  6.58
# 2     2     8     8     8  8.14  6.77  5.76

where():列の属性による列の選択

R
# where()
iris_tbl %>%
  select(where(is.numeric))
# # A tibble: 150 x 5
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width
#   <int>        <dbl>       <dbl>        <dbl>       <dbl>
# 1     1          5.1         3.5          1.4         0.2
# 2     2          4.9         3            1.4         0.2

iris_tbl %>%
  select(where(is.double))
# # A tibble: 150 x 4
#   Sepal.Length Sepal.Width Petal.Length Petal.Width
#          <dbl>       <dbl>        <dbl>       <dbl>
# 1          5.1         3.5          1.4         0.2
# 2          4.9         3            1.4         0.2

iris_tbl %>%
  select(where(is.integer))
# # A tibble: 150 x 1
#      id
#   <int>
# 1     1
# 2     2

iris_tbl %>%
  select(where(is.character))
# # A tibble: 150 x 0

iris_tbl %>%
  select(where(is.factor))
# # A tibble: 150 x 1
#   Species
#   <fct>  
# 1 setosa 
# 2 setosa 

参考:
- Select helpers
- Selection language
- Argument type: tidy-select

rename():列名の変更

列名を変更できます。

R
# rename()
iris_tbl %>%
  rename(ID = id)
# # A tibble: 150 x 6
#      ID Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1          5.1         3.5          1.4         0.2 setosa 
# 2     2          4.9         3            1.4         0.2 setosa 

iris_tbl %>%
  rename(sl = Sepal.Length, sw = Sepal.Width,
         pl = Petal.Length, pw = Petal.Width)
# # A tibble: 150 x 6
#      id    sl    sw    pl    pw Species
#   <int> <dbl> <dbl> <dbl> <dbl> <fct>  
# 1     1   5.1   3.5   1.4   0.2 setosa 
# 2     2   4.9   3     1.4   0.2 setosa 

var <- "Species"      # 変更前の列名
var_2 <- toupper(var) # 変更後の列名
iris_tbl %>%
  rename(!!var_2 := !!var)
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width SPECIES
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1          5.1         3.5          1.4         0.2 setosa 
# 2     2          4.9         3            1.4         0.2 setosa 

vars <- names(iris_tbl) # 変更前の列名
vars_2 <- toupper(vars) # 変更後の列名
named_vars <- setNames(vars, vars_2) # 名前付きベクトル
iris_tbl %>%
  rename(!!!named_vars)
# # A tibble: 150 x 6
#      ID SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1          5.1         3.5          1.4         0.2 setosa 
# 2     2          4.9         3            1.4         0.2 setosa 

vars <- str_c("x", 1:4) # 変更前の列名
vars_2 <- toupper(vars) # 変更後の列名
named_vars <- setNames(vars, vars_2)
anscombe_tbl %>%
  rename(!!!named_vars)
# # A tibble: 11 x 9
#      id    X1    X2    X3    X4    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10    10     8  8.04  9.14  7.46  6.58
# 2     2     8     8     8     8  6.95  8.14  6.77  5.76

rename_with():複数列にわたる列名の一括変更

R
# rename_with()
iris_tbl %>%
  rename_with(toupper)
iris_tbl %>%
  rename_with(toupper, everything())
# # A tibble: 150 x 6
#      ID SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1          5.1         3.5          1.4         0.2 setosa 
# 2     2          4.9         3            1.4         0.2 setosa 

anscombe_tbl %>%
  rename_with(toupper, x1:x4)
anscombe_tbl %>%
  rename_with(.fn = toupper, .cols = x1:x4)
anscombe_tbl %>%
  rename_with(.fn = ~toupper(.), .cols = x1:x4)
# # A tibble: 11 x 9
#      id    X1    X2    X3    X4    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10    10     8  8.04  9.14  7.46  6.58
# 2     2     8     8     8     8  6.95  8.14  6.77  5.76

複数列に一括適用する関数がf(x)の形の関数なら、引数.fnf~f(.)の形で指定します。後者の形を用いれば、複雑な関数や合成関数でも適用できます。

R
anscombe_tbl %>%
  rename_with(~str_c("var_", .), x1:x4)
anscombe_tbl %>%
  rename_with(.fn = ~str_c("var_", .), .cols = x1:x4)
# # A tibble: 11 x 9
#      id var_x1 var_x2 var_x3 var_x4    y1    y2    y3    y4
#   <int>  <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1     10     10     10      8  8.04  9.14  7.46  6.58
# 2     2      8      8      8      8  6.95  8.14  6.77  5.76

anscombe_tbl %>%
  rename_with(~str_c("var_", toupper(.)), x1:x4)
anscombe_tbl %>%
  rename_with(.fn = ~str_c("var_", toupper(.)), .cols = x1:x4)
# # A tibble: 11 x 9
#      id var_X1 var_X2 var_X3 var_X4    y1    y2    y3    y4
#   <int>  <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1     10     10     10      8  8.04  9.14  7.46  6.58
# 2     2      8      8      8      8  6.95  8.14  6.77  5.76

iris_tbl %>%
  rename_with(~str_to_lower(str_c(str_replace(., "\\.", "_"), "(cm)")), 2:5)
iris_tbl %>%
  rename_with(~str_replace(., "\\.", "_"), 2:5) %>%
  rename_with(~str_c(., "(cm)"), 2:5) %>%
  rename_with(~str_to_lower(.), 2:5)
# # A tibble: 150 x 6
#      id `sepal_length(cm)` `sepal_width(cm)` `petal_length(cm)` `petal_width(cm)` Species
#   <int>              <dbl>             <dbl>              <dbl>             <dbl> <fct>  
# 1     1                5.1               3.5                1.4               0.2 setosa 
# 2     2                4.9               3                  1.4               0.2 setosa 

vars <- names(iris_tbl)
vars <- vars[2:5]
vars
# [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"
str_replace(vars, "\\.", "_") # "\\."は正規表現で"."の意味
# [1] "Sepal_Length" "Sepal_Width"  "Petal_Length" "Petal_Width" 
str_c(str_replace(vars, "\\.", "_"), "(cm)")
# [1] "Sepal_Length(cm)" "Sepal_Width(cm)"  "Petal_Length(cm)" "Petal_Width(cm)" 
str_to_lower(str_c(str_replace(vars, "\\.", "_"), "(cm)"))
# [1] "sepal_length(cm)" "sepal_width(cm)"  "petal_length(cm)" "petal_width(cm)" 

relocate():列の並び替え

R
# relocate()
iris_tbl %>%
  relocate(Species) # Species列を最初に
# # A tibble: 150 x 6
#  Species    id Sepal.Length Sepal.Width Petal.Length Petal.Width
#    <fct>   <int>        <dbl>       <dbl>        <dbl>       <dbl>
# 1 setosa      1          5.1         3.5          1.4         0.2
# 2 setosa      2          4.9         3            1.4         0.2

iris_tbl %>%
  relocate(starts_with("Sepal")) # Petalで始まる列を最初に
# A tibble: 150 x 6
#   Sepal.Length Sepal.Width    id Petal.Length Petal.Width Species
#          <dbl>       <dbl> <int>        <dbl>       <dbl> <fct>  
# 1          5.1         3.5     1          1.4         0.2 setosa 
# 2          4.9         3       2          1.4         0.2 setosa 

iris_tbl %>%
  relocate(id, .after = Species) # id列をSpecies列の後に
iris_tbl %>%
  relocate(id, .after = last_col()) # id列を最後の列の後に
iris_tbl %>%
  relocate(id, .after = everything()) # id列を全列の後に
# # A tibble: 150 x 6
#   Sepal.Length Sepal.Width Petal.Length Petal.Width Species    id
#          <dbl>       <dbl>        <dbl>       <dbl> <fct>   <int>
# 1          5.1         3.5          1.4         0.2 setosa      1
# 2          4.9         3            1.4         0.2 setosa      2

iris_tbl %>%
  relocate(id, .before = Species) # id列をSpecies列の前に
iris_tbl %>%
  relocate(id, .before = last_col(0)) # id列をSpecies列の前に
iris_tbl %>%
  relocate(id, .after = last_col(1)) # id列を後ろから2番目の列の後に
# # A tibble: 150 x 6
#   Sepal.Length Sepal.Width Petal.Length Petal.Width    id Species
#          <dbl>       <dbl>        <dbl>       <dbl> <int> <fct>  
# 1          5.1         3.5          1.4         0.2     1 setosa 
# 2          4.9         3            1.4         0.2     2 setosa 

iris_tbl %>%
  relocate(starts_with("Sepal"), .after = starts_with("Petal")) # Sepalで始まる列をPetalで始まる列の後に
# # A tibble: 150 x 6
#      id Petal.Length Petal.Width Sepal.Length Sepal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1          1.4         0.2          5.1         3.5 setosa 
# 2     2          1.4         0.2          4.9         3   setosa 

iris_tbl %>%
  relocate(where(is.factor)) # factor型の列を最初に
# # A tibble: 150 x 6
#   Species    id Sepal.Length Sepal.Width Petal.Length Petal.Width
#   <fct>   <int>        <dbl>       <dbl>        <dbl>       <dbl>
# 1 setosa      1          5.1         3.5          1.4         0.2
# 2 setosa      2          4.9         3            1.4         0.2

iris_tbl %>%
  relocate(-where(is.integer)) # integer型の列以外を最初に
# # A tibble: 150 x 6
#   Sepal.Length Sepal.Width Petal.Length Petal.Width Species    id
#          <dbl>       <dbl>        <dbl>       <dbl> <fct>   <int>
# 1          5.1         3.5          1.4         0.2 setosa      1
# 2          4.9         3            1.4         0.2 setosa      2

anscombe_tbl %>%
  relocate(contains("x"), .after = contains("y")) # "x"を含む列を"y"を含む列の後に
# # A tibble: 11 x 9
#      id    y1    y2    y3    y4    x1    x2    x3    x4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1  8.04  9.14  7.46  6.58    10    10    10     8
# 2     2  6.95  8.14  6.77  5.76     8     8     8     8

vars <- str_c(letters, "1")
vars
anscombe_tbl %>%
  relocate(any_of(vars))
# # A tibble: 11 x 9
#      x1    y1    id    x2    x3    x4    y2    y3    y4
#   <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1    10  8.04     1    10    10     8  9.14  7.46  6.58
# 2     8  6.95     2     8     8     8  8.14  6.77  5.76

mutate():列の計算

R
# mutate()
iris_tbl %>%
  mutate(SP = toupper(Species))
# # A tibble: 150 x 7
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species SP    
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>   <chr> 
# 1     1          5.1         3.5          1.4         0.2 setosa  SETOSA
# 2     2          4.9         3            1.4         0.2 setosa  SETOSA

iris_tbl %>%
  mutate(sp = as.character(Species))
# # A tibble: 150 x 7
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species sp    
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>   <chr> 
# 1     1          5.1         3.5          1.4         0.2 setosa  setosa
# 2     2          4.9         3            1.4         0.2 setosa  setosa

iris_tbl %>%
  mutate(sl_centralized = Sepal.Length - mean(Sepal.Length))
# 中心化
# # A tibble: 150 x 7
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species sl_centralized
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>            <dbl>
# 1     1          5.1         3.5          1.4         0.2 setosa          -0.743
# 2     2          4.9         3            1.4         0.2 setosa          -0.943

iris_tbl %>%
  mutate(sl_normalized = (Sepal.Length - mean(Sepal.Length)) / sd(Sepal.Length))
iris_tbl %>%
  mutate(sl_normalized = as.vector(scale(Sepal.Length)))
# 平均=0、標準偏差=1に正規化 (z-score normalization)
# # A tibble: 150 x 7
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species sl_normalized
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>           <dbl>
# 1     1          5.1         3.5          1.4         0.2 setosa         -0.898
# 2     2          4.9         3            1.4         0.2 setosa         -1.14 
# scale()はmatrixを返すためas.vector()でベクトル化している
scale(iris$Sepal.Length)
class(scale(iris$Sepal.Length)) #[1] "matrix"

iris_tbl %>%
  mutate(sl_normalized = (Sepal.Length - min(Sepal.Length)) / (max(Sepal.Length) - min(Sepal.Length)))
# 最小=0、最大=1に正規化 (min-max normalization)
# A tibble: 150 x 7
#     id Sepal.Length Sepal.Width Petal.Length Petal.Width Species sl_normalized
#  <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>           <dbl>
# 1     1          5.1         3.5          1.4         0.2 setosa         0.222 
# 2     2          4.9         3            1.4         0.2 setosa         0.167 

引数.before, .after:計算後の列の場所の指定

R
# 引数.before, .after
iris_tbl %>%
  mutate(SP = toupper(Species), .before = Species)
# A tibble: 150 x 7
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width SP     Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <chr>  <fct>  
# 1     1          5.1         3.5          1.4         0.2 SETOSA setosa 
# 2     2          4.9         3            1.4         0.2 SETOSA setosa 

library(stringr)
iris_tbl %>%
  mutate(ID = str_pad(id, 3L, pad = "0"), .after = id)
# # A tibble: 150 x 7
#      id ID    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int> <chr>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1 001            5.1         3.5          1.4         0.2 setosa 
# 2     2 002            4.9         3            1.4         0.2 setosa 

anscombe_tbl %>%
  mutate(log_x1 = log(x1), .after = x1)
# # A tibble: 11 x 10
#      id    x1 log_x1    x2    x3    x4    y1    y2    y3    y4
#   <int> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10   2.30    10    10     8  8.04  9.14  7.46  6.58
# 2     2     8   2.08     8     8     8  6.95  8.14  6.77  5.76

引数.keep:計算に用いた列を残すかどうか

R
# 引数.keep
anscombe_tbl %>%
  mutate(z1 = x1 + y1, .keep = "all")    # すべての列を残す
# # A tibble: 11 x 10
#      id    x1    x2    x3    x4    y1    y2    y3    y4    z1
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10    10     8  8.04  9.14  7.46  6.58 18.0 
# 2     2     8     8     8     8  6.95  8.14  6.77  5.76 15.0 

anscombe_tbl %>% 
  mutate(z1 = x1 + y1, .keep = "used")   # 計算に用いた列を残す
# # A tibble: 11 x 3
#      x1    y1    z1
#   <dbl> <dbl> <dbl>
# 1    10  8.04 18.0 
# 2     8  6.95 15.0 

anscombe_tbl %>%
  mutate(z1 = x1 + y1, .keep = "unused") # 計算に用いなかった列を残す
# # A tibble: 11 x 8
#      id    x2    x3    x4    y2    y3    y4    z1
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10     8  9.14  7.46  6.58 18.0 
# 2     2     8     8     8  8.14  6.77  5.76 15.0 

anscombe_tbl %>%
  mutate(z1 = x1 + y1, .keep = "none")   # どの列も残さない
# # A tibble: 11 x 1
#      z1
#   <dbl>
# 1 18.0 
# 2 15.0 

mutate(across()):複数列にわたる計算

R
# mutate(across())
iris_tbl %>%
  mutate(across(Sepal.Length:Petal.Width, round))
iris_tbl %>%
  mutate(across(2:5, round))
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1            5           4            1           0 setosa 
# 2     2            5           3            1           0 setosa 

iris_tbl %>%
  mutate(across(Sepal.Length:Petal.Width, log))
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1         1.63        1.25        0.336      -1.61  setosa 
# 2     2         1.59        1.10        0.336      -1.61  setosa 

scale2 <- function(x, na.rm = FALSE) (x - mean(x, na.rm = na.rm)) / sd(x, na.rm)
iris_tbl %>%
  mutate(across(Sepal.Length:Petal.Width, scale2))
# A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1       -0.898      1.02          -1.34       -1.31 setosa 
# 2     2       -1.14      -0.132         -1.34       -1.31 setosa 

where():複数列の条件指定

R
# where()
iris_tbl %>%
  mutate(across(where(is.double), round))
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1            5           4            1           0 setosa 
# 2     2            5           3            1           0 setosa

iris_tbl %>%
  mutate(across(where(is.factor), as.character))
# # A tibble: 150 x 6
# id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <chr>  
# 1     1          5.1         3.5          1.4         0.2 setosa 
# 2     2          4.9         3            1.4         0.2 setosa

iris_tbl %>%
  mutate(across(where(is.factor), as.integer))
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl>   <int>
# 1     1          5.1         3.5          1.4         0.2       1
# 2     2          4.9         3            1.4         0.2       1

iris_tbl %>%
  mutate(across(where(is.double) & !starts_with("Petal"), round))
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1            5           4          1.4         0.2 setosa 
# 2     2            5           3          1.4         0.2 setosa 

引数.fns:複数列にわたる計算に用いる関数の指定

R
# 引数.fns
anscombe_tbl %>%
  mutate(across(x1:y4, log10))
anscombe_tbl %>%
  mutate(across(x1:y4, .fns = log10))
anscombe_tbl %>%
  mutate(across(x1:y4, ~log(., 10)))
anscombe_tbl %>%
  mutate(across(x1:y4, .fns = ~log(., 10)))
# # A tibble: 11 x 9
#      id    x1    x2    x3    x4    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1 1     1     1     0.903 0.905 0.961 0.873 0.818
# 2     2 0.903 0.903 0.903 0.903 0.842 0.911 0.831 0.760

anscombe_tbl %>%
  mutate(across(-id, ~round(log(., 10), 2)))
anscombe_tbl %>%
  mutate(across(-id, ~log(., 10))) %>% print() %>%
  mutate(across(-id, ~round(., 2)))
# # A tibble: 11 x 9
#      id    x1    x2    x3    x4    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1  1     1     1     0.9   0.91  0.96  0.87  0.82
# 2     2  0.9   0.9   0.9   0.9   0.84  0.91  0.83  0.76

iris_tbl %>%
  mutate(across(2:5, ~str_c(sprintf("%.2f", .), "cm")))
iris_tbl %>%
  mutate(across(2:5, ~sprintf("%.2fcm", .)))
iris_tbl %>%
  mutate(across(2:5, ~str_c(format(., nsmall = 2), "cm")))
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int> <chr>        <chr>       <chr>        <chr>       <fct>  
# 1     1 5.10cm       3.50cm      1.40cm       0.20cm      setosa 
# 2     2 4.90cm       3.00cm      1.40cm       0.20cm      setosa 

iris_tbl %>%
  mutate(across(2:3, ~round(.))) %>%
  mutate(across(4:5, ~round(., 1))) %>%
  mutate(across(2:5, ~str_c(as.character(.), "cm")))
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int> <chr>        <chr>       <chr>        <chr>       <fct>  
# 1     1 5cm          4cm         1.4cm        0.2cm       setosa 
# 2     2 5cm          3cm         1.4cm        0.2cm       setosa 

anscombe_tbl %>%
  select(id, x1:x2) %>%
  mutate(across(x1:x2, .fns = list(sqrt, exp, log)))
# # A tibble: 11 x 9
#      id    x1    x2  x1_1      x1_2  x1_3  x2_1      x2_2  x2_3
#   <int> <dbl> <dbl> <dbl>     <dbl> <dbl> <dbl>     <dbl> <dbl>
# 1     1    10    10  3.16   22026.   2.30  3.16   22026.   2.30
# 2     2     8     8  2.83    2981.   2.08  2.83    2981.   2.08

引数.names:複数列にわたる計算後の列名の指定

R
# 引数.names
anscombe_tbl %>%
  select(id, x1:x4) %>%
  mutate(across(x1:x4, log, .names = "log_{col}"))
anscombe_tbl %>%
  select(id, x1:x4) %>%
  mutate(across(.cols = x1:x4, .fns = log, .names = "log_{col}"))
# # A tibble: 11 x 9
#      id    x1    x2    x3    x4 log_x1 log_x2 log_x3 log_x4
#   <int> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
# 1     1    10    10    10     8   2.30   2.30   2.30   2.08
# 2     2     8     8     8     8   2.08   2.08   2.08   2.08

anscombe_tbl %>%
  select(id, x1:x4) %>%
  mutate(across(.cols = x1:x4, .fns = list(Log = log), .names = "{fn}_{col}"))
# # A tibble: 11 x 9
#      id    x1    x2    x3    x4 Log_x1 Log_x2 Log_x3 Log_x4
#   <int> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
# 1     1    10    10    10     8   2.30   2.30   2.30   2.08
# 2     2     8     8     8     8   2.08   2.08   2.08   2.08

anscombe_tbl %>%
  select(id, x1:x2) %>%
  mutate(across(.cols = x1:x2,
                .fns = list(Sqrt = sqrt, Exp = exp, Log = log),
                .names = "{fn}_{col}"))
anscombe_tbl %>%
  select(id, x1:x2) %>%
  mutate(across(x1:x2, list(Sqrt = sqrt, Exp = exp, Log = log), 
                .names = "{fn}_{col}"))
# # A tibble: 11 x 9
#      id    x1    x2 Sqrt_x1    Exp_x1 Log_x1 Sqrt_x2    Exp_x2 Log_x2
#   <int> <dbl> <dbl>   <dbl>     <dbl>  <dbl>   <dbl>     <dbl>  <dbl>
# 1     1    10    10    3.16   22026.    2.30    3.16   22026.    2.30
# 2     2     8     8    2.83    2981.    2.08    2.83    2981.    2.08

引数.namesには、{col}で(引数.colsに指定した)列名、{fn}で(引数.fnsに指定した)関数名を含む列名を指定できます。ただし、関数名は名前付きリストの形(list(名前 = 要素, 名前 = 要素, ...)の形)で渡す必要があります。列名や関数名を指定しなかった場合の例を次に示します。

R
anscombe_tbl %>%
  select(id, x1:x4) %>%
  mutate(across(.cols = x1:x4, .fns = log, .names = "{fn}_{col}"))
# # A tibble: 11 x 9
#      id    x1    x2    x3    x4 `1_x1` `1_x2` `1_x3` `1_x4`
#   <int> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
# 1     1    10    10    10     8   2.30   2.30   2.30   2.08
# 2     2     8     8     8     8   2.08   2.08   2.08   2.08
# 名前付きリストの形で関数を指定していないため、{fn}のところが数字になっている。

anscombe_tbl %>%
  select(id, x1:x2) %>%
  mutate(across(.cols = x1:x2,
                .fns = list(sqrt, exp, log),
                .names = "{fn}_{col}"))
# # A tibble: 11 x 9
#      id    x1    x2 `1_x1`    `2_x1` `3_x1` `1_x2`    `2_x2` `3_x2`
#   <int> <dbl> <dbl>  <dbl>     <dbl>  <dbl>  <dbl>     <dbl>  <dbl>
# 1     1    10    10   3.16   22026.    2.30   3.16   22026.    2.30
# 2     2     8     8   2.83    2981.    2.08   2.83    2981.    2.08
# 名前付きリストの形で関数を指定していないため、{fn}のところが数字になっている。

anscombe_tbl %>%
  select(id, x1:x2) %>%
  mutate(across(.cols = x1:x2,
                .fns = list(sqrt, exp, log)))
# # A tibble: 11 x 9
#      id    x1    x2  x1_1      x1_2  x1_3  x2_1      x2_2  x2_3
#   <int> <dbl> <dbl> <dbl>     <dbl> <dbl> <dbl>     <dbl> <dbl>
# 1     1    10    10  3.16   22026.   2.30  3.16   22026.   2.30
# 2     2     8     8  2.83    2981.   2.08  2.83    2981.   2.08
# 計算後の列名の形を指定していないため、"計算前の列名_1"のような列名になっている。

参考:
- Create, modify, and delete columns

列計算に使う関数

四則演算や数学関数、文字列関数の他にもいろいろな関数が用意されていますが、長くなりましたので、これについては別記事にします。

参考

8
5
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
8
5