More than 3 years have passed since last update.

Rのデータフレームの列操作についてのメモ

Last updated at 2021-04-29Posted at 2021-03-11

はじめに

Rのデータフレームの列の操作について、サンプルデータを用いて具体的に練習してみました。

Rのデータフレームの列についての操作
参考

Rのデータフレームの列についての操作

Rのデータフレームの列の操作について、サンプルデータを用いて具体的に練習します。
なお、以下については別記事でまとめています。

行の操作について：こちら
集計操作について：こちら
pivot操作について：こちら
データフレームの関数について：こちら

参考：
https://r4ds.had.co.nz/transform.html
https://github.com/tidyverse/dplyr
https://heavywatal.github.io/rstats/dplyr.html

練習に用いるデータ

Rのサンプルデータirisとanscombeを用います。
行番号を表示した方が分かりやすいため、tibble::rowid_to_column()で行番号を列名にしています。
（こちらの記事と同じデータを用います。）

library(dplyr)
library(tibble)
library(tidyr)

iris

# 行番号を列名に
iris_tbl <- iris %>% as_tibble() %>% rowid_to_column("id")
iris_tbl
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1          5.1         3.5          1.4         0.2 setosa 
# 2     2          4.9         3            1.4         0.2 setosa 

anscombe

anscombe_tbl <- anscombe %>% as_tibble() %>% rowid_to_column("id")
anscombe_tbl
# # A tibble: 11 x 9
#      id    x1    x2    x3    x4    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10    10     8  8.04  9.14  7.46  6.58
# 2     2     8     8     8     8  6.95  8.14  6.77  5.76

select()：列の選択・並び替え

列を選択します。

# select()
iris_tbl %>%
  select(c(1, 6, 2))
iris_tbl %>%
  select(c(id, Species, Sepal.Length))
iris_tbl %>%
  select(id, Species, Sepal.Length)
# # A tibble: 150 x 3
#      id Species Sepal.Length
#   <int> <fct>          <dbl>
# 1     1 setosa           5.1
# 2     2 setosa           4.9

iris_tbl %>%
  select(2:5)
iris_tbl %>%
  select(Sepal.Length:Petal.Width)
# # A tibble: 150 x 4
#   Sepal.Length Sepal.Width Petal.Length Petal.Width
#          <dbl>       <dbl>        <dbl>       <dbl>
# 1          5.1         3.5          1.4         0.2
# 2          4.9         3            1.4         0.2

anscombe_tbl %>%
  select(2:5)
anscombe_tbl %>%
  select(x1:y4)
# # A tibble: 11 x 4
#      x1    x2    x3    x4
#   <dbl> <dbl> <dbl> <dbl>
# 1    10    10    10     8
# 2     8     8     8     8

everything()：すべての列

# everything()
iris_tbl %>%
  select(everything())
# # A tibble: 150 x 4
#   Sepal.Length Sepal.Width Petal.Length Petal.Width
#          <dbl>       <dbl>        <dbl>       <dbl>
# 1          5.1         3.5          1.4         0.2
# 2          4.9         3            1.4         0.2

anscombe_tbl %>%
  select(everything())
# # A tibble: 11 x 9
#      id    x1    x2    x3    x4    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10    10     8  8.04  9.14  7.46  6.58
# 2     2     8     8     8     8  6.95  8.14  6.77  5.76

last_col()：最後の列

# last_col()
iris_tbl %>%
  select(last_col())  # 最後の列
iris_tbl %>%
  select(last_col(0)) # 最後の列
# # A tibble: 150 x 1
#  Species
#    <fct>  
# 1 setosa 
# 2 setosa 

iris_tbl %>%
  select(last_col(1)) # 最後の1つ前の列（後ろから2番目の列）
# # A tibble: 150 x 1
#   Petal.Width
#         <dbl>
# 1         0.2
# 2         0.2

iris_tbl %>%
  select(last_col(2)) # 最後の2つ前の列（後ろから3番目の列）
# # A tibble: 150 x 1
#   Petal.Length
#          <dbl>
# 1          1.4
# 2          1.4

iris_tbl %>%
  select(2:last_col(2)) # 2列目から最後の2つ前の列（後ろから3番目の列）まで
# # A tibble: 150 x 3
#   Sepal.Length Sepal.Width Petal.Length
#          <dbl>       <dbl>        <dbl>
# 1          5.1         3.5          1.4
# 2          4.9         3            1.4

iris_tbl %>%
  select(!(2:last_col(2)))
# # A tibble: 150 x 3
#      id Petal.Width Species
#   <int>       <dbl> <fct>  
# 1     1         0.2 setosa 
# 2     2         0.2 setosa

&, |：列の条件選択 and・or

# &, |
iris_tbl %>%
  select(1:3 & 2:5)
# # A tibble: 150 x 2
#   Sepal.Length Sepal.Width
#          <dbl>       <dbl>
# 1          5.1         3.5
# 2          4.9         3  

iris_tbl %>%
  select(1:3 | 2:5)
# # A tibble: 150 x 5
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width
#   <int>        <dbl>       <dbl>        <dbl>       <dbl>
# 1     1          5.1         3.5          1.4         0.2
# 2     2          4.9         3            1.4         0.2

iris_tbl %>%
  select(starts_with("Sepal") & ends_with("Width"))
# # A tibble: 150 x 1
#   Sepal.Width
#         <dbl>
# 1         3.5
# 2         3  

iris_tbl %>%
  select(starts_with("Sepal") | ends_with("Width"))
# # A tibble: 150 x 3
#   Sepal.Length Sepal.Width Petal.Width
#          <dbl>       <dbl>       <dbl>
# 1          5.1         3.5         0.2
# 2          4.9         3           0.2

-, !：列の条件選択 not

# -, !
iris_tbl %>%
  select(-id)
iris_tbl %>%
  select(!id)
# # A tibble: 150 x 5
#   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1          5.1         3.5          1.4         0.2 setosa 
# 2          4.9         3            1.4         0.2 setosa 

iris_tbl %>%
  select(!2:5)
iris_tbl %>%
  select(-(2:5))
# # A tibble: 150 x 2
#      id Species
#   <int> <fct>  
# 1     1 setosa 
# 2     2 setosa 

iris_tbl %>%
  select(-c(1, 6, 2))
iris_tbl %>%
  select(-id, -Species, -Sepal.Length)
# # A tibble: 150 x 3
#   Sepal.Width Petal.Length Petal.Width
#         <dbl>        <dbl>       <dbl>
# 1         3.5          1.4         0.2
# 2         3            1.4         0.2

iris_tbl %>%
  select(-starts_with("Sepal"))
iris_tbl %>%
  select(!starts_with("Sepal"))
# # A tibble: 150 x 4
#      id Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl> <fct>  
# 1     1          1.4         0.2 setosa 
# 2     2          1.4         0.2 setosa

starts_with(), ends_with()：指定した文字列で始まる・終わる列名

# starts_with(), ends_with()
iris_tbl %>%
  select(id, starts_with("S")) # "S"で始まる列
# # A tibble: 150 x 4
#      id Sepal.Length Sepal.Width Species
#   <int>        <dbl>       <dbl> <fct>  
# 1     1          5.1         3.5 setosa 
# 2     2          4.9         3   setosa 

iris_tbl %>%
  select(id, ends_with("th")) # "th"で終わる列
# # A tibble: 150 x 5
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width
#   <int>        <dbl>       <dbl>        <dbl>       <dbl>
# 1     1          5.1         3.5          1.4         0.2
# 2     2          4.9         3            1.4         0.2

contains()：指定した文字列を含む列名

# contains()
iris_tbl %>%
  select(contains("al")) # "al"を含む列
iris_tbl %>%
  select(contains(".")) # "."を含む列
# # A tibble: 150 x 4
#   Sepal.Length Sepal.Width Petal.Length Petal.Width
#          <dbl>       <dbl>        <dbl>       <dbl>
# 1          5.1         3.5          1.4         0.2
# 2          4.9         3            1.4         0.2

iris_tbl %>%
  select(-contains("al"))
# # A tibble: 150 x 2
#      id Species
#   <int> <fct>  
# 1     1 setosa 
# 2     2 setosa

matches()：指定した正規表現にマッチする列名

# matches()
iris_tbl %>%
  select(matches("al")) # "al"を含む列名
# # A tibble: 150 x 4
#   Sepal.Length Sepal.Width Petal.Length Petal.Width
#          <dbl>       <dbl>        <dbl>       <dbl>
# 1          5.1         3.5          1.4         0.2
# 2          4.9         3            1.4         0.2

iris_tbl %>%
  select(matches(".")) # 「"."を含む列名」ではない
# contains()に指定するのは文字列だが、matches()に指定するのは正規表現
# 正規表現で"."は「何でも」の意味
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1          5.1         3.5          1.4         0.2 setosa 
# 2     2          4.9         3            1.4         0.2 setosa 

iris_tbl %>%
  select(matches("\\.")) # "."を含む列名
# 正規表現で"\\."は"."を含む文字列
# # A tibble: 150 x 4
#   Sepal.Length Sepal.Width Petal.Length Petal.Width
#          <dbl>       <dbl>        <dbl>       <dbl>
# 1          5.1         3.5          1.4         0.2
# 2          4.9         3            1.4         0.2

iris_tbl %>%
  select(matches("^..$")) # 2桁の列名
# # A tibble: 150 x 1
#      id
#   <int>
# 1     1
# 2     2

iris_tbl %>%
  select(matches("^S......$")) # "S"で始まる7桁の列名
iris_tbl %>%
  select(matches("^S.{6}$")) # "S"で始まる7桁の列名
# # A tibble: 150 x 1
#  Species
#    <fct>  
# 1 setosa 
# 2 setosa 

iris_tbl %>%
  select(matches("^S.+$")) # "S"で始まる2桁以上の列名
iris_tbl %>%
  select(matches("^S.*$")) # "S"で始まる1桁以上の列名
# # A tibble: 150 x 3
#   Sepal.Length Sepal.Width Species
#          <dbl>       <dbl> <fct>  
# 1          5.1         3.5 setosa 
# 2          4.9         3   setosa 

iris_tbl %>%
  select(matches("p")) # "p"を含む列名
# # A tibble: 150 x 5
#   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1          5.1         3.5          1.4         0.2 setosa 
# 2          4.9         3            1.4         0.2 setosa 

iris_tbl %>%
  select(matches("^p")) # "p"で始まる列名（1桁目が"p"）
# # A tibble: 150 x 2
#   Petal.Length Petal.Width
#          <dbl>       <dbl>
# 1          1.4         0.2
# 2          1.4         0.2

iris_tbl %>%
  select(matches("^.p")) # 2桁目が"p"の列名
# # A tibble: 150 x 1
#  Species
#    <fct>  
# 1 setosa 
# 2 setosa 

iris_tbl %>%
  select(matches("\\W")) # 英数字以外を含む列名
# # A tibble: 150 x 4
#   Sepal.Length Sepal.Width Petal.Length Petal.Width
#          <dbl>       <dbl>        <dbl>       <dbl>
# 1          5.1         3.5          1.4         0.2
# 2          4.9         3            1.4         0.2

iris_tbl %>%
  select(matches("^\\w+$")) # 先頭から末尾まですべて英数字の列名（英数字以外を含まない列名）
iris_tbl %>%
  select(-matches("\\W")) # 英数字以外を含まない列名
# # A tibble: 150 x 2
#      id Species
#   <int> <fct>  
# 1     1 setosa 
# 2     2 setosa 

anscombe_tbl %>%
  select(matches("^x|y")) # "x"か"y"で始まる列名
anscombe_tbl %>%
  select(matches("[1-4]$")) # "1"-"4"で終わる列名
anscombe_tbl %>%
  select(matches("^\\w\\d")) # 1桁目が英数字、2桁目が数字の列名
# # A tibble: 11 x 8
#      x1    x2    x3    x4    y1    y2    y3    y4
#   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1    10    10    10     8  8.04  9.14  7.46  6.58
# 2     8     8     8     8  6.95  8.14  6.77  5.76

anscombe_tbl %>%
  rename(`x 1` = x1) %>% print() %>%
  select(matches("\\s")) # 空白を含む列名
# # A tibble: 11 x 1
#   `x 1`
#   <dbl>
# 1    10
# 2     8

anscombe_tbl %>%
  rename(`x 1` = x1) %>% print() %>%
  select(matches("^\\S+$")) # 空白を含まない列名
anscombe_tbl %>%
  rename(`x 1` = x1) %>% print() %>%
  select(!matches("\\s")) # 空白を含まない列名
# # A tibble: 11 x 8
#      id    x2    x3    x4    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10     8  8.04  9.14  7.46  6.58
# 2     2     8     8     8  6.95  8.14  6.77  5.76

anscombe_tbl %>%
  select(matches("^[a-z]{2}$")) # 英字小文字2桁の列名
# # A tibble: 11 x 1
#      id
#   <int>
# 1     1
# 2     2

iris_tbl %>%
  select(matches("^[a-z|A-Z]{7}$")) # 英字7桁の列名
# # A tibble: 150 x 1
#  Species
#    <fct>  
# 1 setosa 
# 2 setosa

num_range()：数字の連番の列名

# num_range()
anscombe_tbl %>%
  select(num_range("x", 2:3))
# # A tibble: 11 x 2
#      x2    x3
#   <dbl> <dbl>
# 1    10    10
# 2     8     8

anscombe_tbl %>%
  select(-num_range("x", 2:3))
# # A tibble: 11 x 7
#      id    x1    x4    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10     8  8.04  9.14  7.46  6.58
# 2     2     8     8  6.95  8.14  6.77  5.76

anscombe_tbl %>%
  select(num_range("x", 3:10))
# # A tibble: 11 x 2
#      x3    x4
#   <dbl> <dbl>
# 1    10     8
# 2     8     8

anscombe_tbl %>%
  select(-num_range("x", 3:10))
# # A tibble: 11 x 7
#      id    x1    x2    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10  8.04  9.14  7.46  6.58
# 2     2     8     8  6.95  8.14  6.77  5.76

all_of(), any_of()：文字列ベクトルによる列名の選択

# all_of(), any_of()
vars <- names(iris_tbl)
vars
# [1] "id"           "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species" 
vars <- vars[2:5]
vars
# [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width" 
iris_tbl %>%
  select(all_of(vars))
# # A tibble: 150 x 4
#   Sepal.Length Sepal.Width Petal.Length Petal.Width
#          <dbl>       <dbl>        <dbl>       <dbl>
# 1          5.1         3.5          1.4         0.2
# 2          4.9         3            1.4         0.2

library(stringr)
vars <- str_c("x", 1:10)
vars
# [1] "x1"  "x2"  "x3"  "x4"  "x5"  "x6"  "x7"  "x8"  "x9"  "x10"
anscombe_tbl %>%
  select(any_of(vars))
# # A tibble: 11 x 4
#      x1    x2    x3    x4
#   <dbl> <dbl> <dbl> <dbl>
# 1    10    10    10     8
# 2     8     8     8     8

vars <- str_c(letters, 1)
vars
#  [1] "a1" "b1" "c1" "d1" "e1" "f1" "g1" "h1" "i1" "j1" "k1" "l1" "m1"
# [14] "n1" "o1" "p1" "q1" "r1" "s1" "t1" "u1" "v1" "w1" "x1" "y1" "z1"
anscombe_tbl %>%
  select(any_of(vars))
# # A tibble: 11 x 2
#      x1    y1
#   <dbl> <dbl>
# 1    10  8.04
# 2     8  6.95

anscombe_tbl %>%
  select(-any_of(vars))
# # A tibble: 11 x 7
#      id    x2    x3    x4    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10     8  9.14  7.46  6.58
# 2     2     8     8     8  8.14  6.77  5.76

where()：列の属性による列の選択

# where()
iris_tbl %>%
  select(where(is.numeric))
# # A tibble: 150 x 5
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width
#   <int>        <dbl>       <dbl>        <dbl>       <dbl>
# 1     1          5.1         3.5          1.4         0.2
# 2     2          4.9         3            1.4         0.2

iris_tbl %>%
  select(where(is.double))
# # A tibble: 150 x 4
#   Sepal.Length Sepal.Width Petal.Length Petal.Width
#          <dbl>       <dbl>        <dbl>       <dbl>
# 1          5.1         3.5          1.4         0.2
# 2          4.9         3            1.4         0.2

iris_tbl %>%
  select(where(is.integer))
# # A tibble: 150 x 1
#      id
#   <int>
# 1     1
# 2     2

iris_tbl %>%
  select(where(is.character))
# # A tibble: 150 x 0

iris_tbl %>%
  select(where(is.factor))
# # A tibble: 150 x 1
#   Species
#   <fct>  
# 1 setosa 
# 2 setosa

参考：

rename()：列名の変更

列名を変更できます。

# rename()
iris_tbl %>%
  rename(ID = id)
# # A tibble: 150 x 6
#      ID Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1          5.1         3.5          1.4         0.2 setosa 
# 2     2          4.9         3            1.4         0.2 setosa 

iris_tbl %>%
  rename(sl = Sepal.Length, sw = Sepal.Width,
         pl = Petal.Length, pw = Petal.Width)
# # A tibble: 150 x 6
#      id    sl    sw    pl    pw Species
#   <int> <dbl> <dbl> <dbl> <dbl> <fct>  
# 1     1   5.1   3.5   1.4   0.2 setosa 
# 2     2   4.9   3     1.4   0.2 setosa 

var <- "Species"      # 変更前の列名
var_2 <- toupper(var) # 変更後の列名
iris_tbl %>%
  rename(!!var_2 := !!var)
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width SPECIES
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1          5.1         3.5          1.4         0.2 setosa 
# 2     2          4.9         3            1.4         0.2 setosa 

vars <- names(iris_tbl) # 変更前の列名
vars_2 <- toupper(vars) # 変更後の列名
named_vars <- setNames(vars, vars_2) # 名前付きベクトル
iris_tbl %>%
  rename(!!!named_vars)
# # A tibble: 150 x 6
#      ID SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1          5.1         3.5          1.4         0.2 setosa 
# 2     2          4.9         3            1.4         0.2 setosa 

vars <- str_c("x", 1:4) # 変更前の列名
vars_2 <- toupper(vars) # 変更後の列名
named_vars <- setNames(vars, vars_2)
anscombe_tbl %>%
  rename(!!!named_vars)
# # A tibble: 11 x 9
#      id    X1    X2    X3    X4    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10    10     8  8.04  9.14  7.46  6.58
# 2     2     8     8     8     8  6.95  8.14  6.77  5.76

rename_with()：複数列にわたる列名の一括変更

# rename_with()
iris_tbl %>%
  rename_with(toupper)
iris_tbl %>%
  rename_with(toupper, everything())
# # A tibble: 150 x 6
#      ID SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1          5.1         3.5          1.4         0.2 setosa 
# 2     2          4.9         3            1.4         0.2 setosa 

anscombe_tbl %>%
  rename_with(toupper, x1:x4)
anscombe_tbl %>%
  rename_with(.fn = toupper, .cols = x1:x4)
anscombe_tbl %>%
  rename_with(.fn = ~toupper(.), .cols = x1:x4)
# # A tibble: 11 x 9
#      id    X1    X2    X3    X4    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10    10     8  8.04  9.14  7.46  6.58
# 2     2     8     8     8     8  6.95  8.14  6.77  5.76

複数列に一括適用する関数がf(x)の形の関数なら、引数.fnにfか~f(.)の形で指定します。後者の形を用いれば、複雑な関数や合成関数でも適用できます。

anscombe_tbl %>%
  rename_with(~str_c("var_", .), x1:x4)
anscombe_tbl %>%
  rename_with(.fn = ~str_c("var_", .), .cols = x1:x4)
# # A tibble: 11 x 9
#      id var_x1 var_x2 var_x3 var_x4    y1    y2    y3    y4
#   <int>  <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1     10     10     10      8  8.04  9.14  7.46  6.58
# 2     2      8      8      8      8  6.95  8.14  6.77  5.76

anscombe_tbl %>%
  rename_with(~str_c("var_", toupper(.)), x1:x4)
anscombe_tbl %>%
  rename_with(.fn = ~str_c("var_", toupper(.)), .cols = x1:x4)
# # A tibble: 11 x 9
#      id var_X1 var_X2 var_X3 var_X4    y1    y2    y3    y4
#   <int>  <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1     10     10     10      8  8.04  9.14  7.46  6.58
# 2     2      8      8      8      8  6.95  8.14  6.77  5.76

iris_tbl %>%
  rename_with(~str_to_lower(str_c(str_replace(., "\\.", "_"), "(cm)")), 2:5)
iris_tbl %>%
  rename_with(~str_replace(., "\\.", "_"), 2:5) %>%
  rename_with(~str_c(., "(cm)"), 2:5) %>%
  rename_with(~str_to_lower(.), 2:5)
# # A tibble: 150 x 6
#      id `sepal_length(cm)` `sepal_width(cm)` `petal_length(cm)` `petal_width(cm)` Species
#   <int>              <dbl>             <dbl>              <dbl>             <dbl> <fct>  
# 1     1                5.1               3.5                1.4               0.2 setosa 
# 2     2                4.9               3                  1.4               0.2 setosa 

vars <- names(iris_tbl)
vars <- vars[2:5]
vars
# [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"
str_replace(vars, "\\.", "_") # "\\."は正規表現で"."の意味
# [1] "Sepal_Length" "Sepal_Width"  "Petal_Length" "Petal_Width" 
str_c(str_replace(vars, "\\.", "_"), "(cm)")
# [1] "Sepal_Length(cm)" "Sepal_Width(cm)"  "Petal_Length(cm)" "Petal_Width(cm)" 
str_to_lower(str_c(str_replace(vars, "\\.", "_"), "(cm)"))
# [1] "sepal_length(cm)" "sepal_width(cm)"  "petal_length(cm)" "petal_width(cm)"

relocate()：列の並び替え

# relocate()
iris_tbl %>%
  relocate(Species) # Species列を最初に
# # A tibble: 150 x 6
#  Species    id Sepal.Length Sepal.Width Petal.Length Petal.Width
#    <fct>   <int>        <dbl>       <dbl>        <dbl>       <dbl>
# 1 setosa      1          5.1         3.5          1.4         0.2
# 2 setosa      2          4.9         3            1.4         0.2

iris_tbl %>%
  relocate(starts_with("Sepal")) # Petalで始まる列を最初に
# A tibble: 150 x 6
#   Sepal.Length Sepal.Width    id Petal.Length Petal.Width Species
#          <dbl>       <dbl> <int>        <dbl>       <dbl> <fct>  
# 1          5.1         3.5     1          1.4         0.2 setosa 
# 2          4.9         3       2          1.4         0.2 setosa 

iris_tbl %>%
  relocate(id, .after = Species) # id列をSpecies列の後に
iris_tbl %>%
  relocate(id, .after = last_col()) # id列を最後の列の後に
iris_tbl %>%
  relocate(id, .after = everything()) # id列を全列の後に
# # A tibble: 150 x 6
#   Sepal.Length Sepal.Width Petal.Length Petal.Width Species    id
#          <dbl>       <dbl>        <dbl>       <dbl> <fct>   <int>
# 1          5.1         3.5          1.4         0.2 setosa      1
# 2          4.9         3            1.4         0.2 setosa      2

iris_tbl %>%
  relocate(id, .before = Species) # id列をSpecies列の前に
iris_tbl %>%
  relocate(id, .before = last_col(0)) # id列をSpecies列の前に
iris_tbl %>%
  relocate(id, .after = last_col(1)) # id列を後ろから2番目の列の後に
# # A tibble: 150 x 6
#   Sepal.Length Sepal.Width Petal.Length Petal.Width    id Species
#          <dbl>       <dbl>        <dbl>       <dbl> <int> <fct>  
# 1          5.1         3.5          1.4         0.2     1 setosa 
# 2          4.9         3            1.4         0.2     2 setosa 

iris_tbl %>%
  relocate(starts_with("Sepal"), .after = starts_with("Petal")) # Sepalで始まる列をPetalで始まる列の後に
# # A tibble: 150 x 6
#      id Petal.Length Petal.Width Sepal.Length Sepal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1          1.4         0.2          5.1         3.5 setosa 
# 2     2          1.4         0.2          4.9         3   setosa 

iris_tbl %>%
  relocate(where(is.factor)) # factor型の列を最初に
# # A tibble: 150 x 6
#   Species    id Sepal.Length Sepal.Width Petal.Length Petal.Width
#   <fct>   <int>        <dbl>       <dbl>        <dbl>       <dbl>
# 1 setosa      1          5.1         3.5          1.4         0.2
# 2 setosa      2          4.9         3            1.4         0.2

iris_tbl %>%
  relocate(-where(is.integer)) # integer型の列以外を最初に
# # A tibble: 150 x 6
#   Sepal.Length Sepal.Width Petal.Length Petal.Width Species    id
#          <dbl>       <dbl>        <dbl>       <dbl> <fct>   <int>
# 1          5.1         3.5          1.4         0.2 setosa      1
# 2          4.9         3            1.4         0.2 setosa      2

anscombe_tbl %>%
  relocate(contains("x"), .after = contains("y")) # "x"を含む列を"y"を含む列の後に
# # A tibble: 11 x 9
#      id    y1    y2    y3    y4    x1    x2    x3    x4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1  8.04  9.14  7.46  6.58    10    10    10     8
# 2     2  6.95  8.14  6.77  5.76     8     8     8     8

vars <- str_c(letters, "1")
vars
anscombe_tbl %>%
  relocate(any_of(vars))
# # A tibble: 11 x 9
#      x1    y1    id    x2    x3    x4    y2    y3    y4
#   <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1    10  8.04     1    10    10     8  9.14  7.46  6.58
# 2     8  6.95     2     8     8     8  8.14  6.77  5.76

mutate()：列の計算

# mutate()
iris_tbl %>%
  mutate(SP = toupper(Species))
# # A tibble: 150 x 7
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species SP    
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>   <chr> 
# 1     1          5.1         3.5          1.4         0.2 setosa  SETOSA
# 2     2          4.9         3            1.4         0.2 setosa  SETOSA

iris_tbl %>%
  mutate(sp = as.character(Species))
# # A tibble: 150 x 7
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species sp    
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>   <chr> 
# 1     1          5.1         3.5          1.4         0.2 setosa  setosa
# 2     2          4.9         3            1.4         0.2 setosa  setosa

iris_tbl %>%
  mutate(sl_centralized = Sepal.Length - mean(Sepal.Length))
# 中心化
# # A tibble: 150 x 7
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species sl_centralized
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>            <dbl>
# 1     1          5.1         3.5          1.4         0.2 setosa          -0.743
# 2     2          4.9         3            1.4         0.2 setosa          -0.943

iris_tbl %>%
  mutate(sl_normalized = (Sepal.Length - mean(Sepal.Length)) / sd(Sepal.Length))
iris_tbl %>%
  mutate(sl_normalized = as.vector(scale(Sepal.Length)))
# 平均=0、標準偏差=1に正規化 (z-score normalization)
# # A tibble: 150 x 7
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species sl_normalized
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>           <dbl>
# 1     1          5.1         3.5          1.4         0.2 setosa         -0.898
# 2     2          4.9         3            1.4         0.2 setosa         -1.14 
# scale()はmatrixを返すためas.vector()でベクトル化している
scale(iris$Sepal.Length)
class(scale(iris$Sepal.Length)) #[1] "matrix"

iris_tbl %>%
  mutate(sl_normalized = (Sepal.Length - min(Sepal.Length)) / (max(Sepal.Length) - min(Sepal.Length)))
# 最小=0、最大=1に正規化 (min-max normalization)
# A tibble: 150 x 7
#     id Sepal.Length Sepal.Width Petal.Length Petal.Width Species sl_normalized
#  <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>           <dbl>
# 1     1          5.1         3.5          1.4         0.2 setosa         0.222 
# 2     2          4.9         3            1.4         0.2 setosa         0.167

引数.before, .after：計算後の列の場所の指定

# 引数.before, .after
iris_tbl %>%
  mutate(SP = toupper(Species), .before = Species)
# A tibble: 150 x 7
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width SP     Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <chr>  <fct>  
# 1     1          5.1         3.5          1.4         0.2 SETOSA setosa 
# 2     2          4.9         3            1.4         0.2 SETOSA setosa 

library(stringr)
iris_tbl %>%
  mutate(ID = str_pad(id, 3L, pad = "0"), .after = id)
# # A tibble: 150 x 7
#      id ID    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int> <chr>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1 001            5.1         3.5          1.4         0.2 setosa 
# 2     2 002            4.9         3            1.4         0.2 setosa 

anscombe_tbl %>%
  mutate(log_x1 = log(x1), .after = x1)
# # A tibble: 11 x 10
#      id    x1 log_x1    x2    x3    x4    y1    y2    y3    y4
#   <int> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10   2.30    10    10     8  8.04  9.14  7.46  6.58
# 2     2     8   2.08     8     8     8  6.95  8.14  6.77  5.76

引数.keep：計算に用いた列を残すかどうか

# 引数.keep
anscombe_tbl %>%
  mutate(z1 = x1 + y1, .keep = "all")    # すべての列を残す
# # A tibble: 11 x 10
#      id    x1    x2    x3    x4    y1    y2    y3    y4    z1
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10    10     8  8.04  9.14  7.46  6.58 18.0 
# 2     2     8     8     8     8  6.95  8.14  6.77  5.76 15.0 

anscombe_tbl %>% 
  mutate(z1 = x1 + y1, .keep = "used")   # 計算に用いた列を残す
# # A tibble: 11 x 3
#      x1    y1    z1
#   <dbl> <dbl> <dbl>
# 1    10  8.04 18.0 
# 2     8  6.95 15.0 

anscombe_tbl %>%
  mutate(z1 = x1 + y1, .keep = "unused") # 計算に用いなかった列を残す
# # A tibble: 11 x 8
#      id    x2    x3    x4    y2    y3    y4    z1
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1    10    10     8  9.14  7.46  6.58 18.0 
# 2     2     8     8     8  8.14  6.77  5.76 15.0 

anscombe_tbl %>%
  mutate(z1 = x1 + y1, .keep = "none")   # どの列も残さない
# # A tibble: 11 x 1
#      z1
#   <dbl>
# 1 18.0 
# 2 15.0

mutate(across())：複数列にわたる計算

# mutate(across())
iris_tbl %>%
  mutate(across(Sepal.Length:Petal.Width, round))
iris_tbl %>%
  mutate(across(2:5, round))
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1            5           4            1           0 setosa 
# 2     2            5           3            1           0 setosa 

iris_tbl %>%
  mutate(across(Sepal.Length:Petal.Width, log))
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1         1.63        1.25        0.336      -1.61  setosa 
# 2     2         1.59        1.10        0.336      -1.61  setosa 

scale2 <- function(x, na.rm = FALSE) (x - mean(x, na.rm = na.rm)) / sd(x, na.rm)
iris_tbl %>%
  mutate(across(Sepal.Length:Petal.Width, scale2))
# A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1       -0.898      1.02          -1.34       -1.31 setosa 
# 2     2       -1.14      -0.132         -1.34       -1.31 setosa

where()：複数列の条件指定

# where()
iris_tbl %>%
  mutate(across(where(is.double), round))
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1            5           4            1           0 setosa 
# 2     2            5           3            1           0 setosa

iris_tbl %>%
  mutate(across(where(is.factor), as.character))
# # A tibble: 150 x 6
# id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <chr>  
# 1     1          5.1         3.5          1.4         0.2 setosa 
# 2     2          4.9         3            1.4         0.2 setosa

iris_tbl %>%
  mutate(across(where(is.factor), as.integer))
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl>   <int>
# 1     1          5.1         3.5          1.4         0.2       1
# 2     2          4.9         3            1.4         0.2       1

iris_tbl %>%
  mutate(across(where(is.double) & !starts_with("Petal"), round))
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1     1            5           4          1.4         0.2 setosa 
# 2     2            5           3          1.4         0.2 setosa

引数.fns：複数列にわたる計算に用いる関数の指定

# 引数.fns
anscombe_tbl %>%
  mutate(across(x1:y4, log10))
anscombe_tbl %>%
  mutate(across(x1:y4, .fns = log10))
anscombe_tbl %>%
  mutate(across(x1:y4, ~log(., 10)))
anscombe_tbl %>%
  mutate(across(x1:y4, .fns = ~log(., 10)))
# # A tibble: 11 x 9
#      id    x1    x2    x3    x4    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1 1     1     1     0.903 0.905 0.961 0.873 0.818
# 2     2 0.903 0.903 0.903 0.903 0.842 0.911 0.831 0.760

anscombe_tbl %>%
  mutate(across(-id, ~round(log(., 10), 2)))
anscombe_tbl %>%
  mutate(across(-id, ~log(., 10))) %>% print() %>%
  mutate(across(-id, ~round(., 2)))
# # A tibble: 11 x 9
#      id    x1    x2    x3    x4    y1    y2    y3    y4
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1     1  1     1     1     0.9   0.91  0.96  0.87  0.82
# 2     2  0.9   0.9   0.9   0.9   0.84  0.91  0.83  0.76

iris_tbl %>%
  mutate(across(2:5, ~str_c(sprintf("%.2f", .), "cm")))
iris_tbl %>%
  mutate(across(2:5, ~sprintf("%.2fcm", .)))
iris_tbl %>%
  mutate(across(2:5, ~str_c(format(., nsmall = 2), "cm")))
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int> <chr>        <chr>       <chr>        <chr>       <fct>  
# 1     1 5.10cm       3.50cm      1.40cm       0.20cm      setosa 
# 2     2 4.90cm       3.00cm      1.40cm       0.20cm      setosa 

iris_tbl %>%
  mutate(across(2:3, ~round(.))) %>%
  mutate(across(4:5, ~round(., 1))) %>%
  mutate(across(2:5, ~str_c(as.character(.), "cm")))
# # A tibble: 150 x 6
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int> <chr>        <chr>       <chr>        <chr>       <fct>  
# 1     1 5cm          4cm         1.4cm        0.2cm       setosa 
# 2     2 5cm          3cm         1.4cm        0.2cm       setosa 

anscombe_tbl %>%
  select(id, x1:x2) %>%
  mutate(across(x1:x2, .fns = list(sqrt, exp, log)))
# # A tibble: 11 x 9
#      id    x1    x2  x1_1      x1_2  x1_3  x2_1      x2_2  x2_3
#   <int> <dbl> <dbl> <dbl>     <dbl> <dbl> <dbl>     <dbl> <dbl>
# 1     1    10    10  3.16   22026.   2.30  3.16   22026.   2.30
# 2     2     8     8  2.83    2981.   2.08  2.83    2981.   2.08

引数.names：複数列にわたる計算後の列名の指定

# 引数.names
anscombe_tbl %>%
  select(id, x1:x4) %>%
  mutate(across(x1:x4, log, .names = "log_{col}"))
anscombe_tbl %>%
  select(id, x1:x4) %>%
  mutate(across(.cols = x1:x4, .fns = log, .names = "log_{col}"))
# # A tibble: 11 x 9
#      id    x1    x2    x3    x4 log_x1 log_x2 log_x3 log_x4
#   <int> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
# 1     1    10    10    10     8   2.30   2.30   2.30   2.08
# 2     2     8     8     8     8   2.08   2.08   2.08   2.08

anscombe_tbl %>%
  select(id, x1:x4) %>%
  mutate(across(.cols = x1:x4, .fns = list(Log = log), .names = "{fn}_{col}"))
# # A tibble: 11 x 9
#      id    x1    x2    x3    x4 Log_x1 Log_x2 Log_x3 Log_x4
#   <int> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
# 1     1    10    10    10     8   2.30   2.30   2.30   2.08
# 2     2     8     8     8     8   2.08   2.08   2.08   2.08

anscombe_tbl %>%
  select(id, x1:x2) %>%
  mutate(across(.cols = x1:x2,
                .fns = list(Sqrt = sqrt, Exp = exp, Log = log),
                .names = "{fn}_{col}"))
anscombe_tbl %>%
  select(id, x1:x2) %>%
  mutate(across(x1:x2, list(Sqrt = sqrt, Exp = exp, Log = log), 
                .names = "{fn}_{col}"))
# # A tibble: 11 x 9
#      id    x1    x2 Sqrt_x1    Exp_x1 Log_x1 Sqrt_x2    Exp_x2 Log_x2
#   <int> <dbl> <dbl>   <dbl>     <dbl>  <dbl>   <dbl>     <dbl>  <dbl>
# 1     1    10    10    3.16   22026.    2.30    3.16   22026.    2.30
# 2     2     8     8    2.83    2981.    2.08    2.83    2981.    2.08

引数.namesには、{col}で（引数.colsに指定した）列名、{fn}で（引数.fnsに指定した）関数名を含む列名を指定できます。ただし、関数名は名前付きリストの形（list(名前 = 要素, 名前 = 要素, ...)の形）で渡す必要があります。列名や関数名を指定しなかった場合の例を次に示します。

anscombe_tbl %>%
  select(id, x1:x4) %>%
  mutate(across(.cols = x1:x4, .fns = log, .names = "{fn}_{col}"))
# # A tibble: 11 x 9
#      id    x1    x2    x3    x4 `1_x1` `1_x2` `1_x3` `1_x4`
#   <int> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
# 1     1    10    10    10     8   2.30   2.30   2.30   2.08
# 2     2     8     8     8     8   2.08   2.08   2.08   2.08
# 名前付きリストの形で関数を指定していないため、{fn}のところが数字になっている。

anscombe_tbl %>%
  select(id, x1:x2) %>%
  mutate(across(.cols = x1:x2,
                .fns = list(sqrt, exp, log),
                .names = "{fn}_{col}"))
# # A tibble: 11 x 9
#      id    x1    x2 `1_x1`    `2_x1` `3_x1` `1_x2`    `2_x2` `3_x2`
#   <int> <dbl> <dbl>  <dbl>     <dbl>  <dbl>  <dbl>     <dbl>  <dbl>
# 1     1    10    10   3.16   22026.    2.30   3.16   22026.    2.30
# 2     2     8     8   2.83    2981.    2.08   2.83    2981.    2.08
# 名前付きリストの形で関数を指定していないため、{fn}のところが数字になっている。

anscombe_tbl %>%
  select(id, x1:x2) %>%
  mutate(across(.cols = x1:x2,
                .fns = list(sqrt, exp, log)))
# # A tibble: 11 x 9
#      id    x1    x2  x1_1      x1_2  x1_3  x2_1      x2_2  x2_3
#   <int> <dbl> <dbl> <dbl>     <dbl> <dbl> <dbl>     <dbl> <dbl>
# 1     1    10    10  3.16   22026.   2.30  3.16   22026.   2.30
# 2     2     8     8  2.83    2981.   2.08  2.83    2981.   2.08
# 計算後の列名の形を指定していないため、"計算前の列名_1"のような列名になっている。

参考：

Create, modify, and delete columns

列計算に使う関数

四則演算や数学関数、文字列関数の他にもいろいろな関数が用意されていますが、長くなりましたので、これについては別記事にします。

参考

tidyverse
- dplyr
- tidyr
R for Data Science
- 5 Data transformation
- 12 Tidy data
Heavy Watal - R stats
- dplyr — 高速data.frame処理
- tidyr — シンプルなデータ変形ツール
Rのデータベースの操作についての自前のまとめ

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up