More than 3 years have passed since last update.

Rの標準パッケージによるデータフレームの集計操作

Last updated at 2021-04-04Posted at 2021-04-04

はじめに

機械学習の勉強を始めたVBAユーザです。
備忘録としてRの使い方についてまとめています。

こちらの記事では、Rのパッケージdplyr, tidyrを使ったデータフレーム操作についてまとめました。ここではそれと対比して、Rの標準パッケージを用いて同じことをしてみます。

Rの標準パッケージによるデータフレームの集計操作
参考

Rの標準パッケージによるデータフレームの集計操作

カウント集計

列のカウントにはnrow()関数を使うか、rownames()（行名）のlength()（長さ）を計算することでもできます。

# カウント集計
nrow(iris)
# [1] 150

length(rownames(iris))
# [1] 150

グループ別カウント集計

グループ別にカウント集計するにはtable()関数が使えます。結果はtableクラスのオブジェクトが返りますが、data.frame()でデータフレームにできます。

# グループ別カウント集計
table(iris$Species)
# 
#     setosa versicolor  virginica 
#         50         50         50 

table(iris$Species)
# 
#     setosa versicolor  virginica 
#         50         50         50 

table_iris <- table(iris$Species)
class(table_iris)
# [1] "table"
data.frame(table_iris)
#         Var1 Freq
# 1     setosa   50
# 2 versicolor   50
# 3  virginica   50
as.data.frame(table_iris)
#         Var1 Freq
# 1     setosa   50
# 2 versicolor   50
# 3  virginica   50

as.data.frame(table_iris, responseName = "count")
#         Var1 count
# 1     setosa    50
# 2 versicolor    50
# 3  virginica    50

基本統計量の集計

# 基本統計量の集計

# 最大値
max(iris$Sepal.Length)
# [1] 7.9

# 最小値
min(iris$Sepal.Length)
# [1] 4.3

# ユニークな値の数
length(unique(iris$Sepal.Length))
# [1] 35

# 平均値
mean(iris$Sepal.Length)
# [1] 5.843333

# 中央値
median(iris$Sepal.Length)
# [1] 5.8

# 不偏分散
var(iris$Sepal.Length)
sum((iris$Sepal.Length - mean(iris$Sepal.Length))^2) / (length(iris$Sepal.Length) - 1)
# [1] 0.6856935

# 分散
var(iris$Sepal.Length)*((length(iris$Sepal.Length)-1) / length(iris$Sepal.Length))
sum((iris$Sepal.Length - mean(iris$Sepal.Length))^2) / length(iris$Sepal.Length)
# [1] 0.6811222

# 不偏標準偏差
sd(iris$Sepal.Length)
sqrt(sum((iris$Sepal.Length - mean(iris$Sepal.Length))^2) / (length(iris$Sepal.Length) - 1))
# [1] 0.8280661

# 標準偏差
sd(iris$Sepal.Length)*sqrt((length(iris$Sepal.Length)-1) / length(iris$Sepal.Length))
sqrt(sum((iris$Sepal.Length - mean(iris$Sepal.Length))^2) / length(iris$Sepal.Length))
# [1] 0.8253013

# クォンタイル点（最小値・第１四分位・中央値・第３四分位・最大値）
quantile(iris$Sepal.Length)
#   0%  25%  50%  75% 100% 
#  4.3  5.1  5.8  6.4  7.9 

quantile(iris$Sepal.Length, 0.25)
# 25% 
# 5.1

quantile(iris$Sepal.Length, c(0.25, 0.75))
# 25% 75% 
# 5.1 6.4 

# 範囲（最小値・最大値）
range(iris$Sepal.Length)
# [1] 4.3 7.9
range(iris$Sepal.Length)[2] - range(iris$Sepal.Length)[1]
# [1] 3.6

# 四分位偏差（第３四分位 - 第１四分位）
IQR(iris$Sepal.Length)
quantile(iris$Sepal.Length, 0.75, names=F) - quantile(iris$Sepal.Length, 0.25, names=F) 
# [1] 1.3

# 平均絶対偏差（Mean Absolute Deviation）
mad(iris$Sepal.Length)
# [1] 1.03782

# ５数要約（最小値・下側ヒンジ・中央値・上側ヒンジ・最大値）
fivenum(iris$Sepal.Length)
# [1] 4.3 5.1 5.8 6.4 7.9

# 要約統計量
summary(iris$Sepal.Length)
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#   4.300   5.100   5.800   5.843   6.400   7.900 

# 和
sum(iris$Sepal.Length)
# [1] 876.5

# 積
prod(iris$Sepal.Length)
# [1] 2.25744e+114

summary()関数を使うと、各列の要約統計量（最小値、第１四分位数、中央値、平均値、第３四分位数、最大値）（因子型ついては各因子の度数カウント）を表示できます。

summary(iris)
#   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width          Species  
#  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100   setosa    :50  
#  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300   versicolor:50  
#  Median :5.800   Median :3.000   Median :4.350   Median :1.300   virginica :50  
#  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199                  
#  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800                  
#  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500                  

summary(anscombe)
#        x1             x2             x3             x4           y1               y2              y3              y4        
#  Min.   : 4.0   Min.   : 4.0   Min.   : 4.0   Min.   : 8   Min.   : 4.260   Min.   :3.100   Min.   : 5.39   Min.   : 5.250  
#  1st Qu.: 6.5   1st Qu.: 6.5   1st Qu.: 6.5   1st Qu.: 8   1st Qu.: 6.315   1st Qu.:6.695   1st Qu.: 6.25   1st Qu.: 6.170  
#  Median : 9.0   Median : 9.0   Median : 9.0   Median : 8   Median : 7.580   Median :8.140   Median : 7.11   Median : 7.040  
#  Mean   : 9.0   Mean   : 9.0   Mean   : 9.0   Mean   : 9   Mean   : 7.501   Mean   :7.501   Mean   : 7.50   Mean   : 7.501  
#  3rd Qu.:11.5   3rd Qu.:11.5   3rd Qu.:11.5   3rd Qu.: 8   3rd Qu.: 8.570   3rd Qu.:8.950   3rd Qu.: 7.98   3rd Qu.: 8.190  
#  Max.   :14.0   Max.   :14.0   Max.   :14.0   Max.   :19   Max.   :10.840   Max.   :9.260   Max.   :12.74   Max.   :12.500

基本統計量の集計結果をデータフレームで返します。

data.frame(n = length(row.names(iris)),
           max = max(iris$Sepal.Length),
           min = min(iris$Sepal.Length))
#     n max min
# 1 150 7.9 4.3

data.frame(length(row.names(iris)),
           mean(iris$Sepal.Length), mean(iris$Sepal.Width),
           mean(iris$Petal.Length), mean(iris$Petal.Width))
#   length.row.names.iris.. mean.iris.Sepal.Length. mean.iris.Sepal.Width. mean.iris.Petal.Length.
# 1                     150                5.843333               3.057333                   3.758

data.frame(count = length(row.names(iris)),
           mean_sl = mean(iris$Sepal.Length), mean_sw = mean(iris$Sepal.Width),
           mean_pl = mean(iris$Petal.Length), mean_pw = mean(iris$Petal.Width))
#   count  mean_sl  mean_sw mean_pl  mean_pw
# 1   150 5.843333 3.057333   3.758 1.199333

グループ別の基本統計量の集計

グループ別に基本統計量を集計するには、aggregate()関数を使います。
attach()しておくと、データフレーム名を毎回書かなくても列名だけでデータフレームの列にアクセスできるようになります。detach()で元に戻ります。

# グループ別の基本統計量の集計
aggregate(iris$Sepal.Length, by = list(iris$Species), FUN = length)
aggregate(iris$Sepal.Length, by = list(iris$Species), FUN = function(x) length(x))
#      Group.1  x
# 1     setosa 50
# 2 versicolor 50
# 3  virginica 50
attach(iris)
aggregate(Sepal.Length, by = list(Species), FUN = length)
detach(iris)
#      Group.1  x
# 1     setosa 50
# 2 versicolor 50
# 3  virginica 50

aggregate(iris$Sepal.Length, by = list(species = iris$Species), FUN = length)
aggregate(iris$Sepal.Length, by = list(species = iris$Species), FUN = function(x) length(x))
#      species  x
# 1     setosa 50
# 2 versicolor 50
# 3  virginica 50

aggregate(Sepal.Length ~ Species, data = iris, FUN = length)
aggregate(Sepal.Length ~ Species, data = iris, FUN = function(x) length(x))
#      Species Sepal.Length
# 1     setosa           50
# 2 versicolor           50
# 3  virginica           50

aggregate(iris$Sepal.Length, by = list(iris$Species, round(iris$Sepal.Length)), FUN = length)
#       Group.1 Group.2  x
# 1      setosa       4  5
# 2      setosa       5 40
# 3  versicolor       5  6
# 4   virginica       5  1
# 5      setosa       6  5
# 6  versicolor       6 36
# 7   virginica       6 27
# 8  versicolor       7  8
# 9   virginica       7 16
# 10  virginica       8  6

aggregate(iris$Sepal.Length, by = list(species = iris$Species, sl = round(iris$Sepal.Length)), FUN = length)
#       species sl  x
# 1      setosa  4  5
# 2      setosa  5 40
# 3  versicolor  5  6
# 4   virginica  5  1
# 5      setosa  6  5
# 6  versicolor  6 36
# 7   virginica  6 27
# 8  versicolor  7  8
# 9   virginica  7 16
# 10  virginica  8  6

aggregate(Sepal.Length ~ Species + round(Sepal.Length), data = iris, FUN = length)
#       Species round(Sepal.Length) Sepal.Length
# 1      setosa                   4            5
# 2      setosa                   5           40
# 3  versicolor                   5            6
# 4   virginica                   5            1
# 5      setosa                   6            5
# 6  versicolor                   6           36
# 7   virginica                   6           27
# 8  versicolor                   7            8
# 9   virginica                   7           16
# 10  virginica                   8            6


aggregate(iris$Sepal.Length, by = list(species = iris$Species), FUN = mean)
#      species     x
# 1     setosa 5.006
# 2 versicolor 5.936
# 3  virginica 6.588

aggregate(Sepal.Length ~ Species, data = iris, FUN = mean)
#      Species Sepal.Length
# 1     setosa        5.006
# 2 versicolor        5.936
# 3  virginica        6.588


aggregate(iris[,1:4], by = list(species = iris$Species), FUN = mean)
#      species Sepal.Length Sepal.Width Petal.Length Petal.Width
# 1     setosa        5.006       3.428        1.462       0.246
# 2 versicolor        5.936       2.770        4.260       1.326
# 3  virginica        6.588       2.974        5.552       2.026

df <- cbind(
  aggregate(iris$Sepal.Length, by = list(iris$Species), FUN = length),
  aggregate(iris$Sepal.Length, by = list(iris$Species), FUN = mean)[,2],
  aggregate(iris$Sepal.Width,  by = list(iris$Species), FUN = mean)[,2],
  aggregate(iris$Petal.Length, by = list(iris$Species), FUN = mean)[,2],
  aggregate(iris$Petal.Width,  by = list(iris$Species), FUN = mean)[,2])
colnames(df) <- c("Species", "count", "mean_sl", "mean_sw", "mean_pl", "mean_pw")
df
#      Species count mean_sl mean_sw mean_pl mean_pw
# 1     setosa    50   5.006   3.428   1.462   0.246
# 2 versicolor    50   5.936   2.770   4.260   1.326
# 3  virginica    50   6.588   2.974   5.552   2.026
df <- cbind(
  aggregate(iris$Sepal.Length, by = list(iris$Species), FUN = length),
  aggregate(iris[,1:4], by = list(iris$Species), FUN = mean)[,2:5])
colnames(df) <- c("Species", "count", "mean_sl", "mean_sw", "mean_pl", "mean_pw")
df
#      Species count mean_sl mean_sw mean_pl mean_pw
# 1     setosa    50   5.006   3.428   1.462   0.246
# 2 versicolor    50   5.936   2.770   4.260   1.326
# 3  virginica    50   6.588   2.974   5.552   2.026


aggregate(iris$Sepal.Length, by = list(iris$Species), FUN = function(x) quantile(x, 0.25))
aggregate(Sepal.Length ~ Species, data = iris, FUN = function(x) quantile(x, 0.25))
#      Species Sepal.Length
# 1     setosa        4.800
# 2 versicolor        5.600
# 3  virginica        6.225

aggregate(iris$Sepal.Length, by = list(iris$Species), FUN = function(x) quantile(x, c(0.25, 0.75)))
aggregate(Sepal.Length ~ Species, data = iris, FUN = function(x) quantile(x, c(0.25, 0.75)))
#      Species Sepal.Length.25% Sepal.Length.75%
# 1     setosa            4.800            5.200
# 2 versicolor            5.600            6.300
# 3  virginica            6.225            6.900

df <- cbind(
  aggregate(Sepal.Length ~ Species, data = iris, FUN = length),
  aggregate(Sepal.Length ~ Species, data = iris, FUN = function(x) length(unique(x)))[,2],
  aggregate(Sepal.Length ~ Species, data = iris, FUN = mean)[,2],
  aggregate(Sepal.Length ~ Species, data = iris, FUN = median)[,2],
  aggregate(Sepal.Length ~ Species, data = iris, FUN = sd)[,2],
  aggregate(Sepal.Length ~ Species, data = iris, FUN = IQR)[,2],
  aggregate(Sepal.Length ~ Species, data = iris, FUN = mad)[,2],
  aggregate(Sepal.Length ~ Species, data = iris, FUN = min)[,2],
  aggregate(Sepal.Length ~ Species, data = iris, FUN = max)[,2],
  aggregate(Sepal.Length ~ Species, data = iris, FUN = function(x) quantile(x, 0.25))[,2],
  aggregate(Sepal.Length ~ Species, data = iris, FUN = function(x) quantile(x, 0.75))[,2],
  aggregate(Sepal.Length ~ Species, data = iris, FUN = sum)[,2],
  aggregate(Sepal.Length ~ Species, data = iris, FUN = prod)[,2])
colnames(df) <- c("Species", "n", "n_distinct",
                  "mean", "median", "sd", "IQR", "max", "min", "max", "Q1", "Q2",
                  "sum", "prod")
df
#      Species  n n_distinct  mean median        sd   IQR     max min max    Q1  Q2   sum         prod
# 1     setosa 50         15 5.006    5.0 0.3524897 0.400 0.29652 4.3 5.8 4.800 5.2 250.3 8.350948e+34
# 2 versicolor 50         21 5.936    5.9 0.5161711 0.700 0.51891 4.9 7.0 5.600 6.3 296.8 3.926361e+38
# 3  virginica 50         21 6.588    6.5 0.6358796 0.675 0.59304 4.9 7.9 6.225 6.9 329.4 6.884782e+40

統計量を集計する関数の結果が長さ1でない場合について

# 統計量を集計する関数の結果が長さ1でない場合
aggregate(Sepal.Length ~ 1, data = iris, FUN = range)
#   Sepal.Length.1 Sepal.Length.2
# 1            4.3            7.9

aggregate(Sepal.Length ~ 1, data = iris, FUN = summary)
#   Sepal.Length.Min. Sepal.Length.1st Qu. Sepal.Length.Median Sepal.Length.Mean Sepal.Length.3rd Qu. Sepal.Length.Max.
# 1          4.300000             5.100000            5.800000          5.843333             6.400000          7.900000

aggregate(Sepal.Length ~ 1, data = iris, FUN = function(x) quantile(x, c(0.0, 0.25, 0.5, 0.75, 1.0)))
#   Sepal.Length.0% Sepal.Length.25% Sepal.Length.50% Sepal.Length.75% Sepal.Length.100%
# 1             4.3              5.1              5.8              6.4               7.9

aggregate(Sepal.Length ~ Species, data = iris, FUN = summary)
#      Species Sepal.Length.Min. Sepal.Length.1st Qu. Sepal.Length.Median Sepal.Length.Mean Sepal.Length.3rd Qu. Sepal.Length.Max.
# 1     setosa             4.300                4.800               5.000             5.006                5.200             5.800
# 2 versicolor             4.900                5.600               5.900             5.936                6.300             7.000
# 3  virginica             4.900                6.225               6.500             6.588                6.900             7.900

複数列にわたる集計

# 複数列にわたる集計
aggregate(iris[,1:4], by = list(species = iris$Species), FUN = mean)
#      species Sepal.Length Sepal.Width Petal.Length Petal.Width
# 1     setosa        5.006       3.428        1.462       0.246
# 2 versicolor        5.936       2.770        4.260       1.326
# 3  virginica        6.588       2.974        5.552       2.026

aggregate(cbind(Sepal.Length, Sepal.Width, Petal.Length, Petal.Width) ~ Species, data = iris, FUN = mean)
aggregate(. ~ Species, data = iris, FUN = mean)
#      Species Sepal.Length Sepal.Width Petal.Length Petal.Width
# 1     setosa        5.006       3.428        1.462       0.246
# 2 versicolor        5.936       2.770        4.260       1.326
# 3  virginica        6.588       2.974        5.552       2.026

aggregate(iris[,1:4], by = list(species = iris$Species), FUN = function(x) mean(x) * 50)
#      species Sepal.Length Sepal.Width Petal.Length Petal.Width
# 1     setosa        250.3       171.4         73.1        12.3
# 2 versicolor        296.8       138.5        213.0        66.3
# 3  virginica        329.4       148.7        277.6       101.3

aggregate(cbind(Sepal.Length, Sepal.Width, Petal.Length, Petal.Width) ~ Species, data = iris, FUN = function(x) mean(x) * 50)
aggregate(. ~ Species, data = iris, FUN = function(x) mean(x) * 50)
#      Species Sepal.Length Sepal.Width Petal.Length Petal.Width
# 1     setosa        250.3       171.4         73.1        12.3
# 2 versicolor        296.8       138.5        213.0        66.3
# 3  virginica        329.4       148.7        277.6       101.3

aggregate(iris[,1:4], by = list(species = iris$Species), FUN = sum)
#      species Sepal.Length Sepal.Width Petal.Length Petal.Width
# 1     setosa        250.3       171.4         73.1        12.3
# 2 versicolor        296.8       138.5        213.0        66.3
# 3  virginica        329.4       148.7        277.6       101.3

aggregate(cbind(Sepal.Length, Sepal.Width, Petal.Length, Petal.Width) ~ Species, data = iris, FUN = sum)
aggregate(. ~ Species, data = iris, FUN = sum)
#      Species Sepal.Length Sepal.Width Petal.Length Petal.Width
# 1     setosa        250.3       171.4         73.1        12.3
# 2 versicolor        296.8       138.5        213.0        66.3
# 3  virginica        329.4       148.7        277.6       101.3

aggregate(cbind(x1, x2, x3, x4, y1, y2, y3, y4) ~ 1, data = anscombe, FUN = mean)
aggregate(. ~ 1, data = anscombe, FUN = mean)
#   x1 x2 x3 x4       y1       y2  y3       y4
# 1  9  9  9  9 7.500909 7.500909 7.5 7.500909

aggregate(anscombe, by = list(rep(1, 11)), FUN = mean)
#   Group.1 x1 x2 x3 x4       y1       y2  y3       y4
# 1       1  9  9  9  9 7.500909 7.500909 7.5 7.500909

複数の集計関数

複数の集計関数を適用することはできない？

おまけ

# summary関数のような出力
df <- rbind(
  aggregate(. ~ 1, data = iris[,1:4], FUN = min),
  aggregate(. ~ 1, data = iris[,1:4], FUN = function(x) quantile(x, 0.25)),
  aggregate(. ~ 1, data = iris[,1:4], FUN = median),
  aggregate(. ~ 1, data = iris[,1:4], FUN = mean),
  aggregate(. ~ 1, data = iris[,1:4], FUN = function(x) quantile(x, 0.75)),
  aggregate(. ~ 1, data = iris[,1:4], FUN = max))
rownames(df) <- c("Min", "Q1", "Median", "Mean", "Q3", "Max")
df
#        Sepal.Length Sepal.Width Petal.Length Petal.Width
# Min        4.300000    2.000000        1.000    0.100000
# Q1         5.100000    2.800000        1.600    0.300000
# Median     5.800000    3.000000        4.350    1.300000
# Mean       5.843333    3.057333        3.758    1.199333
# Q3         6.400000    3.300000        5.100    1.800000
# Max        7.900000    4.400000        6.900    2.500000

df <- rbind(
  aggregate(. ~ 1, data = anscombe, FUN = min),
  aggregate(. ~ 1, data = anscombe, FUN = function(x) quantile(x, 0.25)),
  aggregate(. ~ 1, data = anscombe, FUN = median),
  aggregate(. ~ 1, data = anscombe, FUN = mean),
  aggregate(. ~ 1, data = anscombe, FUN = function(x) quantile(x, 0.75)),
  aggregate(. ~ 1, data = anscombe, FUN = max))
rownames(df) <- c("Min.", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max.")
df
#           x1   x2   x3 x4        y1       y2    y3        y4
# Min.     4.0  4.0  4.0  8  4.260000 3.100000  5.39  5.250000
# 1st Qu.  6.5  6.5  6.5  8  6.315000 6.695000  6.25  6.170000
# Median   9.0  9.0  9.0  8  7.580000 8.140000  7.11  7.040000
# Mean     9.0  9.0  9.0  9  7.500909 7.500909  7.50  7.500909
# 3rd Qu. 11.5 11.5 11.5  8  8.570000 8.950000  7.98  8.190000
# Max.    14.0 14.0 14.0 19 10.840000 9.260000 12.74 12.500000

ピボット集計

まず、使用するデータを作成しておきます。

# データの準備
iris_tbl <- iris
iris_tbl$id <- 1:nrow(iris)
iris_sl <- data.frame(iris_tbl[, c("id", "Species")], part = "Sepal", measure = "Length", value = iris_tbl$Sepal.Length)
iris_sw <- data.frame(iris_tbl[, c("id", "Species")], part = "Sepal", measure = "Width" , value = iris_tbl$Sepal.Width)
iris_pl <- data.frame(iris_tbl[, c("id", "Species")], part = "Petal", measure = "Length", value = iris_tbl$Petal.Length)
iris_pw <- data.frame(iris_tbl[, c("id", "Species")], part = "Petal", measure = "Width" , value = iris_tbl$Petal.Width)
iris_long <- rbind(iris_sl, iris_sw, iris_pl, iris_pw)
iris_long
#      id    Species  part measure value
# 1     1     setosa Sepal  Length   5.1
# 2     2     setosa Sepal  Length   4.9
# 3     3     setosa Sepal  Length   4.7
# 4     4     setosa Sepal  Length   4.6
# 5     5     setosa Sepal  Length   5.0
# ...

anscombe_tbl <- anscombe
anscombe_tbl$id <- 1:nrow(anscombe)
anscombe_x1 <- data.frame(id = anscombe_tbl$id, axis = "x", set = "1", value = anscombe_tbl$x1)
anscombe_x2 <- data.frame(id = anscombe_tbl$id, axis = "x", set = "2", value = anscombe_tbl$x2)
anscombe_x3 <- data.frame(id = anscombe_tbl$id, axis = "x", set = "3", value = anscombe_tbl$x3)
anscombe_x4 <- data.frame(id = anscombe_tbl$id, axis = "x", set = "4", value = anscombe_tbl$x4)
anscombe_y1 <- data.frame(id = anscombe_tbl$id, axis = "y", set = "1", value = anscombe_tbl$y1)
anscombe_y2 <- data.frame(id = anscombe_tbl$id, axis = "y", set = "2", value = anscombe_tbl$y2)
anscombe_y3 <- data.frame(id = anscombe_tbl$id, axis = "y", set = "3", value = anscombe_tbl$y3)
anscombe_y4 <- data.frame(id = anscombe_tbl$id, axis = "y", set = "4", value = anscombe_tbl$y4)
anscombe_long <- rbind(anscombe_x1, anscombe_x2, anscombe_x3, anscombe_x4,
                       anscombe_y1, anscombe_y2, anscombe_y3, anscombe_y4)
anscombe_long
#    id axis set value
# 1   1    x   1 10.00
# 2   2    x   1  8.00
# 3   3    x   1 13.00
# 4   4    x   1  9.00
# 5   5    x   1 11.00
# ...

ピボット集計でカウント

table()関数は2次元でも使えます。

# ピボット集計でカウント
table(iris_long$part, iris_long$measure)
#        
#         Length Width
#   Petal    150   150
#   Sepal    150   150

table(paste(iris_long$part, iris_long$measure), iris_long$Species)
#               
#                setosa versicolor virginica
#   Petal Length     50         50        50
#   Petal Width      50         50        50
#   Sepal Length     50         50        50
#   Sepal Width      50         50        50

table(anscombe_long$set, anscombe_long$axis)
#    
#      x  y
#   1 11 11
#   2 11 11
#   3 11 11
#   4 11 11

2次元以上になる場合は、ftable()関数も使えます。

ftable(anscombe_long[c("set", "axis")])
#     axis  x  y
# set           
# 1        11 11
# 2        11 11
# 3        11 11
# 4        11 11

ftable(iris_long[c("part", "measure", "Species")])
#               Species setosa versicolor virginica
# part  measure                                    
# Sepal Length              50         50        50
#       Width               50         50        50
# Petal Length              50         50        50
#       Width               50         50        50

ftable(iris_long[c("Species", "measure", "part")], row.vars = c(3, 2))
#               Species setosa versicolor virginica
# part  measure                                    
# Sepal Length              50         50        50
#       Width               50         50        50
# Petal Length              50         50        50
#       Width               50         50        50

ftable(iris_long[c("Species", "part", "measure")], row.vars = c(1))
#            part     Sepal        Petal      
#            measure Length Width Length Width
# Species                                     
# setosa                 50    50     50    50
# versicolor             50    50     50    50
# virginica              50    50     50    50

ftable(anscombe_long[c("set", "axis")], row.vars = c(2, 1))
# axis set    
# x    1    11
#      2    11
#      3    11
#      4    11
# y    1    11
#      2    11
#      3    11
#      4    11

xtabs()関数も使えます。

xtabs(~ set + axis, data = anscombe_long)
#    axis
# set  x  y
#   1 11 11
#   2 11 11
#   3 11 11
#   4 11 11

xtabs(~ paste(part, measure) + Species, data = iris_long)
#                     Species
# paste(part, measure) setosa versicolor virginica
#         Petal Length     50         50        50
#         Petal Width      50         50        50
#         Sepal Length     50         50        50
#         Sepal Width      50         50        50

anscombe_ag <- aggregate(value ~ set + axis, data = anscombe_long, FUN = length)
anscombe_ag
#   set axis value
# 1   1    x    11
# 2   2    x    11
# 3   3    x    11
# 4   4    x    11
# 5   1    y    11
# 6   2    y    11
# 7   3    y    11
# 8   4    y    11
xtabs(value ~ set + axis, data = anscombe_ag)
#    axis
# set  x  y
#   1 11 11
#   2 11 11
#   3 11 11
#   4 11 11

iris_ag <- aggregate(value ~ part + measure + Species, data = iris_long, FUN = length)
iris_ag
#     part measure    Species value
# 1  Petal  Length     setosa    50
# 2  Sepal  Length     setosa    50
# 3  Petal   Width     setosa    50
# 4  Sepal   Width     setosa    50
# 5  Petal  Length versicolor    50
# 6  Sepal  Length versicolor    50
# 7  Petal   Width versicolor    50
# 8  Sepal   Width versicolor    50
# 9  Petal  Length  virginica    50
# 10 Sepal  Length  virginica    50
# 11 Petal   Width  virginica    50
# 12 Sepal   Width  virginica    50
xtabs(value ~ paste(part, measure) + Species, data = iris_ag)
#                     Species
# paste(part, measure) setosa versicolor virginica
#         Petal Length     50         50        50
#         Petal Width      50         50        50
#         Sepal Length     50         50        50
#         Sepal Width      50         50        50

ピボット集計で合計

xtabs()関数でピボット集計（合計）ができます。

# ピボット集計で合計
xtabs(value ~ set + axis, data = anscombe_long)
#    axis
# set     x     y
#   1 99.00 82.51
#   2 99.00 82.51
#   3 99.00 82.50
#   4 99.00 82.51

xtabs(value ~ paste(part, measure) + Species, data = iris_long)
#                     Species
# paste(part, measure) setosa versicolor virginica
#         Petal Length   73.1      213.0     277.6
#         Petal Width    12.3       66.3     101.3
#         Sepal Length  250.3      296.8     329.4
#         Sepal Width   171.4      138.5     148.7

anscombe_ag <- aggregate(value ~ set + axis, data = anscombe_long, FUN = sum)
anscombe_ag
#   set axis value
# 1   1    x 99.00
# 2   2    x 99.00
# 3   3    x 99.00
# 4   4    x 99.00
# 5   1    y 82.51
# 6   2    y 82.51
# 7   3    y 82.50
# 8   4    y 82.51
xtabs(value ~ set + axis, data = anscombe_ag)
#    axis
# set     x     y
#   1 99.00 82.51
#   2 99.00 82.51
#   3 99.00 82.50
#   4 99.00 82.51

iris_ag <- aggregate(value ~ part + measure + Species, data = iris_long, FUN = sum)
iris_ag
#     part measure    Species value
# 1  Petal  Length     setosa  73.1
# 2  Sepal  Length     setosa 250.3
# 3  Petal   Width     setosa  12.3
# 4  Sepal   Width     setosa 171.4
# 5  Petal  Length versicolor 213.0
# 6  Sepal  Length versicolor 296.8
# 7  Petal   Width versicolor  66.3
# 8  Sepal   Width versicolor 138.5
# 9  Petal  Length  virginica 277.6
# 10 Sepal  Length  virginica 329.4
# 11 Petal   Width  virginica 101.3
# 12 Sepal   Width  virginica 148.7
xtabs(value ~ paste(part, measure) + Species, data = iris_ag)
#                     Species
# paste(part, measure) setosa versicolor virginica
#         Petal Length   73.1      213.0     277.6
#         Petal Width    12.3       66.3     101.3
#         Sepal Length  250.3      296.8     329.4
#         Sepal Width   171.4      138.5     148.7

ピボット集計で平均

ピボット集計で平均を取る場合は、まずaggregate()関数で平均を計算しておいて、xtabs()関数でピボットすればできます。

# ピボット集計で平均
anscombe_ag <- aggregate(value ~ set + axis, data = anscombe_long, FUN = mean)
anscombe_ag
#   set axis    value
# 1   1    x 9.000000
# 2   2    x 9.000000
# 3   3    x 9.000000
# 4   4    x 9.000000
# 5   1    y 7.500909
# 6   2    y 7.500909
# 7   3    y 7.500000
# 8   4    y 7.500909
xtabs(value ~ set + axis, data = anscombe_ag)
#    axis
# set        x        y
#   1 9.000000 7.500909
#   2 9.000000 7.500909
#   3 9.000000 7.500000
#   4 9.000000 7.500909

iris_ag <- aggregate(value ~ part + measure + Species, data = iris_long, FUN = mean)
iris_ag
#     part measure    Species value
# 1  Petal  Length     setosa 1.462
# 2  Sepal  Length     setosa 5.006
# 3  Petal   Width     setosa 0.246
# 4  Sepal   Width     setosa 3.428
# 5  Petal  Length versicolor 4.260
# 6  Sepal  Length versicolor 5.936
# 7  Petal   Width versicolor 1.326
# 8  Sepal   Width versicolor 2.770
# 9  Petal  Length  virginica 5.552
# 10 Sepal  Length  virginica 6.588
# 11 Petal   Width  virginica 2.026
# 12 Sepal   Width  virginica 2.974
xtabs(value ~ paste(part, measure) + Species, data = iris_ag)
#                     Species
# paste(part, measure) setosa versicolor virginica
#         Petal Length  1.462      4.260     5.552
#         Petal Width   0.246      1.326     2.026
#         Sepal Length  5.006      5.936     6.588
#         Sepal Width   3.428      2.770     2.974

参考

Rの標準パッケージ
- base
  - table()
  - summary()
- stats

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up