ykishi@dezembro SD % ./SD_test.py
# ==================================================
# 標準偏差を求めてバラツキの大小を確認する
# ==================================================
ymd foo bar baz qux fred
0 2021-05-10 3.0 NaN 2.0 1.0 6.0
1 2021-05-11 NaN 5.0 2.0 NaN NaN
2 2021-05-12 NaN NaN 2.0 NaN 4.0
3 2021-05-13 10.0 NaN 2.0 2.0 7.0
4 2021-05-14 NaN 5.0 2.0 NaN NaN
5 2021-05-15 NaN NaN 2.0 NaN NaN
6 2021-05-16 NaN NaN 2.0 3.0 7.0
7 2021-05-17 3.0 5.0 NaN NaN 7.0
8 2021-05-18 NaN NaN 2.0 NaN 7.0
9 2021-05-19 NaN NaN NaN NaN 7.0
10 2021-05-20 NaN 5.0 NaN 10.0 7.0
11 2021-05-21 5.0 NaN 2.0 NaN NaN
12 2021-05-22 NaN NaN 2.0 NaN 15.0
13 2021-05-23 NaN 7.0 NaN NaN NaN
14 2021-05-24 100.0 NaN 3.0 9.0 NaN
15 2021-05-25 NaN NaN NaN NaN NaN
16 2021-05-26 NaN 20.0 4.0 NaN 3.0
17 2021-05-27 6.0 NaN 7.0 2.0 7.0
18 2021-05-28 NaN NaN NaN 1.0 8.0
19 2021-05-29 1.0 30.0 NaN 1.0 NaN
20 2021-05-30 NaN NaN 8.0 1.0 8.0
21 2021-05-31 5.0 NaN 1.0 1.0 9.0
22 2021-06-01 NaN 2.0 2.0 3.0 3.0
23 2021-06-02 20.0 8.0 3.0 4.0 5.0
#
# 要約統計量を出力する
#
foo bar baz qux fred
count 9.000000 9.000000 17.000000 12.000000 16.000000
mean 17.000000 9.666667 2.823529 3.166667 6.875000
std 31.630681 9.192388 1.878673 3.128559 2.777889
min 1.000000 2.000000 1.000000 1.000000 3.000000
25% 3.000000 5.000000 2.000000 1.000000 5.750000
50% 5.000000 5.000000 2.000000 2.000000 7.000000
75% 10.000000 8.000000 3.000000 3.250000 7.250000
max 100.000000 30.000000 8.000000 10.000000 15.000000
#
# 標準偏差の結果のみ取り出す
#
foo 31.630681
bar 9.192388
baz 1.878673
qux 3.128559
fred 2.777889
Name: std, dtype: float64
<class 'pandas.core.series.Series'>
{'_is_copy': None, '_mgr': SingleBlockManager
Items: Index(['foo', 'bar', 'baz', 'qux', 'fred'], dtype='object')
NumericBlock: 5 dtype: float64, '_item_cache': {}, '_attrs': {}, '_flags': <Flags(allows_duplicate_labels=True)>, '_name': 'std', '_index': Index(['foo', 'bar', 'baz', 'qux', 'fred'], dtype='object')}
float64
#
# SeriesをDataFrameに変換
#
std
foo 31.630681
bar 9.192388
baz 1.878673
qux 3.128559
fred 2.777889
#
# stdの列でソート
#
std
foo 31.630681
bar 9.192388
qux 3.128559
fred 2.777889
baz 1.878673
#
# ソートした結果より上位3位までを列挙する
#
std
foo 31.630681
bar 9.192388
qux 3.128559
# ==================================================
# 欠損値をゼロ埋めした場合の計算結果を確認する
# ==================================================
ymd foo bar baz qux fred
0 2021-05-10 3.0 0.0 2.0 1.0 6.0
1 2021-05-11 0.0 5.0 2.0 0.0 0.0
2 2021-05-12 0.0 0.0 2.0 0.0 4.0
3 2021-05-13 10.0 0.0 2.0 2.0 7.0
4 2021-05-14 0.0 5.0 2.0 0.0 0.0
5 2021-05-15 0.0 0.0 2.0 0.0 0.0
6 2021-05-16 0.0 0.0 2.0 3.0 7.0
7 2021-05-17 3.0 5.0 0.0 0.0 7.0
8 2021-05-18 0.0 0.0 2.0 0.0 7.0
9 2021-05-19 0.0 0.0 0.0 0.0 7.0
10 2021-05-20 0.0 5.0 0.0 10.0 7.0
11 2021-05-21 5.0 0.0 2.0 0.0 0.0
12 2021-05-22 0.0 0.0 2.0 0.0 15.0
13 2021-05-23 0.0 7.0 0.0 0.0 0.0
14 2021-05-24 100.0 0.0 3.0 9.0 0.0
15 2021-05-25 0.0 0.0 0.0 0.0 0.0
16 2021-05-26 0.0 20.0 4.0 0.0 3.0
17 2021-05-27 6.0 0.0 7.0 2.0 7.0
18 2021-05-28 0.0 0.0 0.0 1.0 8.0
19 2021-05-29 1.0 30.0 0.0 1.0 0.0
20 2021-05-30 0.0 0.0 8.0 1.0 8.0
21 2021-05-31 5.0 0.0 1.0 1.0 9.0
22 2021-06-01 0.0 2.0 2.0 3.0 3.0
23 2021-06-02 20.0 8.0 3.0 4.0 5.0
#
# 要約統計量を出力する
#
foo bar baz qux fred
count 24.000000 24.00000 24.000000 24.000000 24.000000
mean 6.375000 3.62500 2.000000 1.583333 4.583333
std 20.461656 7.22804 2.043016 2.701315 3.999094
min 0.000000 0.00000 0.000000 0.000000 0.000000
25% 0.000000 0.00000 0.000000 0.000000 0.000000
50% 0.000000 0.00000 2.000000 0.500000 5.500000
75% 3.500000 5.00000 2.000000 2.000000 7.000000
max 100.000000 30.00000 8.000000 10.000000 15.000000
#
# 標準偏差の結果のみ取り出す
#
foo 20.461656
bar 7.228040
baz 2.043016
qux 2.701315
fred 3.999094
Name: std, dtype: float64
<class 'pandas.core.series.Series'>
{'_is_copy': None, '_mgr': SingleBlockManager
Items: Index(['foo', 'bar', 'baz', 'qux', 'fred'], dtype='object')
NumericBlock: 5 dtype: float64, '_item_cache': {}, '_attrs': {}, '_flags': <Flags(allows_duplicate_labels=True)>, '_name': 'std', '_index': Index(['foo', 'bar', 'baz', 'qux', 'fred'], dtype='object')}
float64
#
# SeriesをDataFrameに変換
#
std
foo 20.461656
bar 7.228040
baz 2.043016
qux 2.701315
fred 3.999094
#
# stdの列でソート
#
std
foo 20.461656
bar 7.228040
fred 3.999094
qux 2.701315
baz 2.043016
#
# ソートした結果より上位3位までを列挙する
#
std
foo 20.461656
bar 7.228040
fred 3.999094
ykishi@dezembro SD %