import pandas as pd
# 他の使いそうなライブラリもインポート
import math
import numpy as np
import matplotlib.pyplot as plt
# jupyter notebook 用の inline 表示リクエスト
%matplotlib inline
pandas のデータ形式
pandas DataFrame という 2 次元配列のデータ形式を主として扱う.
csv を読み込んで DataFrame にしていい感じに計算させることが多い.
pandas.DataFrame
pandas.DataFrame(data=None, index=None, columns=None, dtype=None, copy=False)
自分で初期化する場合, 上のように作る.
data には numpy.ndarray, dict, DataFrame が入る.
# ndarray の場合
pd.DataFrame([[1, 2, 3], [4, 5, 6]])
0 | 1 | 2 | |
---|---|---|---|
0 | 1 | 2 | 3 |
1 | 4 | 5 | 6 |
# dict の場合
pd.DataFrame({'a': [1, 4], 'b': [2, 5], 'c': [3, 6]})
a | b | c | |
---|---|---|---|
0 | 1 | 2 | 3 |
1 | 4 | 5 | 6 |
# ndarray で dict と同値な df を出す
df1 = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'b', 'c'])
df2 = pd.DataFrame({'a': [1, 4], 'b': [2, 5], 'c': [3, 6]})
df1.equals(df2)
True
# データ型の確認
df1.dtypes
a int64
b int64
c int64
dtype: object
# numpy.ndarray に変換
df1.values
array([[1, 2, 3],
[4, 5, 6]])
# 要素数
df1.size
6
DataFrame へのアクセス
df1.a
0 1
1 4
Name: a, dtype: int64
df1['a']
0 1
1 4
Name: a, dtype: int64
df1.loc[0]
a 1
b 2
c 3
Name: 0, dtype: int64
df1.loc
<pandas.core.indexing._LocIndexer at 0x114ea6550>
csv を読み込んで DataFrame にする
pandas.read_csv(filepath_or_buffer, sep=', ', delimiter=None, header='infer', names=None, index_col=None, usecols=None, squeeze=False, prefix=None, mangle_dupe_cols=True, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, dayfirst=False, iterator=False, chunksize=None, compression='infer', thousands=None, decimal=b'.', lineterminator=None, quotechar='"', quoting=0, escapechar=None, comment=None, encoding=None, dialect=None, tupleize_cols=None, error_bad_lines=True, warn_bad_lines=True, skipfooter=0, skip_footer=0, doublequote=True, delim_whitespace=False, as_recarray=None, compact_ints=None, use_unsigned=None, low_memory=True, buffer_lines=None, memory_map=False, float_precision=None)
sample_01_btc.csv
BTC-USD 取引の 1 分毎のデータ.
Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
1508454000,5714.5,5722,5707.08,5722,4.1163103,23517.533993,5713.2558721
1508454060,5709.99,5722,5709.56,5711.16,4.32023995,24680.498868,5712.7611322
1508454120,5711.16,5718.8,5710.01,5710.01,3.85405094,22020.718724,5713.6553374
1508454180,5711.36,5711.36,5706.66,5706.66,1.16405304,6646.2711683,5709.5947864
1508454240,5706.48,5710.87,5706.31,5706.31,1.38535201,7908.5236431,5708.674464
1508454300,5718.57,5722.81,5706,5706,0.81982085,4685.1229798,5714.8131568
1508454360,5721.32,5721.32,5721.32,5721.32,0.1621444,927.67999861,5721.32
1508454420,5707.22,5725.4,5707.22,5709.89,0.30563931,1748.2169415,5719.8694158
1508454480,5719.22,5719.22,5709.89,5709.89,1.98576163,11338.667829,5709.9843496
1508454540,5725.23,5725.23,5709.91,5709.91,0.01087959,62.218800956,5718.8553021
1508454600,5720.41,5720.42,5709.91,5720.41,1.38101969,7889.5914892,5712.8740063
1508454660,5709.89,5720.41,5709.89,5720.41,1.06840668,6111.4859753,5720.1869753
1508454720,5715.85,5723.19,5715.85,5723.19,0.30664325,1754.2302403,5720.7528303
1508454780,5724.43,5724.43,5724.42,5724.42,10.200957,58394.56428,5724.420197
1508454840,5728,5728,5720.7,5727.2,0.52788184,3020.9305932,5722.7401367
1508454900,5728,5729,5724.63,5724.63,1.02945547,5895.6735162,5726.9825534
1508454960,5724.63,5729,5724.63,5729,0.21323798,1220.7260847,5724.7122893
1508455020,5727.71,5728.99,5727.71,5728.99,0.20431343,1170.3316773,5728.1191811
1508455080,5728.99,5729,5728.99,5729,0.32211059,1845.3691499,5728.9924864
1508455140,5720.01,5720.01,5720.01,5720.01,0.045575,260.68945575,5720.01
1508455200,5720,5726.06,5720,5726.06,0.1121888,642.24708813,5724.6987946
1508455260,5720.01,5725.82,5720,5725.82,0.24468336,1400.8095215,5724.9889062
1508455320,5725.82,5725.82,5725.82,5725.82,0.010029,57.42424878,5725.82
1508455380,5724.24,5724.49,5721.38,5724.49,0.52595295,3010.6402539,5724.1626915
1508455440,5724.49,5724.49,5723.88,5723.88,0.23215461,1328.9239824,5724.3058081
1508455500,5721.39,5721.39,5721.38,5721.38,0.26093204,1492.8938093,5721.389406
1508455560,5724.44,5724.44,5720,5720,0.03822585,218.78021503,5723.3577548
1508455620,5719.99,5719.99,5706,5706,5.69254452,32498.767151,5709.0053555
1508455680,5719.95,5719.95,5719.95,5719.95,0.049629,283.87539855,5719.95
1508455740,5719.95,5719.95,5719.95,5719.95,0.049629,283.87539855,5719.95
1508455800,5706.01,5708.84,5706.01,5708.84,0.0482098,275.20505463,5708.4877895
1508455860,5706.01,5706.01,5706.01,5706.01,0.00790223,45.090203402,5706.01
1508455920,5708.85,5708.85,5706,5708.84,0.50472844,2880.0455522,5706.1289279
1508455980,5708.85,5708.85,5706,5708.84,0.50472844,2880.0455522,5706.1289279
1508456040,5706,5706,5704.01,5704.01,0.40618257,2317.5307731,5705.6381645
1508456100,5704.01,5710.27,5704.01,5710.27,0.78124874,4457.2656092,5705.3091813
1508456160,5710.27,5710.57,5710.27,5710.57,0.8436208,4817.5531388,5710.5670448
1508456220,5710.57,5710.57,5704.01,5707.21,0.37128213,2119.9167574,5709.7193376
1508456280,5704,5704.01,5704,5704,0.77077686,4396.5129217,5704.0022214
1508456340,5704.01,5704.01,5704,5704,0.0598,341.0994,5704.0033445
1508456400,5704.01,5704.01,5702.43,5702.43,44.40229928,253201.25248,5702.4356077
1508456460,5702.43,5702.43,5702.43,5702.43,0.01491091,85.028420511,5702.43
1508456520,5702.43,5704.01,5702.43,5704,0.80557148,4594.9691244,5703.9868447
1508456580,5702.44,5702.44,5702.44,5702.44,0.01970529,112.36823391,5702.44
1508456640,5704.01,5704.01,5704.01,5704.01,0.07418565,423.15568946,5704.01
1508456700,5704.01,5704.01,5702.43,5702.43,0.21684081,1236.8403005,5703.9092435
1508456760,5704.01,5704.01,5702.43,5702.43,1.80248464,10278.606035,5702.4652563
1508456820,5702.43,5704.01,5702.43,5702.43,1.0176104,5802.9480004,5702.524267
1508456880,5703,5703,5702.99,5702.99,0.06074779,346.44419697,5702.9926022
1508456940,5703,5703,5702.43,5702.43,0.2174653,1240.1733699,5702.8563632
1508457000,5702.43,5702.43,5702.43,5702.43,5.80725974,33115.492159,5702.43
1508457060,5703.92,5704.01,5702.43,5704,4.90530398,27979.233447,5703.8735134
1508457120,5703.9,5703.95,5703.9,5703.95,0.05126788,292.4291083,5703.9438397
1508457180,5703.9,5703.95,5703.9,5703.95,0.05126788,292.4291083,5703.9438397
1508457240,5702.43,5702.43,5698.5,5698.5,2.64309578,15069.597851,5701.4951802
1508457300,5697,5697,5696.16,5696.17,0.08723364,496.91077327,5696.3205166
1508457360,5690.88,5690.88,5690.88,5690.88,0.16894078,961.42170609,5690.88
1508457420,5698.13,5704.1,5695.63,5704.1,2.31166152,13174.852877,5699.3001628
1508457480,5695.62,5695.64,5694,5695.64,0.12964714,738.4096468,5695.5336369
1508457540,5700.39,5700.39,5698.69,5698.69,0.08542622,486.92979839,5700.0040314
1508457600,5700,5700,5698.68,5700,0.11684654,665.91386047,5699.0464627
import datetime
# timestamp を datetime に変換
timestamp_parser = lambda date: datetime.datetime.fromtimestamp(date)
df = pd.read_csv("./sample_01_btc.csv", index_col=0, parse_dates=0, date_parser=timestamp_parser)
df
Open | High | Low | Close | Volume_(BTC) | Volume_(Currency) | Weighted_Price | |
---|---|---|---|---|---|---|---|
Timestamp | |||||||
2017-10-20 08:00:00 | 5714.50 | 5722.00 | 5707.08 | 5722.00 | 4.116310 | 23517.533993 | 5713.255872 |
2017-10-20 08:01:00 | 5709.99 | 5722.00 | 5709.56 | 5711.16 | 4.320240 | 24680.498868 | 5712.761132 |
2017-10-20 08:02:00 | 5711.16 | 5718.80 | 5710.01 | 5710.01 | 3.854051 | 22020.718724 | 5713.655337 |
2017-10-20 08:03:00 | 5711.36 | 5711.36 | 5706.66 | 5706.66 | 1.164053 | 6646.271168 | 5709.594786 |
2017-10-20 08:04:00 | 5706.48 | 5710.87 | 5706.31 | 5706.31 | 1.385352 | 7908.523643 | 5708.674464 |
2017-10-20 08:05:00 | 5718.57 | 5722.81 | 5706.00 | 5706.00 | 0.819821 | 4685.122980 | 5714.813157 |
2017-10-20 08:06:00 | 5721.32 | 5721.32 | 5721.32 | 5721.32 | 0.162144 | 927.679999 | 5721.320000 |
2017-10-20 08:07:00 | 5707.22 | 5725.40 | 5707.22 | 5709.89 | 0.305639 | 1748.216941 | 5719.869416 |
2017-10-20 08:08:00 | 5719.22 | 5719.22 | 5709.89 | 5709.89 | 1.985762 | 11338.667829 | 5709.984350 |
2017-10-20 08:09:00 | 5725.23 | 5725.23 | 5709.91 | 5709.91 | 0.010880 | 62.218801 | 5718.855302 |
2017-10-20 08:10:00 | 5720.41 | 5720.42 | 5709.91 | 5720.41 | 1.381020 | 7889.591489 | 5712.874006 |
2017-10-20 08:11:00 | 5709.89 | 5720.41 | 5709.89 | 5720.41 | 1.068407 | 6111.485975 | 5720.186975 |
2017-10-20 08:12:00 | 5715.85 | 5723.19 | 5715.85 | 5723.19 | 0.306643 | 1754.230240 | 5720.752830 |
2017-10-20 08:13:00 | 5724.43 | 5724.43 | 5724.42 | 5724.42 | 10.200957 | 58394.564280 | 5724.420197 |
2017-10-20 08:14:00 | 5728.00 | 5728.00 | 5720.70 | 5727.20 | 0.527882 | 3020.930593 | 5722.740137 |
2017-10-20 08:15:00 | 5728.00 | 5729.00 | 5724.63 | 5724.63 | 1.029455 | 5895.673516 | 5726.982553 |
2017-10-20 08:16:00 | 5724.63 | 5729.00 | 5724.63 | 5729.00 | 0.213238 | 1220.726085 | 5724.712289 |
2017-10-20 08:17:00 | 5727.71 | 5728.99 | 5727.71 | 5728.99 | 0.204313 | 1170.331677 | 5728.119181 |
2017-10-20 08:18:00 | 5728.99 | 5729.00 | 5728.99 | 5729.00 | 0.322111 | 1845.369150 | 5728.992486 |
2017-10-20 08:19:00 | 5720.01 | 5720.01 | 5720.01 | 5720.01 | 0.045575 | 260.689456 | 5720.010000 |
2017-10-20 08:20:00 | 5720.00 | 5726.06 | 5720.00 | 5726.06 | 0.112189 | 642.247088 | 5724.698795 |
2017-10-20 08:21:00 | 5720.01 | 5725.82 | 5720.00 | 5725.82 | 0.244683 | 1400.809522 | 5724.988906 |
2017-10-20 08:22:00 | 5725.82 | 5725.82 | 5725.82 | 5725.82 | 0.010029 | 57.424249 | 5725.820000 |
2017-10-20 08:23:00 | 5724.24 | 5724.49 | 5721.38 | 5724.49 | 0.525953 | 3010.640254 | 5724.162691 |
2017-10-20 08:24:00 | 5724.49 | 5724.49 | 5723.88 | 5723.88 | 0.232155 | 1328.923982 | 5724.305808 |
2017-10-20 08:25:00 | 5721.39 | 5721.39 | 5721.38 | 5721.38 | 0.260932 | 1492.893809 | 5721.389406 |
2017-10-20 08:26:00 | 5724.44 | 5724.44 | 5720.00 | 5720.00 | 0.038226 | 218.780215 | 5723.357755 |
2017-10-20 08:27:00 | 5719.99 | 5719.99 | 5706.00 | 5706.00 | 5.692545 | 32498.767151 | 5709.005356 |
2017-10-20 08:28:00 | 5719.95 | 5719.95 | 5719.95 | 5719.95 | 0.049629 | 283.875399 | 5719.950000 |
2017-10-20 08:29:00 | 5719.95 | 5719.95 | 5719.95 | 5719.95 | 0.049629 | 283.875399 | 5719.950000 |
... | ... | ... | ... | ... | ... | ... | ... |
2017-10-20 08:31:00 | 5706.01 | 5706.01 | 5706.01 | 5706.01 | 0.007902 | 45.090203 | 5706.010000 |
2017-10-20 08:32:00 | 5708.85 | 5708.85 | 5706.00 | 5708.84 | 0.504728 | 2880.045552 | 5706.128928 |
2017-10-20 08:33:00 | 5708.85 | 5708.85 | 5706.00 | 5708.84 | 0.504728 | 2880.045552 | 5706.128928 |
2017-10-20 08:34:00 | 5706.00 | 5706.00 | 5704.01 | 5704.01 | 0.406183 | 2317.530773 | 5705.638165 |
2017-10-20 08:35:00 | 5704.01 | 5710.27 | 5704.01 | 5710.27 | 0.781249 | 4457.265609 | 5705.309181 |
2017-10-20 08:36:00 | 5710.27 | 5710.57 | 5710.27 | 5710.57 | 0.843621 | 4817.553139 | 5710.567045 |
2017-10-20 08:37:00 | 5710.57 | 5710.57 | 5704.01 | 5707.21 | 0.371282 | 2119.916757 | 5709.719338 |
2017-10-20 08:38:00 | 5704.00 | 5704.01 | 5704.00 | 5704.00 | 0.770777 | 4396.512922 | 5704.002221 |
2017-10-20 08:39:00 | 5704.01 | 5704.01 | 5704.00 | 5704.00 | 0.059800 | 341.099400 | 5704.003344 |
2017-10-20 08:40:00 | 5704.01 | 5704.01 | 5702.43 | 5702.43 | 44.402299 | 253201.252480 | 5702.435608 |
2017-10-20 08:41:00 | 5702.43 | 5702.43 | 5702.43 | 5702.43 | 0.014911 | 85.028421 | 5702.430000 |
2017-10-20 08:42:00 | 5702.43 | 5704.01 | 5702.43 | 5704.00 | 0.805571 | 4594.969124 | 5703.986845 |
2017-10-20 08:43:00 | 5702.44 | 5702.44 | 5702.44 | 5702.44 | 0.019705 | 112.368234 | 5702.440000 |
2017-10-20 08:44:00 | 5704.01 | 5704.01 | 5704.01 | 5704.01 | 0.074186 | 423.155689 | 5704.010000 |
2017-10-20 08:45:00 | 5704.01 | 5704.01 | 5702.43 | 5702.43 | 0.216841 | 1236.840301 | 5703.909244 |
2017-10-20 08:46:00 | 5704.01 | 5704.01 | 5702.43 | 5702.43 | 1.802485 | 10278.606035 | 5702.465256 |
2017-10-20 08:47:00 | 5702.43 | 5704.01 | 5702.43 | 5702.43 | 1.017610 | 5802.948000 | 5702.524267 |
2017-10-20 08:48:00 | 5703.00 | 5703.00 | 5702.99 | 5702.99 | 0.060748 | 346.444197 | 5702.992602 |
2017-10-20 08:49:00 | 5703.00 | 5703.00 | 5702.43 | 5702.43 | 0.217465 | 1240.173370 | 5702.856363 |
2017-10-20 08:50:00 | 5702.43 | 5702.43 | 5702.43 | 5702.43 | 5.807260 | 33115.492159 | 5702.430000 |
2017-10-20 08:51:00 | 5703.92 | 5704.01 | 5702.43 | 5704.00 | 4.905304 | 27979.233447 | 5703.873513 |
2017-10-20 08:52:00 | 5703.90 | 5703.95 | 5703.90 | 5703.95 | 0.051268 | 292.429108 | 5703.943840 |
2017-10-20 08:53:00 | 5703.90 | 5703.95 | 5703.90 | 5703.95 | 0.051268 | 292.429108 | 5703.943840 |
2017-10-20 08:54:00 | 5702.43 | 5702.43 | 5698.50 | 5698.50 | 2.643096 | 15069.597851 | 5701.495180 |
2017-10-20 08:55:00 | 5697.00 | 5697.00 | 5696.16 | 5696.17 | 0.087234 | 496.910773 | 5696.320517 |
2017-10-20 08:56:00 | 5690.88 | 5690.88 | 5690.88 | 5690.88 | 0.168941 | 961.421706 | 5690.880000 |
2017-10-20 08:57:00 | 5698.13 | 5704.10 | 5695.63 | 5704.10 | 2.311662 | 13174.852877 | 5699.300163 |
2017-10-20 08:58:00 | 5695.62 | 5695.64 | 5694.00 | 5695.64 | 0.129647 | 738.409647 | 5695.533637 |
2017-10-20 08:59:00 | 5700.39 | 5700.39 | 5698.69 | 5698.69 | 0.085426 | 486.929798 | 5700.004031 |
2017-10-20 09:00:00 | 5700.00 | 5700.00 | 5698.68 | 5700.00 | 0.116847 | 665.913860 | 5699.046463 |
61 rows × 7 columns
DataFrame で遊ぶ
DataFrame.describe
DataFrame.describe(percentiles=None, include=None, exclude=None)
要約統計量を出す
df.describe()
Open | High | Low | Close | Volume_(BTC) | Volume_(Currency) | Weighted_Price | |
---|---|---|---|---|---|---|---|
count | 61.000000 | 61.000000 | 61.000000 | 61.000000 | 61.000000 | 61.000000 | 61.000000 |
mean | 5711.347541 | 5713.140164 | 5709.262787 | 5711.027541 | 1.802100 | 10285.950059 | 5711.197041 |
std | 9.889195 | 10.505623 | 9.408502 | 10.106421 | 5.847390 | 33350.541325 | 9.798496 |
min | 5690.880000 | 5690.880000 | 5690.880000 | 5690.880000 | 0.007902 | 45.090203 | 5690.880000 |
25% | 5703.900000 | 5704.010000 | 5702.430000 | 5703.950000 | 0.085426 | 486.929798 | 5703.873513 |
50% | 5708.850000 | 5710.570000 | 5706.010000 | 5708.840000 | 0.306643 | 1754.230240 | 5709.005356 |
75% | 5720.010000 | 5722.810000 | 5719.950000 | 5720.410000 | 1.068407 | 6111.485975 | 5720.186975 |
max | 5728.990000 | 5729.000000 | 5728.990000 | 5729.000000 | 44.402299 | 253201.252480 | 5728.992486 |
DataFrame.hist
DataFrame.hist(data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, figsize=None, layout=None, bins=10, **kwds)
ヒストグラムを描く
df.hist()
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x114eb8d50>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1154aa890>,
<matplotlib.axes._subplots.AxesSubplot object at 0x115528050>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x1155a8210>,
<matplotlib.axes._subplots.AxesSubplot object at 0x11570b350>,
<matplotlib.axes._subplots.AxesSubplot object at 0x11578e390>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x115812210>,
<matplotlib.axes._subplots.AxesSubplot object at 0x11587f350>,
<matplotlib.axes._subplots.AxesSubplot object at 0x115a031d0>]], dtype=object)
matplotlib.finance.candlestick2_ohlc
matplotlib.finance.candlestick2_ohlc(ax, opens, highs, lows, closes, width=4, colorup='k', colordown='r', alpha=0.75)
ローソク足チャートを描画する
import matplotlib.finance as mpf
fig = plt.figure(figsize=(18, 9))
ax = plt.subplot(1, 1, 1)
ax.set_xlim([0, df.shape[0]])
import matplotlib.ticker as ticker
ax.xaxis.set_major_locator(ticker.MaxNLocator(6))
ax.xaxis.set_major_formatter(ticker.FuncFormatter(df.index))
def dateformatter(x, pos):
try:
return df.index[int(x)]
except IndexError:
return ''
ax.xaxis.set_major_formatter(ticker.FuncFormatter(dateformatter))
fig.autofmt_xdate()
mpf.candlestick2_ohlc(ax, df.Open, df.High, df.Low, df.Close, width=0.9, colorup='g', colordown='r')
plt.show()