LoginSignup
40
34

More than 1 year has passed since last update.

pandas 入門

Last updated at Posted at 2017-12-01
import pandas as pd
# 他の使いそうなライブラリもインポート
import math
import numpy as np
import matplotlib.pyplot as plt

# jupyter notebook 用の inline 表示リクエスト
%matplotlib inline

pandas のデータ形式

pandas DataFrame という 2 次元配列のデータ形式を主として扱う.

csv を読み込んで DataFrame にしていい感じに計算させることが多い.

pandas.DataFrame

pandas.DataFrame(data=None, index=None, columns=None, dtype=None, copy=False)

自分で初期化する場合, 上のように作る.

data には numpy.ndarray, dict, DataFrame が入る.

# ndarray の場合
pd.DataFrame([[1, 2, 3], [4, 5, 6]])
0 1 2
0 1 2 3
1 4 5 6
# dict の場合
pd.DataFrame({'a': [1, 4], 'b': [2, 5], 'c': [3, 6]})
a b c
0 1 2 3
1 4 5 6
# ndarray で dict と同値な df を出す
df1 = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'b', 'c'])
df2 = pd.DataFrame({'a': [1, 4], 'b': [2, 5], 'c': [3, 6]})
df1.equals(df2)
True
# データ型の確認
df1.dtypes
a    int64
b    int64
c    int64
dtype: object
# numpy.ndarray に変換
df1.values
array([[1, 2, 3],
       [4, 5, 6]])
# 要素数
df1.size
6

DataFrame へのアクセス

df1.a
0    1
1    4
Name: a, dtype: int64
df1['a']
0    1
1    4
Name: a, dtype: int64
df1.loc[0]
a    1
b    2
c    3
Name: 0, dtype: int64
df1.loc
<pandas.core.indexing._LocIndexer at 0x114ea6550>

csv を読み込んで DataFrame にする

pandas.read_csv(filepath_or_buffer, sep=', ', delimiter=None, header='infer', names=None, index_col=None, usecols=None, squeeze=False, prefix=None, mangle_dupe_cols=True, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, dayfirst=False, iterator=False, chunksize=None, compression='infer', thousands=None, decimal=b'.', lineterminator=None, quotechar='"', quoting=0, escapechar=None, comment=None, encoding=None, dialect=None, tupleize_cols=None, error_bad_lines=True, warn_bad_lines=True, skipfooter=0, skip_footer=0, doublequote=True, delim_whitespace=False, as_recarray=None, compact_ints=None, use_unsigned=None, low_memory=True, buffer_lines=None, memory_map=False, float_precision=None)

sample_01_btc.csv

BTC-USD 取引の 1 分毎のデータ.

Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
1508454000,5714.5,5722,5707.08,5722,4.1163103,23517.533993,5713.2558721
1508454060,5709.99,5722,5709.56,5711.16,4.32023995,24680.498868,5712.7611322
1508454120,5711.16,5718.8,5710.01,5710.01,3.85405094,22020.718724,5713.6553374
1508454180,5711.36,5711.36,5706.66,5706.66,1.16405304,6646.2711683,5709.5947864
1508454240,5706.48,5710.87,5706.31,5706.31,1.38535201,7908.5236431,5708.674464
1508454300,5718.57,5722.81,5706,5706,0.81982085,4685.1229798,5714.8131568
1508454360,5721.32,5721.32,5721.32,5721.32,0.1621444,927.67999861,5721.32
1508454420,5707.22,5725.4,5707.22,5709.89,0.30563931,1748.2169415,5719.8694158
1508454480,5719.22,5719.22,5709.89,5709.89,1.98576163,11338.667829,5709.9843496
1508454540,5725.23,5725.23,5709.91,5709.91,0.01087959,62.218800956,5718.8553021
1508454600,5720.41,5720.42,5709.91,5720.41,1.38101969,7889.5914892,5712.8740063
1508454660,5709.89,5720.41,5709.89,5720.41,1.06840668,6111.4859753,5720.1869753
1508454720,5715.85,5723.19,5715.85,5723.19,0.30664325,1754.2302403,5720.7528303
1508454780,5724.43,5724.43,5724.42,5724.42,10.200957,58394.56428,5724.420197
1508454840,5728,5728,5720.7,5727.2,0.52788184,3020.9305932,5722.7401367
1508454900,5728,5729,5724.63,5724.63,1.02945547,5895.6735162,5726.9825534
1508454960,5724.63,5729,5724.63,5729,0.21323798,1220.7260847,5724.7122893
1508455020,5727.71,5728.99,5727.71,5728.99,0.20431343,1170.3316773,5728.1191811
1508455080,5728.99,5729,5728.99,5729,0.32211059,1845.3691499,5728.9924864
1508455140,5720.01,5720.01,5720.01,5720.01,0.045575,260.68945575,5720.01
1508455200,5720,5726.06,5720,5726.06,0.1121888,642.24708813,5724.6987946
1508455260,5720.01,5725.82,5720,5725.82,0.24468336,1400.8095215,5724.9889062
1508455320,5725.82,5725.82,5725.82,5725.82,0.010029,57.42424878,5725.82
1508455380,5724.24,5724.49,5721.38,5724.49,0.52595295,3010.6402539,5724.1626915
1508455440,5724.49,5724.49,5723.88,5723.88,0.23215461,1328.9239824,5724.3058081
1508455500,5721.39,5721.39,5721.38,5721.38,0.26093204,1492.8938093,5721.389406
1508455560,5724.44,5724.44,5720,5720,0.03822585,218.78021503,5723.3577548
1508455620,5719.99,5719.99,5706,5706,5.69254452,32498.767151,5709.0053555
1508455680,5719.95,5719.95,5719.95,5719.95,0.049629,283.87539855,5719.95
1508455740,5719.95,5719.95,5719.95,5719.95,0.049629,283.87539855,5719.95
1508455800,5706.01,5708.84,5706.01,5708.84,0.0482098,275.20505463,5708.4877895
1508455860,5706.01,5706.01,5706.01,5706.01,0.00790223,45.090203402,5706.01
1508455920,5708.85,5708.85,5706,5708.84,0.50472844,2880.0455522,5706.1289279
1508455980,5708.85,5708.85,5706,5708.84,0.50472844,2880.0455522,5706.1289279
1508456040,5706,5706,5704.01,5704.01,0.40618257,2317.5307731,5705.6381645
1508456100,5704.01,5710.27,5704.01,5710.27,0.78124874,4457.2656092,5705.3091813
1508456160,5710.27,5710.57,5710.27,5710.57,0.8436208,4817.5531388,5710.5670448
1508456220,5710.57,5710.57,5704.01,5707.21,0.37128213,2119.9167574,5709.7193376
1508456280,5704,5704.01,5704,5704,0.77077686,4396.5129217,5704.0022214
1508456340,5704.01,5704.01,5704,5704,0.0598,341.0994,5704.0033445
1508456400,5704.01,5704.01,5702.43,5702.43,44.40229928,253201.25248,5702.4356077
1508456460,5702.43,5702.43,5702.43,5702.43,0.01491091,85.028420511,5702.43
1508456520,5702.43,5704.01,5702.43,5704,0.80557148,4594.9691244,5703.9868447
1508456580,5702.44,5702.44,5702.44,5702.44,0.01970529,112.36823391,5702.44
1508456640,5704.01,5704.01,5704.01,5704.01,0.07418565,423.15568946,5704.01
1508456700,5704.01,5704.01,5702.43,5702.43,0.21684081,1236.8403005,5703.9092435
1508456760,5704.01,5704.01,5702.43,5702.43,1.80248464,10278.606035,5702.4652563
1508456820,5702.43,5704.01,5702.43,5702.43,1.0176104,5802.9480004,5702.524267
1508456880,5703,5703,5702.99,5702.99,0.06074779,346.44419697,5702.9926022
1508456940,5703,5703,5702.43,5702.43,0.2174653,1240.1733699,5702.8563632
1508457000,5702.43,5702.43,5702.43,5702.43,5.80725974,33115.492159,5702.43
1508457060,5703.92,5704.01,5702.43,5704,4.90530398,27979.233447,5703.8735134
1508457120,5703.9,5703.95,5703.9,5703.95,0.05126788,292.4291083,5703.9438397
1508457180,5703.9,5703.95,5703.9,5703.95,0.05126788,292.4291083,5703.9438397
1508457240,5702.43,5702.43,5698.5,5698.5,2.64309578,15069.597851,5701.4951802
1508457300,5697,5697,5696.16,5696.17,0.08723364,496.91077327,5696.3205166
1508457360,5690.88,5690.88,5690.88,5690.88,0.16894078,961.42170609,5690.88
1508457420,5698.13,5704.1,5695.63,5704.1,2.31166152,13174.852877,5699.3001628
1508457480,5695.62,5695.64,5694,5695.64,0.12964714,738.4096468,5695.5336369
1508457540,5700.39,5700.39,5698.69,5698.69,0.08542622,486.92979839,5700.0040314
1508457600,5700,5700,5698.68,5700,0.11684654,665.91386047,5699.0464627
import datetime
# timestamp を datetime に変換
timestamp_parser = lambda date: datetime.datetime.fromtimestamp(date)
df = pd.read_csv("./sample_01_btc.csv", index_col=0, parse_dates=0, date_parser=timestamp_parser)
df
Open High Low Close Volume_(BTC) Volume_(Currency) Weighted_Price
Timestamp
2017-10-20 08:00:00 5714.50 5722.00 5707.08 5722.00 4.116310 23517.533993 5713.255872
2017-10-20 08:01:00 5709.99 5722.00 5709.56 5711.16 4.320240 24680.498868 5712.761132
2017-10-20 08:02:00 5711.16 5718.80 5710.01 5710.01 3.854051 22020.718724 5713.655337
2017-10-20 08:03:00 5711.36 5711.36 5706.66 5706.66 1.164053 6646.271168 5709.594786
2017-10-20 08:04:00 5706.48 5710.87 5706.31 5706.31 1.385352 7908.523643 5708.674464
2017-10-20 08:05:00 5718.57 5722.81 5706.00 5706.00 0.819821 4685.122980 5714.813157
2017-10-20 08:06:00 5721.32 5721.32 5721.32 5721.32 0.162144 927.679999 5721.320000
2017-10-20 08:07:00 5707.22 5725.40 5707.22 5709.89 0.305639 1748.216941 5719.869416
2017-10-20 08:08:00 5719.22 5719.22 5709.89 5709.89 1.985762 11338.667829 5709.984350
2017-10-20 08:09:00 5725.23 5725.23 5709.91 5709.91 0.010880 62.218801 5718.855302
2017-10-20 08:10:00 5720.41 5720.42 5709.91 5720.41 1.381020 7889.591489 5712.874006
2017-10-20 08:11:00 5709.89 5720.41 5709.89 5720.41 1.068407 6111.485975 5720.186975
2017-10-20 08:12:00 5715.85 5723.19 5715.85 5723.19 0.306643 1754.230240 5720.752830
2017-10-20 08:13:00 5724.43 5724.43 5724.42 5724.42 10.200957 58394.564280 5724.420197
2017-10-20 08:14:00 5728.00 5728.00 5720.70 5727.20 0.527882 3020.930593 5722.740137
2017-10-20 08:15:00 5728.00 5729.00 5724.63 5724.63 1.029455 5895.673516 5726.982553
2017-10-20 08:16:00 5724.63 5729.00 5724.63 5729.00 0.213238 1220.726085 5724.712289
2017-10-20 08:17:00 5727.71 5728.99 5727.71 5728.99 0.204313 1170.331677 5728.119181
2017-10-20 08:18:00 5728.99 5729.00 5728.99 5729.00 0.322111 1845.369150 5728.992486
2017-10-20 08:19:00 5720.01 5720.01 5720.01 5720.01 0.045575 260.689456 5720.010000
2017-10-20 08:20:00 5720.00 5726.06 5720.00 5726.06 0.112189 642.247088 5724.698795
2017-10-20 08:21:00 5720.01 5725.82 5720.00 5725.82 0.244683 1400.809522 5724.988906
2017-10-20 08:22:00 5725.82 5725.82 5725.82 5725.82 0.010029 57.424249 5725.820000
2017-10-20 08:23:00 5724.24 5724.49 5721.38 5724.49 0.525953 3010.640254 5724.162691
2017-10-20 08:24:00 5724.49 5724.49 5723.88 5723.88 0.232155 1328.923982 5724.305808
2017-10-20 08:25:00 5721.39 5721.39 5721.38 5721.38 0.260932 1492.893809 5721.389406
2017-10-20 08:26:00 5724.44 5724.44 5720.00 5720.00 0.038226 218.780215 5723.357755
2017-10-20 08:27:00 5719.99 5719.99 5706.00 5706.00 5.692545 32498.767151 5709.005356
2017-10-20 08:28:00 5719.95 5719.95 5719.95 5719.95 0.049629 283.875399 5719.950000
2017-10-20 08:29:00 5719.95 5719.95 5719.95 5719.95 0.049629 283.875399 5719.950000
... ... ... ... ... ... ... ...
2017-10-20 08:31:00 5706.01 5706.01 5706.01 5706.01 0.007902 45.090203 5706.010000
2017-10-20 08:32:00 5708.85 5708.85 5706.00 5708.84 0.504728 2880.045552 5706.128928
2017-10-20 08:33:00 5708.85 5708.85 5706.00 5708.84 0.504728 2880.045552 5706.128928
2017-10-20 08:34:00 5706.00 5706.00 5704.01 5704.01 0.406183 2317.530773 5705.638165
2017-10-20 08:35:00 5704.01 5710.27 5704.01 5710.27 0.781249 4457.265609 5705.309181
2017-10-20 08:36:00 5710.27 5710.57 5710.27 5710.57 0.843621 4817.553139 5710.567045
2017-10-20 08:37:00 5710.57 5710.57 5704.01 5707.21 0.371282 2119.916757 5709.719338
2017-10-20 08:38:00 5704.00 5704.01 5704.00 5704.00 0.770777 4396.512922 5704.002221
2017-10-20 08:39:00 5704.01 5704.01 5704.00 5704.00 0.059800 341.099400 5704.003344
2017-10-20 08:40:00 5704.01 5704.01 5702.43 5702.43 44.402299 253201.252480 5702.435608
2017-10-20 08:41:00 5702.43 5702.43 5702.43 5702.43 0.014911 85.028421 5702.430000
2017-10-20 08:42:00 5702.43 5704.01 5702.43 5704.00 0.805571 4594.969124 5703.986845
2017-10-20 08:43:00 5702.44 5702.44 5702.44 5702.44 0.019705 112.368234 5702.440000
2017-10-20 08:44:00 5704.01 5704.01 5704.01 5704.01 0.074186 423.155689 5704.010000
2017-10-20 08:45:00 5704.01 5704.01 5702.43 5702.43 0.216841 1236.840301 5703.909244
2017-10-20 08:46:00 5704.01 5704.01 5702.43 5702.43 1.802485 10278.606035 5702.465256
2017-10-20 08:47:00 5702.43 5704.01 5702.43 5702.43 1.017610 5802.948000 5702.524267
2017-10-20 08:48:00 5703.00 5703.00 5702.99 5702.99 0.060748 346.444197 5702.992602
2017-10-20 08:49:00 5703.00 5703.00 5702.43 5702.43 0.217465 1240.173370 5702.856363
2017-10-20 08:50:00 5702.43 5702.43 5702.43 5702.43 5.807260 33115.492159 5702.430000
2017-10-20 08:51:00 5703.92 5704.01 5702.43 5704.00 4.905304 27979.233447 5703.873513
2017-10-20 08:52:00 5703.90 5703.95 5703.90 5703.95 0.051268 292.429108 5703.943840
2017-10-20 08:53:00 5703.90 5703.95 5703.90 5703.95 0.051268 292.429108 5703.943840
2017-10-20 08:54:00 5702.43 5702.43 5698.50 5698.50 2.643096 15069.597851 5701.495180
2017-10-20 08:55:00 5697.00 5697.00 5696.16 5696.17 0.087234 496.910773 5696.320517
2017-10-20 08:56:00 5690.88 5690.88 5690.88 5690.88 0.168941 961.421706 5690.880000
2017-10-20 08:57:00 5698.13 5704.10 5695.63 5704.10 2.311662 13174.852877 5699.300163
2017-10-20 08:58:00 5695.62 5695.64 5694.00 5695.64 0.129647 738.409647 5695.533637
2017-10-20 08:59:00 5700.39 5700.39 5698.69 5698.69 0.085426 486.929798 5700.004031
2017-10-20 09:00:00 5700.00 5700.00 5698.68 5700.00 0.116847 665.913860 5699.046463

61 rows × 7 columns

DataFrame で遊ぶ

DataFrame.describe

DataFrame.describe(percentiles=None, include=None, exclude=None)

要約統計量を出す

df.describe()
Open High Low Close Volume_(BTC) Volume_(Currency) Weighted_Price
count 61.000000 61.000000 61.000000 61.000000 61.000000 61.000000 61.000000
mean 5711.347541 5713.140164 5709.262787 5711.027541 1.802100 10285.950059 5711.197041
std 9.889195 10.505623 9.408502 10.106421 5.847390 33350.541325 9.798496
min 5690.880000 5690.880000 5690.880000 5690.880000 0.007902 45.090203 5690.880000
25% 5703.900000 5704.010000 5702.430000 5703.950000 0.085426 486.929798 5703.873513
50% 5708.850000 5710.570000 5706.010000 5708.840000 0.306643 1754.230240 5709.005356
75% 5720.010000 5722.810000 5719.950000 5720.410000 1.068407 6111.485975 5720.186975
max 5728.990000 5729.000000 5728.990000 5729.000000 44.402299 253201.252480 5728.992486

DataFrame.hist

DataFrame.hist(data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, figsize=None, layout=None, bins=10, **kwds)

ヒストグラムを描く

df.hist()
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x114eb8d50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1154aa890>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x115528050>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1155a8210>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11570b350>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11578e390>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x115812210>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11587f350>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x115a031d0>]], dtype=object)

output_23_1.png

matplotlib.finance.candlestick2_ohlc

matplotlib.finance.candlestick2_ohlc(ax, opens, highs, lows, closes, width=4, colorup='k', colordown='r', alpha=0.75)

ローソク足チャートを描画する

import matplotlib.finance as mpf
fig = plt.figure(figsize=(18, 9))
ax = plt.subplot(1, 1, 1)
ax.set_xlim([0, df.shape[0]])
import matplotlib.ticker as ticker
ax.xaxis.set_major_locator(ticker.MaxNLocator(6))
ax.xaxis.set_major_formatter(ticker.FuncFormatter(df.index))

def dateformatter(x, pos):
    try:
        return df.index[int(x)]
    except IndexError:
        return ''

ax.xaxis.set_major_formatter(ticker.FuncFormatter(dateformatter))
fig.autofmt_xdate()
mpf.candlestick2_ohlc(ax, df.Open, df.High, df.Low, df.Close, width=0.9, colorup='g', colordown='r')
plt.show()

output_25_0.png

40
34
1

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
40
34