LoginSignup
1
1

More than 5 years have passed since last update.

df.hist() でカテゴリ変数をプロットする

Last updated at Posted at 2018-06-30

何がしたかったか

pd.DataFrameに含まれるカテゴリ変数の情報を、パッと可視化したかった。

確かに.describe()でだいたいわかる。でも目で見たかったんだ、、、

df.hist(というかplot関数は)カテゴリ変数を描画しない

df.hist??から抜粋

def hist_frame(data, column=None, by=None, ...):
    _converter._WARN = False
    if by is not None:
        axes = grouped_hist(data, column=column, by=by, ...)
        return axes

    if column is not None:
        if not isinstance(column, (list, np.ndarray, Index)):
            column = [column]
        data = data[column]
    data = data._get_numeric_data() # 数字だけ!!!
    naxes = len(data.columns)

    fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False,
                          sharex=sharex, sharey=sharey, figsize=figsize,
                          layout=layout)
    // Omitted
    return axes

data = data._get_numeric_data()!!!!!!

だがしかし

カラムを指定すると使えるっぽい。

Titanic.Embarked.hist??より抜粋
(Titanic.Embarkedがpdのカラムでカテゴリ変数になってる)

def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None,
                xrot=None, ylabelsize=None, yrot=None, figsize=None,
                bins=10, **kwds):
    import matplotlib.pyplot as plt

    if by is None:
        if kwds.get('layout', None) is not None:
            raise ValueError("The 'layout' keyword is not supported when "
                             "'by' is None")
        # hack until the plotting interface is a bit more unified
        fig = kwds.pop('figure', plt.gcf() if plt.get_fignums() else
                       plt.figure(figsize=figsize))
        if (figsize is not None and tuple(figsize) !=
                tuple(fig.get_size_inches())):
            fig.set_size_inches(*figsize, forward=True)
        if ax is None:
            ax = fig.gca()
        elif ax.get_figure() != fig:
            raise AssertionError('passed axis not bound to passed figure')
        values = self.dropna().values

        ax.hist(values, bins=bins, **kwds)
        ax.grid(grid)
        axes = np.array([ax])

        _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot,
                         ylabelsize=ylabelsize, yrot=yrot)

    else:
        if 'figure' in kwds:
            raise ValueError("Cannot pass 'figure' when using the "
                             "'by' argument, since a new 'Figure' instance "
                             "will be created")
        axes = grouped_hist(self, by=by, ax=ax, grid=grid, figsize=figsize,
                            bins=bins, xlabelsize=xlabelsize, xrot=xrot,
                            ylabelsize=ylabelsize, yrot=yrot, **kwds)

    if hasattr(axes, 'ndim'):
        if axes.ndim == 1 and len(axes) == 1:
            return axes[0]
    return axes

titanic.Embarked.hist()で、無事プロットできた。

image.png

以上。

1
1
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
1