この記事は、慶應理工アドベントカレンダー2021の7日目の記事です。
https://t.co/NOQgu1ypYl
今回はデータ分析入門ということで、Pythonでのデータ分析に欠かせないライブラリであるpandas、scikit-leran、matplotlibについて自分用の備忘録としてまとめてみたいと思います。
少し量が多いため、2つの記事に分けてまとめたいと思います。
今回の記事ではpandasについて紹介し、次回の記事ではscikit-learnとmatplotlibについてまとめます。
普段はよく使う属性やメソッドだけを学んで使用しておりましたが、私は体系的に学びたい人間なので、ひたすらpandasの属性とメソッドをまとめました。本記事はただのpandas公式リファレンスのまとめ記事になります。
#目次
1.pandas概要
2.Seriesの属性とメソッド一覧
3.DataFrameの属性とメソッド一覧
#pandas概要
pandasは、データ分析に強みを持つpythonのライブラリです。データの読み込みや、データの表示、データのグラフ化、データ分析、欠損値の操作、データの削除等の機能があります。pandasにはSeries、DataFrame、Panelというデータ構造があります。Seriesは1次元配列でindexラベルがあり、DataFrameは2次元配列でcolumnsラベルがあります。Panelは3次元配列です。Panelは3次元配列ですが、私は今のところ使っていないので本記事では説明は省略します。
#Seriesの属性とメソッド一覧
##コンストラクタ
Series([data, index, dtype, name, copy, ...])
##属性
Series.index
Series.array
Series.values
Series.dtype
Series.shape
Series.nbytes
Series.ndim
Series.size
Series.T
Series.memory_usage([index, deep])
Series.hasnans
Series.empty
Series.dtypes
Series.name
Series.flags
Series.set_flags(*[, copy, ...])
##型変換
Series.astype(dtype[, copy, errors])
Series.convert_dtypes([infer_objects, ...])
Series.infer_objects()
Series.copy([deep])
Series.bool()
Series.to_numpy([dtype, copy, na_value])
Series.to_period([freq, copy])
Series.to_timestamp([freq, how, copy])
Series.to_list()
Series.__array__([dtype])
##インデックス・繰り返し
Series.get(key[, default])
Series.at
Series.iat
Series.loc
Series.iloc
Series.__iter__()
Series.items()
Series.iteritems()
Series.keys()
Series.pop(item)
Series.item()
Series.xs(key[, axis, level, drop_level])
##バイナリ計算
Series.add(other[, level, fill_value, axis])
Series.sub(other[, level, fill_value, axis])
Series.mul(other[, level, fill_value, axis])
Series.div(other[, level, fill_value, axis])
Series.truediv(other[, level, fill_value, axis])
Series.floordiv(other[, level, fill_value, axis])
Series.mod(other[, level, fill_value, axis])
Series.pow(other[, level, fill_value, axis])
Series.radd(other[, level, fill_value, axis])
Series.rsub(other[, level, fill_value, axis])
Series.rmul(other[, level, fill_value, axis])
Series.rdiv(other[, level, fill_value, axis])
Series.rtruediv(other[, level, fill_value, axis])
Series.rfloordiv(other[, level, fill_value, ...])
Series.rmod(other[, level, fill_value, axis])
Series.rpow(other[, level, fill_value, axis])
Series.combine(other, func[, fill_value])
Series.combine_first(other)
Series.lt(other[, level, fill_value, axis])
Series.gt(other[, level, fill_value, axis])
Series.le(other[, level, fill_value, axis])
Series.ge(other[, level, fill_value, axis])
Series.ne(other[, level, fill_value, axis])
Series.eq(other[, level, fill_value, axis])
Series.product([axis, skipna, level, ...])
Series.dot(other)
##応用関数
Series.apply(func[, convert_dtype, args])
Series.agg([func, axis])
Series.aggregate([func, axis])
Series.transform(func[, axis])
Series.map(arg[, na_action])
Series.groupby([by, axis, level, as_index, ...])
Series.rolling(window[, min_periods, ...])
Series.expanding([min_periods, center, ...])
Series.ewm([com, span, halflife, alpha, ...])
Series.pipe(func, *args, **kwargs)
##計算
Series.abs()
Series.all([axis, bool_only, skipna, level])
Series.any([axis, bool_only, skipna, level])
Series.autocorr([lag])
Series.between(left, right[, inclusive])
Series.clip([lower, upper, axis, inplace])
Series.corr(other[, method, min_periods])
Series.count([level])
Series.cov(other[, min_periods, ddof])
Series.cummax([axis, skipna])
Series.cummin([axis, skipna])
Series.cumprod([axis, skipna])
Series.cumsum([axis, skipna])
Series.describe([percentiles, include, ...])
Series.diff([periods])
Series.factorize([sort, na_sentinel])
Series.kurt([axis, skipna, level, numeric_only])
Series.mad([axis, skipna, level])
Series.max([axis, skipna, level, numeric_only])
Series.mean([axis, skipna, level, numeric_only])
Series.median([axis, skipna, level, ...])
Series.min([axis, skipna, level, numeric_only])
Series.mode([dropna])
Series.nlargest([n, keep])
Series.nsmallest([n, keep])
Series.pct_change([periods, fill_method, ...])
Series.prod([axis, skipna, level, ...])
Series.quantile([q, interpolation])
Series.rank([axis, method, numeric_only, ...])
Series.sem([axis, skipna, level, ddof, ...])
Series.skew([axis, skipna, level, numeric_only])
Series.std([axis, skipna, level, ddof, ...])
Series.sum([axis, skipna, level, ...])
Series.var([axis, skipna, level, ddof, ...])
Series.kurtosis([axis, skipna, level, ...])
Series.unique()
Series.nunique([dropna])
Series.is_unique
Series.is_monotonic
Series.is_monotonic_increasing
Series.is_monotonic_decreasing
Series.value_counts([normalize, sort, ...])
##選択・削除
Series.align(other[, join, axis, level, ...])
Series.drop([labels, axis, index, columns, ...])
Series.droplevel(level[, axis])
Series.drop_duplicates([keep, inplace])
Series.duplicated([keep])
Series.equals(other)
Series.first(offset)
Series.head([n])
Series.idxmax([axis, skipna])
Series.idxmin([axis, skipna])
Series.isin(values)
Series.last(offset)
Series.reindex([index])
Series.reindex_like(other[, method, copy, ...])
Series.rename([index, axis, copy, inplace, ...])
Series.rename_axis([mapper, index, columns, ...])
Series.reset_index([level, drop, name, inplace])
Series.sample([n, frac, replace, weights, ...])
Series.set_axis(labels[, axis, inplace])
Series.take(indices[, axis, is_copy])
Series.tail([n])
Series.truncate([before, after, axis, copy])
Series.where(cond[, other, inplace, axis, ...])
Series.mask(cond[, other, inplace, axis, ...])
Series.add_prefix(prefix)
Series.add_suffix(suffix)
Series.filter([items, like, regex, axis])
##欠損データ処理
Series.backfill([axis, inplace, limit, downcast])
Series.bfill([axis, inplace, limit, downcast])
Series.dropna([axis, inplace, how])
Series.fillna([value, method, axis, ...])
Series.interpolate([method, axis, limit, ...])
Series.isna()
Series.isnull()
Series.notna()
Series.notnull()
Series.pad([axis, inplace, limit, downcast])
Series.replace([to_replace, value, inplace, ...])
##変形・ソート
Series.argsort([axis, kind, order])
Series.argmin([axis, skipna])
Series.argmax([axis, skipna])
Series.reorder_levels(order)
Series.sort_values([axis, ascending, ...])
Series.sort_index([axis, level, ascending, ...])
Series.swaplevel([i, j, copy])
Series.unstack([level, fill_value])
Series.explode([ignore_index])
Series.searchsorted(value[, side, sorter])
Series.ravel([order])
Series.repeat(repeats[, axis])
Series.squeeze([axis])
Series.view([dtype])
##組み合わせ・比較・結合
Series.append(to_append[, ignore_index, ...])
Series.compare(other[, align_axis, ...])
Series.update(other)
##時間関連
Series.asfreq(freq[, method, how, ...])
Series.asof(where[, subset])
Series.shift([periods, freq, axis, fill_value])
Series.first_valid_index()
Series.last_valid_index()
Series.resample(rule[, axis, closed, label, ...])
Series.tz_convert(tz[, axis, level, copy])
Series.tz_localize(tz[, axis, level, copy, ...])
Series.at_time(time[, asof, axis])
Series.between_time(start_time, end_time[, ...])
Series.tshift([periods, freq, axis])
Series.slice_shift([periods, axis])
##時間属性
Series.dt.date
Series.dt.time
Series.dt.timetz
Series.dt.year
Series.dt.month
Series.dt.day
Series.dt.hour
Series.dt.minute
Series.dt.second
Series.dt.microsecond
Series.dt.nanosecond
Series.dt.week
Series.dt.weekofyear
Series.dt.dayofweek
Series.dt.day_of_week
Series.dt.weekday
Series.dt.dayofyear
Series.dt.day_of_year
Series.dt.quarter
Series.dt.is_month_start
Series.dt.is_month_end
Series.dt.is_quarter_start
Series.dt.is_quarter_end
Series.dt.is_year_start
Series.dt.is_year_end
Series.dt.is_leap_year
Series.dt.daysinmonth
Series.dt.days_in_month
Series.dt.tz
Series.dt.freq
Series.dt.qyear
Series.dt.start_time
Series.dt.end_time
Series.dt.days
Series.dt.seconds
Series.dt.microseconds
Series.dt.nanoseconds
Series.dt.components
##時間メソッド
Series.dt.to_period(*args, **kwargs)
Series.dt.to_pydatetime()
Series.dt.tz_localize(*args, **kwargs)
Series.dt.tz_convert(*args, **kwargs)
Series.dt.normalize(*args, **kwargs)
Series.dt.strftime(*args, **kwargs)
Series.dt.round(*args, **kwargs)
Series.dt.floor(*args, **kwargs)
Series.dt.ceil(*args, **kwargs)
Series.dt.month_name(*args, **kwargs)
Series.dt.day_name(*args, **kwargs)
Series.dt.to_pytimedelta()
Series.dt.total_seconds(*args, **kwargs)
##文字列処理
Series.str.capitalize()
Series.str.casefold()
Series.str.cat([others, sep, na_rep, join])
Series.str.center(width[, fillchar])
Series.str.contains(pat[, case, flags, na, ...])
Series.str.count(pat[, flags])
Series.str.decode(encoding[, errors])
Series.str.encode(encoding[, errors])
Series.str.endswith(pat[, na])
Series.str.extract(pat[, flags, expand])
Series.str.extractall(pat[, flags])
Series.str.find(sub[, start, end])
Series.str.findall(pat[, flags])
Series.str.fullmatch(pat[, case, flags, na])
Series.str.get(i)
Series.str.index(sub[, start, end])
Series.str.join(sep)
Series.str.len()
Series.str.ljust(width[, fillchar])
Series.str.lower()
Series.str.lstrip([to_strip])
Series.str.match(pat[, case, flags, na])
Series.str.normalize(form)
Series.str.pad(width[, side, fillchar])
Series.str.partition([sep, expand])
Series.str.repeat(repeats)
Series.str.replace(pat, repl[, n, case, ...])
Series.str.rfind(sub[, start, end])
Series.str.rindex(sub[, start, end])
Series.str.rjust(width[, fillchar])
Series.str.rpartition([sep, expand])
Series.str.rstrip([to_strip])
Series.str.slice([start, stop, step])
Series.str.slice_replace([start, stop, repl])
Series.str.split([pat, n, expand])
Series.str.rsplit([pat, n, expand])
Series.str.startswith(pat[, na])
Series.str.strip([to_strip])
Series.str.swapcase()
Series.str.title()
Series.str.translate(table)
Series.str.upper()
Series.str.wrap(width, **kwargs)
Series.str.zfill(width)
Series.str.isalnum()
Series.str.isalpha()
Series.str.isdigit()
Series.str.isspace()
Series.str.islower()
Series.str.isupper()
Series.str.istitle()
Series.str.isnumeric()
Series.str.isdecimal()
Series.str.get_dummies([sep])
##カテゴリー
Series.cat.categories
Series.cat.ordered
Series.cat.codes
Series.cat.rename_categories(*args, **kwargs)
Series.cat.reorder_categories(*args, **kwargs)
Series.cat.add_categories(*args, **kwargs)
Series.cat.remove_categories(*args, **kwargs)
Series.cat.remove_unused_categories(*args, ...)
Series.cat.set_categories(*args, **kwargs)
Series.cat.as_ordered(*args, **kwargs)
Series.cat.as_unordered(*args, **kwargs)
##スパース
Series.sparse.npoints
Series.sparse.density
Series.sparse.fill_value
Series.sparse.sp_values
Series.sparse.from_coo(A[, dense_index])
Series.sparse.to_coo([row_levels, ...])
##メタデータ
Series.attrs
##描画
Series.plot([kind, ax, figsize, ....])
Series.plot.area([x, y])
Series.plot.bar([x, y])
Series.plot.barh([x, y])
Series.plot.box([by])
Series.plot.density([bw_method, ind])
Series.plot.hist([by, bins])
Series.plot.kde([bw_method, ind])
Series.plot.line([x, y])
Series.plot.pie(**kwargs)
Series.hist([by, ax, grid, xlabelsize, ...])
##構造変換
Series.to_pickle(path[, compression, ...])
Series.to_csv([path_or_buf, sep, na_rep, ...])
Series.to_dict([into])
Series.to_excel(excel_writer[, sheet_name, ...])
Series.to_frame([name])
Series.to_xarray()
Series.to_hdf(path_or_buf, key[, mode, ...])
Series.to_sql(name, con[, schema, ...])
Series.to_json([path_or_buf, orient, ...])
Series.to_string([buf, na_rep, ...])
Series.to_clipboard([excel, sep])
Series.to_latex([buf, columns, col_space, ...])
Series.to_markdown([buf, mode, index, ...])
#DataFrameの属性とメソッド一覧
##コンストラクタ
DataFrame([data, index, columns, dtype, copy])
##属性とデータ
DataFrame.index
DataFrame.columns
DataFrame.dtypes
DataFrame.info([verbose, buf, max_cols, ...])
DataFrame.select_dtypes([include, exclude])
DataFrame.values
DataFrame.axes
DataFrame.ndim
DataFrame.size
DataFrame.shape
DataFrame.memory_usage([index, deep])
DataFrame.empty
DataFrame.set_flags(*[, copy, ...])
##型変換
DataFrame.astype(dtype[, copy, errors])
DataFrame.convert_dtypes([infer_objects, ...])
DataFrame.infer_objects()
DataFrame.copy([deep])
DataFrame.bool()
##インデックス、イテレーション
DataFrame.head([n])
DataFrame.at
DataFrame.iat
DataFrame.loc
DataFrame.iloc
DataFrame.insert(loc, column, value[, ...])
DataFrame.__iter__()
DataFrame.items()
DataFrame.iteritems()
DataFrame.keys()
DataFrame.iterrows()
DataFrame.itertuples([index, name])
DataFrame.lookup(row_labels, col_labels)
DataFrame.pop(item)
DataFrame.tail([n])
DataFrame.xs(key[, axis, level, drop_level])
DataFrame.get(key[, default])
DataFrame.isin(values)
DataFrame.where(cond[, other, inplace, ...])
DataFrame.mask(cond[, other, inplace, axis, ...])
DataFrame.query(expr[, inplace])
##バイナリ計算
DataFrame.add(other[, axis, level, fill_value])
DataFrame.sub(other[, axis, level, fill_value])
DataFrame.mul(other[, axis, level, fill_value])
DataFrame.div(other[, axis, level, fill_value])
DataFrame.truediv(other[, axis, level, ...])
DataFrame.floordiv(other[, axis, level, ...])
DataFrame.mod(other[, axis, level, fill_value])
DataFrame.pow(other[, axis, level, fill_value])
DataFrame.dot(other)
DataFrame.radd(other[, axis, level, fill_value])
DataFrame.rsub(other[, axis, level, fill_value])
DataFrame.rmul(other[, axis, level, fill_value])
DataFrame.rdiv(other[, axis, level, fill_value])
DataFrame.rtruediv(other[, axis, level, ...])
DataFrame.rfloordiv(other[, axis, level, ...])
DataFrame.rmod(other[, axis, level, fill_value])
DataFrame.rpow(other[, axis, level, fill_value])
DataFrame.lt(other[, axis, level])
DataFrame.gt(other[, axis, level])
DataFrame.le(other[, axis, level])
DataFrame.ge(other[, axis, level])
DataFrame.ne(other[, axis, level])
DataFrame.eq(other[, axis, level])
DataFrame.combine(other, func[, fill_value, ...])
DataFrame.combine_first(other)
##応用関数
DataFrame.apply(func[, axis, raw, ...])
DataFrame.applymap(func[, na_action])
DataFrame.pipe(func, *args, **kwargs)
DataFrame.agg([func, axis])
DataFrame.aggregate([func, axis])
DataFrame.transform(func[, axis])
DataFrame.groupby([by, axis, level, ...])
DataFrame.rolling(window[, min_periods, ...])
DataFrame.expanding([min_periods, center, ...])
DataFrame.ewm([com, span, halflife, alpha, ...])
##計算
DataFrame.abs()
DataFrame.all([axis, bool_only, skipna, level])
DataFrame.any([axis, bool_only, skipna, level])
DataFrame.clip([lower, upper, axis, inplace])
DataFrame.corr([method, min_periods])
DataFrame.corrwith(other[, axis, drop, method])
DataFrame.count([axis, level, numeric_only])
DataFrame.cov([min_periods, ddof])
DataFrame.cummax([axis, skipna])
DataFrame.cummin([axis, skipna])
DataFrame.cumprod([axis, skipna])
DataFrame.cumsum([axis, skipna])
DataFrame.describe([percentiles, include, ...])
DataFrame.diff([periods, axis])
DataFrame.eval(expr[, inplace])
DataFrame.kurt([axis, skipna, level, ...])
DataFrame.kurtosis([axis, skipna, level, ...])
DataFrame.mad([axis, skipna, level])
DataFrame.max([axis, skipna, level, ...])
DataFrame.mean([axis, skipna, level, ...])
DataFrame.median([axis, skipna, level, ...])
DataFrame.min([axis, skipna, level, ...])
DataFrame.mode([axis, numeric_only, dropna])
DataFrame.pct_change([periods, fill_method, ...])
DataFrame.prod([axis, skipna, level, ...])
DataFrame.product([axis, skipna, level, ...])
DataFrame.quantile([q, axis, numeric_only, ...])
DataFrame.rank([axis, method, numeric_only, ...])
DataFrame.round([decimals])
DataFrame.sem([axis, skipna, level, ddof, ...])
DataFrame.skew([axis, skipna, level, ...])
DataFrame.sum([axis, skipna, level, ...])
DataFrame.std([axis, skipna, level, ddof, ...])
DataFrame.var([axis, skipna, level, ddof, ...])
DataFrame.nunique([axis, dropna])
DataFrame.value_counts([subset, normalize, ...])
##選択・削除
DataFrame.add_prefix(prefix)
DataFrame.add_suffix(suffix)
DataFrame.align(other[, join, axis, level, ...])
DataFrame.at_time(time[, asof, axis])
DataFrame.between_time(start_time, end_time)
DataFrame.drop([labels, axis, index, ...])
DataFrame.drop_duplicates([subset, keep, ...])
DataFrame.duplicated([subset, keep])
DataFrame.equals(other)
DataFrame.filter([items, like, regex, axis])
DataFrame.first(offset)
DataFrame.head([n])
DataFrame.idxmax([axis, skipna])
DataFrame.idxmin([axis, skipna])
DataFrame.last(offset)
DataFrame.reindex([labels, index, columns, ...])
DataFrame.reindex_like(other[, method, ...])
DataFrame.rename([mapper, index, columns, ...])
DataFrame.rename_axis([mapper, index, ...])
DataFrame.reset_index([level, drop, ...])
DataFrame.sample([n, frac, replace, ...])
DataFrame.set_axis(labels[, axis, inplace])
DataFrame.set_index(keys[, drop, append, ...])
DataFrame.tail([n])
DataFrame.take(indices[, axis, is_copy])
DataFrame.truncate([before, after, axis, copy])
##欠損データ処理
DataFrame.backfill([axis, inplace, limit, ...])
DataFrame.bfill([axis, inplace, limit, downcast])
DataFrame.dropna([axis, how, thresh, ...])
DataFrame.ffill([axis, inplace, limit, downcast])
DataFrame.fillna([value, method, axis, ...])
DataFrame.interpolate([method, axis, limit, ...])
DataFrame.isna()
DataFrame.isnull()
DataFrame.notna()
DataFrame.notnull()
DataFrame.pad([axis, inplace, limit, downcast])
DataFrame.replace([to_replace, value, ...])
##変形・ソート
DataFrame.droplevel(level[, axis])
DataFrame.pivot([index, columns, values])
DataFrame.pivot_table([values, index, ...])
DataFrame.reorder_levels(order[, axis])
DataFrame.sort_values(by[, axis, ascending, ...])
DataFrame.sort_index([axis, level, ...])
DataFrame.nlargest(n, columns[, keep])
DataFrame.nsmallest(n, columns[, keep])
DataFrame.swaplevel([i, j, axis])
DataFrame.stack([level, dropna])
DataFrame.unstack([level, fill_value])
DataFrame.swapaxes(axis1, axis2[, copy])
DataFrame.melt([id_vars, value_vars, ...])
DataFrame.explode(column[, ignore_index])
DataFrame.squeeze([axis])
DataFrame.to_xarray()
DataFrame.T
DataFrame.transpose(*args[, copy])
##組み合わせ・比較・結合
DataFrame.append(other[, ignore_index, ...])
DataFrame.assign(**kwargs)
DataFrame.compare(other[, align_axis, ...])
DataFrame.join(other[, on, how, lsuffix, ...])
DataFrame.merge(right[, how, on, left_on, ...])
DataFrame.update(other[, join, overwrite, ...])
##時間関連
DataFrame.asfreq(freq[, method, how, ...])
DataFrame.asof(where[, subset])
DataFrame.shift([periods, freq, axis, ...])
DataFrame.slice_shift([periods, axis])
DataFrame.tshift([periods, freq, axis])
DataFrame.first_valid_index()
DataFrame.last_valid_index()
DataFrame.resample(rule[, axis, closed, ...])
DataFrame.to_period([freq, axis, copy])
DataFrame.to_timestamp([freq, how, axis, copy])
DataFrame.tz_convert(tz[, axis, level, copy])
DataFrame.tz_localize(tz[, axis, level, ...])
##メタデータ
DataFrame.attrs
##描画
DataFrame.plot([x, y, kind, ax, ....])
DataFrame.plot.area([x, y])
DataFrame.plot.bar([x, y])
DataFrame.plot.barh([x, y])
DataFrame.plot.box([by])
DataFrame.plot.density([bw_method, ind])
DataFrame.plot.hexbin(x, y[, C, ...])
DataFrame.plot.hist([by, bins])
DataFrame.plot.kde([bw_method, ind])
DataFrame.plot.line([x, y])
DataFrame.plot.pie(**kwargs)
DataFrame.plot.scatter(x, y[, s, c])
DataFrame.boxplot([column, by, ax, ...])
DataFrame.hist([column, by, grid, ...])
##スパース
DataFrame.sparse.density
DataFrame.sparse.from_spmatrix(data[, ...])
DataFrame.sparse.to_coo()
DataFrame.sparse.to_dense()
##構造変換
DataFrame.from_dict(data[, orient, dtype, ...])
DataFrame.from_records(data[, index, ...])
DataFrame.to_parquet([path, engine, ...])
DataFrame.to_pickle(path[, compression, ...])
DataFrame.to_csv([path_or_buf, sep, na_rep, ...])
DataFrame.to_hdf(path_or_buf, key[, mode, ...])
DataFrame.to_sql(name, con[, schema, ...])
DataFrame.to_dict([orient, into])
DataFrame.to_excel(excel_writer[, ...])
DataFrame.to_json([path_or_buf, orient, ...])
DataFrame.to_html([buf, columns, col_space, ...])
DataFrame.to_feather(path, **kwargs)
DataFrame.to_latex([buf, columns, ...])
DataFrame.to_stata(path[, convert_dates, ...])
DataFrame.to_gbq(destination_table[, ...])
DataFrame.to_records([index, column_dtypes, ...])
DataFrame.to_string([buf, columns, ...])
DataFrame.to_clipboard([excel, sep])
DataFrame.to_markdown([buf, mode, index, ...])
DataFrame.style
##参考文献
pandas公式リファレンス
https://pandas.pydata.org/docs/reference/index.html