LoginSignup
1
0

More than 3 years have passed since last update.

Seabornの基本操作をJupyter Labで書いてみた

Posted at

第24回

Seaborn

import seaborn as sns
%matplotlib inline
import pandas as pd
df = pd.read_csv('train.csv')
df = df.dropna(subset=['Age'])
sns.distplot(df['Age'])
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcdd7e3d0>

png

sns.set()
sns.distplot(df['Age'],bins=50)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcdd05850>

png

sns.jointplot()で2変数の分布を見る

sns.jointplot(x='Age', y='Fare', data=df)
<seaborn.axisgrid.JointGrid at 0x7fcbcdbe31d0>

png

sns.jointplot(x='Age', y='Fare', data=df, kind='hex')
<seaborn.axisgrid.JointGrid at 0x7fcbcdb2b890>

png

(重要)sns.pairplot()で複数のカラムの分布を一発で表示

sns.pairplot(df[['Age', 'Fare', 'Pclass', 'Survived']], hue='Survived', kind='scatter', plot_kws={'alpha': 0.5})
/opt/anaconda3/lib/python3.7/site-packages/statsmodels/nonparametric/kde.py:487: RuntimeWarning: invalid value encountered in true_divide
  binned = fast_linbin(X, a, b, gridsize) / (delta * nobs)
/opt/anaconda3/lib/python3.7/site-packages/statsmodels/nonparametric/kdetools.py:34: RuntimeWarning: invalid value encountered in double_scalars
  FAC1 = 2*(np.pi*bw/RANGE)**2





<seaborn.axisgrid.PairGrid at 0x7fcbcda34990>

png

第25回

sns.barplot() 「棒グラフ」を作る

import numpy as np
import pandas as pd
import seaborn as sns
%matplotlib inline
df = pd.read_csv('train.csv')
df.head()
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
sns.barplot(x='Survived', y='Age', data=df) #平均値
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcdd7e390>

png

sns.barplot(x='Survived', y='Age', data=df, estimator=np.median) #中央値
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcce3a190>

png

sns.countplot() データ数を比較

sns.countplot(x='Sex', data=df, hue='Survived')
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbccfb0c10>

png

sns.boxplot() カテゴリ別に値を比較する

sns.boxplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbccd82750>

png

sns.boxplot(x='Pclass', y='Age', data=df, hue='Survived')
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcc2a4ed0>

png

sns.violonplot() データの分析を可視化できる

sns.violinplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcc1e3ad0>

png

sns.violinplot(x='Pclass', y='Age', data=df, hue=('Survived'))
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcc107210>

png

sns.swarmplot() 本当の分布を確認できる

sns.swarmplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcc19a710>

png

sns.swarmplot(x='Pclass', y='Age', data=df, size=4, hue='Survived')
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc6e9fe50>

png

第26回

Heatmapを描写する

df.corr()で相関関係を作る

import pandas as pd
df = pd.read_csv('train.csv')
corr = df.corr()
corr
PassengerId Survived Pclass Age SibSp Parch Fare
PassengerId 1.000000 -0.005007 -0.035144 0.036847 -0.057527 -0.001652 0.012658
Survived -0.005007 1.000000 -0.338481 -0.077221 -0.035322 0.081629 0.257307
Pclass -0.035144 -0.338481 1.000000 -0.369226 0.083081 0.018443 -0.549500
Age 0.036847 -0.077221 -0.369226 1.000000 -0.308247 -0.189119 0.096067
SibSp -0.057527 -0.035322 0.083081 -0.308247 1.000000 0.414838 0.159651
Parch -0.001652 0.081629 0.018443 -0.189119 0.414838 1.000000 0.216225
Fare 0.012658 0.257307 -0.549500 0.096067 0.159651 0.216225 1.000000

sns.heatmap()でHeatmapをplot

sns.heatmap(corr)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc6df9850>

png

sns.heatmap(corr, cmap='coolwarm', annot=True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcd0b3290>

png

データを俯瞰するときに役立つsns.heatmap()

flights = sns.load_dataset('flights')
print(len(flights))
flights.head()
144
year month passengers
0 1949 January 112
1 1949 February 118
2 1949 March 132
3 1949 April 129
4 1949 May 121
# pivot_tableを作成
flights_pivot = flights.pivot_table(index='month', columns='year', values='passengers') 
flights_pivot
year 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960
month
January 112 115 145 171 196 204 242 284 315 340 360 417
February 118 126 150 180 196 188 233 277 301 318 342 391
March 132 141 178 193 236 235 267 317 356 362 406 419
April 129 135 163 181 235 227 269 313 348 348 396 461
May 121 125 172 183 229 234 270 318 355 363 420 472
June 135 149 178 218 243 264 315 374 422 435 472 535
July 148 170 199 230 264 302 364 413 465 491 548 622
August 148 170 199 242 272 293 347 405 467 505 559 606
September 136 158 184 209 237 259 312 355 404 404 463 508
October 119 133 162 191 211 229 274 306 347 359 407 461
November 104 114 146 172 180 203 237 271 305 310 362 390
December 118 140 166 194 201 229 278 306 336 337 405 432
sns.heatmap(flights_pivot)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc5baabd0>

png

第27回

sns.set()で基本styleを変更

context引数で用途を指定

import pandas as pd
import seaborn as sns
%matplotlib inline

df = pd.read_csv('train.csv')

sns.set(context=('poster'))
df = df.dropna(subset=['Age'])
sns.distplot(df['Age'])
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc568f690>

png

style引数でグラフ全体のstyleを指定

sns.set_style(style='whitegrid') #背景色を変更
sns.distplot(df['Age'])
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc563d850>

png

palette引数で色を指定

sns.set(palette='bright')
sns.violinplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc5472850>

png

sns.despine()で軸や枠を落とす

sns.set(palette='bright' ,style='ticks') ##style='ticks'でplotに軸
sns.violinplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc53ed810>

png

sns.set(palette='bright' )
sns.violinplot(x='Pclass', y='Age', data=df)
sns.despine()

png

matplotlibと同様pltモジュールでいろいろなことができる

import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
sns.distplot(df['Age'])
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc52d8750>

png

sns.distplot(df['Age'])
plt.savefig('seaborn_sample.png')

png


1
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
0