df = pd.read_csv(Titanic)
1. 行列数の確認
print(df.shape)
# (891, 12)
2. データ型の確認
print(df.dtypes)
# PassengerId int64
# Survived int64
# Pclass int64
# Name object
# Sex object
# Age float64
# SibSp int64
# Parch int64
# Ticket object
# Fare float64
# Cabin object
# Embarked object
# dtype: object
3. 欠損値の確認
print(df.isnull().sum())
# PassengerId 0
# Survived 0
# Pclass 0
# Name 0
# Sex 0
# Age 177
# SibSp 0
# Parch 0
# Ticket 0
# Fare 0
# Cabin 687
# Embarked 2
# dtype: int64
4. 先頭5行の確認
print(df.head())
# PassengerId Survived Pclass \
# 0 1 0 3
# 1 2 1 1
# 2 3 1 3
# 3 4 1 1
# 4 5 0 3
# Name Sex Age SibSp \
# 0 Braund, Mr. Owen Harris male 22.0 1
# 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1
# 2 Heikkinen, Miss. Laina female 26.0 0
# 3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1
# 4 Allen, Mr. William Henry male 35.0 0
# Parch Ticket Fare Cabin Embarked
# 0 0 A/5 21171 7.2500 NaN S
# 1 0 PC 17599 71.2833 C85 C
# 2 0 STON/O2. 3101282 7.9250 NaN S
# 3 0 113803 53.1000 C123 S
# 4 0 373450 8.0500 NaN S
5. 統計量の確認
print(df.describe())
# PassengerId Survived Pclass Age SibSp \
# count 891.000000 891.000000 891.000000 714.000000 891.000000
# mean 446.000000 0.383838 2.308642 29.699118 0.523008
# std 257.353842 0.486592 0.836071 14.526497 1.102743
# min 1.000000 0.000000 1.000000 0.420000 0.000000
# 25% 223.500000 0.000000 2.000000 20.125000 0.000000
# 50% 446.000000 0.000000 3.000000 28.000000 0.000000
# 75% 668.500000 1.000000 3.000000 38.000000 1.000000
# max 891.000000 1.000000 3.000000 80.000000 8.000000
# Parch Fare
# count 891.000000 891.000000
# mean 0.381594 32.204208
# std 0.806057 49.693429
# min 0.000000 0.000000
# 25% 0.000000 7.910400
# 50% 0.000000 14.454200
# 75% 0.000000 31.000000
# max 6.000000 512.329200