More than 5 years have passed since last update.

Pythonで機械学習 - Data Preparation

Last updated at Posted at 2018-02-11


import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline


train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")


PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
PassengerId Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 892 3 Kelly, Mr. James male 34.5 0 0 330911 7.8292 NaN Q
1 893 3 Wilkes, Mrs. James (Ellen Needs) female 47.0 1 0 363272 7.0000 NaN S
2 894 2 Myles, Mr. Thomas Francis male 62.0 0 0 240276 9.6875 NaN Q
3 895 3 Wirz, Mr. Albert male 27.0 0 0 315154 8.6625 NaN S
4 896 3 Hirvonen, Mrs. Alexander (Helga E Lindqvist) female 22.0 1 1 3101298 12.2875 NaN S
combine = [train,test]


sex = {"male":0,"female":1}
for dataset in combine:
    dataset["SexInt"] = map(lambda val:sex[val],dataset["Sex"])


for dataset in combine:
    dataset["AgeFillNa"] = dataset.Age.fillna(dataset.Age.median())

for dataset in combine:
    dataset["FareFillNa"] = dataset.Fare.fillna(dataset.Fare.median())


embarked = {"S":0,"C":1,"Q":2}
for dataset in combine:
    dataset["EmbarkedInt"] = map(lambda val:embarked[val],dataset["Embarked"].fillna("S"))


for dataset in combine:
    dataset["NumFamily"] = dataset["SibSp"] + dataset["Parch"]
    dataset["IsAlone"] = map(lambda val:1 if val==0 else 0,dataset["NumFamily"])


PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked SexInt AgeFillNa FareFillNa EmbarkedInt NumFamily IsAlone
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S 0 22.0 7.2500 0 1 0
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C 1 38.0 71.2833 1 1 0
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S 1 26.0 7.9250 0 0 1
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S 1 35.0 53.1000 0 1 0
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S 0 35.0 8.0500 0 0 1



