Reference
Titanic: Machine Learning from Disaster
【Kaggle初心者入門編】タイタニック号で生き残るのは誰?
File Upload 1
from google.colab import drive
drive.mount('/content/gdrive')
print (os.listdir('/content/gdrive/My Drive/...'))
File Upload 2
from google.colab import files
import os
files.upload()
print (os.getcwd())
print (os.listdir('/content'))
Sample Code
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
#matplotlib inline
train = pd.read_csv('/content/train.csv')
test = pd.read_csv('/content/test.csv')
print ('Train: ')
print (train.describe())
print ()
print ('Test: ')
print (test.describe())
def missing_table(df):
null_val = df.isnull().sum()
percent = 100 * df.isnull().sum()/len(df)
missing_table = pd.concat([null_val, percent], axis=1)
return missing_table.rename(columns = {0 : '# of missing', 1 : '%'})
print ('Train')
print (missing_table(train))
print ()
print ('Test')
print (missing_table(test))
train['Age'] = train['Age'].fillna(train['Age'].median())
train['Embarked'] = train['Embarked'].fillna('S')
train['Sex'][train['Sex'] == 'male'] = 0
train['Sex'][train['Sex'] == 'female'] = 1
train['Embarked'][train['Embarked'] == 'S' ] = 0
train['Embarked'][train['Embarked'] == 'C' ] = 1
train['Embarked'][train['Embarked'] == 'Q'] = 2
test['Age'] = test['Age'].fillna(test['Age'].median())
test['Sex'][test['Sex'] == 'male'] = 0
test['Sex'][test['Sex'] == 'female'] = 1
test['Embarked'][test['Embarked'] == 'S'] = 0
test['Embarked'][test['Embarked'] == 'C'] = 1
test['Embarked'][test['Embarked'] == 'Q'] = 2
test.Fare[152] = test.Fare.median()
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import accuracy_score
target = train['Survived'].values
features_train = train[['Pclass', 'Sex', 'Age', 'Fare']].values
# Decision Tree
clf = DecisionTreeClassifier()
clf.fit(features_train, target)
prediction_train = clf.predict(features_train)
score = accuracy_score(target, prediction_train)
print (score)
features_test = test[['Pclass', 'Sex', 'Age', 'Fare']].values
prediction_test = clf.predict(features_test)
print (prediction_test.shape)
print (prediction_test)
PassengerId = np.array(test['PassengerId']).astype(int)
solution = pd.DataFrame(prediction_test, PassengerId, columns = ['Survived'])
solution.to_csv('/content/solution.csv', index_label = ['PassengerId'])
files.download('/content/solution.csv')