#チートシート(翻訳含む)
英語)https://github.com/pandas-dev/pandas/blob/master/doc/cheatsheet/Pandas_Cheat_Sheet.pdf
日本語)https://qiita.com/s_katagiri/items/4cd7dee37aae7a1e1fc0
上記で大体わかるが、
データ加工など痒い所に手が届かない点があるので、自分なりに書く
(かぶっているところもあるが気にしない)
#詳細(随時更新予定)
import numpy as np
import pandas as pd
df = pd.DataFrame(
{"Age":[22,33,44],
"Sex":["man","woman","man"],
"Embarked":["S","C","Q"],
"FamilyS" :[1,2,4],
"Name":["TORO Mr. BU","A Miss. gao","ninnniku"]
}
)
#Use CSV
df.to_csv("file_name")
df=pd.read_csv("file_name")
### treat data
#checknull
null_val = df.isnull().sum()
percent = 100 * df.isnull().sum()/len(df)
kesson_table = pd.concat([null_val, percent], axis=1)
kesson_table.columns= ['欠損','%']
print(df)
#fillnull
df["Age"] = df["Age"].fillna(df["Age"].median())
#data change
#1. replace
df["Embarked"] = df["Embarked"].replace([0,1,2],[-1,-2,-3])
#2. map
df['Embarked'] = df['Embarked'].map( {'S': 0, 'C': 1, 'Q': 2, 'Unknown': 3} ).astype(int)
#3.apply
df["Embarked"] = df["Embarked"].apply(lambda x:0 if x == "S" else x)
df['Sex'] = df['Sex'].apply(lambda x: 1 if x == 'male' else 0)
def family(x):
if x < 2:
return 'Single'
elif x == 2:
return 'Couple'
elif x <= 4:
return 'InterM'
else:
return 'Large'
df['Embarked'] = df['Embarked'].apply(family)
#extract
df['Salutation'] = df.Name.str.extract(' ([A-Za-z]+).', expand=False)
#extract jyoken
df = df[df.Age > 0]
# convert row and columns
df.T
# unique
df["Age"].unique()
# group by
df.groupby(by="Age").sum().sort_values("Embarked")
df.groupby(by="Age").head(1)
#statics
df.corr() #相関
df.describe() #概要