tldr
KggleのTelco Customer ChurnをPredicting Customer Churn - Data Every Day #040に沿ってやっていきます。
実行環境はGoogle Colaboratorです。
インポート
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.preprocessing as sp
from sklearn.model_selection import train_test_split
import sklearn.linear_model as slm
import tensorflow as tf
データのダウンロード
Google Driveをマウントします。
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
KaggleのAPIクライアントを初期化し、認証します。
認証情報はGoogle Drive内(/content/drive/My Drive/Colab Notebooks/Kaggle
)にkaggle.json
として置いてあります。
import os
kaggle_path = "/content/drive/My Drive/Colab Notebooks/Kaggle"
os.environ['KAGGLE_CONFIG_DIR'] = kaggle_path
from kaggle.api.kaggle_api_extended import KaggleApi
api = KaggleApi()
api.authenticate()
Kaggle APIを使ってデータをダウンロードします。
dataset_id = 'blastchar/telco-customer-churn'
dataset = api.dataset_list_files(dataset_id)
file_name = dataset.files[0].name
file_path = os.path.join(api.get_default_download_dir(), file_name)
file_path
'/content/WA_Fn-UseC_-Telco-Customer-Churn.csv'
api.dataset_download_file(dataset_id, file_name, force=True, quiet=False)
100%|██████████| 955k/955k [00:00<00:00, 95.4MB/s]
Downloading WA_Fn-UseC_-Telco-Customer-Churn.csv to /content
True
データの読み込み
Pedumagalhaes/quality-prediction-in-a-mining-processadasを使ってダウンロードしてきたCSVファイルを読み込みます。
data = pd.read_csv(file_path)
data
customerID | gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 7590-VHVEG | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
1 | 5575-GNVDE | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No |
2 | 3668-QPYBK | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
3 | 7795-CFOCW | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
4 | 9237-HQITU | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
7038 | 6840-RESVB | Male | 0 | Yes | Yes | 24 | Yes | Yes | DSL | Yes | No | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.5 | No |
7039 | 2234-XADUH | Female | 0 | Yes | Yes | 72 | Yes | Yes | Fiber optic | No | Yes | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No |
7040 | 4801-JZAZL | Female | 0 | Yes | Yes | 11 | No | No phone service | DSL | Yes | No | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No |
7041 | 8361-LTMKD | Male | 1 | Yes | No | 4 | Yes | Yes | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes |
7042 | 3186-AJIEK | Male | 0 | No | No | 66 | Yes | No | Fiber optic | Yes | No | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No |
7043 rows × 21 columns
下準備
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 customerID 7043 non-null object
1 gender 7043 non-null object
2 SeniorCitizen 7043 non-null int64
3 Partner 7043 non-null object
4 Dependents 7043 non-null object
5 tenure 7043 non-null int64
6 PhoneService 7043 non-null object
7 MultipleLines 7043 non-null object
8 InternetService 7043 non-null object
9 OnlineSecurity 7043 non-null object
10 OnlineBackup 7043 non-null object
11 DeviceProtection 7043 non-null object
12 TechSupport 7043 non-null object
13 StreamingTV 7043 non-null object
14 StreamingMovies 7043 non-null object
15 Contract 7043 non-null object
16 PaperlessBilling 7043 non-null object
17 PaymentMethod 7043 non-null object
18 MonthlyCharges 7043 non-null float64
19 TotalCharges 7043 non-null object
20 Churn 7043 non-null object
dtypes: float64(1), int64(2), object(18)
memory usage: 1.1+ MB
data = data.drop(['customerID'], axis=1)
エンコード
def get_uniques(df, columns):
return {column: list(df[column].unique()) for column in columns}
def get_categorical_columns(df):
return [column for column in df.columns if df.dtypes[column] == 'object']
get_uniques(data, get_categorical_columns(data))
{'Churn': ['No', 'Yes'],
'Contract': ['Month-to-month', 'One year', 'Two year'],
'Dependents': ['No', 'Yes'],
'DeviceProtection': ['No', 'Yes', 'No internet service'],
'InternetService': ['DSL', 'Fiber optic', 'No'],
'MultipleLines': ['No phone service', 'No', 'Yes'],
'OnlineBackup': ['Yes', 'No', 'No internet service'],
'OnlineSecurity': ['No', 'Yes', 'No internet service'],
'PaperlessBilling': ['Yes', 'No'],
'Partner': ['Yes', 'No'],
'PaymentMethod': ['Electronic check',
'Mailed check',
'Bank transfer (automatic)',
'Credit card (automatic)'],
'PhoneService': ['No', 'Yes'],
'StreamingMovies': ['No', 'Yes', 'No internet service'],
'StreamingTV': ['No', 'Yes', 'No internet service'],
'TechSupport': ['No', 'Yes', 'No internet service'],
'TotalCharges': ['29.85',
'1889.5',
'108.15',
'1840.75',
'151.65',
'820.5',
'1949.4',
'301.9',
'3046.05',
'3487.95',
'587.45',
'326.8',
'5681.1',
'5036.3',
'2686.05',
'7895.15',
'1022.95',
'7382.25',
'528.35',
'1862.9',
'39.65',
'202.25',
'20.15',
'3505.1',
'2970.3',
'1530.6',
'4749.15',
'30.2',
'6369.45',
'1093.1',
'6766.95',
'181.65',
'1874.45',
'20.2',
'45.25',
'7251.7',
'316.9',
'3548.3',
'3549.25',
'1105.4',
'475.7',
'4872.35',
'418.25',
'4861.45',
'981.45',
'3906.7',
'97',
'144.15',
'4217.8',
'4254.1',
'3838.75',
'1426.4',
'1752.65',
'633.3',
'4456.35',
'1752.55',
'6311.2',
'7076.35',
'894.3',
'7853.7',
'4707.1',
'5450.7',
'2962',
'957.1',
'857.25',
'244.1',
'3650.35',
'2497.2',
'930.9',
'887.35',
'49.05',
'1090.65',
'7099',
'1424.6',
'177.4',
'6139.5',
'2688.85',
'482.25',
'2111.3',
'1216.6',
'79.35',
'565.35',
'496.9',
'4327.5',
'973.35',
'918.75',
'2215.45',
'1057',
'927.1',
'1009.25',
'2570.2',
'74.7',
'5714.25',
'7107',
'7459.05',
'927.35',
'4748.7',
'113.85',
'1107.2',
'2514.5',
'19.45',
'3605.6',
'3027.25',
'7611.85',
'100.2',
'7303.05',
'927.65',
'3921.3',
'1363.25',
'5238.9',
'3042.25',
'3954.1',
'2868.15',
'3423.5',
'248.4',
'1126.35',
'1064.65',
'835.15',
'2151.6',
'5515.45',
'112.75',
'229.55',
'350.35',
'62.9',
'3027.65',
'2135.5',
'1723.95',
'19.75',
'3985.35',
'1215.65',
'1502.65',
'3260.1',
'35.45',
'81.25',
'1188.2',
'1778.5',
'1277.75',
'1170.55',
'70.45',
'6425.65',
'563.65',
'5971.25',
'5289.05',
'1756.2',
'6416.7',
'61.35',
'45.65',
'1929.95',
'1071.4',
'564.35',
'655.5',
'7930.55',
'5215.25',
'113.5',
'1152.8',
'1821.95',
'419.9',
'1024',
'251.6',
'764.55',
'1592.35',
'135.2',
'3958.25',
'233.9',
'1363.45',
'273',
'6254.45',
'2651.2',
'321.4',
'3539.25',
'242.8',
'1181.75',
'5000.2',
'654.55',
'780.2',
'1145.7',
'503.6',
'1559.25',
'1252',
'29.95',
'45.3',
'662.65',
'2453.3',
'1111.65',
'24.8',
'1023.85',
'82.15',
'244.8',
'2379.1',
'3173.35',
'531',
'1375.4',
'8129.3',
'1192.7',
'1901.65',
'587.4',
'6519.75',
'8041.65',
'20.75',
'2681.15',
'1112.3',
'7405.5',
'1033.95',
'2958.95',
'2684.85',
'4179.2',
'79.9',
'1934.45',
'6654.1',
'84.5',
'25.25',
'1124.2',
'540.05',
'1975.85',
'3437.45',
'3139.8',
'3789.2',
'5324.5',
'624.6',
'268.35',
'1836.9',
'179.35',
'219.35',
'1288.75',
'2545.75',
'55.2',
'2723.15',
'4107.25',
'5760.65',
'4747.5',
'84.6',
'1566.9',
'702',
'114.1',
'299.05',
'1305.95',
'1120.3',
'284.35',
'6350.5',
'7878.3',
'3187.65',
'6126.15',
'731.3',
'273.4',
'2531.8',
'1074.3',
'48.55',
'4298.45',
'4619.55',
'147.15',
'40.2',
'2633.3',
'193.05',
'4103.9',
'7008.15',
'5791.1',
'80.55',
'1228.65',
'132.2',
'1364.3',
'4925.35',
'1520.1',
'5032.25',
'5526.75',
'1195.25',
'2007.25',
'1099.6',
'1732.95',
'1511.2',
'3450.15',
'2172.05',
'70.6',
'401.1',
'5624.85',
'1339.8',
'771.95',
'244.75',
'322.9',
'498.25',
'25.4',
'3687.75',
'1779.95',
'1783.6',
'927.15',
'70',
'606.55',
'435.4',
'1712.7',
'2021.2',
'1940.8',
'567.8',
'220.35',
'20.25',
'5436.45',
'3437.5',
'3015.75',
'73.6',
'1509.8',
'396.1',
'356.65',
'4109',
'3141.7',
'1229.1',
'2303.35',
'2054.4',
'3741.85',
'3682.45',
'19.25',
'1886.25',
'4895.1',
'341.6',
'415.55',
'5686.4',
'1355.1',
'3058.65',
'2231.05',
'3236.35',
'4350.1',
'4264',
'44.8',
'422.3',
'4176.7',
'5138.1',
'880.05',
'139.05',
'973.65',
'1470.05',
'739.35',
'161.95',
'4422.95',
'511.25',
'155.8',
'5293.95',
'4759.85',
'6148.45',
'3565.65',
'6603',
'1830.1',
'6223.8',
'4508.65',
'1328.15',
'865',
'72.1',
'168.2',
'1303.5',
'996.85',
'6430.9',
'2278.75',
'681.4',
'574.35',
'371.9',
'840.1',
'846',
'889',
'6823.4',
'805.1',
'4016.75',
'83.75',
'3959.15',
'2878.55',
'945.7',
'1373.05',
'492.55',
'1406',
'19.15',
'6962.85',
'8126.65',
'690.25',
'181.5',
'830.8',
'5608.4',
'3646.8',
'3662.25',
'25.35',
'2566.5',
'5308.7',
'1410.25',
'3339.05',
'50.65',
'4732.35',
'90.85',
'5067.45',
'214.75',
'4874.7',
'2348.45',
'4063',
'44',
'2595.25',
'2309.55',
'89.3',
'367.55',
'3944.5',
'5965.95',
'3694.7',
'2524.45',
'1803.7',
'415.1',
'624.15',
'237.7',
'7007.6',
'3848.8',
'419.4',
'1468.75',
'5812',
'2861.45',
'19.9',
'19.6',
'233.7',
'1066.15',
'2149.05',
'4473',
'3545.05',
'1195.75',
'6858.9',
'1024.7',
'1845.9',
'75.3',
'132.25',
'515.45',
'2830.45',
'1110.5',
'449.3',
'2838.55',
'5376.4',
'858.6',
'1395.05',
'3975.7',
'1182.55',
'4784.45',
'119.5',
'518.9',
'899.45',
'1183.8',
'720.05',
'8468.2',
'3161.2',
'55.05',
'1882.55',
'5070.4',
'6049.5',
'1166.7',
'2937.65',
'6396.45',
'69.55',
'1270.25',
'759.55',
'7611.55',
'1642.75',
'1545.4',
'3582.4',
'2227.1',
'1417.9',
'2494.65',
'2768.35',
'2369.3',
'38',
'75.1',
'100.9',
'1614.05',
'385.9',
'673.25',
'8404.9',
'2799.75',
'6538.45',
'6588.95',
'868.1',
'734.35',
'330.6',
'55',
'564.4',
'1315.35',
'1861.5',
'2747.2',
'554.05',
'453.4',
'994.8',
'225.75',
'2145',
'1671.6',
'8003.8',
'680.05',
'6130.85',
'1415',
'6201.95',
' ',
'74.35',
'6597.25',
'114.15',
'139.4',
'3902.6',
'20.4',
'903.6',
'1785.65',
'1397.65',
'131.05',
'1238.45',
'3899.05',
'5676',
'4543.15',
'4326.8',
'5502.55',
'1782.4',
'851.8',
'167.5',
'502.85',
'19.85',
'1818.3',
'6300.15',
'334.8',
'5916.95',
'2852.4',
'4131.95',
'1546.3',
'302.6',
'1929.35',
'265.45',
'6989.45',
'1442',
'4713.4',
'1758.6',
'3480',
'4738.3',
'8399.15',
'5430.35',
'686.95',
'5706.3',
'490.65',
'1360.25',
'174.45',
'7379.8',
'85',
'1021.75',
'5029.05',
'1955.4',
'6744.2',
'4946.7',
'8248.5',
'601.6',
'19.55',
'834.1',
'597',
'2647.2',
'3266',
'6744.25',
'5265.5',
'311.6',
'7966.9',
'8220.4',
'1153.25',
'514.75',
'2596.15',
'3808',
'2708.2',
'760.05',
'49.3',
'6033.3',
'89.05',
'516.15',
'5861.75',
'445.95',
'4973.4',
'1667.25',
'5357.75',
'3527.6',
'422.6',
'1103.25',
'2754',
'697.25',
'5614.45',
'3204.4',
'4747.65',
'3082.1',
'597.9',
'3365.4',
'38.8',
'233.55',
'346.2',
'19',
'61.7',
'85.7',
'3342.45',
'85.1',
'7422.1',
'6668.05',
'8071.05',
'1174.8',
'5435',
'2438.6',
'45.6',
'713.75',
'916.15',
'237.2',
'4614.55',
'1414.45',
'1170.5',
'47.7',
'4859.25',
'4903.2',
'3608',
'6094.25',
'3084.9',
'2356.75',
'8306.05',
'6786.4',
'248.95',
'663.05',
'1357.1',
'4860.35',
'3418.2',
'631.4',
'186.3',
'6976.75',
'4884.85',
'522.95',
'362.6',
'5755.8',
'3355.65',
'406.95',
'137.6',
'2395.7',
'1968.1',
'6819.45',
'7943.45',
'4547.25',
'4687.9',
'2473.95',
'6562.9',
'176.3',
'2236.2',
'6985.65',
'3109.9',
'4911.35',
'5794.65',
'855.3',
'1620.2',
'2499.3',
'89.55',
'4287.2',
'394.85',
'1899.65',
'45.7',
'3442.15',
'161.5',
'1732.6',
'222.3',
'74.6',
'655.3',
'475.25',
'164.3',
'865.1',
'6132.7',
'3597.5',
'35.9',
'697.65',
'96.05',
'428.7',
'20.05',
'4459.15',
'1167.6',
'238.1',
'145.15',
'1453.1',
'191.05',
'4039.3',
'1039.45',
'1336.1',
'75.05',
'493.4',
'2550.9',
'7246.15',
'1203.95',
'62.25',
'313.6',
'3775.85',
'80',
'4616.05',
'195.65',
'4188.4',
'71.1',
'49.9',
'1266.4',
'91.1',
'1623.4',
'4149.45',
'20.45',
'2344.5',
'1013.05',
'270.7',
'417',
'20.65',
'6316.2',
'168.15',
'4018.05',
'4811.6',
'4189.7',
'2848.45',
'2516.2',
'33.6',
'208.45',
'2015.35',
'3739.8',
'2964',
'2263.4',
'2211.8',
'1683.7',
'1519',
'1164.05',
'1710.9',
'4245.55',
'4145.9',
'2664.3',
'1277.5',
'5589.3',
'34.75',
'381.3',
'141.5',
'3105.55',
'204.55',
'605.9',
'356.1',
'2758.15',
'4805.65',
'3941.7',
'92.75',
'1901.05',
'5730.7',
'2423.4',
'1653.45',
'3327.05',
'7085.5',
'3344.1',
'6697.35',
'2879.9',
'137.1',
'1008.55',
'1551.6',
'202.15',
'7882.25',
'8196.4',
'202.9',
'855.1',
'5817',
'1652.95',
'5600.15',
'515.75',
'190.5',
'1842.8',
'1253.8',
'57.2',
'1269.55',
'6563.4',
'20.8',
'1907.85',
'208.85',
'4758.8',
'1292.6',
'363.15',
'1600.25',
'275.7',
'3089.1',
'1175.6',
'237.3',
'1444.65',
'454.15',
'3036.75',
'8065.65',
'92.5',
'184.65',
'6152.3',
'89.5',
'5154.5',
'220.45',
'1510.3',
'52.2',
'2588.95',
'4874.8',
'3983.6',
'2003.6',
'1832.4',
'4908.25',
'3590.2',
'5023',
'146.6',
'2339.3',
'298.7',
'143.65',
'2548.65',
'507.4',
'20',
'6125.4',
'5411.4',
'1058.25',
'903.8',
'3009.5',
'489.45',
'5468.45',
'1058.1',
'7616',
'4113.1',
'6017.65',
'7250.15',
'1108.2',
'938.65',
'94.15',
'2088.05',
'178.7',
'5656.75',
'2317.1',
'5986.45',
'6751.35',
'3566.6',
'4889.3',
'318.1',
'1563.95',
'1430.25',
'644.35',
'372.45',
'5453.4',
'1442.6',
'5610.7',
'963.95',
'5222.3',
'3340.55',
'292.8',
'5774.55',
'4487.3',
'44.4',
'2854.95',
'905.55',
'5509.3',
'7589.8',
'229.6',
'394.1',
'89.9',
'295.95',
'5459.2',
'444.75',
'6782.15',
'6510.45',
'8476.5',
'4461.85',
'62',
'352.65',
'1424.9',
'6413.65',
'6309.65',
'50.8',
'5898.6',
'4719.75',
'457.3',
'5822.3',
'1463.5',
'307.4',
'2104.55',
'319.15',
'2337.45',
'104.3',
'5084.65',
'121.25',
'7015.9',
'5598',
'1269.1',
'3027.4',
'4634.35',
'113.95',
'1582.75',
'3077',
'4039.5',
'1665.2',
'1043.3',
'504.2',
'497.55',
'7511.65',
'1782',
'609.65',
'2857.6',
'3247.55',
'6215.35',
'2823',
'5017.9',
'2619.25',
'24.6',
'4965.1',
'2679.7',
'8310.55',
'5682.25',
'1120.95',
'4914.9',
'27.55',
'923.5',
'1625.65',
'6068.65',
'5398.6',
'918.6',
'234.85',
'1231.85',
'170.9',
'7984.15',
'688.65',
'1288.3',
'7848.5',
'267',
'1798.9',
'73.55',
'1643.55',
'4807.45',
'2193',
'2239.4',
'1505.9',
'255.35',
'1189.4',
'4786.15',
'1820.9',
'2404.15',
'205.05',
'952.3',
'7039.45',
'2538.05',
'1212.85',
'2651.1',
'1304.8',
'360.1',
'435.45',
'308.05',
'434.5',
'7118.9',
'320.45',
'531.55',
'382.2',
'2001',
'4919.7',
'5645.8',
'215.8',
'77.6',
'2896.55',
'3395.8',
'759.35',
'85.15',
'535.55',
'1253.15',
'955.15',
'2162.6',
'44.7',
'1813.35',
'245.15',
'2028.8',
'2723.75',
'365.8',
'551.95',
'4054.2',
'832.35',
'112.3',
'60.65',
'5550.1',
'174.8',
'90.55',
'4733.1',
'4048.95',
'1359.7',
'4542.35',
'7532.15',
'69.9',
'888.65',
'6383.35',
'1916',
'413',
'808.95',
'1886.4',
'86.6',
'1679.25',
'2656.5',
'540.95',
'537.35',
'678.8',
'4513.65',
'1423.85',
'555.4',
'225.55',
'268.45',
'2917.5',
'2416.1',
'424.45',
'1492.1',
'132.4',
'660.9',
'1893.95',
'284.9',
'784.25',
'417.7',
'5016.25',
'1612.75',
'119.75',
'3204.65',
'485.2',
'160.85',
'4145.25',
'827.45',
'49.5',
'990.85',
'696.35',
...],
'gender': ['Female', 'Male']}
sorted(data['TotalCharges'].unique())
[' ',
'100.2',
'100.25',
'100.35',
'100.4',
'100.8',
'100.9',
'1001.2',
'1001.5',
'1003.05',
'1004.35',
'1004.5',
'1004.75',
'1005.7',
'1006.9',
'1007.8',
'1007.9',
'1008.55',
'1008.7',
'1009.25',
'101.1',
'101.45',
'101.65',
'101.9',
'1010',
'1011.05',
'1011.5',
'1011.8',
'1012.4',
'1013.05',
'1013.2',
'1013.35',
'1013.6',
'1014.25',
'1016.7',
'1017.35',
'102.45',
'102.5',
'102.75',
'1020.2',
'1020.75',
'1021.55',
'1021.75',
'1021.8',
'1022.5',
'1022.6',
'1022.95',
'1023.75',
'1023.85',
'1023.9',
'1023.95',
'1024',
'1024.65',
'1024.7',
'1025.05',
'1025.15',
'1025.95',
'1026.35',
'1027.25',
'1028.75',
'1028.9',
'1029.35',
'1029.75',
'1029.8',
'103.7',
'1031.1',
'1031.4',
'1031.7',
'1032',
'1032.05',
'1033',
'1033.9',
'1033.95',
'1035.5',
'1035.7',
'1036',
'1036.75',
'1037.75',
'1039.45',
'104.2',
'104.3',
'1041.8',
'1042.65',
'1043.3',
'1043.35',
'1043.4',
'1043.8',
'1045.25',
'1046.1',
'1046.2',
'1046.5',
'1047.7',
'1048.45',
'1048.85',
'1049.05',
'1049.6',
'105.5',
'105.6',
'1050.5',
'1051.05',
'1051.9',
'1052.35',
'1052.4',
'1054.6',
'1054.75',
'1054.8',
'1055.9',
'1056.95',
'1057',
'1057.55',
'1057.85',
'1058.1',
'1058.25',
'1058.6',
'1059.55',
'106.2',
'106.55',
'106.8',
'106.85',
'106.9',
'1060.2',
'1060.6',
'1061.6',
'1062.1',
'1064.65',
'1064.95',
'1066.15',
'1066.9',
'1067.05',
'1067.15',
'1067.65',
'1068.15',
'1068.85',
'107.05',
'107.1',
'107.25',
'107.6',
'1070.15',
'1070.25',
'1070.5',
'1070.7',
'1071.4',
'1071.6',
'1072',
'1072.6',
'1073.3',
'1074.3',
'1074.65',
'1077.05',
'1077.5',
'1077.95',
'1078.75',
'1078.9',
'1079.05',
'1079.45',
'1079.65',
'108.05',
'108.15',
'108.65',
'108.7',
'108.95',
'1080.55',
'1081.25',
'1081.45',
'1082.75',
'1082.8',
'1083',
'1083.7',
'1086.75',
'1087.25',
'1087.45',
'1087.7',
'1088.25',
'109.2',
'109.25',
'109.3',
'109.5',
'109.6',
'109.8',
'1090.1',
'1090.6',
'1090.65',
'1092.35',
'1093',
'1093.1',
'1093.2',
'1093.4',
'1094.35',
'1094.5',
'1095.3',
'1095.65',
'1096.25',
'1096.6',
'1096.65',
'1097.15',
'1098.85',
'1099.6',
'110.05',
'110.15',
'1101.85',
'1102.4',
'1103.25',
'1105.4',
'1107.2',
'1107.25',
'1108',
'1108.2',
'1108.6',
'1108.8',
'111.05',
'111.4',
'111.65',
'1110.05',
'1110.35',
'1110.5',
'1111.2',
'1111.65',
'1111.85',
'1112.3',
'1113.95',
'1114.55',
'1114.85',
'1115.15',
'1115.2',
'1115.6',
'1117.55',
'1118.8',
'1119.35',
'1119.9',
'112.3',
'112.75',
'1120.3',
'1120.95',
'1121.05',
'1122.4',
'1123.15',
'1123.65',
'1124.2',
'1125.2',
'1125.6',
'1126.35',
'1126.75',
'1127.2',
'1127.35',
'1128.1',
'1129.1',
'1129.35',
'1129.75',
'113.1',
'113.35',
'113.5',
'113.55',
'113.85',
'113.95',
'1130',
'1130.85',
'1131.2',
'1131.3',
'1131.5',
'1132.35',
'1132.75',
'1133.65',
'1133.7',
'1133.9',
'1134.25',
'1135.7',
'1137.05',
'1138.8',
'1139.2',
'114.1',
'114.15',
'114.7',
'1140.05',
'1143.8',
'1144.5',
'1144.6',
'1145.35',
'1145.7',
'1146.05',
'1146.65',
'1147',
'1147.45',
'1147.85',
'1148.1',
'1149.65',
'115.1',
'115.95',
'1151.05',
'1151.55',
'1152.7',
'1152.8',
'1153.25',
'1155.6',
'1156.1',
'1156.35',
'1156.55',
'1157.05',
'1158.85',
'116.6',
'116.65',
'116.85',
'116.95',
'1160.45',
'1160.75',
'1161.75',
'1162.85',
'1164.05',
'1164.3',
'1165.55',
'1165.6',
'1165.9',
'1166.7',
'1167.6',
'1167.8',
'1169.35',
'117.05',
'117.8',
'117.95',
'1170.5',
'1170.55',
'1171.3',
'1171.5',
'1172.95',
'1173.35',
'1173.55',
'1174.35',
'1174.8',
'1175.6',
'1175.85',
'1177.05',
'1177.95',
'1178.25',
'1178.4',
'1178.75',
'118.25',
'118.3',
'118.4',
'118.5',
'1180.95',
'1181.75',
'1182.55',
'1183.05',
'1183.2',
'1183.8',
'1184',
'1185',
'1185.95',
'1187.05',
'1188.2',
'1188.25',
'1189.4',
'1189.9',
'119.3',
'119.5',
'119.75',
'1190.5',
'1191.2',
'1191.4',
'1192.3',
'1192.7',
'1193.05',
'1193.55',
'1194.3',
'1195.25',
'1195.75',
'1195.95',
'1198.05',
'1198.8',
'1199.4',
'120.25',
'1200.15',
'1201.15',
'1201.65',
'1203.9',
'1203.95',
'1205.05',
'1205.5',
'1206.05',
'1207',
'1208.15',
'1208.35',
'1208.6',
'1209.25',
'121.25',
'1210.3',
'1210.4',
'1211.65',
'1212.1',
'1212.25',
'1212.85',
'1214.05',
'1215.1',
'1215.45',
'1215.6',
'1215.65',
'1215.8',
'1216.35',
'1216.6',
'1217.25',
'1218.25',
'1218.45',
'1218.55',
'1218.65',
'1219.85',
'122',
'122.7',
'122.9',
'1221.55',
'1221.65',
'1222.05',
'1222.25',
'1222.65',
'1222.8',
'1224.05',
'1225.65',
'1226.45',
'1228.65',
'1229.1',
'123.05',
'123.65',
'123.8',
'1230.25',
'1230.9',
'1231.85',
'1232.9',
'1233.15',
'1233.25',
'1233.4',
'1233.65',
'1234.6',
'1234.8',
'1235.55',
'1236.15',
'1237.3',
'1237.65',
'1237.85',
'1238.45',
'1238.65',
'124.4',
'124.45',
'1240.15',
'1240.25',
'1240.8',
'1242.2',
'1242.25',
'1242.45',
'1244.5',
'1244.8',
'1245.05',
'1245.6',
'1246.4',
'1247.75',
'1248.9',
'1249.25',
'125',
'125.5',
'125.95',
'1250.1',
'1250.85',
'1252',
'1252.85',
'1253.15',
'1253.5',
'1253.65',
'1253.8',
'1253.9',
'1254.7',
'1255.1',
'1258.15',
'1258.3',
'1258.35',
'1258.6',
'1259',
'1259.35',
'126.05',
'1260.7',
'1261',
'1261.35',
'1261.45',
'1261.7',
'1263.05',
'1263.85',
'1263.9',
'1264.2',
'1265.65',
'1266.1',
'1266.35',
'1266.4',
'1267',
'1267.05',
'1267.2',
'1267.95',
'1268.85',
'1269.1',
'1269.55',
'1269.6',
'127.1',
'1270.2',
'1270.25',
'1270.55',
'1271.8',
'1272.05',
'1273.3',
'1274.05',
'1274.45',
'1275.6',
'1275.65',
'1275.7',
'1275.85',
'1277.5',
'1277.75',
'1278.8',
'1279',
'128.6',
'1281',
'1281.25',
'1282.85',
'1284.2',
'1285.05',
'1285.8',
'1286',
'1286.05',
'1287.85',
'1288',
'1288.3',
'1288.75',
'129.15',
'129.2',
'129.55',
'129.6',
'1290',
'1291.3',
'1291.35',
'1292.2',
'1292.6',
'1292.65',
'1293.8',
'1294.6',
'1295.4',
'1296.15',
'1296.8',
'1297.35',
'1297.8',
'1298.7',
'1299.1',
'1299.8',
'130.1',
'130.15',
'130.25',
'130.5',
'130.55',
'130.75',
'1301',
'1301.1',
'1301.7',
'1301.9',
'1302.65',
'1303.25',
'1303.5',
'1304.8',
'1304.85',
'1305.95',
'1306.3',
'1307.8',
'1308.1',
'1308.4',
'1309',
'1309.15',
'131.05',
'131.65',
'1311.3',
'1311.6',
'1311.75',
'1312.15',
'1312.45',
'1313.25',
'1313.55',
'1315',
'1315.35',
'1316.9',
'1317.95',
'1319.85',
'1319.95',
'132.2',
'132.25',
'132.4',
'1321.3',
'1322.55',
'1322.85',
'1323.7',
'1325.85',
'1326.25',
'1326.35',
'1327.15',
'1327.4',
'1327.85',
'1328.15',
'1328.35',
'1329.15',
'1329.2',
'1331.05',
'1332.4',
'1334',
'1334.45',
'1334.5',
'1335.2',
'1336.1',
'1336.15',
'1336.35',
'1336.65',
'1336.8',
'1336.9',
'1337.45',
'1337.5',
'1338.15',
'1339.8',
'134.05',
'134.35',
'134.5',
'134.6',
'134.7',
'134.75',
'1340.1',
'1341.5',
'1342.15',
'1343.4',
'1344.5',
'1345.55',
'1345.65',
'1345.75',
'1345.85',
'1346.2',
'1346.3',
'1346.9',
'1347.15',
'1348.5',
'1348.9',
'1348.95',
'135',
'135.2',
'135.75',
'1350.15',
'1354.4',
'1355.1',
'1355.45',
'1356.3',
'1356.7',
'1357.1',
'1358.6',
'1358.85',
'1359',
'1359.45',
'1359.5',
'1359.7',
'136.05',
'136.75',
'1360.25',
'1362.85',
'1363.25',
'1363.45',
'1364.3',
'1364.75',
'1367.75',
'1369.8',
'137.1',
'137.25',
'137.6',
'137.85',
'137.95',
'1370.35',
'1372.45',
'1372.9',
'1373',
'1373.05',
'1374.2',
'1374.35',
'1374.9',
'1375.15',
'1375.4',
'1375.6',
'1376.5',
'1377.7',
'1378.25',
'1378.45',
'1378.75',
'1379.6',
'1379.8',
'138.85',
'1380.1',
'1380.4',
'1381.2',
'1381.8',
'1382.9',
'1383.6',
'1384.75',
'1385.85',
'1386.8',
'1386.9',
'1387',
'1387.35',
'1387.45',
'1388',
'1388.45',
'1388.75',
'1389.2',
'1389.35',
'1389.6',
'1389.85',
'139.05',
'139.25',
'139.35',
'139.4',
'139.75',
'1390.6',
'1390.85',
'1391.15',
'1391.65',
'1392.25',
'1393.6',
'1394.55',
'1395.05',
'1396',
'1396.25',
'1396.9',
'1397.3',
'1397.65',
'1398.25',
'1398.6',
'1399.35',
'140.1',
'140.4',
'140.7',
'140.95',
'1400.3',
'1400.55',
'1400.85',
'1401.15',
'1401.4',
'1402.25',
'1403.1',
'1404.65',
'1405.3',
'1406',
'1406.65',
'1406.9',
'141.1',
'141.45',
'141.5',
'141.6',
'141.65',
'141.7',
'1410.25',
'1411.2',
'1411.35',
'1411.65',
'1411.9',
'1412.4',
'1412.65',
'1413',
'1414.2',
'1414.45',
'1414.65',
'1414.8',
'1415',
'1415.55',
'1415.85',
'1416.2',
'1416.5',
'1416.75',
'1417.9',
'1419.4',
'142.35',
'1421.75',
'1421.9',
'1422.05',
'1422.1',
'1422.65',
'1423.05',
'1423.15',
'1423.35',
'1423.65',
'1423.85',
'1424.2',
'1424.4',
'1424.5',
'1424.6',
'1424.9',
'1424.95',
'1425.45',
'1426.4',
'1426.45',
'1426.75',
'1427.55',
'1429.65',
'143.35',
'143.65',
'143.9',
'1430.05',
'1430.25',
'1430.95',
'1431.65',
'1432.55',
'1433.8',
'1434.1',
'1434.6',
'1436.95',
'1438.05',
'1439.35',
'144',
'144.15',
'144.35',
'144.55',
'144.8',
'144.95',
'1440.75',
'1441.1',
'1441.65',
'1441.8',
'1441.95',
'1442',
'1442.2',
'1442.6',
'1442.65',
'1443.65',
'1444.05',
'1444.65',
'1445.2',
'1445.3',
'1445.95',
'1446.8',
'1447.9',
'1448.6',
'1448.8',
'145',
'145.15',
'145.4',
'1451.1',
'1451.6',
'1451.9',
'1453.1',
'1454.15',
'1454.25',
'1457.25',
'1458.1',
'1459.35',
'146.05',
'146.3',
'146.4',
'146.6',
'146.65',
'146.9',
'1460.65',
'1460.85',
'1461.15',
'1461.45',
'1462.05',
'1462.6',
'1463.45',
'1463.5',
'1463.7',
'1465.75',
'1466.1',
'1468.75',
'1468.9',
'147.15',
'147.5',
'147.55',
'147.75',
'147.8',
'1470.05',
'1470.95',
'1471.75',
'1474.35',
'1474.75',
'1474.9',
'1476.25',
'1477.65',
'1478.85',
'148.05',
'1482.3',
'1483.25',
'1489.3',
'149.05',
'149.55',
'1490.4',
'1490.95',
'1492.1',
'1493.1',
'1493.2',
'1493.55',
'1493.75',
'1494.5',
'1495.1',
'1496.45',
'1496.9',
'1497.05',
'1497.9',
'1498.2',
'1498.35',
'1498.55',
'1498.65',
'1498.85',
'150',
'150.35',
'150.6',
'150.75',
'150.85',
'1500.25',
'1500.5',
'1500.95',
'1501.75',
'1502.25',
'1502.65',
'1504.05',
'1505.05',
'1505.15',
'1505.35',
'1505.45',
'1505.85',
'1505.9',
'1506.4',
'1507',
'1509.8',
'1509.9',
'151.3',
'151.65',
'151.75',
'151.8',
'1510.3',
'1510.5',
'1511.2',
'1513.6',
'1514.85',
'1515.1',
'1516.6',
'1517.5',
'1519',
'152.3',
'152.45',
'152.6',
'152.7',
'152.95',
'1520.1',
'1520.9',
'1521.2',
'1522.65',
'1522.7',
'1523.4',
'1524.85',
'1525.35',
'1527.35',
'1527.5',
'1529.2',
'1529.45',
'1529.65',
'153.05',
'153.3',
'153.8',
'153.95',
'1530.6',
'1531.4',
'1532.45',
'1533.8',
'1534.05',
'1534.75',
'1536.75',
'1537.85',
'1537.9',
'1538.6',
'1539.45',
'1539.75',
'1539.8',
'154.3',
'154.55',
'154.65',
'154.8',
'154.85',
'1540.05',
'1540.2',
'1540.35',
'1544.05',
'1545.4',
'1546.3',
'1547.35',
'1548.65',
'1549.75',
'155.35',
'155.65',
'155.8',
'155.9',
'1551.6',
'1553.2',
'1553.9',
'1553.95',
'1554',
'1554.9',
'1555.65',
'1556.85',
'1558.65',
'1558.7',
...]
data['TotalCharges'] = data['TotalCharges'].replace(' ', np.NaN)
data['TotalCharges'] = data['TotalCharges'].astype(np.float)
data['TotalCharges'] = data['TotalCharges'].fillna(data['TotalCharges'].mean())
get_uniques(data, get_categorical_columns(data))
{'Churn': ['No', 'Yes'],
'Contract': ['Month-to-month', 'One year', 'Two year'],
'Dependents': ['No', 'Yes'],
'DeviceProtection': ['No', 'Yes', 'No internet service'],
'InternetService': ['DSL', 'Fiber optic', 'No'],
'MultipleLines': ['No phone service', 'No', 'Yes'],
'OnlineBackup': ['Yes', 'No', 'No internet service'],
'OnlineSecurity': ['No', 'Yes', 'No internet service'],
'PaperlessBilling': ['Yes', 'No'],
'Partner': ['Yes', 'No'],
'PaymentMethod': ['Electronic check',
'Mailed check',
'Bank transfer (automatic)',
'Credit card (automatic)'],
'PhoneService': ['No', 'Yes'],
'StreamingMovies': ['No', 'Yes', 'No internet service'],
'StreamingTV': ['No', 'Yes', 'No internet service'],
'TechSupport': ['No', 'Yes', 'No internet service'],
'gender': ['Female', 'Male']}
data['MultipleLines'] = data['MultipleLines'].replace('No phone service', 'No')
data[['DeviceProtection', 'OnlineBackup', 'OnlineSecurity', 'StreamingMovies', 'StreamingTV', 'TechSupport']] = data[['DeviceProtection', 'OnlineBackup', 'OnlineSecurity', 'StreamingMovies', 'StreamingTV', 'TechSupport']].replace('No internet service', 'No')
binary_features = ['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'DeviceProtection', 'OnlineBackup', 'OnlineSecurity', 'StreamingMovies', 'StreamingTV', 'TechSupport', 'PaperlessBilling']
ordinal_features = ['InternetService', 'Contract']
nominal_features = ['PaymentMethod']
target_column = ['Churn']
internet_ordering = ['No', 'DSL', 'Fiber optic']
contract_ordering = ['Month-to-month', 'One year', 'Two year']
def binary_encode(df, column, positive_value):
df = df.copy()
df[column] = df[column].apply(lambda x: 1 if x == positive_value else 0)
return df
def ordinal_encode(df, column, ordering):
df = df.copy()
df[column] = df[column].apply(lambda x: ordering.index(x))
return df
def onehot_encode(df, column):
df = df.copy()
dummies = pd.get_dummies(df[column])
df = pd.concat([df, dummies], axis=1)
df = df.drop(column, axis=1)
return df
data = binary_encode(data, 'gender', 'Male')
yes_features = ['Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'DeviceProtection', 'OnlineBackup', 'OnlineSecurity', 'StreamingMovies', 'StreamingTV', 'TechSupport', 'PaperlessBilling']
for column in yes_features:
data = binary_encode(data, column, 'Yes')
data = ordinal_encode(data, 'InternetService', internet_ordering)
data = ordinal_encode(data, 'Contract', contract_ordering)
data = onehot_encode(data, 'PaymentMethod')
data = binary_encode(data, 'Churn', 'Yes')
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 23 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 gender 7043 non-null int64
1 SeniorCitizen 7043 non-null int64
2 Partner 7043 non-null int64
3 Dependents 7043 non-null int64
4 tenure 7043 non-null int64
5 PhoneService 7043 non-null int64
6 MultipleLines 7043 non-null int64
7 InternetService 7043 non-null int64
8 OnlineSecurity 7043 non-null int64
9 OnlineBackup 7043 non-null int64
10 DeviceProtection 7043 non-null int64
11 TechSupport 7043 non-null int64
12 StreamingTV 7043 non-null int64
13 StreamingMovies 7043 non-null int64
14 Contract 7043 non-null int64
15 PaperlessBilling 7043 non-null int64
16 MonthlyCharges 7043 non-null float64
17 TotalCharges 7043 non-null float64
18 Churn 7043 non-null int64
19 Bank transfer (automatic) 7043 non-null uint8
20 Credit card (automatic) 7043 non-null uint8
21 Electronic check 7043 non-null uint8
22 Mailed check 7043 non-null uint8
dtypes: float64(2), int64(17), uint8(4)
memory usage: 1.0 MB
分割とスケーリング
y = data['Churn']
X = data.drop('Churn', axis=1)
scaelr = sp.StandardScaler()
X = scaelr.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7)
トレーニング
X.shape
(7043, 22)
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(22,)),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid'),
])
model.summary()
model.compile(
optimizer='adam',
loss='binary_crossentropy',
metrics=[tf.keras.metrics.AUC(name='auc')],
)
batch_size=64
epochs=100
history = model.fit(
X_train,
y_train,
validation_split=0.2,
batch_size=batch_size,
epochs=epochs,
callbacks=[tf.keras.callbacks.ReduceLROnPlateau],
verbose=0,
)
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_6 (Dense) (None, 64) 1472
_________________________________________________________________
dense_7 (Dense) (None, 64) 4160
_________________________________________________________________
dense_8 (Dense) (None, 1) 65
=================================================================
Total params: 5,697
Trainable params: 5,697
Non-trainable params: 0
_________________________________________________________________
結果
plt.figure(figsize=(14, 10))
epochs_range = range(1, epochs+1)
train_loss = history.history['loss']
val_loss = history.history['val_loss']
plt.plot(epochs_range, train_loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
np.argmin(val_loss)
5
model.evaluate(X_test, y_test)
67/67 [==============================] - 0s 1ms/step - loss: 0.4744 - auc: 0.8117
[0.4744356870651245, 0.8116970062255859]