tldr
KggleのNYS Environmental Remediation SitesをClassifying Environmental Program Types - Data Every Day #041に沿ってやっていきます。
実行環境はGoogle Colaboratorです。
インポート
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.preprocessing as sp
from sklearn.model_selection import train_test_split
import sklearn.linear_model as slm
import tensorflow as tf
データのダウンロード
Google Driveをマウントします。
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
KaggleのAPIクライアントを初期化し、認証します。
認証情報はGoogle Drive内(/content/drive/My Drive/Colab Notebooks/Kaggle
)にkaggle.json
として置いてあります。
import os
kaggle_path = "/content/drive/My Drive/Colab Notebooks/Kaggle"
os.environ['KAGGLE_CONFIG_DIR'] = kaggle_path
from kaggle.api.kaggle_api_extended import KaggleApi
api = KaggleApi()
api.authenticate()
Kaggle APIを使ってデータをダウンロードします。
dataset_id = 'new-york-state/nys-environmental-remediation-sites'
dataset = api.dataset_list_files(dataset_id)
file_name = dataset.files[0].name
file_path = os.path.join(api.get_default_download_dir(), file_name)
file_path
'/content/environmental-remediation-sites.csv'
api.dataset_download_file(dataset_id, file_name, force=True, quiet=False)
100%|██████████| 4.82M/4.82M [00:00<00:00, 50.2MB/s]
Downloading environmental-remediation-sites.csv.zip to /content
True
データの読み込み
Pedumagalhaes/quality-prediction-in-a-mining-processadasを使ってダウンロードしてきたCSVファイルを読み込みます。
data = pd.read_csv(file_path + '.zip')
data
Program Number | Program Type | Program Facility Name | Site Class | Address1 | Address2 | Locality | County | ZIPCode | SWIS Code | DEC Region | Latitude | Longitude | Control Code | Control Type | OU | Project Name | Project Completion Date | Waste Name | Contaminants | Owner Name | Owner Address1 | Owner Address2 | Owner City | Owner State | Owner ZIP | Disposal Name | Disposal Address1 | Disposal Address2 | Disposal City | Disposal ZIP | Disposal State | Operator Name | Operator Address1 | Operator Address2 | Operator City | Operator State | Operator Zip | Georeference | NYS Municipal Boundaries 2 | New York Zip Codes 2 | Counties 2 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | C241159 | BCP | 11-28 31st Drive | C | 11-28 31st Drive | NaN | Queens | Queens | 11106 | 4101 | 2 | 40.768000 | -73.935028 | Environmental Easement | Environmental Easement | 01 | Remedial Design | 2016-09-12T00:00:00.000 | NaN | tetrachloroethene (PCE) | George Man | 1083 Maple Lane | NaN | New Hyde Park | NY | 11040 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | {'type': 'Point', 'coordinates': [-73.93502777... | 196 | 878.0 | 2137.0 |
1 | E633060 | ERP | 1333 East Dominick Street | A | 1313-1333 East Dominick Street | NaN | Rome | Oneida | 13440 | 3313 | 6 | 43.204978 | -75.434194 | Environmental Easement | Environmental Easement | 01 | NaN | 2018-03-01T00:00:00.000 | NaN | benzo(a)anthracene | JAMES F. BROWN | 198 N. WASHINGTON ST. | NaN | ROME | NY | 13440 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | {'type': 'Point', 'coordinates': [-75.43419382... | 989 | 1366.0 | 625.0 |
2 | C915320 | BCP | 166 Chandler Street | C | 166 Chandler Street | NaN | Buffalo | Erie | 14207 | 1502 | 9 | 42.943614 | -78.885092 | Environmental Easement | Environmental Easement | 01 | Remedial Investigation | 2018-12-12T00:00:00.000 | NaN | mercury | Rocco Termini | 391 Washington Street | NaN | Buffalo | NY | 14203 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | {'type': 'Point', 'coordinates': [-78.88509166... | 300 | 63.0 | 2041.0 |
3 | C828192 | BCP | 1777 East Henrietta Road | A | 1777 East Henrietta Road | NaN | Henrietta | Monroe | 14623 | 2832 | 8 | 43.084683 | -77.610928 | Environmental Easement | Environmental Easement | 01 | NaN | 2020-12-31T00:00:00.000 | NaN | benzo(a)pyrene | Kerry Ventures Tech Park, LLC | 195 Middle road, suite 400 | NaN | henrietta | NY | 14467 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | {'type': 'Point', 'coordinates': [-77.61092777... | 546 | 1682.0 | 2093.0 |
4 | V00669 | VCP | 192 Ralph Avenue | C | 192 Ralph Ave | NaN | Brooklyn | Kings | 11233 | 2401 | 2 | 40.683078 | -73.922924 | Deed Restriction | Deed Restriction | 01A | Remedial Action | 2008-06-11T00:00:00.000 | NaN | tetrachloroethene (PCE) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | {'type': 'Point', 'coordinates': [-73.92292392... | 894 | 816.0 | 2090.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
82605 | 704005 | HW | Tri-City Barrel Company | 04 | Old Route 7 | NaN | Fenton (Port Crane) | Broome | 13833 | 432 | 7 | 42.158504 | -75.811645 | Other Controls | Other Controls | 01 | Remedial Design | 2003-01-16T00:00:00.000 | DICHLOROETHYLENE, XYLENE, STYRENE, | styrene | Gary Warner | RD 1 / PO Box 88 | NaN | Port Crane | NY | 13833 | TRI-CITY BARREL COMPANY, INC. | NaN | NaN | NaN | NaN | ZZ | WARNER GARY | R.D. 1, PO BOX 88 | NaN | PORT CRANE | NY | NaN | {'type': 'Point', 'coordinates': [-75.811645, ... | 945 | 453.0 | 2033.0 |
82606 | C224043 | BCP | U. S. Dredging Shipyard Site | C | One Beard Street | NaN | Brooklyn | Kings | 11231 | 2401 | 2 | 40.671538 | -74.011473 | Environmental Easement | Environmental Easement | 01 | Remedial Investigation | 2006-03-31T00:00:00.000 | NaN | methylene chloride | cLAUDE bOISVERT | 420 Alan Wood Road | NaN | Conshohocken | PA | 19428 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | {'type': 'Point', 'coordinates': [-74.011473, ... | 894 | 814.0 | 2090.0 |
82607 | 130043H | HW | Utility Manufacturing/Wonder King | 04 | 700-712 Main Street | NaN | Westbury | Nassau | 11590 | 3022 | 1 | 40.756999 | -73.556776 | Decision Document | Decision Document | 01 | Remedial Investigation | 2003-03-19T00:00:00.000 | TETRACHLOROETHYLENE | tetrachloroethene (PCE) | Audie Kranz and Wilbur Kranz | 700-712 Main Street | NaN | Westbury | NY | 11590 | NEST EQUITIES, INC. | NaN | NaN | NaN | NaN | ZZ | Audie Kranz and Wilbur Kranz | 700-712 Main Street | NaN | Westbury | NY | 11590 | {'type': 'Point', 'coordinates': [-73.55677553... | 60 | 997.0 | 2094.0 |
82608 | 704005 | HW | Tri-City Barrel Company | 04 | Old Route 7 | NaN | Fenton (Port Crane) | Broome | 13833 | 432 | 7 | 42.158504 | -75.811645 | Deed Restriction | Deed Restriction | 01 | Remedial Investigation | 2000-03-31T00:00:00.000 | DICHLOROETHYLENE, XYLENE, STYRENE, | styrene | Tri-Cities Barrel Co Inc | 319 NYS Rte 7 PO Box 88 | NaN | Port Crane | NY | 13833 | TRI-CITY BARREL COMPANY, INC. | NaN | NaN | NaN | NaN | ZZ | WARNER GARY | R.D. 1, PO BOX 88 | NaN | PORT CRANE | NY | NaN | {'type': 'Point', 'coordinates': [-75.811645, ... | 945 | 453.0 | 2033.0 |
82609 | 413010 | HW | West Well and Parking Area (Amphenol) | 02 | 40-60 Delaware Street | NaN | Sidney | Delaware | 13838 | 1350 | 4 | 42.307482 | -75.399926 | Deed Restriction | Deed Restriction | 01 | Remedial Design | 1987-11-01T00:00:00.000 | NaN | trans-1,2-dichloroethene | joseph m bianchi | 40-60 Delaware Avenue | NaN | Sidney | NY | 13838 | bendix corporation | NaN | NaN | NaN | NaN | NY | joseph m bianchi | 40-60 Delaware Avenue | NaN | Sidney | NY | 13838 | {'type': 'Point', 'coordinates': [-75.39992579... | 558 | 456.0 | 2039.0 |
82610 rows × 42 columns
下準備
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 82610 entries, 0 to 82609
Data columns (total 42 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Program Number 82610 non-null object
1 Program Type 82610 non-null object
2 Program Facility Name 82610 non-null object
3 Site Class 82610 non-null object
4 Address1 82610 non-null object
5 Address2 1190 non-null object
6 Locality 82448 non-null object
7 County 82610 non-null object
8 ZIPCode 82610 non-null object
9 SWIS Code 82610 non-null int64
10 DEC Region 82610 non-null int64
11 Latitude 82610 non-null float64
12 Longitude 82610 non-null float64
13 Control Code 82610 non-null object
14 Control Type 82610 non-null object
15 OU 82610 non-null object
16 Project Name 68153 non-null object
17 Project Completion Date 82549 non-null object
18 Waste Name 36047 non-null object
19 Contaminants 53667 non-null object
20 Owner Name 79344 non-null object
21 Owner Address1 78689 non-null object
22 Owner Address2 11719 non-null object
23 Owner City 78843 non-null object
24 Owner State 79344 non-null object
25 Owner ZIP 78547 non-null object
26 Disposal Name 31861 non-null object
27 Disposal Address1 8037 non-null object
28 Disposal Address2 1354 non-null object
29 Disposal City 6534 non-null object
30 Disposal ZIP 5516 non-null object
31 Disposal State 31927 non-null object
32 Operator Name 55801 non-null object
33 Operator Address1 53337 non-null object
34 Operator Address2 2915 non-null object
35 Operator City 53489 non-null object
36 Operator State 55859 non-null object
37 Operator Zip 51438 non-null object
38 Georeference 82610 non-null object
39 NYS Municipal Boundaries 2 82610 non-null int64
40 New York Zip Codes 2 82056 non-null float64
41 Counties 2 82517 non-null float64
dtypes: float64(4), int64(3), object(35)
memory usage: 26.5+ MB
null_columns = data.loc[:, data.isna().sum() > 0.25 * data.shape[0]]
null_columns
Address2 | Waste Name | Contaminants | Owner Address2 | Disposal Name | Disposal Address1 | Disposal Address2 | Disposal City | Disposal ZIP | Disposal State | Operator Name | Operator Address1 | Operator Address2 | Operator City | Operator State | Operator Zip | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | NaN | NaN | tetrachloroethene (PCE) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | NaN | NaN | benzo(a)anthracene | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | NaN | NaN | mercury | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | NaN | NaN | benzo(a)pyrene | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4 | NaN | NaN | tetrachloroethene (PCE) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
82605 | NaN | DICHLOROETHYLENE, XYLENE, STYRENE, | styrene | NaN | TRI-CITY BARREL COMPANY, INC. | NaN | NaN | NaN | NaN | ZZ | WARNER GARY | R.D. 1, PO BOX 88 | NaN | PORT CRANE | NY | NaN |
82606 | NaN | NaN | methylene chloride | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
82607 | NaN | TETRACHLOROETHYLENE | tetrachloroethene (PCE) | NaN | NEST EQUITIES, INC. | NaN | NaN | NaN | NaN | ZZ | Audie Kranz and Wilbur Kranz | 700-712 Main Street | NaN | Westbury | NY | 11590 |
82608 | NaN | DICHLOROETHYLENE, XYLENE, STYRENE, | styrene | NaN | TRI-CITY BARREL COMPANY, INC. | NaN | NaN | NaN | NaN | ZZ | WARNER GARY | R.D. 1, PO BOX 88 | NaN | PORT CRANE | NY | NaN |
82609 | NaN | NaN | trans-1,2-dichloroethene | NaN | bendix corporation | NaN | NaN | NaN | NaN | NY | joseph m bianchi | 40-60 Delaware Avenue | NaN | Sidney | NY | 13838 |
82610 rows × 16 columns
data = data.drop(null_columns, axis=1)
unneeded_columns = [
'Program Number',
'Project Name',
'Program Facility Name',
'Address1',
'Locality',
'ZIPCode',
'SWIS Code',
'Owner Name',
'Owner Address1',
'Owner City',
'Owner State',
'Owner ZIP',
'Georeference',
]
data = data.drop(unneeded_columns, axis=1)
data
Program Type | Site Class | County | DEC Region | Latitude | Longitude | Control Code | Control Type | OU | Project Completion Date | NYS Municipal Boundaries 2 | New York Zip Codes 2 | Counties 2 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | BCP | C | Queens | 2 | 40.768000 | -73.935028 | Environmental Easement | Environmental Easement | 01 | 2016-09-12T00:00:00.000 | 196 | 878.0 | 2137.0 |
1 | ERP | A | Oneida | 6 | 43.204978 | -75.434194 | Environmental Easement | Environmental Easement | 01 | 2018-03-01T00:00:00.000 | 989 | 1366.0 | 625.0 |
2 | BCP | C | Erie | 9 | 42.943614 | -78.885092 | Environmental Easement | Environmental Easement | 01 | 2018-12-12T00:00:00.000 | 300 | 63.0 | 2041.0 |
3 | BCP | A | Monroe | 8 | 43.084683 | -77.610928 | Environmental Easement | Environmental Easement | 01 | 2020-12-31T00:00:00.000 | 546 | 1682.0 | 2093.0 |
4 | VCP | C | Kings | 2 | 40.683078 | -73.922924 | Deed Restriction | Deed Restriction | 01A | 2008-06-11T00:00:00.000 | 894 | 816.0 | 2090.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
82605 | HW | 04 | Broome | 7 | 42.158504 | -75.811645 | Other Controls | Other Controls | 01 | 2003-01-16T00:00:00.000 | 945 | 453.0 | 2033.0 |
82606 | BCP | C | Kings | 2 | 40.671538 | -74.011473 | Environmental Easement | Environmental Easement | 01 | 2006-03-31T00:00:00.000 | 894 | 814.0 | 2090.0 |
82607 | HW | 04 | Nassau | 1 | 40.756999 | -73.556776 | Decision Document | Decision Document | 01 | 2003-03-19T00:00:00.000 | 60 | 997.0 | 2094.0 |
82608 | HW | 04 | Broome | 7 | 42.158504 | -75.811645 | Deed Restriction | Deed Restriction | 01 | 2000-03-31T00:00:00.000 | 945 | 453.0 | 2033.0 |
82609 | HW | 02 | Delaware | 4 | 42.307482 | -75.399926 | Deed Restriction | Deed Restriction | 01 | 1987-11-01T00:00:00.000 | 558 | 456.0 | 2039.0 |
82610 rows × 13 columns
def get_uniques(df, columns):
return {column: list(df[column].unique()) for column in columns}
def get_categorical_columns(df):
return [column for column in df.columns if df.dtypes[column] == 'object']
get_uniques(data, get_categorical_columns(data))
{'Control Code': ['Environmental Easement',
'Deed Restriction',
'Environmental Notice',
'Other Controls',
'Decision Document'],
'Control Type': ['Environmental Easement',
'Deed Restriction',
'Environmental Notice',
'Other Controls',
'Decision Document'],
'County': ['Queens',
'Oneida',
'Erie',
'Monroe',
'Kings',
'Bronx',
'Cattaraugus',
'Schenectady',
'Broome',
'Franklin',
'Onondaga',
'Delaware',
'Richmond',
'Nassau',
'Dutchess',
'Westchester',
'Chenango',
'Niagara',
'New York',
'Ulster',
'Orleans',
'Chemung',
'Lewis',
'Ontario',
'Montgomery',
'Herkimer',
'Wayne',
'Saratoga',
'Rockland',
'Jefferson',
'Suffolk',
'Washington',
'Orange',
'Rensselaer',
'Oswego',
'Fulton',
'Albany',
'Clinton',
'St Lawrence',
'Seneca',
'Cayuga',
'Chautauqua',
'Tompkins',
'Genesee',
'Tioga',
'Putnam',
'Madison',
'Greene',
'Yates',
'Sullivan',
'Allegany',
'Warren',
'Schoharie',
'Livingston',
'Steuben',
'Cortland',
'Wyoming',
'Columbia',
'Schuyler',
'Otsego',
'Essex'],
'OU': ['01',
'01A',
'02',
'03',
'04B',
'00',
'05',
'09',
'12',
'21',
'11',
'16',
'17',
'04',
'06',
'01B',
'02A',
'01C',
'01E',
'05A',
'03A',
'06A',
'04A',
'03B',
'02B',
'04C',
'07',
'04D',
'03C',
'01H',
'01J',
'01F',
'01D',
'01K',
'08',
'01a',
'02C',
'18',
'20',
'14',
'25',
'15',
'13',
'19',
'42'],
'Program Type': ['BCP', 'ERP', 'VCP', 'HW', 'RCRA'],
'Project Completion Date': ['2016-09-12T00:00:00.000',
'2018-03-01T00:00:00.000',
'2018-12-12T00:00:00.000',
'2020-12-31T00:00:00.000',
'2008-06-11T00:00:00.000',
'2011-08-16T00:00:00.000',
'2016-11-30T00:00:00.000',
'2014-03-19T00:00:00.000',
'2012-04-10T00:00:00.000',
'2021-07-31T00:00:00.000',
'2010-03-17T00:00:00.000',
'2000-03-27T00:00:00.000',
'1991-08-01T00:00:00.000',
nan,
'1999-03-01T00:00:00.000',
'2000-12-22T00:00:00.000',
'2007-07-30T00:00:00.000',
'2001-05-04T00:00:00.000',
'2014-10-30T00:00:00.000',
'2012-11-16T00:00:00.000',
'2008-01-10T00:00:00.000',
'1995-09-01T00:00:00.000',
'2014-05-14T00:00:00.000',
'2020-05-29T00:00:00.000',
'1990-10-01T00:00:00.000',
'2005-10-27T00:00:00.000',
'2022-06-30T00:00:00.000',
'2010-09-30T00:00:00.000',
'2011-03-15T00:00:00.000',
'2003-09-25T00:00:00.000',
'2003-09-30T00:00:00.000',
'2016-12-20T00:00:00.000',
'1993-03-01T00:00:00.000',
'2003-09-04T00:00:00.000',
'2019-12-31T00:00:00.000',
'2016-12-21T00:00:00.000',
'2002-03-26T00:00:00.000',
'2021-08-31T00:00:00.000',
'2015-03-20T00:00:00.000',
'2018-02-21T00:00:00.000',
'2002-03-19T00:00:00.000',
'2014-10-15T00:00:00.000',
'2001-03-26T00:00:00.000',
'2012-07-03T00:00:00.000',
'2011-03-29T00:00:00.000',
'1994-09-01T00:00:00.000',
'2006-12-26T00:00:00.000',
'1987-01-01T00:00:00.000',
'2021-08-30T00:00:00.000',
'2021-02-05T00:00:00.000',
'2014-03-31T00:00:00.000',
'2018-02-12T00:00:00.000',
'2020-10-31T00:00:00.000',
'2011-05-13T00:00:00.000',
'2019-05-08T00:00:00.000',
'2018-07-12T00:00:00.000',
'2021-10-31T00:00:00.000',
'1989-10-01T00:00:00.000',
'2021-06-30T00:00:00.000',
'2001-09-14T00:00:00.000',
'2021-04-30T00:00:00.000',
'2007-07-09T00:00:00.000',
'2009-12-04T00:00:00.000',
'1991-12-01T00:00:00.000',
'2017-07-12T00:00:00.000',
'1995-02-01T00:00:00.000',
'1998-06-17T00:00:00.000',
'1991-11-01T00:00:00.000',
'2007-03-30T00:00:00.000',
'2009-03-25T00:00:00.000',
'2010-03-30T00:00:00.000',
'2014-09-02T00:00:00.000',
'2002-11-12T00:00:00.000',
'2021-12-31T00:00:00.000',
'1999-01-01T00:00:00.000',
'2018-07-23T00:00:00.000',
'2001-04-01T00:00:00.000',
'1992-09-01T00:00:00.000',
'1988-01-01T00:00:00.000',
'2021-03-01T00:00:00.000',
'2014-04-11T00:00:00.000',
'2007-04-30T00:00:00.000',
'2002-08-19T00:00:00.000',
'2021-05-31T00:00:00.000',
'2011-03-31T00:00:00.000',
'2021-11-30T00:00:00.000',
'2016-03-25T00:00:00.000',
'2011-10-12T00:00:00.000',
'1995-07-01T00:00:00.000',
'2004-05-18T00:00:00.000',
'2004-03-30T00:00:00.000',
'2003-11-14T00:00:00.000',
'1998-03-01T00:00:00.000',
'1995-06-01T00:00:00.000',
'2014-12-01T00:00:00.000',
'2008-09-02T00:00:00.000',
'2005-11-25T00:00:00.000',
'2002-12-31T00:00:00.000',
'2008-12-29T00:00:00.000',
'2021-10-29T00:00:00.000',
'2008-03-31T00:00:00.000',
'2008-04-29T00:00:00.000',
'2006-10-17T00:00:00.000',
'2007-01-31T00:00:00.000',
'2021-01-29T00:00:00.000',
'2016-10-28T00:00:00.000',
'2001-10-20T00:00:00.000',
'2008-03-28T00:00:00.000',
'2012-02-07T00:00:00.000',
'2018-03-31T00:00:00.000',
'2021-05-30T00:00:00.000',
'2010-09-17T00:00:00.000',
'2007-12-11T00:00:00.000',
'1997-07-01T00:00:00.000',
'2003-01-21T00:00:00.000',
'2013-04-30T00:00:00.000',
'2009-03-31T00:00:00.000',
'2014-12-22T00:00:00.000',
'2016-08-16T00:00:00.000',
'2003-11-20T00:00:00.000',
'2011-06-15T00:00:00.000',
'1991-09-01T00:00:00.000',
'2007-05-03T00:00:00.000',
'2000-05-30T00:00:00.000',
'2016-03-31T00:00:00.000',
'1998-06-01T00:00:00.000',
'2015-11-20T00:00:00.000',
'2019-09-27T00:00:00.000',
'2008-03-21T00:00:00.000',
'2015-09-30T00:00:00.000',
'1996-08-01T00:00:00.000',
'1994-03-01T00:00:00.000',
'1987-02-01T00:00:00.000',
'2020-09-25T00:00:00.000',
'2020-08-04T00:00:00.000',
'2011-12-30T00:00:00.000',
'2003-12-16T00:00:00.000',
'2000-06-01T00:00:00.000',
'2014-07-28T00:00:00.000',
'2017-06-30T00:00:00.000',
'2009-08-25T00:00:00.000',
'2012-11-05T00:00:00.000',
'2020-05-21T00:00:00.000',
'1998-10-01T00:00:00.000',
'2006-03-28T00:00:00.000',
'2015-11-05T00:00:00.000',
'2018-12-18T00:00:00.000',
'1998-12-01T00:00:00.000',
'2016-02-22T00:00:00.000',
'2015-04-24T00:00:00.000',
'2019-12-23T00:00:00.000',
'2017-11-03T00:00:00.000',
'2016-12-16T00:00:00.000',
'2010-07-07T00:00:00.000',
'2018-09-14T00:00:00.000',
'2013-04-25T00:00:00.000',
'2005-03-24T00:00:00.000',
'2008-02-28T00:00:00.000',
'2015-12-29T00:00:00.000',
'2017-08-23T00:00:00.000',
'2015-10-01T00:00:00.000',
'2019-09-11T00:00:00.000',
'2013-01-30T00:00:00.000',
'1998-01-01T00:00:00.000',
'2015-01-22T00:00:00.000',
'2015-11-30T00:00:00.000',
'1993-08-01T00:00:00.000',
'2010-09-01T00:00:00.000',
'2014-12-19T00:00:00.000',
'2015-02-27T00:00:00.000',
'2019-03-04T00:00:00.000',
'2015-03-30T00:00:00.000',
'2000-03-31T00:00:00.000',
'2014-05-15T00:00:00.000',
'2013-01-08T00:00:00.000',
'2005-03-31T00:00:00.000',
'2017-03-25T00:00:00.000',
'2021-09-01T00:00:00.000',
'1997-08-01T00:00:00.000',
'2017-09-25T00:00:00.000',
'1997-03-01T00:00:00.000',
'1996-02-01T00:00:00.000',
'2007-12-24T00:00:00.000',
'2016-04-20T00:00:00.000',
'2003-12-30T00:00:00.000',
'2014-04-02T00:00:00.000',
'2013-02-04T00:00:00.000',
'2015-06-05T00:00:00.000',
'2010-01-29T00:00:00.000',
'2015-11-06T00:00:00.000',
'2015-05-14T00:00:00.000',
'2017-12-28T00:00:00.000',
'2014-10-28T00:00:00.000',
'2014-03-26T00:00:00.000',
'2009-03-30T00:00:00.000',
'2009-11-13T00:00:00.000',
'2011-03-24T00:00:00.000',
'2018-05-18T00:00:00.000',
'2021-03-31T00:00:00.000',
'1999-08-01T00:00:00.000',
'1995-03-01T00:00:00.000',
'2013-08-07T00:00:00.000',
'2017-11-17T00:00:00.000',
'1992-03-01T00:00:00.000',
'2010-03-31T00:00:00.000',
'2019-12-27T00:00:00.000',
'2010-10-18T00:00:00.000',
'2005-11-12T00:00:00.000',
'1994-07-01T00:00:00.000',
'2019-05-07T00:00:00.000',
'2016-08-26T00:00:00.000',
'1993-12-01T00:00:00.000',
'1998-02-01T00:00:00.000',
'2003-03-31T00:00:00.000',
'2019-12-19T00:00:00.000',
'2014-11-25T00:00:00.000',
'2019-02-11T00:00:00.000',
'2025-12-31T00:00:00.000',
'2018-10-15T00:00:00.000',
'2018-11-30T00:00:00.000',
'2012-01-18T00:00:00.000',
'2006-03-31T00:00:00.000',
'2014-06-30T00:00:00.000',
'2003-11-25T00:00:00.000',
'2017-04-17T00:00:00.000',
'2006-03-22T00:00:00.000',
'2003-01-20T00:00:00.000',
'2005-07-26T00:00:00.000',
'2014-10-21T00:00:00.000',
'2013-12-09T00:00:00.000',
'2016-07-13T00:00:00.000',
'2015-11-16T00:00:00.000',
'1999-04-01T00:00:00.000',
'2013-09-30T00:00:00.000',
'2015-06-10T00:00:00.000',
'2020-05-26T00:00:00.000',
'2012-12-24T00:00:00.000',
'2019-09-30T00:00:00.000',
'2008-01-14T00:00:00.000',
'1998-08-01T00:00:00.000',
'2014-06-12T00:00:00.000',
'2004-03-31T00:00:00.000',
'2000-11-27T00:00:00.000',
'2014-12-12T00:00:00.000',
'2010-10-19T00:00:00.000',
'2017-12-29T00:00:00.000',
'2021-09-15T00:00:00.000',
'2015-11-10T00:00:00.000',
'2011-11-25T00:00:00.000',
'1997-12-01T00:00:00.000',
'1991-03-01T00:00:00.000',
'2015-04-15T00:00:00.000',
'1984-12-01T00:00:00.000',
'2015-12-14T00:00:00.000',
'2008-12-01T00:00:00.000',
'2019-12-18T00:00:00.000',
'2010-09-27T00:00:00.000',
'2001-05-31T00:00:00.000',
'1996-03-01T00:00:00.000',
'2013-09-27T00:00:00.000',
'2012-08-30T00:00:00.000',
'2013-04-05T00:00:00.000',
'2014-04-03T00:00:00.000',
'2015-09-28T00:00:00.000',
'2015-07-13T00:00:00.000',
'2019-01-07T00:00:00.000',
'2006-12-29T00:00:00.000',
'2019-01-14T00:00:00.000',
'2019-12-13T00:00:00.000',
'2016-12-09T00:00:00.000',
'1993-03-24T00:00:00.000',
'2009-11-16T00:00:00.000',
'2013-05-22T00:00:00.000',
'2014-07-10T00:00:00.000',
'1987-05-01T00:00:00.000',
'2015-05-28T00:00:00.000',
'2006-04-27T00:00:00.000',
'2014-12-23T00:00:00.000',
'2011-09-14T00:00:00.000',
'2015-10-19T00:00:00.000',
'2012-10-26T00:00:00.000',
'2003-12-04T00:00:00.000',
'2013-03-29T00:00:00.000',
'2005-01-12T00:00:00.000',
'2023-09-30T00:00:00.000',
'2005-08-30T00:00:00.000',
'2006-10-12T00:00:00.000',
'1994-12-01T00:00:00.000',
'2008-04-01T00:00:00.000',
'2009-01-26T00:00:00.000',
'2012-12-31T00:00:00.000',
'2017-08-09T00:00:00.000',
'2006-05-25T00:00:00.000',
'2015-09-01T00:00:00.000',
'2004-03-16T00:00:00.000',
'2002-03-28T00:00:00.000',
'2019-01-02T00:00:00.000',
'2013-08-23T00:00:00.000',
'2005-09-20T00:00:00.000',
'2015-03-12T00:00:00.000',
'2016-03-30T00:00:00.000',
'2012-03-15T00:00:00.000',
'2018-10-16T00:00:00.000',
'2019-01-11T00:00:00.000',
'2009-05-29T00:00:00.000',
'2019-08-19T00:00:00.000',
'2019-10-11T00:00:00.000',
'2014-08-01T00:00:00.000',
'2003-08-11T00:00:00.000',
'2015-12-08T00:00:00.000',
'2006-12-28T00:00:00.000',
'2019-12-16T00:00:00.000',
'2018-03-28T00:00:00.000',
'2009-04-06T00:00:00.000',
'2018-08-09T00:00:00.000',
'2011-03-28T00:00:00.000',
'2003-01-06T00:00:00.000',
'2014-03-28T00:00:00.000',
'2013-12-30T00:00:00.000',
'2015-07-06T00:00:00.000',
'2014-03-04T00:00:00.000',
'2000-03-30T00:00:00.000',
'2001-09-13T00:00:00.000',
'2015-11-12T00:00:00.000',
'2020-05-19T00:00:00.000',
'2018-12-20T00:00:00.000',
'2021-01-31T00:00:00.000',
'2017-03-30T00:00:00.000',
'2014-10-09T00:00:00.000',
'1996-11-08T00:00:00.000',
'2011-08-24T00:00:00.000',
'2018-12-03T00:00:00.000',
'1996-09-01T00:00:00.000',
'2017-02-15T00:00:00.000',
'2011-09-20T00:00:00.000',
'2019-05-20T00:00:00.000',
'2018-11-02T00:00:00.000',
'2008-12-30T00:00:00.000',
'1995-01-01T00:00:00.000',
'2011-07-05T00:00:00.000',
'2018-05-29T00:00:00.000',
'2019-12-24T00:00:00.000',
'2020-11-30T00:00:00.000',
'2013-12-31T00:00:00.000',
'2004-03-24T00:00:00.000',
'1996-10-01T00:00:00.000',
'2016-11-21T00:00:00.000',
'1988-05-01T00:00:00.000',
'2015-12-22T00:00:00.000',
'2013-11-25T00:00:00.000',
'2015-05-15T00:00:00.000',
'1991-07-01T00:00:00.000',
'2010-12-17T00:00:00.000',
'2011-10-13T00:00:00.000',
'2014-07-21T00:00:00.000',
'2015-10-07T00:00:00.000',
'2018-06-29T00:00:00.000',
'2009-07-31T00:00:00.000',
'2012-02-28T00:00:00.000',
'2017-08-24T00:00:00.000',
'1999-08-30T00:00:00.000',
'2016-12-28T00:00:00.000',
'1999-10-01T00:00:00.000',
'2015-01-06T00:00:00.000',
'2015-06-01T00:00:00.000',
'2017-10-31T00:00:00.000',
'2015-02-03T00:00:00.000',
'2009-02-17T00:00:00.000',
'2017-10-17T00:00:00.000',
'1992-04-01T00:00:00.000',
'1994-10-01T00:00:00.000',
'2019-04-26T00:00:00.000',
'2015-01-20T00:00:00.000',
'2008-08-12T00:00:00.000',
'2019-08-28T00:00:00.000',
'2010-03-29T00:00:00.000',
'2074-04-02T00:00:00.000',
'2019-10-03T00:00:00.000',
'2003-01-03T00:00:00.000',
'2020-05-20T00:00:00.000',
'2020-12-30T00:00:00.000',
'2014-09-29T00:00:00.000',
'2000-05-15T00:00:00.000',
'1989-09-01T00:00:00.000',
'2013-12-19T00:00:00.000',
'2010-06-08T00:00:00.000',
'2003-08-04T00:00:00.000',
'1982-01-01T00:00:00.000',
'2001-08-30T00:00:00.000',
'2018-06-11T00:00:00.000',
'2019-08-16T00:00:00.000',
'2015-12-17T00:00:00.000',
'2016-01-19T00:00:00.000',
'2015-04-28T00:00:00.000',
'2008-11-20T00:00:00.000',
'1994-11-01T00:00:00.000',
'2015-12-21T00:00:00.000',
'2005-05-10T00:00:00.000',
'1992-10-01T00:00:00.000',
'2012-12-21T00:00:00.000',
'2011-09-28T00:00:00.000',
'2005-03-18T00:00:00.000',
'2015-03-16T00:00:00.000',
'2018-03-19T00:00:00.000',
'2016-12-05T00:00:00.000',
'1999-09-01T00:00:00.000',
'2012-12-07T00:00:00.000',
'1996-07-01T00:00:00.000',
'2017-12-27T00:00:00.000',
'2012-05-03T00:00:00.000',
'2000-08-30T00:00:00.000',
'2024-12-02T00:00:00.000',
'2020-09-30T00:00:00.000',
'2019-03-21T00:00:00.000',
'2009-11-30T00:00:00.000',
'1996-01-01T00:00:00.000',
'2004-08-27T00:00:00.000',
'2006-09-19T00:00:00.000',
'2003-10-20T00:00:00.000',
'2012-06-19T00:00:00.000',
'2010-06-02T00:00:00.000',
'1993-02-01T00:00:00.000',
'1999-07-01T00:00:00.000',
'2007-12-12T00:00:00.000',
'2007-03-27T00:00:00.000',
'2009-01-07T00:00:00.000',
'2014-09-19T00:00:00.000',
'1996-12-01T00:00:00.000',
'2007-02-13T00:00:00.000',
'2003-06-23T00:00:00.000',
'2010-12-28T00:00:00.000',
'2016-11-04T00:00:00.000',
'2001-06-26T00:00:00.000',
'2018-12-28T00:00:00.000',
'2018-10-03T00:00:00.000',
'2014-04-23T00:00:00.000',
'2006-02-15T00:00:00.000',
'1990-09-01T00:00:00.000',
'1995-12-01T00:00:00.000',
'2012-12-19T00:00:00.000',
'2018-03-21T00:00:00.000',
'2011-10-18T00:00:00.000',
'2020-06-15T00:00:00.000',
'2015-12-11T00:00:00.000',
'2001-06-11T00:00:00.000',
'1990-03-01T00:00:00.000',
'2019-03-22T00:00:00.000',
'2012-07-27T00:00:00.000',
'1989-03-01T00:00:00.000',
'2013-09-04T00:00:00.000',
'2015-06-02T00:00:00.000',
'2001-12-30T00:00:00.000',
'2016-07-06T00:00:00.000',
'2021-07-30T00:00:00.000',
'2013-03-19T00:00:00.000',
'2014-12-09T00:00:00.000',
'2020-06-10T00:00:00.000',
'2016-03-10T00:00:00.000',
'2002-03-27T00:00:00.000',
'2011-02-15T00:00:00.000',
'2005-06-30T00:00:00.000',
'2015-12-01T00:00:00.000',
'2018-12-26T00:00:00.000',
'2007-12-17T00:00:00.000',
'2013-11-15T00:00:00.000',
'2011-06-24T00:00:00.000',
'1991-10-01T00:00:00.000',
'2018-02-15T00:00:00.000',
'2018-03-30T00:00:00.000',
'2015-05-29T00:00:00.000',
'1995-10-01T00:00:00.000',
'2016-01-26T00:00:00.000',
'2021-05-07T00:00:00.000',
'1995-11-01T00:00:00.000',
'2003-08-25T00:00:00.000',
'2013-05-15T00:00:00.000',
'1991-01-01T00:00:00.000',
'2018-06-25T00:00:00.000',
'2009-03-27T00:00:00.000',
'2014-12-18T00:00:00.000',
'2010-07-22T00:00:00.000',
'2014-05-08T00:00:00.000',
'2008-03-10T00:00:00.000',
'2012-12-04T00:00:00.000',
'2006-05-29T00:00:00.000',
'2003-09-15T00:00:00.000',
'2018-03-08T00:00:00.000',
'2012-04-06T00:00:00.000',
'2017-09-06T00:00:00.000',
'2017-11-14T00:00:00.000',
'2008-04-10T00:00:00.000',
'2014-06-25T00:00:00.000',
'2011-01-21T00:00:00.000',
'2015-12-10T00:00:00.000',
'2015-02-18T00:00:00.000',
'1993-06-01T00:00:00.000',
'2006-03-23T00:00:00.000',
'2018-06-28T00:00:00.000',
'2016-05-19T00:00:00.000',
'2018-06-01T00:00:00.000',
'2018-05-17T00:00:00.000',
'2002-11-29T00:00:00.000',
'2007-09-17T00:00:00.000',
'2014-11-19T00:00:00.000',
'2014-08-14T00:00:00.000',
'2017-04-30T00:00:00.000',
'2002-03-29T00:00:00.000',
'2012-03-31T00:00:00.000',
'1986-01-01T00:00:00.000',
'2019-01-03T00:00:00.000',
'2020-10-23T00:00:00.000',
'1992-01-01T00:00:00.000',
'2008-09-09T00:00:00.000',
'2013-10-28T00:00:00.000',
'2019-03-27T00:00:00.000',
'2000-06-08T00:00:00.000',
'2008-12-16T00:00:00.000',
'2013-10-11T00:00:00.000',
'2016-11-18T00:00:00.000',
'2006-12-14T00:00:00.000',
'2006-10-06T00:00:00.000',
'2014-12-17T00:00:00.000',
'2015-03-31T00:00:00.000',
'2010-10-01T00:00:00.000',
'1995-08-01T00:00:00.000',
'2012-12-13T00:00:00.000',
'2006-12-22T00:00:00.000',
'2015-03-19T00:00:00.000',
'2015-08-15T00:00:00.000',
'2017-08-18T00:00:00.000',
'2009-06-22T00:00:00.000',
'2009-08-06T00:00:00.000',
'2004-04-27T00:00:00.000',
'2008-08-14T00:00:00.000',
'2019-11-15T00:00:00.000',
'2007-03-22T00:00:00.000',
'1999-06-01T00:00:00.000',
'2001-11-20T00:00:00.000',
'2004-11-19T00:00:00.000',
'2013-05-16T00:00:00.000',
'2014-09-26T00:00:00.000',
'2016-11-09T00:00:00.000',
'1987-09-01T00:00:00.000',
'2014-12-30T00:00:00.000',
'2012-09-06T00:00:00.000',
'2000-02-29T00:00:00.000',
'2001-03-30T00:00:00.000',
'2009-01-30T00:00:00.000',
'2007-03-29T00:00:00.000',
'2019-12-20T00:00:00.000',
'2019-03-25T00:00:00.000',
'2017-09-21T00:00:00.000',
'2013-05-14T00:00:00.000',
'2018-08-03T00:00:00.000',
'2019-11-08T00:00:00.000',
'2022-03-31T00:00:00.000',
'2008-03-27T00:00:00.000',
'2012-03-27T00:00:00.000',
'2016-02-18T00:00:00.000',
'2024-09-01T00:00:00.000',
'2004-03-29T00:00:00.000',
'2000-03-20T00:00:00.000',
'2020-04-28T00:00:00.000',
'2015-12-09T00:00:00.000',
'2003-03-29T00:00:00.000',
'2019-12-11T00:00:00.000',
'2010-01-04T00:00:00.000',
'2016-12-22T00:00:00.000',
'2018-03-29T00:00:00.000',
'2012-03-19T00:00:00.000',
'2015-06-11T00:00:00.000',
'2000-10-04T00:00:00.000',
'2023-03-15T00:00:00.000',
'2011-12-22T00:00:00.000',
'2010-11-29T00:00:00.000',
'2000-06-09T00:00:00.000',
'2015-12-15T00:00:00.000',
'2017-02-21T00:00:00.000',
'2014-03-24T00:00:00.000',
'2015-03-06T00:00:00.000',
'2015-04-16T00:00:00.000',
'2013-09-25T00:00:00.000',
'2008-05-16T00:00:00.000',
'2020-08-13T00:00:00.000',
'1997-11-01T00:00:00.000',
'2018-04-16T00:00:00.000',
'2015-12-23T00:00:00.000',
'2016-12-27T00:00:00.000',
'2017-05-30T00:00:00.000',
'2018-04-02T00:00:00.000',
'2010-03-18T00:00:00.000',
'2012-11-28T00:00:00.000',
'2002-12-19T00:00:00.000',
'1988-03-01T00:00:00.000',
'2012-01-20T00:00:00.000',
'2002-11-18T00:00:00.000',
'2015-03-26T00:00:00.000',
'2012-03-14T00:00:00.000',
'2006-10-19T00:00:00.000',
'2007-07-31T00:00:00.000',
'2009-12-01T00:00:00.000',
'2009-07-17T00:00:00.000',
'2008-05-28T00:00:00.000',
'2007-12-14T00:00:00.000',
'2015-06-04T00:00:00.000',
'2010-01-08T00:00:00.000',
'2017-08-04T00:00:00.000',
'2007-06-27T00:00:00.000',
'2019-04-19T00:00:00.000',
'2020-10-15T00:00:00.000',
'2019-10-29T00:00:00.000',
'2000-07-10T00:00:00.000',
'2001-06-27T00:00:00.000',
'1988-08-01T00:00:00.000',
'2018-11-16T00:00:00.000',
'1992-07-01T00:00:00.000',
'2004-09-27T00:00:00.000',
'2002-01-31T00:00:00.000',
'1997-09-01T00:00:00.000',
'2022-12-01T00:00:00.000',
'2013-02-05T00:00:00.000',
'2015-05-31T00:00:00.000',
'2004-03-01T00:00:00.000',
'1993-04-01T00:00:00.000',
'2011-09-30T00:00:00.000',
'2018-01-24T00:00:00.000',
'2018-08-10T00:00:00.000',
'2011-09-09T00:00:00.000',
'2015-05-27T00:00:00.000',
'2013-02-25T00:00:00.000',
'2000-11-29T00:00:00.000',
'2011-02-14T00:00:00.000',
'1994-05-01T00:00:00.000',
'2012-03-30T00:00:00.000',
'2004-04-14T00:00:00.000',
'2022-09-16T00:00:00.000',
'2016-12-13T00:00:00.000',
'2006-09-05T00:00:00.000',
'1988-07-01T00:00:00.000',
'2018-11-15T00:00:00.000',
'2010-12-22T00:00:00.000',
'2018-09-25T00:00:00.000',
'2004-04-04T00:00:00.000',
'2014-09-18T00:00:00.000',
'1998-05-01T00:00:00.000',
'2013-10-18T00:00:00.000',
'2000-02-25T00:00:00.000',
'2017-03-01T00:00:00.000',
'2016-01-07T00:00:00.000',
'2015-09-15T00:00:00.000',
'2003-09-12T00:00:00.000',
'2006-07-27T00:00:00.000',
'2019-07-26T00:00:00.000',
'2015-08-03T00:00:00.000',
'2000-11-09T00:00:00.000',
'2011-03-30T00:00:00.000',
'2002-12-30T00:00:00.000',
'1981-01-01T00:00:00.000',
'2004-12-31T00:00:00.000',
'2008-02-13T00:00:00.000',
'2007-05-01T00:00:00.000',
'2017-11-16T00:00:00.000',
'2016-05-24T00:00:00.000',
'2000-10-02T00:00:00.000',
'2014-07-24T00:00:00.000',
'2003-04-09T00:00:00.000',
'2009-06-02T00:00:00.000',
'2009-10-27T00:00:00.000',
'2014-06-03T00:00:00.000',
'2019-06-12T00:00:00.000',
'2007-04-27T00:00:00.000',
'2004-06-28T00:00:00.000',
'2002-02-05T00:00:00.000',
'2008-03-05T00:00:00.000',
'1997-10-01T00:00:00.000',
'2004-12-20T00:00:00.000',
'2019-09-19T00:00:00.000',
'2008-12-11T00:00:00.000',
'2002-03-25T00:00:00.000',
'2004-07-30T00:00:00.000',
'2008-12-31T00:00:00.000',
'2008-09-25T00:00:00.000',
'2011-07-06T00:00:00.000',
'1999-03-15T00:00:00.000',
'2010-12-13T00:00:00.000',
'2010-07-14T00:00:00.000',
'2021-09-02T00:00:00.000',
'2011-11-18T00:00:00.000',
'2020-07-27T00:00:00.000',
'2002-05-22T00:00:00.000',
'2009-01-16T00:00:00.000',
'2018-12-14T00:00:00.000',
'2003-07-03T00:00:00.000',
'2010-05-24T00:00:00.000',
'1999-07-21T00:00:00.000',
'2017-05-25T00:00:00.000',
'1998-06-30T00:00:00.000',
'2004-06-01T00:00:00.000',
'2005-12-22T00:00:00.000',
'2002-09-24T00:00:00.000',
'2003-06-30T00:00:00.000',
'2012-06-14T00:00:00.000',
'2007-08-27T00:00:00.000',
'2000-10-06T00:00:00.000',
'2020-08-06T00:00:00.000',
'1992-11-01T00:00:00.000',
'2006-01-17T00:00:00.000',
'2011-06-23T00:00:00.000',
'2016-03-23T00:00:00.000',
'2008-05-29T00:00:00.000',
'1993-01-01T00:00:00.000',
'2008-07-17T00:00:00.000',
'2001-10-22T00:00:00.000',
'2014-05-20T00:00:00.000',
'2001-08-15T00:00:00.000',
'2010-05-03T00:00:00.000',
'2014-03-06T00:00:00.000',
'2012-04-04T00:00:00.000',
'2013-03-12T00:00:00.000',
'2006-04-19T00:00:00.000',
'2016-09-23T00:00:00.000',
'2000-12-20T00:00:00.000',
'2012-01-19T00:00:00.000',
'1992-12-01T00:00:00.000',
'2017-03-06T00:00:00.000',
'2013-03-22T00:00:00.000',
'1988-12-31T00:00:00.000',
'2014-01-27T00:00:00.000',
'2007-11-27T00:00:00.000',
'2004-01-30T00:00:00.000',
'2012-11-21T00:00:00.000',
'2010-03-08T00:00:00.000',
'1986-09-01T00:00:00.000',
'2003-12-23T00:00:00.000',
'2016-10-31T00:00:00.000',
'2012-07-13T00:00:00.000',
'2012-03-21T00:00:00.000',
'2000-02-02T00:00:00.000',
'2011-11-22T00:00:00.000',
'2018-07-18T00:00:00.000',
'1998-05-20T00:00:00.000',
'1998-04-01T00:00:00.000',
'2006-10-25T00:00:00.000',
'2012-03-02T00:00:00.000',
'2008-01-23T00:00:00.000',
'2002-03-04T00:00:00.000',
'2011-06-27T00:00:00.000',
'2007-03-28T00:00:00.000',
'2011-09-27T00:00:00.000',
'1999-11-01T00:00:00.000',
'2000-02-23T00:00:00.000',
'2024-04-30T00:00:00.000',
'1991-05-01T00:00:00.000',
'2005-10-25T00:00:00.000',
'2013-10-01T00:00:00.000',
'1999-08-24T00:00:00.000',
'2003-08-05T00:00:00.000',
'2015-03-25T00:00:00.000',
'2015-07-08T00:00:00.000',
'2006-08-08T00:00:00.000',
'1995-04-01T00:00:00.000',
'2012-02-02T00:00:00.000',
'2015-01-16T00:00:00.000',
'2017-03-31T00:00:00.000',
'2000-10-30T00:00:00.000',
'2002-11-27T00:00:00.000',
'1996-11-01T00:00:00.000',
'2011-12-19T00:00:00.000',
'2010-07-12T00:00:00.000',
'2010-12-06T00:00:00.000',
'2001-06-15T00:00:00.000',
'2006-04-14T00:00:00.000',
'2000-09-28T00:00:00.000',
'2013-11-13T00:00:00.000',
'2013-08-14T00:00:00.000',
'2018-03-13T00:00:00.000',
'2017-07-07T00:00:00.000',
'2007-08-13T00:00:00.000',
'2012-12-11T00:00:00.000',
'2000-02-01T00:00:00.000',
'2011-08-15T00:00:00.000',
'2012-07-10T00:00:00.000',
'2008-01-28T00:00:00.000',
'2009-02-26T00:00:00.000',
'2012-06-06T00:00:00.000',
'2007-10-19T00:00:00.000',
'2007-12-27T00:00:00.000',
'2011-06-17T00:00:00.000',
'2012-03-05T00:00:00.000',
'2013-02-22T00:00:00.000',
'2006-10-26T00:00:00.000',
'2007-12-28T00:00:00.000',
'2006-09-11T00:00:00.000',
'2018-04-19T00:00:00.000',
'2000-09-15T00:00:00.000',
'2006-06-30T00:00:00.000',
'1998-12-14T00:00:00.000',
'2021-03-19T00:00:00.000',
'2014-12-31T00:00:00.000',
'2016-10-03T00:00:00.000',
'2011-02-25T00:00:00.000',
'2015-09-14T00:00:00.000',
'2019-03-07T00:00:00.000',
'2007-02-01T00:00:00.000',
'2010-03-11T00:00:00.000',
'2018-06-18T00:00:00.000',
'2018-07-06T00:00:00.000',
'2017-11-29T00:00:00.000',
'2018-08-27T00:00:00.000',
'2020-07-10T00:00:00.000',
'2006-08-30T00:00:00.000',
'2014-12-08T00:00:00.000',
'2013-06-26T00:00:00.000',
'2013-10-16T00:00:00.000',
'2021-02-28T00:00:00.000',
'1982-02-01T00:00:00.000',
'2014-08-15T00:00:00.000',
'2008-07-31T00:00:00.000',
'2011-07-07T00:00:00.000',
'2008-09-18T00:00:00.000',
'1991-06-01T00:00:00.000',
'2000-12-30T00:00:00.000',
'2004-07-13T00:00:00.000',
'2018-12-21T00:00:00.000',
'1992-02-01T00:00:00.000',
'2005-12-02T00:00:00.000',
'2019-12-30T00:00:00.000',
'2000-03-23T00:00:00.000',
'2010-10-05T00:00:00.000',
'2017-08-02T00:00:00.000',
'2013-03-05T00:00:00.000',
'2015-08-11T00:00:00.000',
'2015-09-03T00:00:00.000',
'2012-03-29T00:00:00.000',
'1986-06-01T00:00:00.000',
'2015-10-30T00:00:00.000',
'2000-10-27T00:00:00.000',
'1988-12-01T00:00:00.000',
'2014-08-21T00:00:00.000',
'1984-11-01T00:00:00.000',
'2003-03-14T00:00:00.000',
'2016-06-30T00:00:00.000',
'2015-09-17T00:00:00.000',
'2010-06-11T00:00:00.000',
'1996-05-01T00:00:00.000',
'2011-04-04T00:00:00.000',
'2000-09-22T00:00:00.000',
'2012-10-15T00:00:00.000',
'2012-09-04T00:00:00.000',
'2015-04-30T00:00:00.000',
'2019-12-06T00:00:00.000',
'2006-08-23T00:00:00.000',
'2019-12-12T00:00:00.000',
'2019-09-12T00:00:00.000',
'2016-09-15T00:00:00.000',
'2019-11-29T00:00:00.000',
'2009-05-22T00:00:00.000',
'2018-10-17T00:00:00.000',
'2014-12-05T00:00:00.000',
'2001-06-30T00:00:00.000',
'1997-05-01T00:00:00.000',
'2006-03-03T00:00:00.000',
'2002-06-24T00:00:00.000',
'2014-01-22T00:00:00.000',
'2014-04-17T00:00:00.000',
'2011-09-02T00:00:00.000',
'2003-02-19T00:00:00.000',
'2006-08-31T00:00:00.000',
'1985-12-01T00:00:00.000',
'2005-09-30T00:00:00.000',
'2007-07-05T00:00:00.000',
'1994-01-01T00:00:00.000',
'2017-05-17T00:00:00.000',
'2017-01-19T00:00:00.000',
'2018-04-20T00:00:00.000',
'2017-08-21T00:00:00.000',
'2010-03-24T00:00:00.000',
'2003-12-02T00:00:00.000',
'2000-06-30T00:00:00.000',
'2013-10-07T00:00:00.000',
'2002-05-16T00:00:00.000',
'2008-09-12T00:00:00.000',
'2006-07-25T00:00:00.000',
'2015-04-13T00:00:00.000',
'2007-12-19T00:00:00.000',
'2013-09-23T00:00:00.000',
'1986-12-01T00:00:00.000',
'2018-08-31T00:00:00.000',
'2009-10-29T00:00:00.000',
'2007-12-21T00:00:00.000',
'1993-05-01T00:00:00.000',
'2017-12-05T00:00:00.000',
'1983-03-01T00:00:00.000',
'2019-10-22T00:00:00.000',
'2018-02-09T00:00:00.000',
'2020-07-01T00:00:00.000',
'1990-11-01T00:00:00.000',
'2002-05-01T00:00:00.000',
'2017-05-09T00:00:00.000',
'2015-09-16T00:00:00.000',
'2007-02-15T00:00:00.000',
'2007-02-07T00:00:00.000',
'2015-06-16T00:00:00.000',
'2017-10-12T00:00:00.000',
'2008-06-30T00:00:00.000',
'1987-10-01T00:00:00.000',
'2014-03-27T00:00:00.000',
'2008-09-30T00:00:00.000',
'2013-01-31T00:00:00.000',
'2019-07-22T00:00:00.000',
'2000-09-13T00:00:00.000',
'2014-09-16T00:00:00.000',
'1989-07-01T00:00:00.000',
'2015-11-23T00:00:00.000',
'2004-08-06T00:00:00.000',
'2004-03-23T00:00:00.000',
'2001-03-23T00:00:00.000',
'2018-07-13T00:00:00.000',
'2001-12-18T00:00:00.000',
'1998-09-01T00:00:00.000',
'2022-07-31T00:00:00.000',
'2004-09-30T00:00:00.000',
'2017-03-09T00:00:00.000',
'2019-04-02T00:00:00.000',
'1991-02-01T00:00:00.000',
'2013-09-13T00:00:00.000',
'2011-06-03T00:00:00.000',
'2005-11-03T00:00:00.000',
'2015-11-24T00:00:00.000',
'2011-10-04T00:00:00.000',
'2017-10-19T00:00:00.000',
'2020-12-23T00:00:00.000',
'2014-03-20T00:00:00.000',
'2009-10-22T00:00:00.000',
'1998-01-21T00:00:00.000',
'2014-11-18T00:00:00.000',
'2006-01-09T00:00:00.000',
'2011-03-04T00:00:00.000',
'2006-10-04T00:00:00.000',
'2014-09-04T00:00:00.000',
'2015-01-07T00:00:00.000',
'2016-07-21T00:00:00.000',
'2009-11-05T00:00:00.000',
'2012-12-12T00:00:00.000',
'2013-05-02T00:00:00.000',
'2007-02-12T00:00:00.000',
'2007-03-21T00:00:00.000',
'2012-09-24T00:00:00.000',
'2012-12-18T00:00:00.000',
'2015-08-27T00:00:00.000',
'2008-02-22T00:00:00.000',
'2012-02-17T00:00:00.000',
'2009-07-07T00:00:00.000',
'1989-01-01T00:00:00.000',
'2016-06-24T00:00:00.000',
'2000-01-12T00:00:00.000',
'1986-12-31T00:00:00.000',
'1987-12-01T00:00:00.000',
'2009-09-25T00:00:00.000',
'2016-12-29T00:00:00.000',
'2018-02-20T00:00:00.000',
'1999-02-25T00:00:00.000',
'2005-07-28T00:00:00.000',
'2002-09-30T00:00:00.000',
'2018-02-23T00:00:00.000',
'2006-06-28T00:00:00.000',
'2010-05-25T00:00:00.000',
'2016-10-24T00:00:00.000',
'2009-11-10T00:00:00.000',
'2015-06-30T00:00:00.000',
'2012-06-08T00:00:00.000',
'2013-01-04T00:00:00.000',
'2003-04-15T00:00:00.000',
'2014-06-20T00:00:00.000',
'2018-11-21T00:00:00.000',
'2010-10-15T00:00:00.000',
'2014-04-18T00:00:00.000',
'2018-07-02T00:00:00.000',
'1999-03-30T00:00:00.000',
'2012-10-10T00:00:00.000',
'2024-09-15T00:00:00.000',
'2006-12-07T00:00:00.000',
'2017-10-06T00:00:00.000',
'1990-01-01T00:00:00.000',
'1998-07-01T00:00:00.000',
'1996-06-01T00:00:00.000',
'2008-02-27T00:00:00.000',
'2007-10-03T00:00:00.000',
'2019-09-06T00:00:00.000',
'2017-09-29T00:00:00.000',
'2012-09-28T00:00:00.000',
'2015-07-31T00:00:00.000',
'2021-01-01T00:00:00.000',
'2010-02-24T00:00:00.000',
'2015-10-31T00:00:00.000',
'2010-08-30T00:00:00.000',
'2006-11-28T00:00:00.000',
'2001-09-17T00:00:00.000',
'2014-04-08T00:00:00.000',
'2025-03-31T00:00:00.000',
...],
'Site Class': ['C', 'A', '04', '02', 'N', 'P', '03']}
data['Project Completion Date'] = data['Project Completion Date'].apply(lambda x: x[0:7] if str(x) != 'nan' else x)
data['Year'] = data['Project Completion Date'].apply(lambda x: np.float(x[0:4]) if str(x) != 'nan' else x)
data['Month'] = data['Project Completion Date'].apply(lambda x: np.float(x[5:7]) if str(x) != 'nan' else x)
data = data.drop('Project Completion Date', axis=1)
for column in ['New York Zip Codes 2', 'Year', 'Month', 'Counties 2']:
data[column] = data[column].fillna(data[column].mean())
data.isna().sum()
Program Type 0
Site Class 0
County 0
DEC Region 0
Latitude 0
Longitude 0
Control Code 0
Control Type 0
OU 0
NYS Municipal Boundaries 2 0
New York Zip Codes 2 0
Counties 2 0
Year 0
Month 0
dtype: int64
エンコード
def onehot_encode(df, column):
df = df.copy()
dummies = pd.get_dummies(df[column])
df = pd.concat([df, dummies], axis=1)
df = df.drop(column, axis=1)
return df
get_categorical_columns(data)
['Program Type', 'Site Class', 'County', 'Control Code', 'Control Type', 'OU']
nominal_features = get_categorical_columns(data)
nominal_features.remove('Program Type')
for feature in nominal_features:
data = onehot_encode(data, feature)
label_encoder = sp.LabelEncoder()
data['Program Type'] = label_encoder.fit_transform(data['Program Type'])
target_mapppings = {index: column for index, column in enumerate(label_encoder.classes_)}
target_mapppings
{0: 'BCP', 1: 'ERP', 2: 'HW', 3: 'RCRA', 4: 'VCP'}
データの分割とスケーリング
X
DEC Region | Latitude | Longitude | NYS Municipal Boundaries 2 | New York Zip Codes 2 | Counties 2 | Year | Month | 02 | 03 | 04 | A | C | N | P | Albany | Allegany | Bronx | Broome | Cattaraugus | Cayuga | Chautauqua | Chemung | Chenango | Clinton | Columbia | Cortland | Delaware | Dutchess | Erie | Essex | Franklin | Fulton | Genesee | Greene | Herkimer | Jefferson | Kings | Lewis | Livingston | ... | 01D | 01E | 01F | 01H | 01J | 01K | 01a | 02 | 02A | 02B | 02C | 03 | 03A | 03B | 03C | 04 | 04A | 04B | 04C | 04D | 05 | 05A | 06 | 06A | 07 | 08 | 09 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 25 | 42 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -0.748714 | -0.963589 | 0.637439 | -0.689064 | 0.093366 | 0.314497 | 1.002552 | 0.535128 | -0.625374 | -0.020588 | -0.492479 | -0.233580 | 1.819035 | -0.094942 | -0.547296 | -0.097821 | -0.067979 | -0.156868 | -0.139954 | -0.081267 | -0.049016 | -0.07772 | -0.079897 | -0.042793 | -0.047504 | -0.075805 | -0.072589 | -0.062163 | -0.166678 | -0.251353 | -0.017047 | -0.015167 | -0.060473 | -0.025337 | -0.038141 | -0.082093 | -0.061967 | -0.175413 | -0.013918 | -0.059045 | ... | -0.015167 | -0.012546 | -0.006959 | -0.009841 | -0.009841 | -0.006959 | -0.00492 | -0.186021 | -0.024609 | -0.012053 | -0.00492 | -0.138008 | -0.020291 | -0.008523 | -0.00492 | -0.087171 | -0.006026 | -0.00492 | -0.00492 | -0.00492 | -0.055972 | -0.006026 | -0.072673 | -0.006026 | -0.047504 | -0.058318 | -0.065041 | -0.093767 | -0.038141 | -0.04673 | -0.058318 | -0.044052 | -0.064286 | -0.144079 | -0.04673 | -0.02696 | -0.044052 | -0.115094 | -0.031134 | -0.015561 |
1 | 0.533197 | 1.277122 | -0.140194 | 1.777746 | 1.188601 | -4.472172 | 1.219252 | -1.042891 | -0.625374 | -0.020588 | -0.492479 | 4.281179 | -0.549742 | -0.094942 | -0.547296 | -0.097821 | -0.067979 | -0.156868 | -0.139954 | -0.081267 | -0.049016 | -0.07772 | -0.079897 | -0.042793 | -0.047504 | -0.075805 | -0.072589 | -0.062163 | -0.166678 | -0.251353 | -0.017047 | -0.015167 | -0.060473 | -0.025337 | -0.038141 | -0.082093 | -0.061967 | -0.175413 | -0.013918 | -0.059045 | ... | -0.015167 | -0.012546 | -0.006959 | -0.009841 | -0.009841 | -0.006959 | -0.00492 | -0.186021 | -0.024609 | -0.012053 | -0.00492 | -0.138008 | -0.020291 | -0.008523 | -0.00492 | -0.087171 | -0.006026 | -0.00492 | -0.00492 | -0.00492 | -0.055972 | -0.006026 | -0.072673 | -0.006026 | -0.047504 | -0.058318 | -0.065041 | -0.093767 | -0.038141 | -0.04673 | -0.058318 | -0.044052 | -0.064286 | -0.144079 | -0.04673 | -0.02696 | -0.044052 | -0.115094 | -0.031134 | -0.015561 |
2 | 1.494630 | 1.036807 | -1.930211 | -0.365548 | -1.735766 | 0.010582 | 1.219252 | 1.324138 | -0.625374 | -0.020588 | -0.492479 | -0.233580 | 1.819035 | -0.094942 | -0.547296 | -0.097821 | -0.067979 | -0.156868 | -0.139954 | -0.081267 | -0.049016 | -0.07772 | -0.079897 | -0.042793 | -0.047504 | -0.075805 | -0.072589 | -0.062163 | -0.166678 | 3.978476 | -0.017047 | -0.015167 | -0.060473 | -0.025337 | -0.038141 | -0.082093 | -0.061967 | -0.175413 | -0.013918 | -0.059045 | ... | -0.015167 | -0.012546 | -0.006959 | -0.009841 | -0.009841 | -0.006959 | -0.00492 | -0.186021 | -0.024609 | -0.012053 | -0.00492 | -0.138008 | -0.020291 | -0.008523 | -0.00492 | -0.087171 | -0.006026 | -0.00492 | -0.00492 | -0.00492 | -0.055972 | -0.006026 | -0.072673 | -0.006026 | -0.047504 | -0.058318 | -0.065041 | -0.093767 | -0.038141 | -0.04673 | -0.058318 | -0.044052 | -0.064286 | -0.144079 | -0.04673 | -0.02696 | -0.044052 | -0.115094 | -0.031134 | -0.015561 |
3 | 1.174152 | 1.166515 | -1.269289 | 0.399692 | 1.897811 | 0.175202 | 1.435952 | 1.324138 | -0.625374 | -0.020588 | -0.492479 | 4.281179 | -0.549742 | -0.094942 | -0.547296 | -0.097821 | -0.067979 | -0.156868 | -0.139954 | -0.081267 | -0.049016 | -0.07772 | -0.079897 | -0.042793 | -0.047504 | -0.075805 | -0.072589 | -0.062163 | -0.166678 | -0.251353 | -0.017047 | -0.015167 | -0.060473 | -0.025337 | -0.038141 | -0.082093 | -0.061967 | -0.175413 | -0.013918 | -0.059045 | ... | -0.015167 | -0.012546 | -0.006959 | -0.009841 | -0.009841 | -0.006959 | -0.00492 | -0.186021 | -0.024609 | -0.012053 | -0.00492 | -0.138008 | -0.020291 | -0.008523 | -0.00492 | -0.087171 | -0.006026 | -0.00492 | -0.00492 | -0.00492 | -0.055972 | -0.006026 | -0.072673 | -0.006026 | -0.047504 | -0.058318 | -0.065041 | -0.093767 | -0.038141 | -0.04673 | -0.058318 | -0.044052 | -0.064286 | -0.144079 | -0.04673 | -0.02696 | -0.044052 | -0.115094 | -0.031134 | -0.015561 |
4 | -0.748714 | -1.041672 | 0.643717 | 1.482227 | -0.045783 | 0.165705 | 0.135751 | -0.253882 | -0.625374 | -0.020588 | -0.492479 | -0.233580 | 1.819035 | -0.094942 | -0.547296 | -0.097821 | -0.067979 | -0.156868 | -0.139954 | -0.081267 | -0.049016 | -0.07772 | -0.079897 | -0.042793 | -0.047504 | -0.075805 | -0.072589 | -0.062163 | -0.166678 | -0.251353 | -0.017047 | -0.015167 | -0.060473 | -0.025337 | -0.038141 | -0.082093 | -0.061967 | 5.700842 | -0.013918 | -0.059045 | ... | -0.015167 | -0.012546 | -0.006959 | -0.009841 | -0.009841 | -0.006959 | -0.00492 | -0.186021 | -0.024609 | -0.012053 | -0.00492 | -0.138008 | -0.020291 | -0.008523 | -0.00492 | -0.087171 | -0.006026 | -0.00492 | -0.00492 | -0.00492 | -0.055972 | -0.006026 | -0.072673 | -0.006026 | -0.047504 | -0.058318 | -0.065041 | -0.093767 | -0.038141 | -0.04673 | -0.058318 | -0.044052 | -0.064286 | -0.144079 | -0.04673 | -0.02696 | -0.044052 | -0.115094 | -0.031134 | -0.015561 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
82605 | 0.853675 | 0.314928 | -0.335982 | 1.640874 | -0.860476 | -0.014745 | -0.406000 | -1.568898 | -0.625374 | -0.020588 | 2.030542 | -0.233580 | -0.549742 | -0.094942 | -0.547296 | -0.097821 | -0.067979 | -0.156868 | 7.145222 | -0.081267 | -0.049016 | -0.07772 | -0.079897 | -0.042793 | -0.047504 | -0.075805 | -0.072589 | -0.062163 | -0.166678 | -0.251353 | -0.017047 | -0.015167 | -0.060473 | -0.025337 | -0.038141 | -0.082093 | -0.061967 | -0.175413 | -0.013918 | -0.059045 | ... | -0.015167 | -0.012546 | -0.006959 | -0.009841 | -0.009841 | -0.006959 | -0.00492 | -0.186021 | -0.024609 | -0.012053 | -0.00492 | -0.138008 | -0.020291 | -0.008523 | -0.00492 | -0.087171 | -0.006026 | -0.00492 | -0.00492 | -0.00492 | -0.055972 | -0.006026 | -0.072673 | -0.006026 | -0.047504 | -0.058318 | -0.065041 | -0.093767 | -0.038141 | -0.04673 | -0.058318 | -0.044052 | -0.064286 | -0.144079 | -0.04673 | -0.02696 | -0.044052 | -0.115094 | -0.031134 | -0.015561 |
82606 | -0.748714 | -1.052282 | 0.597786 | 1.482227 | -0.050271 | 0.165705 | -0.080949 | -1.042891 | -0.625374 | -0.020588 | -0.492479 | -0.233580 | 1.819035 | -0.094942 | -0.547296 | -0.097821 | -0.067979 | -0.156868 | -0.139954 | -0.081267 | -0.049016 | -0.07772 | -0.079897 | -0.042793 | -0.047504 | -0.075805 | -0.072589 | -0.062163 | -0.166678 | -0.251353 | -0.017047 | -0.015167 | -0.060473 | -0.025337 | -0.038141 | -0.082093 | -0.061967 | 5.700842 | -0.013918 | -0.059045 | ... | -0.015167 | -0.012546 | -0.006959 | -0.009841 | -0.009841 | -0.006959 | -0.00492 | -0.186021 | -0.024609 | -0.012053 | -0.00492 | -0.138008 | -0.020291 | -0.008523 | -0.00492 | -0.087171 | -0.006026 | -0.00492 | -0.00492 | -0.00492 | -0.055972 | -0.006026 | -0.072673 | -0.006026 | -0.047504 | -0.058318 | -0.065041 | -0.093767 | -0.038141 | -0.04673 | -0.058318 | -0.044052 | -0.064286 | -0.144079 | -0.04673 | -0.02696 | -0.044052 | -0.115094 | -0.031134 | -0.015561 |
82607 | -1.069191 | -0.973704 | 0.833642 | -1.112123 | 0.360442 | 0.178368 | -0.406000 | -1.042891 | -0.625374 | -0.020588 | 2.030542 | -0.233580 | -0.549742 | -0.094942 | -0.547296 | -0.097821 | -0.067979 | -0.156868 | -0.139954 | -0.081267 | -0.049016 | -0.07772 | -0.079897 | -0.042793 | -0.047504 | -0.075805 | -0.072589 | -0.062163 | -0.166678 | -0.251353 | -0.017047 | -0.015167 | -0.060473 | -0.025337 | -0.038141 | -0.082093 | -0.061967 | -0.175413 | -0.013918 | -0.059045 | ... | -0.015167 | -0.012546 | -0.006959 | -0.009841 | -0.009841 | -0.006959 | -0.00492 | -0.186021 | -0.024609 | -0.012053 | -0.00492 | -0.138008 | -0.020291 | -0.008523 | -0.00492 | -0.087171 | -0.006026 | -0.00492 | -0.00492 | -0.00492 | -0.055972 | -0.006026 | -0.072673 | -0.006026 | -0.047504 | -0.058318 | -0.065041 | -0.093767 | -0.038141 | -0.04673 | -0.058318 | -0.044052 | -0.064286 | -0.144079 | -0.04673 | -0.02696 | -0.044052 | -0.115094 | -0.031134 | -0.015561 |
82608 | 0.853675 | 0.314928 | -0.335982 | 1.640874 | -0.860476 | -0.014745 | -0.731050 | -1.042891 | -0.625374 | -0.020588 | 2.030542 | -0.233580 | -0.549742 | -0.094942 | -0.547296 | -0.097821 | -0.067979 | -0.156868 | 7.145222 | -0.081267 | -0.049016 | -0.07772 | -0.079897 | -0.042793 | -0.047504 | -0.075805 | -0.072589 | -0.062163 | -0.166678 | -0.251353 | -0.017047 | -0.015167 | -0.060473 | -0.025337 | -0.038141 | -0.082093 | -0.061967 | -0.175413 | -0.013918 | -0.059045 | ... | -0.015167 | -0.012546 | -0.006959 | -0.009841 | -0.009841 | -0.006959 | -0.00492 | -0.186021 | -0.024609 | -0.012053 | -0.00492 | -0.138008 | -0.020291 | -0.008523 | -0.00492 | -0.087171 | -0.006026 | -0.00492 | -0.00492 | -0.00492 | -0.055972 | -0.006026 | -0.072673 | -0.006026 | -0.047504 | -0.058318 | -0.065041 | -0.093767 | -0.038141 | -0.04673 | -0.058318 | -0.044052 | -0.064286 | -0.144079 | -0.04673 | -0.02696 | -0.044052 | -0.115094 | -0.031134 | -0.015561 |
82609 | -0.107758 | 0.451908 | -0.122419 | 0.437021 | -0.853743 | 0.004250 | -2.139601 | 1.061135 | 1.599044 | -0.020588 | -0.492479 | -0.233580 | -0.549742 | -0.094942 | -0.547296 | -0.097821 | -0.067979 | -0.156868 | -0.139954 | -0.081267 | -0.049016 | -0.07772 | -0.079897 | -0.042793 | -0.047504 | -0.075805 | -0.072589 | 16.086637 | -0.166678 | -0.251353 | -0.017047 | -0.015167 | -0.060473 | -0.025337 | -0.038141 | -0.082093 | -0.061967 | -0.175413 | -0.013918 | -0.059045 | ... | -0.015167 | -0.012546 | -0.006959 | -0.009841 | -0.009841 | -0.006959 | -0.00492 | -0.186021 | -0.024609 | -0.012053 | -0.00492 | -0.138008 | -0.020291 | -0.008523 | -0.00492 | -0.087171 | -0.006026 | -0.00492 | -0.00492 | -0.00492 | -0.055972 | -0.006026 | -0.072673 | -0.006026 | -0.047504 | -0.058318 | -0.065041 | -0.093767 | -0.038141 | -0.04673 | -0.058318 | -0.044052 | -0.064286 | -0.144079 | -0.04673 | -0.02696 | -0.044052 | -0.115094 | -0.031134 | -0.015561 |
82610 rows × 131 columns
y = data['Program Type']
X = data.drop(['Program Type'], axis=1)
scaler = sp.StandardScaler()
X = pd.DataFrame(scaler.fit_transform(X), index=X.index, columns=X.columns)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7)
トレーニング
X.shape
(82610, 131)
y.value_counts()
2 61673
0 14403
1 3455
4 3022
3 57
Name: Program Type, dtype: int64
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(131,)),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(5, activation='softmax'),
])
model.summary()
model.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'],
)
batch_size=64
epochs=100
history = model.fit(
X_train,
y_train,
validation_split=0.2,
batch_size=batch_size,
epochs=epochs,
callbacks=[tf.keras.callbacks.ReduceLROnPlateau()],
verbose=0,
)
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_6 (Dense) (None, 64) 8448
_________________________________________________________________
dense_7 (Dense) (None, 64) 4160
_________________________________________________________________
dense_8 (Dense) (None, 5) 325
=================================================================
Total params: 12,933
Trainable params: 12,933
Non-trainable params: 0
_________________________________________________________________
plt.figure(figsize=(14, 10))
epochs_range = range(1, epochs+1)
train_loss = history.history['loss']
val_loss = history.history['val_loss']
plt.plot(epochs_range, train_loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
model.evaluate(X_test, y_test)
775/775 [==============================] - 1s 884us/step - loss: 0.0262 - accuracy: 0.9958
[0.026238199323415756, 0.9958440661430359]
for label in range(5):
label_indices = y_test[y_test == label].index
label_acc = model.evaluate(X_test.loc[label_indices, :], y_test.loc[label_indices], verbose=0)
print(f'Class {label} Accuracy: {label_acc[1]}')
Class 0 Accuracy: 0.9960345029830933
Class 1 Accuracy: 0.9609895348548889
Class 2 Accuracy: 0.9981631636619568
Class 3 Accuracy: 0.9230769276618958
Class 4 Accuracy: 0.9891657829284668
すべてのタイプで良い精度が出ていることを確認できました。