0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

統計データのDNNを簡素化してみた

Last updated at Posted at 2024-12-30

※注意
最新のPythonのバージョンでは対応できていないのでプログラムコピーしても動かない可能性があります。

前回クラスの作り方を学んだのでKerasのDeepLearningを簡素化してみようと思います。

from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
class DNNClassifier():
    def __init__(self, hidden_layer_sizes=(100,)):
        self.hidden_layer_sizes = hidden_layer_sizes
        self.x_train = None
        self.y_train = None
        self.model = None
        self.pred = None
        self.history = None
    
    def fit(self, x_train, y_train):
        if isinstance(x_train, pd.DataFrame):
            x_train = x_train.values
        if isinstance(y_train, pd.Series):
            y_train = y_train.values
        self.x_train = x_train
        self.y_train = y_train
        self.model = Sequential()
        for i in range(len(self.hidden_layer_sizes)):
            if i == 0:
                self.model.add(Dense(self.hidden_layer_sizes[i], input_dim=x_train.shape[1], activation="relu"))
            else:
                self.model.add(Dense(self.hidden_layer_sizes[i], activation="relu"))
        self.model.add(Dense(len(list(set(y_train))), activation="softmax"))
        self.model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
        x_train, x_val, y_train, y_val = train_test_split(self.x_train, self.y_train, test_size=0.3)
        self.history = self.model.fit(x_train, y_train, epochs=200, batch_size=32, validation_data=(x_val, y_val))
    
    def predict(self, x_test):
        if isinstance(x_test, pd.DataFrame):
            x_test = x_test.values
        self.pred = self.model.predict(x_test)
        y_pred = np.argmax(self.pred, axis=1)
        return y_pred

    def predict_proba_(self, x_test):
        if isinstance(x_test, pd.DataFrame):
            x_test = x_test.values
        return self.model.predict(x_test)
    
    def epoch(self, val="loss"):
        plt.plot(self.history.history[val])
        plt.plot(self.history.history["val_"+val])
        plt.title("model "+val)
        plt.ylabel(val)
        plt.xlabel("Epoch")
        plt.legend(["Train", "Validation"])
        plt.show()
        
    def score(self, x_val, y_val):
        if isinstance(x_val, pd.DataFrame):
            x_val = x_val.values
        if isinstance(y_val, pd.Series):
            y_val = y_val.values
        y_pred = self.predict(x_val)
        return accuracy_score(y_val, y_pred)
        
class DNNRegressor():
    def __init__(self, hidden_layer_sizes=(100,)):
        self.hidden_layer_sizes = hidden_layer_sizes
        self.x_train = None
        self.y_train = None
        self.model = None
        self.pred = None
        self.history = None
    
    def fit(self, x_train, y_train):
        if isinstance(x_train, pd.DataFrame):
            x_train = x_train.values
        if isinstance(y_train, pd.Series):
            y_train = y_train.values
        self.x_train = x_train
        self.y_train = y_train
        self.model = Sequential()
        for i in range(len(self.hidden_layer_sizes)):
            if i == 0:
                self.model.add(Dense(self.hidden_layer_sizes[i], input_dim=x_train.shape[1], activation="relu"))
            else:
                self.model.add(Dense(self.hidden_layer_sizes[i], activation="relu"))
        self.model.add(Dense(1, activation="linear"))
        self.model.compile(optimizer="adam", loss="mean_squared_error")
        x_train, x_val, y_train, y_val = train_test_split(self.x_train, self.y_train, test_size=0.3)
        history = self.model.fit(x_train, y_train, epochs=200, batch_size=32, validation_data=(x_val, y_val))
    
    def predict(self, x_test):
        if isinstance(x_test, pd.DataFrame):
            x_test = x_test.values
        self.pred = self.model.predict(x_test)
        return self.pred

    def epoch(self, val="loss"):
        plt.plot(self.history.history[val])
        plt.plot(self.history.history["val_"+val])
        plt.title("model loss")
        plt.ylabel("loss")
        plt.xlabel("Epoch")
        plt.legend(["Train", "Validation"])
        plt.show()

    def score(self, x_val, y_val):
        if isinstance(x_val, pd.DataFrame):
            x_val = x_val.values
        if isinstance(y_val, pd.Series):
            y_val = y_val.values
        y_pred = self.predict(x_val)
        return r2_score(y_val, y_pred)

参考にしたのはScikit-LearnのMLPClassifierです。
それと同じ感覚で使えるようにしてみようと思いました。

コード

import pandas as pd
from sklearn.preprocessing import MinMaxScaler

df = pd.read_csv("/content/drive/MyDrive/breast_cancer.csv")

y = df["y"]
x = df.drop("y", axis=1)

ss = MinMaxScaler()
ss.fit(x)
ss.transform(x)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

model = DNNClassifier(hidden_layer_sizes=(500, 1000, 750, 500))

model.fit(x_train, y_train)
9/9 ━━━━━━━━━━━━━━━━━━━━ 2s 82ms/step - accuracy: 0.4614 - loss: 78.8762 - val_accuracy: 0.3750 - val_loss: 20.9503
Epoch 2/200
9/9 ━━━━━━━━━━━━━━━━━━━━ 1s 54ms/step - accuracy: 0.5082 - loss: 11.7719 - val_accuracy: 0.6500 - val_loss: 5.8090
Epoch 3/200
9/9 ━━━━━━━━━━━━━━━━━━━━ 1s 55ms/step - accuracy: 0.6063 - loss: 5.4411 - val_accuracy: 0.6833 - val_loss: 4.6855
Epoch 4/200
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 48ms/step - accuracy: 0.7120 - loss: 1.8871 - val_accuracy: 0.9000 - val_loss: 0.9980
Epoch 5/200
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 49ms/step - accuracy: 0.8138 - loss: 0.7664 - val_accuracy: 0.7750 - val_loss: 0.8150
・
・
・
Epoch 195/200
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 36ms/step - accuracy: 0.9144 - loss: 0.2160 - val_accuracy: 0.9000 - val_loss: 0.2829
Epoch 196/200
9/9 ━━━━━━━━━━━━━━━━━━━━ 1s 32ms/step - accuracy: 0.9355 - loss: 0.1584 - val_accuracy: 0.9417 - val_loss: 0.1755
Epoch 197/200
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 33ms/step - accuracy: 0.9346 - loss: 0.1567 - val_accuracy: 0.9417 - val_loss: 0.1860
Epoch 198/200
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 34ms/step - accuracy: 0.9436 - loss: 0.1256 - val_accuracy: 0.9250 - val_loss: 0.2962
Epoch 199/200
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 31ms/step - accuracy: 0.9502 - loss: 0.1649 - val_accuracy: 0.9333 - val_loss: 0.1858
Epoch 200/200
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 30ms/step - accuracy: 0.9608 - loss: 0.1310 - val_accuracy: 0.9167 - val_loss: 0.1904

結構学習に時間がかかりました。

y_pred = model.predict(x_test)

from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))
              precision    recall  f1-score   support

         0.0       0.94      0.93      0.93        67
         1.0       0.95      0.96      0.96       104

    accuracy                           0.95       171
   macro avg       0.95      0.94      0.94       171
weighted avg       0.95      0.95      0.95       171

精度がイマイチ・・・

model.epoch(val="accuracy")

image.png

model.epoch(val="loss")

image.png

MLPClassifierと比べて良い点はこれが見れるだけかも知れません。

まとめ

Scikit-Learnすごい

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?