import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import datetime, os, random
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import Sequence
%load_ext tensorboard
from google.colab import auth
from oauth2client.client import GoogleCredentials
import gspread
データセット読み込み
from google.colab import auth
auth.authenticate_user()
import gspread
from google.auth import default
creds, _ = default()
gc = gspread.authorize(creds)
filename = "命令コード20から1"
ss = gc.open(filename)
np.random.seed(0)
# tf.keras.set_random_seed(0)
worksheet = ss.worksheet("norm")
# すべての値を多次元配列datasetへ
dataset = pd.DataFrame(worksheet.get_all_values())
# 0行目を列名にする
# dataset.columns = list(dataset.loc[0, :])
dataset.columns = list(dataset.loc[0, :])
dataset.drop(0, inplace=True)
dataset.reset_index(inplace=True)
dataset.drop('name', axis=1, inplace=True)
dataset.l_0 = dataset.l_0.astype(float)
dataset.l_1 = dataset.l_1.astype(float)
dataset.l_2 = dataset.l_2.astype(float)
dataset.l_3 = dataset.l_3.astype(float)
dataset.l_4 = dataset.l_4.astype(float)
dataset.l_5 = dataset.l_5.astype(float)
dataset.l_6 = dataset.l_6.astype(float)
dataset.l_7 = dataset.l_7.astype(float)
dataset.l_8 = dataset.l_8.astype(float)
dataset.l_9 = dataset.l_9.astype(float)
dataset.l_10 = dataset.l_10.astype(float)
dataset.l_11 = dataset.l_11.astype(float)
dataset.l_12 = dataset.l_12.astype(float)
dataset.l_13 = dataset.l_13.astype(float)
dataset.l_14 = dataset.l_14.astype(float)
dataset.l_15 = dataset.l_15.astype(float)
dataset.l_16 = dataset.l_16.astype(float)
dataset.l_17 = dataset.l_17.astype(float)
dataset.l_18 = dataset.l_18.astype(float)
dataset.l_19 = dataset.l_19.astype(float)
dataset.cycle = dataset.cycle.astype(float)
dataset.sample(5)
train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)
# 訓練データを8割指定
# train_dataset = dataset.head(173)
# index列を消す
# test_dataset = dataset.tail(17)
sns.pairplot(train_dataset[["l_0"]], diag_kind="kde")
train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)
# 訓練データを8割指定
# train_dataset = dataset.head(173)
# index列を消す
# test_dataset = dataset.tail(17)
sns.pairplot(train_dataset[["l_0"]], diag_kind="kde")
train_labels = pd.DataFrame.from_dict({
'cycle': train_dataset.pop('cycle'),
})
train_dataset.pop("index")
test_labels = pd.DataFrame.from_dict({
'cycle': test_dataset.pop('cycle'),
})
test_dataset.pop("index")
test_labels.sample(4)
学習用データを正規化
train_stats = train_dataset.describe()
train_stats = train_stats.transpose()
train_stats
def norm(x):
return x
# return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)
normed_train_data.sample(5)
モデル
def build_model():
lambd = 0.1
inputs = layers.Input(shape=len(train_dataset.keys()),)
# x = layers.Dense(8, activation='relu', kernel_regularizer=keras.regularizers.L1(float(lambd)))(inputs)
x = layers.Dense(20, activation='relu')(inputs)
x = layers.Dense(20, activation='relu')(x)
for i in range(1,10):
x = layers.Dense(20, activation='relu')(x)
#x = layers.Dropout(0.2)(x)
a_load = layers.Dense(1, name='cycle')(x)
model = keras.Model(inputs=inputs, outputs=a_load, name='LLVM')
model.compile(
optimizer=tf.keras.optimizers.RMSprop(0.001),
loss_weights=[0.8],
loss = 'mse',
metrics = ['mae'])
return model
model = build_model()
model.summary()
入出力の正しさ検証
example_batch = normed_train_data[:100]
example_result = model.predict(example_batch)
example_result
訓練
# エポックが終わるごとにドットを一つ出力することで進捗を表示
class PrintDot(keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
if epoch % 100 == 0: print('')
print('.', end='')
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)
EPOCHS =300
model.fit(
normed_train_data,
train_labels.cycle,
epochs=EPOCHS,
validation_split=0.1,
verbose=0,
callbacks=[tensorboard_callback, PrintDot()])
%tensorboard --logdir logs
テストデータで予測
test_predictions = model.predict(normed_test_data)
result = normed_test_data.copy()
result['cycle'] = dataset['cycle']
result['predict_cycle'] = test_predictions
result['diff_cycle'] = result['predict_cycle'] - result['cycle']
result['error_cycle'] = round((result['predict_cycle'] - result['cycle']) / result['cycle'] * 100)
result.reindex(columns=['cycle', 'predict_cycle', 'diff_cycle', 'error_cycle']).sort_values('error_cycle')
worksheet = ss.worksheet("eval")
# すべての値を多次元配列datasetへ
evaldata = pd.DataFrame(worksheet.get_all_values())
# 0行目を列名にする
# evaldata.columns = list(evaldata.loc[0, :])
evaldata.columns = list(evaldata.loc[0, :])
evaldata.drop(0, inplace=True)
evaldata.reset_index(inplace=True)
evaldata.drop('name', axis=1, inplace=True)
evaldata.l_0 = evaldata.l_0.astype(float)
evaldata.l_1 = evaldata.l_1.astype(float)
evaldata.l_2 = evaldata.l_2.astype(float)
evaldata.l_3 = evaldata.l_3.astype(float)
evaldata.l_4 = evaldata.l_4.astype(float)
evaldata.l_5 = evaldata.l_5.astype(float)
evaldata.l_6 = evaldata.l_6.astype(float)
evaldata.l_7 = evaldata.l_7.astype(float)
evaldata.l_8 = evaldata.l_8.astype(float)
evaldata.l_9 = evaldata.l_9.astype(float)
evaldata.l_10 = evaldata.l_10.astype(float)
evaldata.l_11 = evaldata.l_11.astype(float)
evaldata.l_12 = evaldata.l_12.astype(float)
evaldata.l_13 = evaldata.l_13.astype(float)
evaldata.l_14 = evaldata.l_14.astype(float)
evaldata.l_15 = evaldata.l_15.astype(float)
evaldata.l_16 = evaldata.l_16.astype(float)
evaldata.l_17 = evaldata.l_17.astype(float)
evaldata.l_18 = evaldata.l_18.astype(float)
evaldata.l_19 = evaldata.l_19.astype(float)
eval_labels = pd.DataFrame.from_dict({
'cycle': evaldata.pop('cycle')})
evaldata.pop("index")
eval_dataset = norm(evaldata)
eval_dataset
test_predictions = model.predict(eval_dataset)
result = eval_dataset.copy()
result['cycle'] = eval_labels['cycle'].astype(float)
result['predict_cycle'] = test_predictions
result['cycle'].dtype #predictはfloat #cycle(正解)はobject
result['diff_cycle'] = result['predict_cycle'] - result['cycle']
result['error_cycle'] = round((result['predict_cycle'] - result['cycle']) / result['cycle'] * 100)
result.reindex(columns=['cycle', 'predict_cycle', 'diff_cycle', 'error_cycle']).sort_values('error_cycle')
for i in range(1):
filename = str(i) + "層" + ".csv"
# 結果をCSVファイルに保存
result.to_csv(filename, index=False)