メモ

メモ

Last updated at 2024-08-13Posted at 2024-07-23

time.py

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

# データの読み込み
data = pd.read_csv('path_to_your_data.csv', parse_dates=['日期'], index_col='日期')
data = data.rename(columns={'DJIA（道琼斯指数值）': 'DJIA'})

# データの前処理
data = data.sort_index()
data = data.dropna()

# データの分割
n = len(data)
train_size = int(n * 0.7)
val_size = int(n * 0.15)

train_data = data.iloc[:train_size]
val_data = data.iloc[train_size:train_size + val_size]
test_data = data.iloc[train_size + val_size:]

# モデルの定義とグリッドサーチの設定
def create_holt_winters_model(params):
    model = ExponentialSmoothing(train_data, 
                                  trend=params['trend'], 
                                  seasonal=params['seasonal'], 
                                  seasonal_periods=params['seasonal_periods'])
    return model

param_grid = {
    'trend': ['add', 'mul', None],
    'seasonal': ['add', 'mul', None],
    'seasonal_periods': [12]  # ここは実際のデータに合わせて設定する
}

def fit_model(params):
    model = create_holt_winters_model(params)
    model_fit = model.fit()
    predictions = model_fit.predict(start=val_data.index[0], end=val_data.index[-1])
    mae = mean_absolute_error(val_data, predictions)
    return mae

# グリッドサーチ
best_mae = float('inf')
best_params = None

for trend in param_grid['trend']:
    for seasonal in param_grid['seasonal']:
        for seasonal_periods in param_grid['seasonal_periods']:
            params = {'trend': trend, 'seasonal': seasonal, 'seasonal_periods': seasonal_periods}
            mae = fit_model(params)
            if mae < best_mae:
                best_mae = mae
                best_params = params

# 最適なパラメータでモデルの再学習
best_model = create_holt_winters_model(best_params).fit()

# テストデータでの予測
test_predictions = best_model.predict(start=test_data.index[0], end=test_data.index[-1])

# 結果の表示
plt.figure(figsize=(14, 7))
plt.plot(data.index, data['DJIA'], label='Actual Data', color='blue')
plt.plot(test_data.index, test_predictions, label='Predictions', color='red')
plt.fill_between(test_data.index, 
                 test_predictions - 1.96 * np.std(test_predictions),
                 test_predictions + 1.96 * np.std(test_predictions), 
                 color='red', alpha=0.2, label='95% Prediction Interval')
plt.xlabel('Date')
plt.ylabel('DJIA Value')
plt.title('Holt-Winters Forecast with Anomaly Detection')
plt.legend()
plt.show()

# モデルの詳細と評価結果
print(f'Best Parameters: {best_params}')
print(f'Validation MAE: {best_mae}')

# 異常検出のための予測値と残差の計算
residuals = test_data['DJIA'] - test_predictions
threshold = 1.96 * np.std(residuals)
anomalies = residuals[abs(residuals) > threshold]

# 異常の可視化
plt.figure(figsize=(14, 7))
plt.plot(data.index, data['DJIA'], label='Actual Data', color='blue')
plt.plot(test_data.index, test_predictions, label='Predictions', color='red')
plt.scatter(anomalies.index, anomalies + test_predictions.loc[anomalies.index], color='orange', label='Detected Anomalies')
plt.fill_between(test_data.index, 
                 test_predictions - threshold,
                 test_predictions + threshold, 
                 color='red', alpha=0.2, label='Anomaly Threshold')
plt.xlabel('Date')
plt.ylabel('DJIA Value')
plt.title('Anomalies Detected with Holt-Winters Model')
plt.legend()
plt.show()

IsolationForest.py

import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.metrics import make_scorer
import numpy as np

# データを読み込む
data = pd.read_csv('data.csv')

# 日付でソートする
data['日期'] = pd.to_datetime(data['日期'])
data = data.sort_values(by='日期')

# 特徴量を抽出する（ここでは、道琼斯指数値そのものを使用）
X = data[['DJIA']].values

# データセットを学習、検証、テストに分割する
# 学習データ：60%、検証データ：20%、テストデータ：20%
X_train, X_temp = train_test_split(X, test_size=0.4, shuffle=False)
X_valid, X_test = train_test_split(X_temp, test_size=0.5, shuffle=False)

# パラメータ検索空間を定義する
param_space = {
    'n_estimators': [50, 100, 150, 200],
    'max_samples': [0.6, 0.8, 1.0],
    'contamination': [0.01, 0.05, 0.1, 0.2],
    'max_features': [0.5, 0.75, 1.0],
    'bootstrap': [True, False]
}

# 交差検証スコア関数：異常スコアの平均
def mean_anomaly_score(X_train, X_valid, params):
    model = IsolationForest(**params)
    model.fit(X_train)
    return model.decision_function(X_valid).mean()

# パラメータグリッドを生成する
grid = ParameterGrid(param_space)

# 各パラメータセットでスコアを計算する
best_score = -np.inf
best_params = None

for params in grid:
    score = mean_anomaly_score(X_train, X_valid, params)
    if score > best_score:
        best_score = score
        best_params = params

# 最良のパラメータでモデルを再トレーニングする
best_model = IsolationForest(**best_params)
best_model.fit(X_train)

# テストデータで予測を行う
y_pred = best_model.predict(X_test)

# 結果を可視化する
# 異常値は-1、正常値は1として出力されるため、異常値のみを抽出
anomalies = data.iloc[len(X_train) + len(X_valid):][y_pred == -1]

import matplotlib.pyplot as plt

plt.figure(figsize=(14, 7))
plt.plot(data['日期'], data['DJIA'], label='DJIA', color='blue')
plt.scatter(anomalies['日期'], anomalies['DJIA'], color='red', label='Anomalies')
plt.title('DJIA 異常検知')
plt.xlabel('日期')
plt.ylabel('DJIA 値')
plt.legend()
plt.show()

# 結果の解釈：
# 青い線は道琼斯指数の動きを示しており、赤い点は検出された異常を示しています。




import shap
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# データを読み込む
data = pd.read_csv('data.csv')

# 日付でソートする
data['日期'] = pd.to_datetime(data['日期'])
data = data.sort_values(by='日期')

# 特徴量を抽出する（ここでは、道琼斯指数値そのものを使用）
X = data[['DJIA']].values

# データセットを学習、検証、テストに分割する
X_train, X_temp = train_test_split(X, test_size=0.4, shuffle=False)
X_valid, X_test = train_test_split(X_temp, test_size=0.5, shuffle=False)

# Isolation Forest モデルの初期化と学習
model = IsolationForest()
model.fit(X_train)

# SHAP 値を計算する
explainer = shap.Explainer(model, X_train)
shap_values = explainer(X_test)

# SHAP 値をプロットする
shap.summary_plot(shap_values, X_test, feature_names=['DJIA'])

# 重要な特徴量を表示する
shap.summary_plot(shap_values, X_test, plot_type="bar")

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up