0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

メモ

Last updated at Posted at 2024-07-23
time.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

# データの読み込み
data = pd.read_csv('path_to_your_data.csv', parse_dates=['日期'], index_col='日期')
data = data.rename(columns={'DJIA(道琼斯指数值)': 'DJIA'})

# データの前処理
data = data.sort_index()
data = data.dropna()

# データの分割
n = len(data)
train_size = int(n * 0.7)
val_size = int(n * 0.15)

train_data = data.iloc[:train_size]
val_data = data.iloc[train_size:train_size + val_size]
test_data = data.iloc[train_size + val_size:]

# モデルの定義とグリッドサーチの設定
def create_holt_winters_model(params):
    model = ExponentialSmoothing(train_data, 
                                  trend=params['trend'], 
                                  seasonal=params['seasonal'], 
                                  seasonal_periods=params['seasonal_periods'])
    return model

param_grid = {
    'trend': ['add', 'mul', None],
    'seasonal': ['add', 'mul', None],
    'seasonal_periods': [12]  # ここは実際のデータに合わせて設定する
}

def fit_model(params):
    model = create_holt_winters_model(params)
    model_fit = model.fit()
    predictions = model_fit.predict(start=val_data.index[0], end=val_data.index[-1])
    mae = mean_absolute_error(val_data, predictions)
    return mae

# グリッドサーチ
best_mae = float('inf')
best_params = None

for trend in param_grid['trend']:
    for seasonal in param_grid['seasonal']:
        for seasonal_periods in param_grid['seasonal_periods']:
            params = {'trend': trend, 'seasonal': seasonal, 'seasonal_periods': seasonal_periods}
            mae = fit_model(params)
            if mae < best_mae:
                best_mae = mae
                best_params = params

# 最適なパラメータでモデルの再学習
best_model = create_holt_winters_model(best_params).fit()

# テストデータでの予測
test_predictions = best_model.predict(start=test_data.index[0], end=test_data.index[-1])

# 結果の表示
plt.figure(figsize=(14, 7))
plt.plot(data.index, data['DJIA'], label='Actual Data', color='blue')
plt.plot(test_data.index, test_predictions, label='Predictions', color='red')
plt.fill_between(test_data.index, 
                 test_predictions - 1.96 * np.std(test_predictions),
                 test_predictions + 1.96 * np.std(test_predictions), 
                 color='red', alpha=0.2, label='95% Prediction Interval')
plt.xlabel('Date')
plt.ylabel('DJIA Value')
plt.title('Holt-Winters Forecast with Anomaly Detection')
plt.legend()
plt.show()

# モデルの詳細と評価結果
print(f'Best Parameters: {best_params}')
print(f'Validation MAE: {best_mae}')

# 異常検出のための予測値と残差の計算
residuals = test_data['DJIA'] - test_predictions
threshold = 1.96 * np.std(residuals)
anomalies = residuals[abs(residuals) > threshold]

# 異常の可視化
plt.figure(figsize=(14, 7))
plt.plot(data.index, data['DJIA'], label='Actual Data', color='blue')
plt.plot(test_data.index, test_predictions, label='Predictions', color='red')
plt.scatter(anomalies.index, anomalies + test_predictions.loc[anomalies.index], color='orange', label='Detected Anomalies')
plt.fill_between(test_data.index, 
                 test_predictions - threshold,
                 test_predictions + threshold, 
                 color='red', alpha=0.2, label='Anomaly Threshold')
plt.xlabel('Date')
plt.ylabel('DJIA Value')
plt.title('Anomalies Detected with Holt-Winters Model')
plt.legend()
plt.show()



IsolationForest.py
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.metrics import make_scorer
import numpy as np

# データを読み込む
data = pd.read_csv('data.csv')

# 日付でソートする
data['日期'] = pd.to_datetime(data['日期'])
data = data.sort_values(by='日期')

# 特徴量を抽出する(ここでは、道琼斯指数値そのものを使用)
X = data[['DJIA']].values

# データセットを学習、検証、テストに分割する
# 学習データ:60%、検証データ:20%、テストデータ:20%
X_train, X_temp = train_test_split(X, test_size=0.4, shuffle=False)
X_valid, X_test = train_test_split(X_temp, test_size=0.5, shuffle=False)

# パラメータ検索空間を定義する
param_space = {
    'n_estimators': [50, 100, 150, 200],
    'max_samples': [0.6, 0.8, 1.0],
    'contamination': [0.01, 0.05, 0.1, 0.2],
    'max_features': [0.5, 0.75, 1.0],
    'bootstrap': [True, False]
}

# 交差検証スコア関数:異常スコアの平均
def mean_anomaly_score(X_train, X_valid, params):
    model = IsolationForest(**params)
    model.fit(X_train)
    return model.decision_function(X_valid).mean()

# パラメータグリッドを生成する
grid = ParameterGrid(param_space)

# 各パラメータセットでスコアを計算する
best_score = -np.inf
best_params = None

for params in grid:
    score = mean_anomaly_score(X_train, X_valid, params)
    if score > best_score:
        best_score = score
        best_params = params

# 最良のパラメータでモデルを再トレーニングする
best_model = IsolationForest(**best_params)
best_model.fit(X_train)

# テストデータで予測を行う
y_pred = best_model.predict(X_test)

# 結果を可視化する
# 異常値は-1、正常値は1として出力されるため、異常値のみを抽出
anomalies = data.iloc[len(X_train) + len(X_valid):][y_pred == -1]

import matplotlib.pyplot as plt

plt.figure(figsize=(14, 7))
plt.plot(data['日期'], data['DJIA'], label='DJIA', color='blue')
plt.scatter(anomalies['日期'], anomalies['DJIA'], color='red', label='Anomalies')
plt.title('DJIA 異常検知')
plt.xlabel('日期')
plt.ylabel('DJIA 値')
plt.legend()
plt.show()

# 結果の解釈:
# 青い線は道琼斯指数の動きを示しており、赤い点は検出された異常を示しています。




import shap
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# データを読み込む
data = pd.read_csv('data.csv')

# 日付でソートする
data['日期'] = pd.to_datetime(data['日期'])
data = data.sort_values(by='日期')

# 特徴量を抽出する(ここでは、道琼斯指数値そのものを使用)
X = data[['DJIA']].values

# データセットを学習、検証、テストに分割する
X_train, X_temp = train_test_split(X, test_size=0.4, shuffle=False)
X_valid, X_test = train_test_split(X_temp, test_size=0.5, shuffle=False)

# Isolation Forest モデルの初期化と学習
model = IsolationForest()
model.fit(X_train)

# SHAP 値を計算する
explainer = shap.Explainer(model, X_train)
shap_values = explainer(X_test)

# SHAP 値をプロットする
shap.summary_plot(shap_values, X_test, feature_names=['DJIA'])

# 重要な特徴量を表示する
shap.summary_plot(shap_values, X_test, plot_type="bar")

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?