時系列データを分析する際に、部分時系列を取り出して特徴量を取り出すということをする場合があります。下記のような特徴量を抽出することが出来ます。
from scipy import fftpack, signal
import scipy
import numpy as np
import matplotlib.pyplot as plt
def pentropy(y, fs):
f, Pxx = signal.periodogram(y, fs)
pk = Pxx / np.mean(Pxx)
S = scipy.stats.entropy(pk)
return S
def generate_features(y, x, fs=1, order=5):
"""
Parameters
----------
y : np.ndarray
波のデータ
x : np.ndarray
時刻など
fs : int
サンプル周波数
order : int
ピーク検出の間隔
Returns
----------
np.ndarray
特徴ベクトル
"""
# norm
L1 = np.linalg.norm(y, ord=1)
L2 = np.linalg.norm(y, ord=2)
# mean
mean = np.mean(y)
# Standard deviation
std = np.std(y)
# skewness
skew = scipy.stats.skew(y)
# kurtosis
kurtosis = scipy.stats.kurtosis(y)
# mad
mad = np.linalg.norm(y - mean, ord=1)
# percentile
percentil_calc = np.percentile(y, [0, 1, 25, 50, 75, 99, 100])
# relative_percentile
relative_percentile = percentil_calc - mean
# value range
max_range = percentil_calc[-1] - percentil_calc[0]
# asymmetry
v_max = relative_percentile[-1]
v_min = relative_percentile[0]
asymmetry = v_max + v_min
# ピーク値のインデックスを取得
maxid = signal.argrelmax(y, order=order) # 最大値
minid = signal.argrelmin(y, order=order) # 最小値
# max height of peaks
max_height = np.max(y[maxid])
# min height of peaks
min_height = np.min(y[minid])
# peak height diff
peak_width = max_height - min_height
# mean value of peak's width
x_p = np.append(x[maxid], x[minid])
mean_width_all = np.mean(x_p)
# number of peaks
num_peak = len(x_p)
# max, min and their diff, mean of peak's width
diff1 = np.diff(x_p)
width_max = np.max(diff1)
width_min = np.min(diff1)
width_diff = width_max - width_min
width_mean = np.mean(diff1)
width_median = np.median(diff1)
# spectral entropy
se = pentropy(y, fs)
# merge to features (15features)
features = np.concatenate([np.asarray(
[L1, L2, mean, std, skew, kurtosis, mad, max_range,
asymmetry, max_height, min_height, peak_width,
mean_width_all, num_peak, width_max, width_min, width_diff,
width_mean, width_median, se]
), relative_percentile])
return features
x = np.linspace(0, 10, 100)
yorg = np.sin(x)
y = yorg + np.random.randn(100)*0.5
features = generate_features(y, x, order=5)