More than 5 years have passed since last update.

時系列データから取り出せる特徴量

Last updated at 2020-01-23Posted at 2020-01-23

時系列データを分析する際に、部分時系列を取り出して特徴量を取り出すということをする場合があります。下記のような特徴量を抽出することが出来ます。

from scipy import fftpack, signal
import scipy
import numpy as np
import matplotlib.pyplot as plt


def pentropy(y, fs):
    f, Pxx = signal.periodogram(y, fs)
    pk = Pxx / np.mean(Pxx)
    S = scipy.stats.entropy(pk)
    return S


def generate_features(y, x, fs=1, order=5):
    """
    Parameters
    ----------
    y : np.ndarray
        波のデータ
        
    x : np.ndarray
        時刻など
    
    fs : int
        サンプル周波数
    
    order : int
        ピーク検出の間隔

    Returns
    ----------
    np.ndarray
        特徴ベクトル
    """
        
    # norm
    L1 = np.linalg.norm(y, ord=1)
    L2 = np.linalg.norm(y, ord=2)
    
    # mean
    mean = np.mean(y)
    
    # Standard deviation
    std = np.std(y)
    
    # skewness
    skew = scipy.stats.skew(y)
    
    # kurtosis
    kurtosis = scipy.stats.kurtosis(y)
    
    # mad
    mad = np.linalg.norm(y - mean, ord=1)
    
    # percentile
    percentil_calc = np.percentile(y, [0, 1, 25, 50, 75, 99, 100])
    
    # relative_percentile
    relative_percentile = percentil_calc - mean
    
    # value range
    max_range = percentil_calc[-1] - percentil_calc[0]
    
    # asymmetry
    v_max = relative_percentile[-1]
    v_min = relative_percentile[0]
    asymmetry = v_max + v_min

    # ピーク値のインデックスを取得
    maxid = signal.argrelmax(y, order=order)  # 最大値
    minid = signal.argrelmin(y, order=order)  # 最小値
    
    # max height of peaks
    max_height = np.max(y[maxid])
    
    # min height of peaks
    min_height = np.min(y[minid])
    
    # peak height diff
    peak_width = max_height - min_height
    
    # mean value of peak's width
    x_p = np.append(x[maxid], x[minid])
    mean_width_all = np.mean(x_p)
    
    # number of peaks
    num_peak = len(x_p)
    
    # max, min and their diff, mean of peak's width
    diff1 = np.diff(x_p)
    width_max = np.max(diff1)
    width_min = np.min(diff1)
    width_diff = width_max - width_min
    width_mean = np.mean(diff1)
    width_median = np.median(diff1)
    
    # spectral entropy
    se = pentropy(y, fs)

    # merge to features (15features)
    features = np.concatenate([np.asarray(
        [L1, L2, mean, std, skew, kurtosis, mad, max_range,
         asymmetry, max_height, min_height, peak_width,
         mean_width_all, num_peak, width_max, width_min, width_diff,
         width_mean, width_median, se]
    ), relative_percentile])

    return features


x = np.linspace(0, 10, 100)
yorg = np.sin(x)
y = yorg + np.random.randn(100)*0.5

features = generate_features(y, x, order=5)

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up