LoginSignup
4
7

More than 3 years have passed since last update.

時系列データから取り出せる特徴量

Last updated at Posted at 2020-01-23

時系列データを分析する際に、部分時系列を取り出して特徴量を取り出すということをする場合があります。下記のような特徴量を抽出することが出来ます。

from scipy import fftpack, signal
import scipy
import numpy as np
import matplotlib.pyplot as plt


def pentropy(y, fs):
    f, Pxx = signal.periodogram(y, fs)
    pk = Pxx / np.mean(Pxx)
    S = scipy.stats.entropy(pk)
    return S


def generate_features(y, x, fs=1, order=5):
    """
    Parameters
    ----------
    y : np.ndarray
        波のデータ

    x : np.ndarray
        時刻など

    fs : int
        サンプル周波数

    order : int
        ピーク検出の間隔

    Returns
    ----------
    np.ndarray
        特徴ベクトル
    """

    # norm
    L1 = np.linalg.norm(y, ord=1)
    L2 = np.linalg.norm(y, ord=2)

    # mean
    mean = np.mean(y)

    # Standard deviation
    std = np.std(y)

    # skewness
    skew = scipy.stats.skew(y)

    # kurtosis
    kurtosis = scipy.stats.kurtosis(y)

    # mad
    mad = np.linalg.norm(y - mean, ord=1)

    # percentile
    percentil_calc = np.percentile(y, [0, 1, 25, 50, 75, 99, 100])

    # relative_percentile
    relative_percentile = percentil_calc - mean

    # value range
    max_range = percentil_calc[-1] - percentil_calc[0]

    # asymmetry
    v_max = relative_percentile[-1]
    v_min = relative_percentile[0]
    asymmetry = v_max + v_min

    # ピーク値のインデックスを取得
    maxid = signal.argrelmax(y, order=order)  # 最大値
    minid = signal.argrelmin(y, order=order)  # 最小値

    # max height of peaks
    max_height = np.max(y[maxid])

    # min height of peaks
    min_height = np.min(y[minid])

    # peak height diff
    peak_width = max_height - min_height

    # mean value of peak's width
    x_p = np.append(x[maxid], x[minid])
    mean_width_all = np.mean(x_p)

    # number of peaks
    num_peak = len(x_p)

    # max, min and their diff, mean of peak's width
    diff1 = np.diff(x_p)
    width_max = np.max(diff1)
    width_min = np.min(diff1)
    width_diff = width_max - width_min
    width_mean = np.mean(diff1)
    width_median = np.median(diff1)

    # spectral entropy
    se = pentropy(y, fs)

    # merge to features (15features)
    features = np.concatenate([np.asarray(
        [L1, L2, mean, std, skew, kurtosis, mad, max_range,
         asymmetry, max_height, min_height, peak_width,
         mean_width_all, num_peak, width_max, width_min, width_diff,
         width_mean, width_median, se]
    ), relative_percentile])

    return features


x = np.linspace(0, 10, 100)
yorg = np.sin(x)
y = yorg + np.random.randn(100)*0.5

features = generate_features(y, x, order=5)
4
7
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
4
7