Python
統計
主成分分析
多変量解析

Python - 数値データファイルからデータを読み込み分散共分散行列、固有値、固有ベクトルを求める

More than 1 year has passed since last update.

以下のようなタブ区切りの数値データ系列ファイルからデータを読み込み分散共分散行列、固有値、固有ベクトルを求めるプログラム。


MultipleRegressionAnalysis_Data2.txt

23.3    19.8    14.7    19.7    16.9    14.4

5.24 -5.23 -7.95 11.70 -2.44 -2.14


MultipleRegressionAnalysis_3.py

import numpy as np

import math

def load_data(filename):
with open(filename) as lines:
return [[float(data) for data in line.strip().split('\t')]
for line in lines]

def calc_sum_of_square(data_list1, data_list2):
return (len(data_list1) * sum([data1 * data2 for data1, data2 in zip(data_list1, data_list2)]) - sum(data_list1) * sum(data_list2))/(len(data_list1)*(len(data_list1)-1))

def calc_variance_covariance_matrix(data_list):
return [[calc_sum_of_square(data_list1, data_list2)/(len(data_list1)-1) for data_list1 in data_list]
for data_list2 in data_list]

def calc_engenvalue_2d(mat):
lambda1 = ((mat[0][0] + mat[1][1]) + math.sqrt((mat[0][0] - mat[1][1])**2 + 4*(mat[0][1]**2))) / 2
lambda2 = ((mat[0][0] + mat[1][1]) - math.sqrt((mat[0][0] - mat[1][1])**2 + 4*(mat[0][1]**2))) / 2
return [lambda1,lambda2]

def calc_engenvector_2d(mat, engenvalue):
a11 = math.fabs(mat[0][1]) / math.sqrt((engenvalue[0] - mat[0][0])**2 + mat[0][1]**2)
a12 = (engenvalue[0] - mat[0][0])*a11 / mat[0][1]
a21 = math.fabs(mat[1][0]) / math.sqrt((engenvalue[1] - mat[0][0])**2 + mat[0][1]**2)
a22 = (engenvalue[1] - mat[0][0])*a21 / mat[0][1]
return [[a11, a12],[a21, a22]]

if __name__ == "__main__":
data = load_data('MultipleRegressionAnalysis_Data2.txt')
variance_coveriance_matrix = calc_variance_covariance_matrix(data)
print("- 分散共分散行列 -")
print(np.array(variance_coveriance_matrix))
eigenvalue = calc_engenvalue_2d(variance_coveriance_matrix)
print("- 固有値 -")
print(eigenvalue)
engen_vector = calc_engenvector_2d(variance_coveriance_matrix, eigenvalue)
print("- 固有ベクトル -")
print("z1 = " + str(engen_vector[0][0]) + " x1 " + ("+" if engen_vector[0][1] > 0 else "") + str(engen_vector[0][1]) + " x2")
print("z2 = " + str(engen_vector[1][0]) + " x1 " + ("+" if engen_vector[1][1] > 0 else "") + str(engen_vector[1][1]) + " x2")



結果

> python MultipleRegressionAnalysis_3.py

- 分散共分散行列 -
[[ 2.36693333 2.99921333]
[ 2.99921333 10.61296533]]
- 固有値 -
[11.58843357427205, 1.3914650923945988]
- 固有ベクトル -
z1 = 0.30929366776230544 x1 +0.9509665751655736 x2
z2 = 0.9509665751655736 x1 -0.3092936677623057 x2