#例2
data=data.frame(prefecture=c("Hokkaido","Aomori","Iwate","Miyagi","Akita","Yamagata","Fukushima","Ibaragi","Totigi","Gumma","Saitama","Tiba","Tokyo","Kanagawa","Niigata","Toyama","Ishikawa","Fukui","Yamanashi","Nagano","Gifu","Shizuoka","Aichi","Mie","Shiga","Kyoto","Oosaka","Hyogo","Nara","Wakayama","Tottori","Shimane","Okayama","Hiroshima","Yamaguchi","Tokushima","Kagawa","Ehime","Kouchi","Hukuoka","Saga","Nagasaki","Kumamoto","Ooita","Miyazaki","Kagoshima"),x1=c(6364,7135,7266,7186,6274,6497,6740,6687,6917,6725,7928,7403,13500,8537,7315,6730,8875,7589,7249,6319,7673,7270,8149,6385,7049,10861,11548,9986,6728,7596,7324,6166,5889,8170,5147,6997,6460,6657,7979,7301,6564,8866,6928,6983,6016,7688),x2=c(90547,12634,20101,15486,12471,12446,27026,19486,19919,16611,42768,37630,147481,55648,27569,11709,13660,11161,10092,22957,20135,36377,48255,19562,9889,16925,146758,75350,16405,13007,8005,10334,22146,23029,15488,9905,7407,14795,12387,31172,8151,11117,14634,11504,11415,12546),x3=c(19.7,20,16.3,20.6,18.1,21.2,14.2,9.6,28.8,16.3,15.5,28.4,49.1,31.8,14.6,35.3,30.8,32.9,24.2,11.4,18.3,19.6,21.4,14.6,21.4,23.6,40.9,24.8,10.7,16.8,26.6,10.2,11,16.3,23.3,10.8,21.9,16.2,11.8,17.7,24.7,17.5,18.5,20.9,25.2,32.5),x4=c(640.9,128.2,113.8,171.7,97.4,134.4,166.5,225.1,187.5,212,337.3,319.4,1584.3,559.2,234.3,121.4,120,93.5,93.2,238.1,255.1,419.1,825.1,160.5,91.8,233.6,807.5,404.6,85.8,111,44.1,52.9,156.6,256.1,126.8,71.4,81.5,108.1,72.7,367,70.6,89.6,136.6,97.5,96.3,119.4),y=c(44523,10395,8669,12808,7640,7094,17556,20320,18715,16755,34601,27811,88406,48030,18213,8839,12086,8871,8961,15280,17837,37421,50998,14809,12610,35478,75497,55464,7916,14259,6244,5400,19015,36771,13827,9372,11088,9960,8516,51175,10384,10652,15987,10216,8460,12932))
Data=data %>% select(x1,x2,x3,x4,y)
library(dplyr)
Expect_and_sd_data=data.frame(value=c("mean","sd"),x1=c(mean(Data$x1),sd(Data$x1)),x2=c(mean(Data$x2),sd(Data$x2)),x3=c(mean(Data$x3),sd(Data$x3)),x4=c(mean(Data$x4),sd(Data$x4)),y=c(mean(Data$y),sd(Data$y)))
#偏差積和
A=array(0,dim=c(ncol(Data),ncol(Data)))
for(i in 1:ncol(A)){
for(j in 1:nrow(A)){
A[i,j]=sum((Data[,i]-mean(Data[,i]))*(Data[,j]-mean(Data[,j])))
}
}
inv_A=solve(A[1:(ncol(A)-1),1:(nrow(A)-1)])
#相関行列
correlation=array(0,dim=c(ncol(Data),ncol(Data)))
for(i in 1:ncol(correlation)){
for(j in 1:nrow(correlation)){
correlation[i,j]=(sum((Data[,i]-mean(Data[,i]))*(Data[,j]-mean(Data[,j]))))/((sum((Data[,i]-mean(Data[,i]))^2)*sum((Data[,j]-mean(Data[,j]))^2))^(1/2))
}
}
#偏差積和(y)
A_y=array(0,dim=c(1,ncol(Data)-1))
for(j in 1:ncol(A_y)){
A_y[1,j]=sum((Data[,j]-mean(Data[,j]))*(Data[,ncol(Data)]-mean(Data[,ncol(Data)])))
}
#偏回帰係数
B=array(0,dim=c(1,(ncol(Data)-1)))
for(j in 1:length(B)){
B[1,j]=sum(inv_A[j,]*A_y)
}
B0=mean(Data[,ncol(Data)])-sum(B*apply(Data[,1:(ncol(Data)-1)],2,mean))
B=cbind(B0,B)
#t値
ayy=A[ncol(A),nrow(A)]
RV=sum(A[1:(nrow(A)-1),ncol(A)]*B[1,2:length(B)])
EV=ayy-RV
V_e=EV/(nrow(Data)-(ncol(Data)-1)-1)
V_R=RV/(ncol(Data)-1)
t=array(0,dim=c(1,(ncol(Data)-1)))
for(j in 1:(length(t))){
t[1,j]=abs(B[1,1+j])/((inv_A[j,j]*V_e)^(1/2))
}
#t0
element=array(0,dim=c((ncol(Data)-1),(ncol(Data)-1)))
for(j in 1:(ncol(Data)-1)){
for(i in 1:(ncol(Data)-1)){
element=mean(Data[,i])*mean(Data[,j])*inv_A[i,j]
}
}
t0=abs(B0)/(((1/nrow(Data)+sum(element))*V_e)^(1/2))
t=cbind(t0,t)
#偏回帰係数の検定
Hypothesis_B_data=data.frame(num=0:(length(B)-1),B_value=0,percent5=0,percent1=0)
Hypothesis_B_data$B_value=t(B)
for(j in 1:nrow(Hypothesis_B_data)){
alpha=0.95
if(qt(alpha,(nrow(Data)-(ncol(Data)-1)-1))<=t[1,j]){
Hypothesis_B_data$percent5[j]=1
}
alpha=0.99
if(qt(alpha,(nrow(Data)-(ncol(Data)-1)-1))<=t[1,j]){
Hypothesis_B_data$percent1[j]=1
}
}
#1:有意,0:有意でない