LoginSignup
1
1

More than 3 years have passed since last update.

クラスタリング手法(サービスサイエンスの事訳(ことわけ)―データサイエンスと数理科学の融合に向けて 高木英明先生)

Last updated at Posted at 2018-07-23

#クラスタリング手法(サービスサイエンスの事訳(ことわけ)―データサイエンスと数理科学の融合に向けて 高木英明先生)

library(dplyr)

matrix=matrix(c(46.165,43.965,45.332,48,45.182,46.698,45.066,43.466,45.799,48.166,45.666,44.432,45.832,44.833,44.199,48.333,44.024,45.199,45.332,43.266,44.766,46.533,45.758,45.233,45.632,41.199,45.099,47.683,47.124,43.766,44.932,40.865,45.124,47.633,44.965,46.466,44.599,44.233,45.166,48.515,43.765,43.532,44.333,42.932,43.165,45.765,45.366,44.198),ncol=8,nrow=6)

matrix=t(matrix)

colnames(matrix)=c("床運動","あん馬","吊り輪","とび馬","平行棒","鉄棒")

rownames(matrix)=c("USA","Russian","England","german","japan","china","Ukraine","France")

#余弦係数

cosine_similarity=array(0,dim=c(nrow(matrix),nrow(matrix)))

for(j in 1:nrow(cosine_similarity)){
for(i in 1:ncol(cosine_similarity)){  

cosine_similarity[i,j]= sum(matrix[i,]*matrix[j,])/sqrt(sum(matrix[i,]^2)*sum(matrix[j,]^2))

}  
}

#相関係数

cor=cor(t(matrix))

#偏差パターン類似率

deviation_pattern=array(0,dim=c(nrow(matrix),nrow(matrix)))

average=apply(matrix,2,mean)

t_matrix=t(matrix)

for(i in 1:nrow(deviation_pattern)){
for(j in 1:ncol(deviation_pattern)){  

deviation_pattern[i,j]=sum((matrix[i,]-average)*(matrix[j,]-average))/sqrt(sum((matrix[i,]-average)^2)*sum((matrix[j,]-average)^2))

}
}

#非類似度(deviation_pattern)

#d:exp(-s)

d1_deviation_pattern=exp(-deviation_pattern)

#d:1-s

d2_deviation_pattern=1-deviation_pattern


#d:1/(1+s)

d3_deviation_pattern=1/(1+deviation_pattern)

#d:cor,cosine_similarity

d_cor2=((1-cor)/2)

d_cosine_similarity2=((1-cosine_similarity)/2)

#p.18

X_hat=t(matrix)-apply(matrix,1,mean)

for(j in 1:ncol(X_hat)){

X_hat[,j]=X_hat[,j]/sqrt(apply((t(matrix)-apply(matrix,1,mean))^2,2,sum))[j]

}

#X_hatの性質(2.5)

apply(X_hat^2,2,sum)

#d_hat2は平方ユークリッド距離

d_hat2=array(0,dim=c(ncol(X_hat),ncol(X_hat)))

for(i in 1:ncol(d_hat2)){
for(j in 1:nrow(d_hat2)){  

d_hat2[i,j]=sum((X_hat[,i]-X_hat[,j])^2)

}
}

t_X_hat=t(X_hat)

S=t_X_hat%*%t(t_X_hat)

diff_S=(1-S);

diag(diff_S)=floor(abs(diag(diff_S)))

#(2.4)の確認

floor(d_hat2*100)/100==floor(2*diff_S*100)/100

#p.21 character of distances

city_block=array(0,dim=c(nrow(matrix),nrow(matrix)))

for(i in 1:nrow(matrix)){
for(j in 1:nrow(matrix)){  

city_block[i,j]=sum(abs(matrix[i,]-matrix[j,]) )

}  
}

Euclid=array(0,dim=c(nrow(matrix),nrow(matrix)))

for(j in 1:nrow(matrix)){
for(i in 1:nrow(matrix)){  

Euclid[i,j]=sqrt(sum((matrix[i,]-matrix[j,])^2))    

}
}

Minkowski=array(0,dim=c(nrow(matrix),nrow(matrix)))

r=2

for(j in 1:nrow(matrix)){
for(i in 1:nrow(matrix)){  

Minkowski[i,j]=(sum(abs(matrix[i,]-matrix[j,])^r))^(1/r)  

}
}

chebyshev=array(0,dim=c(nrow(matrix),nrow(matrix)))

for(j in 1:nrow(matrix)){
for(i in 1:nrow(matrix)){

chebyshev[i,j]=max(abs(matrix[i,]-matrix[j,]))  

}
}

standard_euclid=array(0,dim=c(nrow(matrix),nrow(matrix)))

for(j in 1:nrow(matrix)){
for(i in 1:nrow(matrix)){

standard_euclid[i,j]=sqrt(sum((matrix[i,]-matrix[j,])^2/apply(matrix,2,sd)))

}
}  

Maharanobis=array(0,dim=c(nrow(matrix),nrow(matrix)))

for(j in 1:nrow(matrix)){
for(i in 1:nrow(matrix)){

Maharanobis[i,j]=(matrix[i,]-matrix[j,])%*%solve(cov(matrix))%*%(matrix[i,]-matrix[j,])

}
}

#Hotelling T2 theory

#multivariate normal dis

library(mvtnorm)

N=100000;p=ncol(matrix)

samples=array(0,dim=c(N,ncol(matrix)))

x0=rmvnorm(1,apply(matrix,2,mean),cov(matrix))

for(j in 1:nrow(samples)){

samples[j,]=rmvnorm(1,apply(matrix,2,mean),cov(matrix))

}

mu=apply(samples,2,mean);sigma=cov(samples)

T2=function(x){

  return(((N-p)/((N+1)*p))*sum((x-mu)%*%solve(sigma)*(x-mu)))

}

data=data.frame(num=1:N,T2=0)

for(j in 1:nrow(data)){

data$T2[j]=T2(samples[j,])  

}

data$T2=round(data$T2*100)/100

num=sort(unique(data$T2))

result=data.frame(num=num,prob=0)

for(j in 1:nrow(result)){

result$prob[j]=length(data$T2[data$T2<=num[j]])/N    

}


result=result %>% dplyr::mutate(f_dis=0)

for(j in 1:nrow(result)){

result$f_dis[j]=pf(result$num[j],p,N-p)  

}

SN=10*log10((x0-mu)^2/diag(sigma))


#Hotelling T2 and f_distribution plots

plot(result$num,result$prob,col=2,type="h",xlim=c(min(result$num),max(result$num)),ylim=c(min(result$prob),max(result$prob)),xlab="T2",ylab="prob")

par(new=T)

plot(result$num,result$f_dis,col=3,type="p",xlim=c(min(result$num),max(result$num)),ylim=c(min(result$prob),max(result$prob)),xlab="T2",ylab="prob")


#近似公式

N=100000;p=6

#2^(-p/2)

(c(((N-p)/2):((N-1)/2))/rep((N-p),p/2))

#exp(-0.1*p/2)

(1+(p*0.1)/(N-p))^(-N/2)

gamma_f=function(x){

  z<-(p/(2*gamma(p/2)))*exp(-p*x/2)*(p*x/2)^((p/2)-1)

  return(z)

}

gamma_f_data=data.frame(x=seq(0,5,0.01),values=0,dis=0)

for(j in 1:nrow(gamma_f_data)){

gamma_f_data$values[j]=gamma_f(gamma_f_data$x[j])  

gamma_f_data$dis[j]=integrate(gamma_f,0,gamma_f_data$x[j])$value

}

plot(gamma_f_data$x,gamma_f_data$values,type="h",col=2)

plot(gamma_f_data$x,gamma_f_data$dis,type="h",col=2)

#漸近近似

plot(result$num,result$prob,col=2,type="h",xlim=c(min(result$num),max(result$num,gamma_f_data$x)),ylim=c(min(result$prob,gamma_f_data$dis),max(result$prob,gamma_f_data$dis)),xlab="T2",ylab="prob")

par(new=T)

plot(gamma_f_data$x,gamma_f_data$dis,col=3,type="p",xlim=c(min(result$num),max(result$num,gamma_f_data$x)),ylim=c(min(result$prob,gamma_f_data$dis),max(result$prob,gamma_f_data$dis)),xlab="T2",ylab="prob")


#2.3.4 数値例

matrix2=matrix[rownames(matrix)!="japan",]

average2=apply(matrix2,2,mean)

sigma2=cov(matrix2)

solve_sigma2=solve(sigma2)

#10*log10((matrix2[1,1]-average2[1])^2/diag(sigma2)[1])


#同一のクラスターとみなした時の距離の縮小化(パターン認識と学習機械 p.42)

#sum(W)=1

N=100

char=10

X=array(0,dim=c(N,char))

d_data_real=data.frame(x=0,y=0,d=0)

d_real_mat=array(0,dim=c(char,char))

for(j in 1:char){

X[,j]=rnorm(N,mean=sample(1:3,1),sd=sample(1:3,1))

}

for(j in 1:char){
for(i in 1:char){ 

if(j>i){  

d_real_mat[i,j]=sqrt(sum((X[i,]-X[j,])^2))  

d_data_real=rbind(d_data_real,data.frame(x=i,y=j,d=sqrt(sum((X[i,]-X[j,])^2)))) 

}
}
}

d_data_real=tail(d_data_real,nrow(d_data_real)-1)

plot3d(d_data_real$x,d_data_real$y,d_data_real$d,xlab="x",ylab="y",zlab="d",col=3,type="s")


W=(1/apply(X,2,var))/sum(1/apply(X,2,var))

beta=sum(W)/sum(1/apply(X,2,var))

D_bar=(2*N/(N-1))*sum(W*apply(X,2,var))

W_mat=diag(W)

d_mat=array(0,dim=c(char,char))

d_data=data.frame(x=0,y=0,d=0)

for(i in 1:char){
for(j in 1:char){ 

if(i<j){  

d_mat[i,j]=sqrt(sum(((X[i,]-X[j,])%*%W_mat)^2))

d_data=rbind(d_data,data.frame(x=i,y=j,d=d_mat[i,j]))

}

}
}

d_data=tail(d_data,nrow(d_data)-1)

library(rgl)

plot3d(d_data$x,d_data$y,d_data$d,col=2,type="s",xlab="x",ylab="y",zlab="d")


#同一のクラスターとみなした時の距離の縮小化(パターン認識と学習機械 p.42)

#prod(W)=1

W=((1/apply(X,2,var))*prod(apply(X,2,var))^(1/char))

lambda=(prod(apply(X,2,var))^(1/char))^2

D_bar=(2*N/(N-1))*sum(W*apply(X,2,var))

W_mat=diag(W)

d_data=data.frame(x=0,y=0,d=0)

for(i in 1:char){
for(j in 1:char){ 

if(i<j){  

d_mat[i,j]=sqrt(sum(((X[i,]-X[j,])%*%W_mat)^2))

d_data=rbind(d_data,data.frame(x=i,y=j,d=d_mat[i,j]))

}

}
}

d_data=tail(d_data,nrow(d_data)-1)

library(rgl)

plot3d(d_data$x,d_data$y,d_data$d,col=2,type="s",xlab="x",ylab="y",zlab="d")



#クラス間分離

N=100

char=10

A=array(0,dim=c(N,char))

for(j in 1:char){

A[,j]=rnorm(N,mean=sample(1:3,1),sd=sample(1:3,1))  

}

B=array(0,dim=c(N,char))

for(j in 1:char){

B[,j]=rnorm(N,mean=sample(5:7,1),sd=sample(1:3,1))  

}

x=array(0,dim=c(char,char))

for(j in 1:ncol(x)){
for(i in 1:nrow(x)){  

  value=0

  for(k in 1:nrow(A)){

    value=value+sum((A[k,i]-B[,i])*(A[k,j]-B[,j]))

  }

 x[i,j]=value/(char*char) 

}
}

y=array(0,dim=c(char,char))

#A,B間すべてのパターン数:z

z=array(0,dim=c(2*char,char))

for(i in 1:ncol(z)){
for(j in 1:nrow(z)){  

#if(i!=j){  

z[j,i]=floor(abs(rnorm(1,mean=i,sd=1)+rnorm(1,mean=j,sd=1) ))

#} 
}  
}

for(j in 1:ncol(y)){
for(i in 1:nrow(y)){  

  value=0

  for(k in 1:nrow(z)){

    value=value+sum((z[k,i]-z[,i])*(z[k,j]-z[,j]))

  }

 y[i,j]=value/(char*(char-1)/2) 

}
}

eigen_values=eigen(x%*%solve(y))$values

W=eigen_vectors=eigen(x%*%solve(y))$vectors

K=0

for(j in 1:ncol(eigen_vectors)){

K=K+eigen_vectors[,j]%*%y%*%t(t(eigen_vectors[,j]))

}

d_max=max(eigen_values)*K


#ポテンシャル法 p.61

N=100

char=10

A=array(0,dim=c(N,char))

for(j in 1:char){

A[,j]=rnorm(N,mean=sample(1:3,1),sd=sample(1:3,1))  

}

A=apply(A,1,mean)

B=array(0,dim=c(N,char))

for(j in 1:char){

B[,j]=rnorm(N,mean=sample(5:7,1),sd=sample(1:3,1))  

}

B=apply(B,1,mean)

num_A=sort(unique(round(abs(A))))

num_B=sort(unique(round(abs(B))))

potential_A=function(s,alpha){

z<-(1/length(num_A))*sum(1/(1+alpha*(s-num_A)^2))  

return(z)

}

potential_B=function(s,alpha){

z<-(1/length(num_B))*sum(1/(1+alpha*(s-num_B)^2))  

return(z)

}

X=data.frame(values=rnorm(10,mean=2,sd=5),A=0,B=0)

c=0.1

for(j in 1:nrow(X)){

X$A[j]=potential_A(X$values[j],c)  

X$B[j]=potential_B(X$values[j],c)

}



#多次元尺度

library(dplyr)

matrix=matrix(c(46.165,43.965,45.332,48,45.182,46.698,45.066,43.466,45.799,48.166,45.666,44.432,45.832,44.833,44.199,48.333,44.024,45.199,45.332,43.266,44.766,46.533,45.758,45.233,45.632,41.199,45.099,47.683,47.124,43.766,44.932,40.865,45.124,47.633,44.965,46.466,44.599,44.233,45.166,48.515,43.765,43.532,44.333,42.932,43.165,45.765,45.366,44.198),ncol=8,nrow=6)

matrix=t(matrix)

colnames(matrix)=c("床運動","あん馬","吊り輪","とび馬","平行棒","鉄棒")

rownames(matrix)=c("USA","Russian","England","german","japan","china","Ukraine","France")

X=as.matrix(matrix)

X=t(t(X)-apply(matrix,2,mean))

p=ncol(X)

d_mat=array(0,dim=c(nrow(X),nrow(X)))

for(i in 1:ncol(d_mat)){
for(j in 1:nrow(d_mat)){


d_mat[j,i]=sum((X[i,]-X[j,])^2)  


}
}

B=(X)%*%t(X)

J=array(-1/p,dim=c(p,p))

diag(J)=1+diag(J)

J2=J%*%J

eigen_values=eigen(B)$values

eigen_values2=eigen_values[eigen_values>10^(-10)]

eigen_vector=eigen(B)$vectors

lambda=diag(eigen_values2)

lambda2=diag(sqrt(eigen_values2))

eigen_vector2=eigen_vector[,1:length(eigen_values2)]

X_hat=eigen_vector2%*%lambda2

A=solve(t(X_hat)%*%X_hat)%*%t(X_hat)%*%X

B=(X_hat)%*%t(X_hat)

X_hat%*%A


#因子分析法

library(dplyr)

matrix=matrix(c(46.165,43.965,45.332,48,45.182,46.698,45.066,43.466,45.799,48.166,45.666,44.432,45.832,44.833,44.199,48.333,44.024,45.199,45.332,43.266,44.766,46.533,45.758,45.233,45.632,41.199,45.099,47.683,47.124,43.766,44.932,40.865,45.124,47.633,44.965,46.466,44.599,44.233,45.166,48.515,43.765,43.532,44.333,42.932,43.165,45.765,45.366,44.198),ncol=8,nrow=6)

matrix=t(matrix)

colnames(matrix)=c("床運動","あん馬","吊り輪","とび馬","平行棒","鉄棒")

rownames(matrix)=c("USA","Russian","England","german","japan","china","Ukraine","France")

R=cor(matrix)

lambda=eigen(R)$values

Q=eigen(R)$vectors

A=Q%*%diag(lambda)^(1/2)

square_sum=apply(A^2,2,sum)

#因子負荷量の最大化V

apply(A^2,1,sum)

#mat=lambda

mat=t(Q)%*%R%*%Q

#square_sum=lambda


#Indscal

data=data.frame(companys=c("simano","hirosedennki","oriental land","fuji jyuukougyou","fanacks","obick","enplus","ryouhinnkeikaku"),income=c(949,819,942,948,920,929,976,850),growth=c(726,811,758,835,671,706,690,831),stability=c(953,952,870,785,974,914,878,847))

D1=array(0,dim=c(nrow(data),nrow(data)))

D2=array(0,dim=c(nrow(data),nrow(data)))

D3=array(0,dim=c(nrow(data),nrow(data)))

for(i in 1:nrow(data)){
for(j in 1:nrow(data)){  
if(i!=j){
  D1[i,j]=abs(data$income[i]-data$income[j])  
  D2[i,j]=abs(data$growth[i]-data$growth[j])
  D3[i,j]=abs(data$stability[i]-data$stability[j])
}  
}    
}

J=array(-1/nrow(data),dim=c(nrow(data),nrow(data)))

diag(J)=diag(J)+1

B1=(-1/2)*J%*%D1%*%J;B1=B1/sd(B1)

B2=(-1/2)*J%*%D2%*%J;B2=B2/sd(B2)

B3=(-1/2)*J%*%D3%*%J;B3=B3/sd(B3)

B_bar=(1/3)*(B1+B2+B3)

lambda=eigen(B_bar)$values[1:2]

V=eigen(B_bar)$vectors[,1:2]

X=V%*%diag(lambda^(1/2))

for(j in 1:ncol(X)){

X[,j]=X[,j]/apply(X,2,sd)[j]

}

X_l=X;X_r=X

F=array(0,dim=c(nrow(data)^2,ncol(X)))

for(k in 1:ncol(X)){
for(i in 1:nrow(data)){  
for(j in 1:nrow(data)){  

l=nrow(data)*(i-1)+j 

F[l,k]=X_l[i,k]*X_r[j,k]   

}
}
}

B=array(0,dim=c(3,nrow(data)^2))


for(i in 1:nrow(data)){
for(j in 1:nrow(data)){  

l=nrow(data)*(i-1)+j 

B[1,l]=B1[i,j]
B[2,l]=B2[i,j] 
B[3,l]=B3[i,j]  

}
}

W=B%*%F%*%solve(t(F)%*%F) 

B1_star=array(0,dim=c(nrow(data),nrow(data)))

B2_star=array(0,dim=c(nrow(data),nrow(data)))

B3_star=array(0,dim=c(nrow(data),nrow(data)))

for(i in 1:nrow(data)){
for(j in 1:nrow(data)){  
B1_star[i,j]=sum(W[1,]*X_l[i,]*X_r[j,] ) 
B2_star[i,j]=sum(W[2,]*X_l[i,]*X_r[j,] ) 
B3_star[i,j]=sum(W[3,]*X_l[i,]*X_r[j,] ) 
}
}

eta2=(sum((B1-B1_star)^2)+sum((B2-B2_star)^2)+sum((B3-B3_star)^2))/(sum(B1^2)+sum(B2^2)+sum(B3^2))

eta=sqrt(eta2)

B_star=array(0,dim=c(nrow(data),3*nrow(data)))

for(i in 1:nrow(data)){
for(j in 1:nrow(data)){
for(r in 1:3){

u=nrow(data)*(r-1)+j  

if(r==1){

 B_star[i,u]=B1[i,j] 

}

if(r==2){

 B_star[i,u]=B2[i,j]   

}  

if(r==3){

  B_star[i,u]=B3[i,j]  

}    

} 
}
}  

B_star_star=array(0,dim=c(nrow(data),3*nrow(data)))

for(i in 1:nrow(data)){
for(j in 1:nrow(data)){
for(r in 1:3){

v=nrow(data)*(r-1)+i  

if(r==1){

 B_star_star[j,v]=B1[i,j] 

}

if(r==2){

 B_star_star[j,v]=B2[i,j]   

}  

if(r==3){

  B_star_star[j,v]=B3[i,j]  

}    

} 
}
} 


#indscal(iteration)

times=50

for(s in 1:times){

G=array(0,dim=c(3*nrow(data),ncol(X)))

for(j in 1:nrow(data)){
for(k in 1:ncol(X)){ 
for(r in 1:3){  

u=nrow(data)*(r-1)+j   

G[u,k]=W[r,k]*X_r[j,k]

}
}
}  

H=array(0,dim=c(3*nrow(data),ncol(X)))

for(i in 1:nrow(data)){
for(k in 1:ncol(X)){ 
for(r in 1:3){  

v=nrow(data)*(r-1)+i   

H[v,k]=W[r,k]*X_l[i,k]

}
}
} 



X_l=B_star%*%G%*%solve(t(G)%*%G)

X_r=B_star_star%*%H%*%solve(t(H)%*%H)

F=array(0,dim=c(nrow(data)^2,ncol(X)))

for(k in 1:ncol(X)){
for(i in 1:nrow(data)){  
for(j in 1:nrow(data)){  

l=nrow(data)*(i-1)+j 

F[l,k]=X_l[i,k]*X_r[j,k]   

}
}
}

W=B%*%F%*%solve(t(F)%*%F) 

B1_star=array(0,dim=c(nrow(data),nrow(data)))

B2_star=array(0,dim=c(nrow(data),nrow(data)))

B3_star=array(0,dim=c(nrow(data),nrow(data)))

for(i in 1:nrow(data)){
for(j in 1:nrow(data)){  
B1_star[i,j]=sum(W[1,]*X_l[i,]*X_r[j,] ) 
B2_star[i,j]=sum(W[2,]*X_l[i,]*X_r[j,] ) 
B3_star[i,j]=sum(W[3,]*X_l[i,]*X_r[j,] ) 
}
}

eta2=(sum((B1-B1_star)^2)+sum((B2-B2_star)^2)+sum((B3-B3_star)^2))/(sum(B1^2)+sum(B2^2)+sum(B3^2))

eta=sqrt(eta2)

print(paste0(s,"回目の値etaは",eta))

print("X_l:")

print(X_l)

print("X_r:")

print(X_r)

}

#result;plot

X=X_l

plot(X[,1],X[,2],type="p",col=2,xlab="x1",ylab="x2",main="ABCDFFGH")

plot(W[,1],W[,2],type="p",col=3,xlab="w1",ylab="w2",main="安定性:成長性:収益性")

Y1=X;Y2=X;Y3=X

for(j in 1:ncol(Y1)){

Y1[,j]=Y1[,j]*sqrt(W[1,])[j]  

Y2[,j]=Y2[,j]*sqrt(W[2,])[j]

Y3[,j]=Y3[,j]*sqrt(W[3,])[j]

}

plot(Y1[,1],Y1[,2],type="p",col=2,xlab="y1",ylab="y2",main="収益性の観点")

plot(Y2[,1],Y2[,2],type="p",col=2,xlab="y1",ylab="y2",main="成長性の観点")

plot(Y3[,1],Y3[,2],type="p",col=2,xlab="y1",ylab="y2",main="安定性の観点")

#クラスター分類の数

#stiring number of the second kind;N(n,k)

N=function(n,k){

value=0

for(m in 1:k){

value=value+(1/gamma(k+1))*((-1)^(k-m))*(gamma(k+1)/(gamma(m+1)*gamma(k-m+1)))*m^n  

}

return(value)

}

B=function(n){

value=0

for(j in 1:n){

value=value+N(n,j)  

}

return(value)

}

#クラスター間分散とクラスター内分散

matrix=matrix(c(46.165,43.965,45.332,48,45.182,46.698,45.066,43.466,45.799,48.166,45.666,44.432,45.832,44.833,44.199,48.333,44.024,45.199,45.332,43.266,44.766,46.533,45.758,45.233,45.632,41.199,45.099,47.683,47.124,43.766,44.932,40.865,45.124,47.633,44.965,46.466,44.599,44.233,45.166,48.515,43.765,43.532,44.333,42.932,43.165,45.765,45.366,44.198),ncol=8,nrow=6)

matrix=t(matrix)

colnames(matrix)=c("床運動","あん馬","吊り輪","とび馬","平行棒","鉄棒")

rownames(matrix)=c("USA","Russian","England","german","japan","china","Ukraine","France")

#重心

C1=apply(matrix[rownames(matrix) %in% c("USA","japan", "china"),],2,mean)

C2=apply(matrix[rownames(matrix) %in% c("Russian","England","Ukraine"),],2,mean)

C3=apply(matrix[rownames(matrix) %in% c("France","german"),],2,mean)


#USA,JAPAN,CHINA

W13=sum((t(t(matrix[rownames(matrix) %in% c("USA","japan", "china"),])-C1))^2)/3

#Russian,England,Ukraine

W23=sum((t(t(matrix[rownames(matrix) %in% c("Russian","England","Ukraine"),])-C2))^2)/3

#France,german

W33=sum((t(t(matrix[rownames(matrix) %in% c("France","german"),])-C3))^2)/2

#クラスター内の分散

W=(1/(3+3+2))*(3*W13+3*W23+2*W33)

x_bar=apply(matrix,2,mean)

#クラスター間の分散

B=(sum((apply(matrix[rownames(matrix) %in% c("USA","japan", "china"),],2,mean)-x_bar)^2)*3+sum((apply(matrix[rownames(matrix) %in% c("Russian","England","Ukraine"),],2,mean)-x_bar)^2)*3+sum((apply(matrix[rownames(matrix) %in% c("France","german"),],2,mean)-x_bar)^2)*2)/8

pseudo_F=(B/(8-3))/(W/(3-1))

X_hat=t(matrix)-apply(matrix,1,mean)

for(j in 1:ncol(X_hat)){

X_hat[,j]=X_hat[,j]/sqrt(apply((t(matrix)-apply(matrix,1,mean))^2,2,sum))[j]

}

#X_hatの性質(2.5)

apply(X_hat^2,2,sum)

#d_hat2は平方ユークリッド距離

d_hat2=array(0,dim=c(ncol(X_hat),ncol(X_hat)))

for(i in 1:ncol(d_hat2)){
for(j in 1:nrow(d_hat2)){  

d_hat2[i,j]=sum((X_hat[,i]-X_hat[,j])^2)

}
}


#torgerson method

library(dplyr)

mat=matrix(0,nrow=10,ncol=10)

mat[1,]=c(0,8.89,10.16,15.84,16.84,33.01,32.69,43.47,37.78,39.31)
mat[2,]=c(0,0,9.64,15.37,20.45,30.75,34.09,41.61,39.26,41.47)
mat[3,]=c(0,0,0,6.12,11.2,22.98,24.56,33.61,29.75,31.86)
mat[4,]=c(0,0,0,0,8.48,17.20,18.75,27.64,23.91,26.23)
mat[5,]=c(0,0,0,0,0,20.77,16.25,29.52,21.17,22.49)
mat[6,]=c(0,0,0,0,0,0,13.01,10.86,15.61,19.06)
mat[7,]=c(0,0,0,0,0,0,0,15.99,5.19,7.69)
mat[8,]=c(rep(0,8),14.84,17.68)
mat[9,]=c(rep(0,9),3.45)

mat=mat+t(mat)

d=mat

ave_ncol_d=apply(d^2,1,mean)

ave_nrow_d=apply(d^2,2,mean)

B=array(0,dim=c(nrow(mat),ncol(mat)))

for(i in 1:nrow(B)){
for(j in 1:ncol(B)){  

B[i,j]=(-1/2)*((d[i,j]^2)-ave_ncol_d[i]-ave_nrow_d[j]+mean(d^2))  

}
}

eigen_values_B=eigen(B)$values

eigen_vectors_B=eigen(B)$vectors

eigen_values_B=eigen_values_B[1:2]

eigen_vectors_B=eigen_vectors_B[,1:2]

X=eigen_vectors_B%*%diag(sqrt(eigen_values_B),length(eigen_values_B))

plot(X[,1],X[,2],col=2,type="p",xlab="principal component 1",ylab="principal component 2", main="torgerson method")



#torgerson method

library(dplyr)

require(proxy)

X=cbind(iris$Sepal.Length,iris$Sepal.Width,iris$Petal.Length,iris$Petal.Width)

mat=dist(X,"Euclidean")

d=as.matrix(mat)

ave_ncol_d=apply(d^2,1,mean)

ave_nrow_d=apply(d^2,2,mean)

B=array(0,dim=c(nrow(mat),ncol(mat)))

for(i in 1:nrow(B)){
for(j in 1:ncol(B)){  

B[i,j]=(-1/2)*((d[i,j]^2)-ave_ncol_d[i]-ave_nrow_d[j]+mean(d^2))  

}
}

eigen_values_B=eigen(B)$values

eigen_vectors_B=eigen(B)$vectors

eigen_values_B=eigen_values_B[1:2]

eigen_vectors_B=eigen_vectors_B[,1:2]

X=eigen_vectors_B%*%diag(sqrt(eigen_values_B),length(eigen_values_B))

plot(X[,1],X[,2],col=2,type="p",xlab="principal component 1",ylab="principal component 2", main="torgerson method")

X=data.frame(X,class=iris$Species)


1
1
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
1