#クラスタリング手法(サービスサイエンスの事訳(ことわけ)―データサイエンスと数理科学の融合に向けて 高木英明先生)
library(dplyr)
matrix=matrix(c(46.165,43.965,45.332,48,45.182,46.698,45.066,43.466,45.799,48.166,45.666,44.432,45.832,44.833,44.199,48.333,44.024,45.199,45.332,43.266,44.766,46.533,45.758,45.233,45.632,41.199,45.099,47.683,47.124,43.766,44.932,40.865,45.124,47.633,44.965,46.466,44.599,44.233,45.166,48.515,43.765,43.532,44.333,42.932,43.165,45.765,45.366,44.198),ncol=8,nrow=6)
matrix=t(matrix)
colnames(matrix)=c("床運動","あん馬","吊り輪","とび馬","平行棒","鉄棒")
rownames(matrix)=c("USA","Russian","England","german","japan","china","Ukraine","France")
#余弦係数
cosine_similarity=array(0,dim=c(nrow(matrix),nrow(matrix)))
for(j in 1:nrow(cosine_similarity)){
for(i in 1:ncol(cosine_similarity)){
cosine_similarity[i,j]= sum(matrix[i,]*matrix[j,])/sqrt(sum(matrix[i,]^2)*sum(matrix[j,]^2))
}
}
#相関係数
cor=cor(t(matrix))
#偏差パターン類似率
deviation_pattern=array(0,dim=c(nrow(matrix),nrow(matrix)))
average=apply(matrix,2,mean)
t_matrix=t(matrix)
for(i in 1:nrow(deviation_pattern)){
for(j in 1:ncol(deviation_pattern)){
deviation_pattern[i,j]=sum((matrix[i,]-average)*(matrix[j,]-average))/sqrt(sum((matrix[i,]-average)^2)*sum((matrix[j,]-average)^2))
}
}
#非類似度(deviation_pattern)
#d:exp(-s)
d1_deviation_pattern=exp(-deviation_pattern)
#d:1-s
d2_deviation_pattern=1-deviation_pattern
#d:1/(1+s)
d3_deviation_pattern=1/(1+deviation_pattern)
#d:cor,cosine_similarity
d_cor2=((1-cor)/2)
d_cosine_similarity2=((1-cosine_similarity)/2)
#p.18
X_hat=t(matrix)-apply(matrix,1,mean)
for(j in 1:ncol(X_hat)){
X_hat[,j]=X_hat[,j]/sqrt(apply((t(matrix)-apply(matrix,1,mean))^2,2,sum))[j]
}
#X_hatの性質(2.5)
apply(X_hat^2,2,sum)
#d_hat2は平方ユークリッド距離
d_hat2=array(0,dim=c(ncol(X_hat),ncol(X_hat)))
for(i in 1:ncol(d_hat2)){
for(j in 1:nrow(d_hat2)){
d_hat2[i,j]=sum((X_hat[,i]-X_hat[,j])^2)
}
}
t_X_hat=t(X_hat)
S=t_X_hat%*%t(t_X_hat)
diff_S=(1-S);
diag(diff_S)=floor(abs(diag(diff_S)))
#(2.4)の確認
floor(d_hat2*100)/100==floor(2*diff_S*100)/100
#p.21 character of distances
city_block=array(0,dim=c(nrow(matrix),nrow(matrix)))
for(i in 1:nrow(matrix)){
for(j in 1:nrow(matrix)){
city_block[i,j]=sum(abs(matrix[i,]-matrix[j,]) )
}
}
Euclid=array(0,dim=c(nrow(matrix),nrow(matrix)))
for(j in 1:nrow(matrix)){
for(i in 1:nrow(matrix)){
Euclid[i,j]=sqrt(sum((matrix[i,]-matrix[j,])^2))
}
}
Minkowski=array(0,dim=c(nrow(matrix),nrow(matrix)))
r=2
for(j in 1:nrow(matrix)){
for(i in 1:nrow(matrix)){
Minkowski[i,j]=(sum(abs(matrix[i,]-matrix[j,])^r))^(1/r)
}
}
chebyshev=array(0,dim=c(nrow(matrix),nrow(matrix)))
for(j in 1:nrow(matrix)){
for(i in 1:nrow(matrix)){
chebyshev[i,j]=max(abs(matrix[i,]-matrix[j,]))
}
}
standard_euclid=array(0,dim=c(nrow(matrix),nrow(matrix)))
for(j in 1:nrow(matrix)){
for(i in 1:nrow(matrix)){
standard_euclid[i,j]=sqrt(sum((matrix[i,]-matrix[j,])^2/apply(matrix,2,sd)))
}
}
Maharanobis=array(0,dim=c(nrow(matrix),nrow(matrix)))
for(j in 1:nrow(matrix)){
for(i in 1:nrow(matrix)){
Maharanobis[i,j]=(matrix[i,]-matrix[j,])%*%solve(cov(matrix))%*%(matrix[i,]-matrix[j,])
}
}
#Hotelling T2 theory
#multivariate normal dis
library(mvtnorm)
N=100000;p=ncol(matrix)
samples=array(0,dim=c(N,ncol(matrix)))
x0=rmvnorm(1,apply(matrix,2,mean),cov(matrix))
for(j in 1:nrow(samples)){
samples[j,]=rmvnorm(1,apply(matrix,2,mean),cov(matrix))
}
mu=apply(samples,2,mean);sigma=cov(samples)
T2=function(x){
return(((N-p)/((N+1)*p))*sum((x-mu)%*%solve(sigma)*(x-mu)))
}
data=data.frame(num=1:N,T2=0)
for(j in 1:nrow(data)){
data$T2[j]=T2(samples[j,])
}
data$T2=round(data$T2*100)/100
num=sort(unique(data$T2))
result=data.frame(num=num,prob=0)
for(j in 1:nrow(result)){
result$prob[j]=length(data$T2[data$T2<=num[j]])/N
}
result=result %>% dplyr::mutate(f_dis=0)
for(j in 1:nrow(result)){
result$f_dis[j]=pf(result$num[j],p,N-p)
}
SN=10*log10((x0-mu)^2/diag(sigma))
#Hotelling T2 and f_distribution plots
plot(result$num,result$prob,col=2,type="h",xlim=c(min(result$num),max(result$num)),ylim=c(min(result$prob),max(result$prob)),xlab="T2",ylab="prob")
par(new=T)
plot(result$num,result$f_dis,col=3,type="p",xlim=c(min(result$num),max(result$num)),ylim=c(min(result$prob),max(result$prob)),xlab="T2",ylab="prob")
#近似公式
N=100000;p=6
#2^(-p/2)
(c(((N-p)/2):((N-1)/2))/rep((N-p),p/2))
#exp(-0.1*p/2)
(1+(p*0.1)/(N-p))^(-N/2)
gamma_f=function(x){
z<-(p/(2*gamma(p/2)))*exp(-p*x/2)*(p*x/2)^((p/2)-1)
return(z)
}
gamma_f_data=data.frame(x=seq(0,5,0.01),values=0,dis=0)
for(j in 1:nrow(gamma_f_data)){
gamma_f_data$values[j]=gamma_f(gamma_f_data$x[j])
gamma_f_data$dis[j]=integrate(gamma_f,0,gamma_f_data$x[j])$value
}
plot(gamma_f_data$x,gamma_f_data$values,type="h",col=2)
plot(gamma_f_data$x,gamma_f_data$dis,type="h",col=2)
#漸近近似
plot(result$num,result$prob,col=2,type="h",xlim=c(min(result$num),max(result$num,gamma_f_data$x)),ylim=c(min(result$prob,gamma_f_data$dis),max(result$prob,gamma_f_data$dis)),xlab="T2",ylab="prob")
par(new=T)
plot(gamma_f_data$x,gamma_f_data$dis,col=3,type="p",xlim=c(min(result$num),max(result$num,gamma_f_data$x)),ylim=c(min(result$prob,gamma_f_data$dis),max(result$prob,gamma_f_data$dis)),xlab="T2",ylab="prob")
#2.3.4 数値例
matrix2=matrix[rownames(matrix)!="japan",]
average2=apply(matrix2,2,mean)
sigma2=cov(matrix2)
solve_sigma2=solve(sigma2)
#10*log10((matrix2[1,1]-average2[1])^2/diag(sigma2)[1])
#同一のクラスターとみなした時の距離の縮小化(パターン認識と学習機械 p.42)
#sum(W)=1
N=100
char=10
X=array(0,dim=c(N,char))
d_data_real=data.frame(x=0,y=0,d=0)
d_real_mat=array(0,dim=c(char,char))
for(j in 1:char){
X[,j]=rnorm(N,mean=sample(1:3,1),sd=sample(1:3,1))
}
for(j in 1:char){
for(i in 1:char){
if(j>i){
d_real_mat[i,j]=sqrt(sum((X[i,]-X[j,])^2))
d_data_real=rbind(d_data_real,data.frame(x=i,y=j,d=sqrt(sum((X[i,]-X[j,])^2))))
}
}
}
d_data_real=tail(d_data_real,nrow(d_data_real)-1)
plot3d(d_data_real$x,d_data_real$y,d_data_real$d,xlab="x",ylab="y",zlab="d",col=3,type="s")
W=(1/apply(X,2,var))/sum(1/apply(X,2,var))
beta=sum(W)/sum(1/apply(X,2,var))
D_bar=(2*N/(N-1))*sum(W*apply(X,2,var))
W_mat=diag(W)
d_mat=array(0,dim=c(char,char))
d_data=data.frame(x=0,y=0,d=0)
for(i in 1:char){
for(j in 1:char){
if(i<j){
d_mat[i,j]=sqrt(sum(((X[i,]-X[j,])%*%W_mat)^2))
d_data=rbind(d_data,data.frame(x=i,y=j,d=d_mat[i,j]))
}
}
}
d_data=tail(d_data,nrow(d_data)-1)
library(rgl)
plot3d(d_data$x,d_data$y,d_data$d,col=2,type="s",xlab="x",ylab="y",zlab="d")
#同一のクラスターとみなした時の距離の縮小化(パターン認識と学習機械 p.42)
#prod(W)=1
W=((1/apply(X,2,var))*prod(apply(X,2,var))^(1/char))
lambda=(prod(apply(X,2,var))^(1/char))^2
D_bar=(2*N/(N-1))*sum(W*apply(X,2,var))
W_mat=diag(W)
d_data=data.frame(x=0,y=0,d=0)
for(i in 1:char){
for(j in 1:char){
if(i<j){
d_mat[i,j]=sqrt(sum(((X[i,]-X[j,])%*%W_mat)^2))
d_data=rbind(d_data,data.frame(x=i,y=j,d=d_mat[i,j]))
}
}
}
d_data=tail(d_data,nrow(d_data)-1)
library(rgl)
plot3d(d_data$x,d_data$y,d_data$d,col=2,type="s",xlab="x",ylab="y",zlab="d")
#クラス間分離
N=100
char=10
A=array(0,dim=c(N,char))
for(j in 1:char){
A[,j]=rnorm(N,mean=sample(1:3,1),sd=sample(1:3,1))
}
B=array(0,dim=c(N,char))
for(j in 1:char){
B[,j]=rnorm(N,mean=sample(5:7,1),sd=sample(1:3,1))
}
x=array(0,dim=c(char,char))
for(j in 1:ncol(x)){
for(i in 1:nrow(x)){
value=0
for(k in 1:nrow(A)){
value=value+sum((A[k,i]-B[,i])*(A[k,j]-B[,j]))
}
x[i,j]=value/(char*char)
}
}
y=array(0,dim=c(char,char))
#A,B間すべてのパターン数:z
z=array(0,dim=c(2*char,char))
for(i in 1:ncol(z)){
for(j in 1:nrow(z)){
#if(i!=j){
z[j,i]=floor(abs(rnorm(1,mean=i,sd=1)+rnorm(1,mean=j,sd=1) ))
#}
}
}
for(j in 1:ncol(y)){
for(i in 1:nrow(y)){
value=0
for(k in 1:nrow(z)){
value=value+sum((z[k,i]-z[,i])*(z[k,j]-z[,j]))
}
y[i,j]=value/(char*(char-1)/2)
}
}
eigen_values=eigen(x%*%solve(y))$values
W=eigen_vectors=eigen(x%*%solve(y))$vectors
K=0
for(j in 1:ncol(eigen_vectors)){
K=K+eigen_vectors[,j]%*%y%*%t(t(eigen_vectors[,j]))
}
d_max=max(eigen_values)*K
#ポテンシャル法 p.61
N=100
char=10
A=array(0,dim=c(N,char))
for(j in 1:char){
A[,j]=rnorm(N,mean=sample(1:3,1),sd=sample(1:3,1))
}
A=apply(A,1,mean)
B=array(0,dim=c(N,char))
for(j in 1:char){
B[,j]=rnorm(N,mean=sample(5:7,1),sd=sample(1:3,1))
}
B=apply(B,1,mean)
num_A=sort(unique(round(abs(A))))
num_B=sort(unique(round(abs(B))))
potential_A=function(s,alpha){
z<-(1/length(num_A))*sum(1/(1+alpha*(s-num_A)^2))
return(z)
}
potential_B=function(s,alpha){
z<-(1/length(num_B))*sum(1/(1+alpha*(s-num_B)^2))
return(z)
}
X=data.frame(values=rnorm(10,mean=2,sd=5),A=0,B=0)
c=0.1
for(j in 1:nrow(X)){
X$A[j]=potential_A(X$values[j],c)
X$B[j]=potential_B(X$values[j],c)
}
#多次元尺度
library(dplyr)
matrix=matrix(c(46.165,43.965,45.332,48,45.182,46.698,45.066,43.466,45.799,48.166,45.666,44.432,45.832,44.833,44.199,48.333,44.024,45.199,45.332,43.266,44.766,46.533,45.758,45.233,45.632,41.199,45.099,47.683,47.124,43.766,44.932,40.865,45.124,47.633,44.965,46.466,44.599,44.233,45.166,48.515,43.765,43.532,44.333,42.932,43.165,45.765,45.366,44.198),ncol=8,nrow=6)
matrix=t(matrix)
colnames(matrix)=c("床運動","あん馬","吊り輪","とび馬","平行棒","鉄棒")
rownames(matrix)=c("USA","Russian","England","german","japan","china","Ukraine","France")
X=as.matrix(matrix)
X=t(t(X)-apply(matrix,2,mean))
p=ncol(X)
d_mat=array(0,dim=c(nrow(X),nrow(X)))
for(i in 1:ncol(d_mat)){
for(j in 1:nrow(d_mat)){
d_mat[j,i]=sum((X[i,]-X[j,])^2)
}
}
B=(X)%*%t(X)
J=array(-1/p,dim=c(p,p))
diag(J)=1+diag(J)
J2=J%*%J
eigen_values=eigen(B)$values
eigen_values2=eigen_values[eigen_values>10^(-10)]
eigen_vector=eigen(B)$vectors
lambda=diag(eigen_values2)
lambda2=diag(sqrt(eigen_values2))
eigen_vector2=eigen_vector[,1:length(eigen_values2)]
X_hat=eigen_vector2%*%lambda2
A=solve(t(X_hat)%*%X_hat)%*%t(X_hat)%*%X
B=(X_hat)%*%t(X_hat)
X_hat%*%A
#因子分析法
library(dplyr)
matrix=matrix(c(46.165,43.965,45.332,48,45.182,46.698,45.066,43.466,45.799,48.166,45.666,44.432,45.832,44.833,44.199,48.333,44.024,45.199,45.332,43.266,44.766,46.533,45.758,45.233,45.632,41.199,45.099,47.683,47.124,43.766,44.932,40.865,45.124,47.633,44.965,46.466,44.599,44.233,45.166,48.515,43.765,43.532,44.333,42.932,43.165,45.765,45.366,44.198),ncol=8,nrow=6)
matrix=t(matrix)
colnames(matrix)=c("床運動","あん馬","吊り輪","とび馬","平行棒","鉄棒")
rownames(matrix)=c("USA","Russian","England","german","japan","china","Ukraine","France")
R=cor(matrix)
lambda=eigen(R)$values
Q=eigen(R)$vectors
A=Q%*%diag(lambda)^(1/2)
square_sum=apply(A^2,2,sum)
#因子負荷量の最大化V
apply(A^2,1,sum)
#mat=lambda
mat=t(Q)%*%R%*%Q
#square_sum=lambda
#Indscal
data=data.frame(companys=c("simano","hirosedennki","oriental land","fuji jyuukougyou","fanacks","obick","enplus","ryouhinnkeikaku"),income=c(949,819,942,948,920,929,976,850),growth=c(726,811,758,835,671,706,690,831),stability=c(953,952,870,785,974,914,878,847))
D1=array(0,dim=c(nrow(data),nrow(data)))
D2=array(0,dim=c(nrow(data),nrow(data)))
D3=array(0,dim=c(nrow(data),nrow(data)))
for(i in 1:nrow(data)){
for(j in 1:nrow(data)){
if(i!=j){
D1[i,j]=abs(data$income[i]-data$income[j])
D2[i,j]=abs(data$growth[i]-data$growth[j])
D3[i,j]=abs(data$stability[i]-data$stability[j])
}
}
}
J=array(-1/nrow(data),dim=c(nrow(data),nrow(data)))
diag(J)=diag(J)+1
B1=(-1/2)*J%*%D1%*%J;B1=B1/sd(B1)
B2=(-1/2)*J%*%D2%*%J;B2=B2/sd(B2)
B3=(-1/2)*J%*%D3%*%J;B3=B3/sd(B3)
B_bar=(1/3)*(B1+B2+B3)
lambda=eigen(B_bar)$values[1:2]
V=eigen(B_bar)$vectors[,1:2]
X=V%*%diag(lambda^(1/2))
for(j in 1:ncol(X)){
X[,j]=X[,j]/apply(X,2,sd)[j]
}
X_l=X;X_r=X
F=array(0,dim=c(nrow(data)^2,ncol(X)))
for(k in 1:ncol(X)){
for(i in 1:nrow(data)){
for(j in 1:nrow(data)){
l=nrow(data)*(i-1)+j
F[l,k]=X_l[i,k]*X_r[j,k]
}
}
}
B=array(0,dim=c(3,nrow(data)^2))
for(i in 1:nrow(data)){
for(j in 1:nrow(data)){
l=nrow(data)*(i-1)+j
B[1,l]=B1[i,j]
B[2,l]=B2[i,j]
B[3,l]=B3[i,j]
}
}
W=B%*%F%*%solve(t(F)%*%F)
B1_star=array(0,dim=c(nrow(data),nrow(data)))
B2_star=array(0,dim=c(nrow(data),nrow(data)))
B3_star=array(0,dim=c(nrow(data),nrow(data)))
for(i in 1:nrow(data)){
for(j in 1:nrow(data)){
B1_star[i,j]=sum(W[1,]*X_l[i,]*X_r[j,] )
B2_star[i,j]=sum(W[2,]*X_l[i,]*X_r[j,] )
B3_star[i,j]=sum(W[3,]*X_l[i,]*X_r[j,] )
}
}
eta2=(sum((B1-B1_star)^2)+sum((B2-B2_star)^2)+sum((B3-B3_star)^2))/(sum(B1^2)+sum(B2^2)+sum(B3^2))
eta=sqrt(eta2)
B_star=array(0,dim=c(nrow(data),3*nrow(data)))
for(i in 1:nrow(data)){
for(j in 1:nrow(data)){
for(r in 1:3){
u=nrow(data)*(r-1)+j
if(r==1){
B_star[i,u]=B1[i,j]
}
if(r==2){
B_star[i,u]=B2[i,j]
}
if(r==3){
B_star[i,u]=B3[i,j]
}
}
}
}
B_star_star=array(0,dim=c(nrow(data),3*nrow(data)))
for(i in 1:nrow(data)){
for(j in 1:nrow(data)){
for(r in 1:3){
v=nrow(data)*(r-1)+i
if(r==1){
B_star_star[j,v]=B1[i,j]
}
if(r==2){
B_star_star[j,v]=B2[i,j]
}
if(r==3){
B_star_star[j,v]=B3[i,j]
}
}
}
}
#indscal(iteration)
times=50
for(s in 1:times){
G=array(0,dim=c(3*nrow(data),ncol(X)))
for(j in 1:nrow(data)){
for(k in 1:ncol(X)){
for(r in 1:3){
u=nrow(data)*(r-1)+j
G[u,k]=W[r,k]*X_r[j,k]
}
}
}
H=array(0,dim=c(3*nrow(data),ncol(X)))
for(i in 1:nrow(data)){
for(k in 1:ncol(X)){
for(r in 1:3){
v=nrow(data)*(r-1)+i
H[v,k]=W[r,k]*X_l[i,k]
}
}
}
X_l=B_star%*%G%*%solve(t(G)%*%G)
X_r=B_star_star%*%H%*%solve(t(H)%*%H)
F=array(0,dim=c(nrow(data)^2,ncol(X)))
for(k in 1:ncol(X)){
for(i in 1:nrow(data)){
for(j in 1:nrow(data)){
l=nrow(data)*(i-1)+j
F[l,k]=X_l[i,k]*X_r[j,k]
}
}
}
W=B%*%F%*%solve(t(F)%*%F)
B1_star=array(0,dim=c(nrow(data),nrow(data)))
B2_star=array(0,dim=c(nrow(data),nrow(data)))
B3_star=array(0,dim=c(nrow(data),nrow(data)))
for(i in 1:nrow(data)){
for(j in 1:nrow(data)){
B1_star[i,j]=sum(W[1,]*X_l[i,]*X_r[j,] )
B2_star[i,j]=sum(W[2,]*X_l[i,]*X_r[j,] )
B3_star[i,j]=sum(W[3,]*X_l[i,]*X_r[j,] )
}
}
eta2=(sum((B1-B1_star)^2)+sum((B2-B2_star)^2)+sum((B3-B3_star)^2))/(sum(B1^2)+sum(B2^2)+sum(B3^2))
eta=sqrt(eta2)
print(paste0(s,"回目の値etaは",eta))
print("X_l:")
print(X_l)
print("X_r:")
print(X_r)
}
#result;plot
X=X_l
plot(X[,1],X[,2],type="p",col=2,xlab="x1",ylab="x2",main="ABCDFFGH")
plot(W[,1],W[,2],type="p",col=3,xlab="w1",ylab="w2",main="安定性:成長性:収益性")
Y1=X;Y2=X;Y3=X
for(j in 1:ncol(Y1)){
Y1[,j]=Y1[,j]*sqrt(W[1,])[j]
Y2[,j]=Y2[,j]*sqrt(W[2,])[j]
Y3[,j]=Y3[,j]*sqrt(W[3,])[j]
}
plot(Y1[,1],Y1[,2],type="p",col=2,xlab="y1",ylab="y2",main="収益性の観点")
plot(Y2[,1],Y2[,2],type="p",col=2,xlab="y1",ylab="y2",main="成長性の観点")
plot(Y3[,1],Y3[,2],type="p",col=2,xlab="y1",ylab="y2",main="安定性の観点")
#クラスター分類の数
#stiring number of the second kind;N(n,k)
N=function(n,k){
value=0
for(m in 1:k){
value=value+(1/gamma(k+1))*((-1)^(k-m))*(gamma(k+1)/(gamma(m+1)*gamma(k-m+1)))*m^n
}
return(value)
}
B=function(n){
value=0
for(j in 1:n){
value=value+N(n,j)
}
return(value)
}
#クラスター間分散とクラスター内分散
matrix=matrix(c(46.165,43.965,45.332,48,45.182,46.698,45.066,43.466,45.799,48.166,45.666,44.432,45.832,44.833,44.199,48.333,44.024,45.199,45.332,43.266,44.766,46.533,45.758,45.233,45.632,41.199,45.099,47.683,47.124,43.766,44.932,40.865,45.124,47.633,44.965,46.466,44.599,44.233,45.166,48.515,43.765,43.532,44.333,42.932,43.165,45.765,45.366,44.198),ncol=8,nrow=6)
matrix=t(matrix)
colnames(matrix)=c("床運動","あん馬","吊り輪","とび馬","平行棒","鉄棒")
rownames(matrix)=c("USA","Russian","England","german","japan","china","Ukraine","France")
#重心
C1=apply(matrix[rownames(matrix) %in% c("USA","japan", "china"),],2,mean)
C2=apply(matrix[rownames(matrix) %in% c("Russian","England","Ukraine"),],2,mean)
C3=apply(matrix[rownames(matrix) %in% c("France","german"),],2,mean)
#USA,JAPAN,CHINA
W13=sum((t(t(matrix[rownames(matrix) %in% c("USA","japan", "china"),])-C1))^2)/3
#Russian,England,Ukraine
W23=sum((t(t(matrix[rownames(matrix) %in% c("Russian","England","Ukraine"),])-C2))^2)/3
#France,german
W33=sum((t(t(matrix[rownames(matrix) %in% c("France","german"),])-C3))^2)/2
#クラスター内の分散
W=(1/(3+3+2))*(3*W13+3*W23+2*W33)
x_bar=apply(matrix,2,mean)
#クラスター間の分散
B=(sum((apply(matrix[rownames(matrix) %in% c("USA","japan", "china"),],2,mean)-x_bar)^2)*3+sum((apply(matrix[rownames(matrix) %in% c("Russian","England","Ukraine"),],2,mean)-x_bar)^2)*3+sum((apply(matrix[rownames(matrix) %in% c("France","german"),],2,mean)-x_bar)^2)*2)/8
pseudo_F=(B/(8-3))/(W/(3-1))
X_hat=t(matrix)-apply(matrix,1,mean)
for(j in 1:ncol(X_hat)){
X_hat[,j]=X_hat[,j]/sqrt(apply((t(matrix)-apply(matrix,1,mean))^2,2,sum))[j]
}
#X_hatの性質(2.5)
apply(X_hat^2,2,sum)
#d_hat2は平方ユークリッド距離
d_hat2=array(0,dim=c(ncol(X_hat),ncol(X_hat)))
for(i in 1:ncol(d_hat2)){
for(j in 1:nrow(d_hat2)){
d_hat2[i,j]=sum((X_hat[,i]-X_hat[,j])^2)
}
}
#torgerson method
library(dplyr)
mat=matrix(0,nrow=10,ncol=10)
mat[1,]=c(0,8.89,10.16,15.84,16.84,33.01,32.69,43.47,37.78,39.31)
mat[2,]=c(0,0,9.64,15.37,20.45,30.75,34.09,41.61,39.26,41.47)
mat[3,]=c(0,0,0,6.12,11.2,22.98,24.56,33.61,29.75,31.86)
mat[4,]=c(0,0,0,0,8.48,17.20,18.75,27.64,23.91,26.23)
mat[5,]=c(0,0,0,0,0,20.77,16.25,29.52,21.17,22.49)
mat[6,]=c(0,0,0,0,0,0,13.01,10.86,15.61,19.06)
mat[7,]=c(0,0,0,0,0,0,0,15.99,5.19,7.69)
mat[8,]=c(rep(0,8),14.84,17.68)
mat[9,]=c(rep(0,9),3.45)
mat=mat+t(mat)
d=mat
ave_ncol_d=apply(d^2,1,mean)
ave_nrow_d=apply(d^2,2,mean)
B=array(0,dim=c(nrow(mat),ncol(mat)))
for(i in 1:nrow(B)){
for(j in 1:ncol(B)){
B[i,j]=(-1/2)*((d[i,j]^2)-ave_ncol_d[i]-ave_nrow_d[j]+mean(d^2))
}
}
eigen_values_B=eigen(B)$values
eigen_vectors_B=eigen(B)$vectors
eigen_values_B=eigen_values_B[1:2]
eigen_vectors_B=eigen_vectors_B[,1:2]
X=eigen_vectors_B%*%diag(sqrt(eigen_values_B),length(eigen_values_B))
plot(X[,1],X[,2],col=2,type="p",xlab="principal component 1",ylab="principal component 2", main="torgerson method")
#torgerson method
library(dplyr)
require(proxy)
X=cbind(iris$Sepal.Length,iris$Sepal.Width,iris$Petal.Length,iris$Petal.Width)
mat=dist(X,"Euclidean")
d=as.matrix(mat)
ave_ncol_d=apply(d^2,1,mean)
ave_nrow_d=apply(d^2,2,mean)
B=array(0,dim=c(nrow(mat),ncol(mat)))
for(i in 1:nrow(B)){
for(j in 1:ncol(B)){
B[i,j]=(-1/2)*((d[i,j]^2)-ave_ncol_d[i]-ave_nrow_d[j]+mean(d^2))
}
}
eigen_values_B=eigen(B)$values
eigen_vectors_B=eigen(B)$vectors
eigen_values_B=eigen_values_B[1:2]
eigen_vectors_B=eigen_vectors_B[,1:2]
X=eigen_vectors_B%*%diag(sqrt(eigen_values_B),length(eigen_values_B))
plot(X[,1],X[,2],col=2,type="p",xlab="principal component 1",ylab="principal component 2", main="torgerson method")
X=data.frame(X,class=iris$Species)