R言語で主成分分析を行うスクリプト
07_PCA.R
#-------Making data------------------------------
data_iris<-iris[,1:4]
#-------Get and save PCA result-----------------
#PCAを行う
#center = TRUE 平均を0に正規化する
#scale = TRUE 分散は1に正規化する
pca_result<-prcomp(data_iris, center = TRUE, scale = TRUE)
#主成分得点
pca_score <- pca_result$x
write.csv(pca_score, "07_output_pca_score.csv", quote=FALSE, row.names=FALSE)
#固有ベクトル
eigenvector <- as.data.frame(pca_result$rotation)
rownames(eigenvector)<-colnames(data_iris)
write.csv(eigenvector, "07_output_eigenvector.csv", quote=FALSE)
#標準偏差、寄与率(分散比)、累積寄与率(分散比の累積値)
pca_summary<-NULL
standard_deviation<-pca_result$sdev
pca_summary<-cbind(pca_summary,standard_deviation)
proportion_of_Variance<-standard_deviation^2/sum(standard_deviation^2)
pca_summary<-cbind(pca_summary,proportion_of_Variance)
cumulative_Proportion <-cumsum(proportion_of_Variance)
pca_summary<-as.data.frame(cbind(pca_summary,cumulative_Proportion))
rownames(pca_summary)<-colnames(pca_score )
write.csv(pca_summary, "07_output_summary.csv", quote=FALSE)
上記の主成分分析の結果をプロットするプログラムも載せておきます。
07_PCA.R
#--------------------------------------Plot----------------------------------------
ScatterplotMatrix <- function(DataMatrix,color) {
panel.cor<-function(x,y,digits=2,prefix="",cex.cor,...){
usr<-par("usr")
on.exit(par(usr))
par(usr=c(0,1,0,1))
r<-cor(x, y)
txt<-format(c(r,0.123456789),digits=2)[1]
if(missing(cex.cor))
cex.cor<-0.8/strwidth(txt)
text(0.5,0.5,txt,cex=1.5)
}
#Set histogram panel
panel.hist.density<-function(x,...) {
usr <- par("usr")
on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks
nB <- length(breaks)
y <- h$counts
y <- y/max(y)
#draw density
tryd <- try( d <- density(x,na.rm=TRUE,bw="nrd",adjust=1.2),silent=TRUE)
if(class(tryd) != "try-error") {
d$y <- d$y/max(d$y)
lines(d,col="blue",lwd=1.5)
}
}
#Set lower panel
panel.lower <- function(x, y, ...){
points(x, y, pch = 21, col = color)
}
pairs(DataMatrix,upper.panel=panel.cor,diag.panel=panel.hist.density,lower.panel=panel.lower)
return
}
png("07_output1.png") #Open device
ScatterplotMatrix(pca_score,iris$Species)
dev.off() #Close device
png("07_output2.png") #Open device
plot(pca_summary$cumulative_Proportion)
dev.off() #Close device