packageを使用しないでリッジ回帰をプログラムしました。
あるきめられたサンプルデータに対して、リッジ回帰で当て込みました。
一度、よければ使ってみてください。気づいたことがあれば教えてください。
#リッジ回帰
library(dplyr)
data=data.frame(num=1:20,y=c(167,167.5,168.4,172,155.3,151.4,163,174,168,160.4,164.7,171,162.6,164.8,163.3,167.6,169.2,168,167.4,172),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56))
Z=matrix(c(data$x1-mean(data$x1),data$x2-mean(data$x2)),ncol=ncol(data)-2,nrow=nrow(data))
X=as.matrix(data[,colnames(data) %in% c("x1","x2")])
Y=data$y;N=length(Y)
GCV=1;
fun=function(x){
if(GCV==1){
H=(Z)%*%solve(t(Z)%*%Z+diag(x,ncol(Z)))%*%t(Z)
unit=matrix(0,ncol=ncol(Z),nrow=ncol(Z))
diag(unit)<-1
Beta=(solve(t(Z)%*%Z+x*unit)%*%t(Z))%*%data$y
return(mean((Y-(mean(Y)+Z%*%Beta))^2/(rep(1,ncol(H))-diag(H))^2))
}else{
H=(Z)%*%solve(t(Z)%*%Z+diag(x,ncol(Z)))%*%t(Z)
unit=matrix(0,ncol=ncol(Z),nrow=ncol(Z))
diag(unit)<-1
Beta=(solve(t(Z)%*%Z+x*unit)%*%t(Z))%*%data$y
return(mean((Y-(mean(Y)+Z%*%Beta))^2))
}
}
GCV_data=data.frame(gamma=seq(0,100,1),GCV=0)
for(j in 1:nrow(GCV_data)){
GCV_data$GCV[j]=fun(GCV_data$gamma[j])
}
plot(GCV_data$gamma,GCV_data$GCV,type="p",col=2)
gamma=min(GCV_data$gamma[GCV_data$GCV==min(GCV_data$GCV)])
unit=matrix(0,ncol=ncol(Z),nrow=ncol(Z))
diag(unit)<-1
Beta=(solve(t(Z)%*%Z+gamma*unit)%*%t(Z))%*%data$y
result=data.frame(num=1:nrow(data),y=data$y,predict=mean(data$y)+Z%*%Beta)
plot(result$num,result$y,xlim=c(1,nrow(data)),ylim=c(min(result$y,result$predict),max(result$y,result$predict)),xlab="sample number",ylab="values",type="o",col=2,main=paste0("cor:",cor(result$y,result$predict)))
par(new=T)
plot(result$num,result$predict,xlim=c(1,nrow(data)),ylim=c(min(result$y,result$predict),max(result$y,result$predict)),xlab="sample number",ylab="values",type="o",col=3,main=paste0("cor:",cor(result$y,result$predict)))
ridge_sigma=(gamma*sum(Beta^2)+sum((Y-Z%*%Beta)^2))/N
#異常度
a_f=function(y,x){
return((y-mean(Y)-sum(Beta*x-Beta*apply(X,2,mean)))^2/ridge_sigma)
}
#example ridge reg
library(dplyr)
data=data.frame(USArrests)
colnames(data)=c("y","x1","x2","x3")
#data=data.frame(num=1:n,y=sample(c(167,167.5,168.4,172,155.3,151.4,163,174,168,160.4,164.7,171,162.6,164.8,163.3,167.6,169.2,168,167.4,172),n,replace=T),x1=sample(c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),n,replace=T),x2=sample(c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56),n,replace=T))
n=floor(nrow(data)*0.7)
X=as.matrix(data[,colnames(data) %in% c("x1","x2")])
Z_test=X[(n+1):nrow(data),];Z=X[1:n,]
Z=t(t(Z)-c(apply(Z,2,mean)))
Z_test=t(t(Z_test)-c(apply(Z_test,2,mean)))
Y_test=data$y[(n+1):nrow(data)];Y=data$y[1:n];N=length(Y)
GCV=1;
fun=function(x){
if(GCV==1){
H=(Z)%*%solve(t(Z)%*%Z+diag(x,ncol(Z)))%*%t(Z)
unit=matrix(0,ncol=ncol(Z),nrow=ncol(Z))
diag(unit)<-1
Beta=(solve(t(Z)%*%Z+x*unit)%*%t(Z))%*%Y
return(mean((Y-(mean(Y)+Z%*%Beta))^2/(rep(1,ncol(H))-diag(H))^2))
}else{
H=(Z)%*%solve(t(Z)%*%Z+diag(x,ncol(Z)))%*%t(Z)
unit=matrix(0,ncol=ncol(Z),nrow=ncol(Z))
diag(unit)<-1
Beta=(solve(t(Z)%*%Z+x*unit)%*%t(Z))%*%Y
return(mean((Y-(mean(Y)+Z%*%Beta))^2))
}
}
GCV_data=data.frame(gamma=seq(0,100000,1),GCV=0)
for(j in 1:nrow(GCV_data)){
GCV_data$GCV[j]=fun(GCV_data$gamma[j])
}
plot(GCV_data$gamma,GCV_data$GCV,type="p",col=2)
gamma=min(GCV_data$gamma[GCV_data$GCV==min(GCV_data$GCV)])
unit=matrix(0,ncol=ncol(Z),nrow=ncol(Z))
diag(unit)<-1
Beta=(solve(t(Z)%*%Z+gamma*unit)%*%t(Z))%*%Y
result=data.frame(num=1:(nrow(data)-n),y=Y_test,predict=mean(Y)+Z_test%*%Beta)
plot(result$num,result$y,xlim=c(1,nrow(result)),ylim=c(min(result$y,result$predict),max(result$y,result$predict)),xlab="sample number",ylab="values",type="o",col=2,main=paste0("cor:",cor(result$y,result$predict)))
par(new=T)
plot(result$num,result$predict,xlim=c(1,nrow(result)),ylim=c(min(result$y,result$predict),max(result$y,result$predict)),xlab="sample number",ylab="values",type="o",col=3,main=paste0("cor:",cor(result$y,result$predict)))
#ridge_sigma=(gamma*sum(Beta^2)+sum((Y-Z%*%Beta)^2))/N