書いたものをいろいろ載せてみました。
重回帰:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import pyplot
from mpl_toolkits.mplot3d.axes3d import Axes3D
from scipy import genfromtxt
df_sample=\
pd.DataFrame([
[167,167.5,168.4,172,155.3,151.4,163,174,168,160.4,164.7,171,162.6,164.8,163.3,167.6,169.2,168,167.4,172],
[84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82],
[61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56]
]).T
df_sample.columns=["y","x1","x2"]
df_sample.index=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
Expect_and_sd_data=\
pd.DataFrame([
[np.mean(df_sample["y"]),np.std(df_sample["y"])],
[np.mean(df_sample["x1"]),np.std(df_sample["x1"])],
[np.mean(df_sample["x2"]),np.std(df_sample["x2"])]
]).T
Expect_and_sd_data.columns=["y","x1","x2"]
Expect_and_sd_data.index=["mean","std"]
data=np.array([df_sample["y"],df_sample["x1"],df_sample["x2"]])
cor=np.zeros([3,3])
for j in range(0,3,1):
for i in range(0,3,1):
cor[i][j]=sum((data[i]-np.mean(data[i]))*(data[j]-np.mean(data[j])))
print(cor)
COR=cor
cor=np.zeros([3,3])
cor[0:2,0:2]=COR[1:3,1:3]
cor[2][2]=COR[0][0]
for j in range(0,2,1):
cor[j][2]=COR[0][j+1]
cor[j+1][2]=COR[j+1][0]
A=cor
inv_A=np.linalg.inv(cor[0:2,0:2])
b1=sum(A[2,0:2]*inv_A[0])
b2=sum(A[2,0:2]*inv_A[1])
b0=np.mean(df_sample["y"])-b1*np.mean(df_sample["x1"])-b2*np.mean(df_sample["x2"])
print("linear_equation is",b1,"*x1+",b2,"*x2+",b0)
ayy=A[2][2]
EV=sum((df_sample["y"]-(b0+b1*df_sample["x1"]+b2*df_sample["x2"]))**2)
RV=sum
多項式回帰:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
df_sample=\
pd.DataFrame([
[167,167.5,168.4,172,155.3,151.4,163,174,168,160.4,164.7,171,162.6,164.8,163.3,167.6,169.2,168,167.4,172],
#[84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82],
[61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56]
]).T
df_sample.columns=["y","x1"]
df_sample.index=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
print(df_sample)
value=np.array([df_sample["x1"]])
ans=np.ones(len(df_sample.index))
vec=ans
DIM=2
dim=min(DIM,len(df_sample.index))
for j in range(0,dim):
ans=ans*value
vec=np.vstack((vec,ans))
X=vec
Y=np.array([df_sample["y"]])
#print(Y)
X=np.matrix(X)
Y=np.transpose(Y)
a=((np.linalg.inv(X.dot(np.transpose(X)))).dot(X)).dot(Y)
print(a)
print(X)
y=np.transpose(X).dot(a)
error=sum(np.power(abs(y-Y),2))/len(y)
print("平均二乗誤差は",error,"で次元は",DIM,"です。")
R2=1-(sum(np.power(y-Y,2))/sum(np.power(Y-np.mean(Y),2)))
R=(np.sign(R2))*(np.power(R2,1/2))
print("coefficent of determination R is",R)
リッジ回帰:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import pyplot
df_sample=\
pd.DataFrame([
[167,167.5,168.4,172,155.3,151.4,163,174,168,160.4,164.7,171,162.6,164.8,163.3,167.6,169.2,168,167.4,172],
[84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82],
[61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56]
]).T
df_sample.columns=["y","x1","x2"]
df_sample.index=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
Expect_and_sd_data=\
pd.DataFrame([
[np.mean(df_sample["y"]),np.std(df_sample["y"])],
[np.mean(df_sample["x1"]),np.std(df_sample["x1"])],
[np.mean(df_sample["x2"]),np.std(df_sample["x2"])]
]).T
Expect_and_sd_data.columns=["y","x1","x2"]
Expect_and_sd_data.index=["mean","std"]
Z=np.array([df_sample["x1"]-np.mean(df_sample["x1"]),df_sample["x2"]-np.mean(df_sample["x2"])])
Y=np.array([df_sample["y"]])
gamma=5
unit=np.identity(2)
Beta=(np.linalg.inv(Z.dot(Z.T)+gamma*unit).dot(Z)).dot(Y.T)
fmt="""
係数は{0},{1}で
方程式としては、Y={2}*z1+{3}*z2+{4}
となります。
"""
desc=fmt.format(Beta[0],Beta[1],Beta[0],Beta[1],np.mean(df_sample["y"]))
print(desc)
y=(np.mean(df_sample["y"])+(Z.T).dot(Beta))
print(y)
x1=range(1,len(df_sample["y"])+1)
y1=y
x2=range(1,len(df_sample["y"])+1)
y2=Y
pyplot.scatter(x1,y1,c="b",label="predict values")
pyplot.scatter(x2,y2,c="r",label="data values")
pyplot.legend()
pyplot.title("Ridge regression")
pyplot.show()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import pyplot
from mpl_toolkits.mplot3d.axes3d import Axes3D
from scipy import genfromtxt
from statistics import mean, median,variance,stdev
#ADALINE
df_sample=\
pd.DataFrame([
[167,167.5,168.4,172,155.3,151.4,163,174,168,160.4,164.7,171,162.6,164.8,163.3,167.6,169.2,168,167.4,172],
[84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82],
[61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56]
]).T
df_sample.columns=["y","x1","x2"]
df_sample.index=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
#print(df_sample)
eta=0.0001;ite=100000
X=np.matrix(df_sample.loc[:, ['x1','x2']])
Y=np.array(df_sample["y"])
X=(X-np.array([np.mean(df_sample["x1"]),np.mean(df_sample["x2"])]))/np.array([stdev(df_sample["x1"]),stdev(df_sample["x2"])])
X=np.c_[np.ones(X.shape[0]),X]
w=np.ones(X.shape[1])
for i in range(0,ite):
for j in range(0,X.shape[1]):
pthi=np.exp(np.array(X.dot(w)))
dw=eta*sum((Y-pthi)*X[:,j])
w[j]=w[j]+dw
pthi=np.exp(np.array(X.dot(w)))
print(w)
y=pthi
x1=range(1,len(df_sample["y"])+1)
y1=y
x2=range(1,len(df_sample["y"])+1)
y2=Y
print("cor:",np.corrcoef(y1,y2))
pyplot.scatter(x1,y1,c="b",label="predict values")
pyplot.scatter(x2,y2,c="r",label="data values")
pyplot.legend()
pyplot.title("ADALINE")
pyplot.show()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import pyplot
from mpl_toolkits.mplot3d.axes3d import Axes3D
from scipy import genfromtxt
from statistics import mean, median,variance,stdev
from copy import copy
# Logistic (regularized parameter)
df_sample=\
pd.DataFrame([
[167,167.5,168.4,172,155.3,151.4,163,174,168,160.4,164.7,171,162.6,164.8,163.3,167.6,169.2,168,167.4,172],
[84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82],
[61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56]
]).T
df_sample.columns=["y","x1","x2"]
df_sample.index=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
#print(df_sample)
eta=0.00001;ite=100000;lam=1
X=np.matrix(df_sample.loc[:, ['x1','x2']])/10
Y=np.array(df_sample["y"])/1000
#X=(X-np.array([np.mean(df_sample["x1"]),np.mean(df_sample["x2"])]))/np.array([stdev(df_sample["x1"]),stdev(df_sample["x2"])])
X=np.c_[np.ones(X.shape[0]),X]
w=np.ones(X.shape[1])
def J_cost(s,lam):
z=np.array([1/(1+np.exp(-X.dot(s)))])
J=np.sum(-Y*np.array([np.log(z)])-(np.array([np.ones(len(Y))])-Y)*np.log(np.array([np.ones(len(Y))])-z))+lam*np.sum(s**2)
return J
def logit(s):
return np.array([1/(1+np.exp(-X.dot(s)))])
#print(J_cost(w,0.1))
#print(logit(w))
for i in range(0,ite):
for j in range(0,len(w)):
w_sub=copy(w);
w_sub[j]=w_sub[j]+0.01;
dw=np.sum(J_cost(w_sub,lam)-J_cost(w,lam))
w[j]=w[j]-eta*dw/0.01
print(J_cost(w,lam))
x1=range(1,len(df_sample["y"])+1)
y1=logit(w)
x2=range(1,len(df_sample["y"])+1)
y2=Y
#print("cor:",np.corrcoef(y1,y2))
pyplot.scatter(x1,y1,c="b",label="predict values")
pyplot.scatter(x2,y2,c="r",label="data values")
pyplot.legend()
pyplot.title("Penalized logistic reg")
pyplot.show()