LoginSignup
0
0

More than 5 years have passed since last update.

前回の記事(ロジスティック回帰分析)の補足コード

Posted at

1.要約

 前回の記事で,収束判定を"尤度の値が変わらなくなるまで"と"係数の値が変わらなくなるまで"の2パターン紹介しました.今回の記事は,後者の収束基準に基づくプログラムを載せています.詳しい数理は前回の記事をご覧ください.

2.サンプルコード

def calc_p(beta,MatX):
    a,b = MatX.shape
    pxb = np.zeros((a,1))
    for i in range(a):
        pxb[i,0] = np.exp(beta.T.dot(MatX[i,:]))/(1+np.exp(beta.T.dot(MatX[i,:])))
        #pxb[i,0] = np.exp(beta.T.dot(MatX[i,:].reshape(b,1)))/(1+np.exp(beta.T.dot(MatX[i,:].reshape(b,1))))
    return pxb

def losgistic_estimation2(y,data,tol=10**(-4),nstart=30,maxiter=50):
    X = data.astype("float64");n,p = X.shape
    X1 = np.hstack([np.ones(n).reshape(n,1),X]).reshape(n,p+1)
    y = y.astype("float64").reshape(n,1)
    beta_hat = np.zeros(((p+1),1))
    delta_hat = np.Inf
    for _ in range(nstart):
        print("---epoc:%d ---" % (_+1))
        #initial beta
        beta = np.random.randn(p+1).reshape((p+1),1)
        delta = np.Inf
        while True:
            print("  delta: %f" % delta)
            #Newton-Raphson step
            prob = calc_p(beta,X1)
            #print(prob) #for check
            W = np.zeros((n,n))
            for i in range(n):
                W[i,i] = prob[i,0]*(1-prob[i,0])
            #print(W) # for check
            pd2 = -X1.T.dot(W).dot(X1)
            pd1 = X1.T.dot(y-prob)
            new_beta = beta - np.linalg.inv(pd2).dot(pd1)
            print("new_beta: ",new_beta)
            delta = sum((beta - new_beta)**2)
            if delta >= 0 and delta <= tol:
                print("parameters are converged")
                break
            elif np.isnan(delta)==True:
                break
            else:
                beta = new_beta
                continue
        if np.isnan(delta)==True:
            print("ERROR:delta is nan \nStop and go next epoc")
            continue
        if delta < delta_hat:
            beta_hat = new_beta
    return beta_hat

# データ行列の生成
N = 1000;p = 2
np.random.seed(10)
X = np.random.randn(N*p).reshape(N,p)
X1 = np.hstack([np.ones(N).reshape(N,1),X])
#正解ラベルの生成
y = np.zeros((N,1))
np.random.seed(100)
beta = np.random.randn(p+1).reshape(p+1,1)
prob = np.exp(X1.dot(beta))/(1+np.exp(X1.dot(beta)))
prob
for i in range(N):
    np.random.seed(i+8)
    if np.random.rand(1) < prob[i]:
        y[i] = 1
    else:
        y[i] = 0
y

losgistic_estimation2(y,X)
beta

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0