kisara11235
@kisara11235

Are you sure you want to delete the question?

Leaving a resolved question undeleted may help others!

broyden法(BFGS法)について

解決したいこと

下記サイトを参考にし、最小化問題としてbroyden法を実装しているのですが、うまく計算ができていません。

発生している問題・エラー

望み道理の計算結果となりません。
以下コードでは実験として二次関数や適当な多項式のような答えがわかっている計算を行っていますが、てんで異なる解を出力します。

DIM 1
4780668144446.717773 (quadratic)

DIM 2
408387270367.103210
104611701873.233688 (fn)

該当するソースコード

#include <stdio.h>
#include <math.h>

#define EPOCH 20
#define EPS 1e-8
#define EPS_h 1e-6

#define DIM 1

void copy_double(int length, double hairetu_before[length], double hairetu_after[length]){
    for(int i = 0; i < length; i++){
        hairetu_after[i] = hairetu_before[i];
    }
}

void HV(int matrix_size, double hamiltonian[matrix_size][matrix_size], double vector[matrix_size], double out_put_vector[matrix_size]){
    for(int i = 0; i < matrix_size; i++){
        for(int j = 0; j < matrix_size; j++){
            out_put_vector[i] += hamiltonian[i][j] * vector[j];
        }
    }
}
double dot_product(int length, double vector_1[length], double vector_2[length]){ //内積
    double dot = 0.0;
    for(int i = 0; i < length; i++){
        dot += vector_1[i] * vector_2[i];
    }
    return dot;
}
double norm(int length, double vector[length]){ //ノルム
    double norm = 0.0; norm = dot_product(length, vector, vector); norm = sqrt(norm);
    return norm;
}

void VH(int length, double matrix[length][length], double vector[length], double out_put_vector[length]){
    for(int i = 0; i < length; i++){
        for(int j = 0; j < length; j++){
            out_put_vector[i] += vector[j] * matrix[j][i];
        }
    }
}
void VV_H(int length, double vector_tate[length], double vector_yoko[length], double out_put_matrix[length][length]){
    for(int i = 0; i < length; i++){
        for(int j = 0; j < length; j++){
            out_put_matrix[i][j] += vector_tate[i] * vector_yoko[j];
        }
    }
}
void HH_double(int matrix_size, double matrix_A[matrix_size][matrix_size], double matrix_B[matrix_size][matrix_size], double out_put_matrix[matrix_size][matrix_size]){    
    for(int i = 0; i < matrix_size; i++){
        for(int j = 0; j < matrix_size; j++){
            for(int k = 0; k < matrix_size; k++){
                out_put_matrix[i][j] += matrix_A[i][k] * matrix_B[k][j];
            }
        }
    }
}

void Initial_matrix_double(int matrix_size, double matrix[matrix_size][matrix_size]){
    for(int i = 0; i < matrix_size; i++){ matrix[i][i] = 1.0; } 
}

double fn(double theta[2]){
    return theta[0] * theta[0] / 20.0 + theta[1] * theta[1];
}

double quadratic(double theta[1]){
    return (theta[0] + 1.0) * (theta[0] + 1.0) + 5.0;

}

double df_dtheta_i(double theta[DIM], int i_h, double (*func)(double [])){
    double theta_ghost[DIM]; for(int j = 0; j < DIM; j++){ theta_ghost[j] = theta[j];} theta_ghost[i_h] += EPS_h;
    return func(theta_ghost);
}

#define step 0.01
void broyden(double x[DIM], double (*func)(double [])){
    double theta_0[DIM] = {0}, theta_1[DIM] = {0}, s[DIM], H[DIM][DIM] = {0}; Initial_matrix_double(DIM, H);
    copy_double(DIM, x, theta_0); int t_epoch = 0;
    while(t_epoch < EPOCH){
        double g_t[DIM] = {0}, f_0 = func(theta_0);
        for(int i_h = 0; i_h < DIM; i_h++){
            double d_f_0 = df_dtheta_i(theta_0, i_h, func);
            g_t[i_h] = (d_f_0 - f_0) / EPS_h;
        }
        if(norm(DIM, g_t) < EPS){ 
            copy_double(DIM, theta_0, x);
            return;
        }
        double d[DIM] = {0}; HV(DIM, H, g_t, d); 
        double s[DIM] = {0}, y[DIM] = {0};
        for(int i = 0; i < DIM; i++){ 
            theta_1[i] = theta_0[i] - step * d[i];
            s[i] = - step * d[i];
        }
        double f_1 = func(theta_1);
        for(int i_h = 0; i_h < DIM; i_h++){
            double d_f_1 = df_dtheta_i(theta_1, i_h, func);
            y[i_h] = (d_f_1 - f_1) / EPS_h - g_t[i_h];
        }
        double sy = dot_product(DIM, s, y);
        double Y_S[DIM][DIM] = {0};  VV_H(DIM, y, s, Y_S);
        for(int i = 0; i < DIM; i++){
            for(int j = 0; j < DIM; j++){
                Y_S[i][j] *= - 1.0 / sy;
            }
            Y_S[i][i] = 1.0 - Y_S[i][i];
        }
        double H_right[DIM][DIM] = {0}; HH_double(DIM, H, Y_S, H_right);
        double Y_S_T[DIM][DIM] = {0};
        for(int i = 0; i < DIM; i++){
            for(int j = 0; j < DIM; j++){
                Y_S_T[i][j] = Y_S[j][i];
            }
        }
        HH_double(DIM, Y_S_T, H_right, H);
        double S_S[DIM][DIM] = {0}; VV_H(DIM, s, s, S_S);
        for(int i = 0; i < DIM; i++){
            for(int j = 0; j < DIM; j++){
                H[i][j] += S_S[i][j] / sy;
            }
        }
        copy_double(DIM, theta_1, theta_0); t_epoch++;
    }
}


int main(){
    double x[DIM];
    x[0] = .50;
    // x[1] = .50;
    broyden(x, quadratic);
    // broyden(x, fn);
    for(int i = 0; i < DIM; i++){
        printf("%lf\n", x[i]);
    }

    return 0;
}



自分で試したこと

・適当な行列やベクトルでサブルーチン(VH, VV_H, HH_double)のチェックを行った。←問題なし
・EPOCHを変えて数値を確認した←結局(norm==0)で止まるので意味はなかった。

アルゴリズムへの理解が足りないことで生じる原因なのかもしれない。(特にyを求める式やHの更新式)

0

No Answers yet.

Your answer might help someone💌