LoginSignup
2
3

More than 3 years have passed since last update.

Rのpower.prop.test関数をpythonで実装する

Last updated at Posted at 2020-07-05
  • 適合度テストを設計するのに、サンプルサイズや検証力を求めたい
  • Rのpower.prop.testという関数を使えば求められる
  • 同じような関数をpythonでも使いたい…

ということで、練習としてRのpower.prop.test関数のソースコードを元にpythonで実装してみました。
引数の意味などはRの公式ドキュメントを参照してください。
コードは記事の最後に載せてます。

使用例

サンプルサイズを求める

  • グループ1の比率は1%、グループ2の比率は3%
  • 有意水準は0.05
  • 検出力は0.8
  • 片側検定

上記の条件を満たすためのサンプルの大きさnを求めます。

# python
> params_dict = power_prop_test(p1=0.01, p2=0.03, power=0.8, sig_level=0.05, alternative="one_sided")

    Sample size: n = 604.845426012357
    Probability: p1 = 0.01, p2 = 0.03
    sig_level = 0.05,
    power = 0.8,
    alternative = one_sided

    NOTE: n is number in "each" group

> print(params_dict)

{'n': 604.845426012357, 'p1': 0.01, 'p2': 0.03, 'sig_level': 0.05, 'power': 0.8, 'alternative': 'one_sided'}

Rで同じ値を渡した結果とほぼ一致します。

# R
> power.prop.test(p1=0.01, p2=0.03, power=0.8, sig.level=0.05, alternative="one.sided")

     Two-sample comparison of proportions power calculation 

              n = 604.8434
             p1 = 0.01
             p2 = 0.03
      sig.level = 0.05
          power = 0.8
    alternative = one.sided

NOTE: n is number in *each* group

検出力を求める

次に、サンプルサイズn=1000のときの検出力(power)について同様に求めてみます。

#python
> power_prop_test(n=1000, p1=0.01, p2=0.03, sig_level=0.05, alternative="one_sided")

    Sample size: n = 1000
    Probability: p1 = 0.01, p2 = 0.03
    sig_level = 0.05,
    power = 0.9398478094069961,
    alternative = one_sided

    NOTE: n is number in "each" group
#R
> power.prop.test(n=1000, p1=0.01, p2=0.03, sig.level=0.05, alternative="one.sided")

     Two-sample comparison of proportions power calculation 

              n = 1000
             p1 = 0.01
             p2 = 0.03
      sig.level = 0.05
          power = 0.9398478
    alternative = one.sided

NOTE: n is number in *each* group

こちらもほとんど同じ値をとります。

補足

  • python上でRを動かすPypeRなどで、直接Rの関数を呼び出したほうが簡単かも
  • 初期値によっては収束せず、エラーを吐くかもしれません

読んでいただきありがとうございました。

使用したpower_prop_test関数

# 求めたいパラメーターをNoneにする
# alternativeは"one_sided" "two_sided"のどちらかを選択
# 求めた各パラメータを辞書型で返します
def power_prop_test(n=None, p1=None, p2=None, sig_level=0.05, power=None,
                    alternative="one_sided", strict=False,
                    tol=2.220446049250313e-16**0.25):

    from math import sqrt
    from scipy.stats import norm
    from scipy.optimize import brentq

    missing_params = [arg for arg in [n, p1, p2, sig_level, power] if not arg]
    num_none = len(missing_params)

    if num_none != 1:
        raise ValueError("exactly one of 'n', 'p1', 'p2', 'power' and 'sig.level'must be None")

    if sig_level is not None:
        if sig_level < 0 or sig_level > 1:
            raise ValueError("\"sig_level\" must be between 0 and 1")

    if power is not None:
        if power < 0 or power > 1:
            raise ValueError("\"power\" must be between 0 and 1")

    if alternative not in ["two_sided", "one_sided"]:
        raise ValueError("alternative must be \"two_sided\" or \"one_sided\"")
    t_side_dict = {"two_sided":2, "one_sided":1}
    t_side = t_side_dict[alternative]

    # compute power 
    def p_body(p1=p1, p2=p2, sig_level=sig_level, n=n, t_side=t_side, strict=strict):
        if strict and t_side==2:
            qu = norm.ppf(1-sig_level/t_side)
            d = abs(p1-p2)
            q1 = 1-p1
            q2 = 1-p2
            pbar = (p1 + p2) / 2
            qbar = 1 - pbar
            v1 = p1 * q1
            v2 = p2 * q2
            vbar = pbar * qbar
            power_value = (norm.cdf((sqrt(n) * d - qu * sqrt(2 * vbar))/sqrt(v1 + v2))
                    + norm.cdf(-(sqrt(n) * d + qu * sqrt(2 * vbar))/sqrt(v1 + v2)))
            return power_value

        else:
            power_value = norm.cdf((sqrt(n) * abs(p1 - p2) - (-norm.ppf(sig_level / t_side)
                        * sqrt((p1 + p2) * (1 - (p1 + p2)/2)))) / sqrt(p1 * 
                        (1 - p1) + p2 * (1 - p2)))
            return power_value

    if power is None:
        power = p_body()

    # solve the equation for the None value argument 
    elif n is None:
        def n_solver(x, power=power):
            return p_body(n=x) - power
        n = brentq(f=n_solver, a=1, b=1e+07, rtol=tol, maxiter=1000000)

    elif p1 is None:
        def p1_solver(x, power=power):
            return p_body(p1=x) - power
        try:
            p1 = brentq(f=p1_solver, a=0, b=p2, rtol=tol, maxiter=1000000)
        except:
            ValueError("No p1 in [0, p2] can be found to achive the desired power")

    elif p2 is None:
        def p2_solver(x, power=power):
            return p_body(p2=x) - power
        try:
            p2 = brentq(f=p2_solver, a=p1, b=1, rtol=tol, maxiter=1000000)
        except:
            VealueError("No p2 in [p1, 1] can be found to achive the desired power")

    elif sig_level is None:
        def sig_level_solver(x, power=power):
            return p_body(sig_level=x) - power
        try:
            sig_level = brentq(f=sig_level_solver, a=1e-10, b=1-1e-10, rtol=tol, maxiter=1000000)
        except:
            ValueError("No significance level [0,1] can be found to achieve the desired power")

    print("""
    Sample size: n = {0}
    Probability: p1 = {1}, p2 = {2}
    sig_level = {3},
    power = {4},
    alternative = {5}

    NOTE: n is number in "each" group
    """.format(n, p1, p2, sig_level, power, alternative))
    #各パラメーターは引数名をキーにもつ辞書で返される
    params_dict = {"n":n, "p1":p1, "p2":p2,
                   "sig_level":sig_level, "power":power, "alternative":alternative}

    return params_dict
2
3
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
2
3