Erlang/OTP の gen.erl を読む

Erlang

Posted at 2014-05-18

去年 Erlang/OTP トレーニングに参加したのですが、講師の Loïcさんが
「Erlangを使って夜安心して眠れる仕組みを知りたければ、OTP の gen.erl を読め」と言ってたような気がしたので読んでみました。

gen.erl 概要

gen.erl は gen_server.erl や gen_fsm.erl などの汎用モジュールから呼ばれる、さらに汎用的な動作を担当している模様。
具体的には

汎用モジュールの起動
汎用モジュールに登録された関数の呼び出し

の２つです。

gen.erl

とりあえずコードを読んでコメントを付けてみた。
(誤訳の可能性があるのでコメントの原文は残しています)

gen.erl


-module(gen).
-compile({inline,[get_node/1]}).

%%%-----------------------------------------------------------------
%%% This module implements the really generic stuff of the generic
%%% standard behaviours (e.g. gen_server, gen_fsm).
%%%
%%% The standard behaviour should export init_it/6.
%%%-----------------------------------------------------------------

%%%-----------------------------------------------------------------
%%% このモジュールは generic ビヘイビアの本当に汎用的な部分を実装している
%%% 基本的なビヘイビアは init_it/6 を export しなければならない。
%%%-----------------------------------------------------------------

-export([start/5, start/6, debug_options/1,
    call/3, call/4, reply/2]).

-export([init_it/6, init_it/7]).

-export([format_status_header/2]).

-define(default_timeout, 5000).

%%-----------------------------------------------------------------

-type linkage()    :: 'link' | 'nolink'.
-type emgr_name()  :: {'local', atom()} | {'global', term()} | {via, atom(), term()}.

-type start_ret()  :: {'ok', pid()} | 'ignore' | {'error', term()}.

-type debug_flag() :: 'trace' | 'log' | 'statistics' | 'debug'
| {'logfile', string()}.
-type option()     :: {'timeout', timeout()}
| {'debug', [debug_flag()]}
| {'spawn_opt', [proc_lib:spawn_option()]}.
-type options()    :: [option()].

%%-----------------------------------------------------------------
%% Starts a generic process.
%% start(GenMod, LinkP, Mod, Args, Options)
%% start(GenMod, LinkP, Name, Mod, Args, Options)
%%    GenMod = atom(), callback module implementing the 'real' fsm
%%    LinkP = link | nolink
%%    Name = {local, atom()} | {global, term()} | {via, atom(), term()}
%%    Args = term(), init arguments (to Mod:init/1)
%%    Options = [{timeout, Timeout} | {debug, [Flag]} | {spawn_opt, OptionList}]
%%      Flag = trace | log | {logfile, File} | statistics | debug
%%          (debug == log && statistics)
%% Returns: {ok, Pid} | ignore |{error, Reason} |
%%          {error, {already_started, Pid}} |
%%    The 'already_started' is returned only if Name is given
%%-----------------------------------------------------------------

%% 汎用サーバープロセスを開始する

-spec start(module(), linkage(), emgr_name(), module(), term(), options()) ->
    start_ret().

%% Name(プロセス名) が与えられたパターン
%% GenMod  -> gen_server とかの汎用モジュール
%% LinkP   -> プロセスを link するか
%% Name    -> 起動するノードの場所をプロセス名を指定
%% Mod     -> 起動したいモジュール (ビヘイビアを実装したコールバックモジュール)
%% Args    -> Modに渡される起動時の引数
%% Options -> 汎用サーバーのオプション
start(GenMod, LinkP, Name, Mod, Args, Options) ->
    case where(Name) of  % NameのPidを探す
        undefined ->
            do_spawn(GenMod, LinkP, Name, Mod, Args, Options);
        Pid ->  % すでに指定された名前のプロセスが起動している
            {error, {already_started, Pid}}
    end.

-spec start(module(), linkage(), module(), term(), options()) -> start_ret().

%% プロセス名が与えられなかったパターン
start(GenMod, LinkP, Mod, Args, Options) ->
    do_spawn(GenMod, LinkP, Mod, Args, Options).



%%-----------------------------------------------------------------
%% Spawn the process (and link) maybe at another node.
%% If spawn without link, set parent to ourselves 'self'!!!
%%-----------------------------------------------------------------

%%-----------------------------------------------------------------
%% (恐らく)異なるノードにプロセスを spawn(してlink) する
%% link しないで spawn した場合, 親プロセスを我々自身, つまり'self'にする!!!
%%-----------------------------------------------------------------



%% proc_lib:spawn_linkはBIFのspawn_linkを使っている
%% proc_lib:start_link はプロセスを同期的に開始する
%% プロセスを生成し, 起動するまで待つ. 起動後, 必ずinit_ack(Parent, Ret)またはinit_ack(Ret)を呼ぶ
%% Parentはこの関数を評価したプロセスを指す. その後 Ret が返る
do_spawn(GenMod, link, Mod, Args, Options) ->
    Time = timeout(Options),
    proc_lib:start_link(?MODULE, init_it,
        %% GMod, Starter, Parent, Mod, Args, Options
        [GenMod, self(), self(), Mod, Args, Options],
        Time,
        spawn_opts(Options));
%% spawn_optsはerlang:spawn_optへ渡されるオプション
%% spawn_optは [] にFunのアプリケーションで起動した新しいプロセスのPidを返す


%% linkしないパターン
%% linkage() = link | nolink とtype定義されているけど, パターンマッチが _ となっているので
%% link 以外のオプションは実質ない模様.
do_spawn(GenMod, _, Mod, Args, Options) ->
    Time = timeout(Options),
    proc_lib:start(?MODULE, init_it,
        %% linkしないとき親が 'self' になるってコメントされているのは, ここのこと.
        %% GMod, Starter, Parent, Mod, Args, Options
        [GenMod, self(), self, Mod, Args, Options],
        Time,
        spawn_opts(Options)).

%% プロセス名を指定して link するパターン
do_spawn(GenMod, link, Name, Mod, Args, Options) ->
    Time = timeout(Options),
    proc_lib:start_link(?MODULE, init_it,
        %% GMod, Starter, Parent, Mod, Args, Options
        [GenMod, self(), self(), Name, Mod, Args, Options],  % start_linkにおけるArgs
        Time,
        spawn_opts(Options));
%% プロセス名があるけど link しないパターン
do_spawn(GenMod, _, Name, Mod, Args, Options) ->
    Time = timeout(Options),
    proc_lib:start(?MODULE, init_it,
        %% linkしないとき親が 'self' になるってコメントされているのは, ここのこと.
        %% GMod, Starter, Parent, Mod, Args, Options
        [GenMod, self(), self, Name, Mod, Args, Options],  % startのArgs
        Time,
        spawn_opts(Options)).

%%-----------------------------------------------------------------
%% Initiate the new process.
%% Register the name using the Rfunc function
%% Calls the Mod:init/Args function.
%% Finally an acknowledge is sent to Parent and the main
%% loop is entered.
%%-----------------------------------------------------------------

%% do_spawn の proc_lib:start_link, start/5 で使われる

%%-----------------------------------------------------------------
%% 新しいプロセスを初期化する.
%% Rfunc関数を使って名前を登録する.
%% Mod:init/Args を呼び出す.
%% 最終的にParentとmainプロセスに存在認識?(acknowledge)を送信し,
%% そしてメインループへ・・
%%-----------------------------------------------------------------

%% プロセス名を指定されてないときは, self() つまり自身のPid をプロセス名にして init_it2 を呼び出す
%% do_spawn のコードより, Starter は self()
%% Parent は link -> self(), nolink -> 'self' の2通り
%% Name は Pid か プロセス名
init_it(GenMod, Starter, Parent, Mod, Args, Options) ->
    init_it2(GenMod, Starter, Parent, self(), Mod, Args, Options).

init_it(GenMod, Starter, Parent, Name, Mod, Args, Options) ->
    case name_register(Name) of
        true ->
            init_it2(GenMod, Starter, Parent, Name, Mod, Args, Options);
        {false, Pid} ->
            proc_lib:init_ack(Starter, {error, {already_started, Pid}})
    end.

%% GenMod の init_it を呼び出す.
%% つまり GenMod モジュールには init_it という関数が定義されている必要がある.
init_it2(GenMod, Starter, Parent, Name, Mod, Args, Options) ->
    GenMod:init_it(Starter, Parent, Name, Mod, Args, Options).




%%-----------------------------------------------------------------
%% Makes a synchronous call to a generic process.
%% Request is sent to the Pid, and the response must be
%% {Tag, _, Reply}.
%%-----------------------------------------------------------------

%%-----------------------------------------------------------------
%% 汎用プロセスを同期的呼び出しを行う.
%% Request が Pid へ送られ,
%% {Tag, _, Reply} というレスポンスが返されなければならない.
%%-----------------------------------------------------------------

%% call関数は, ローカル, グローバルな領域で正しく関数の Pid までたどりつけるようにする部分
%% call -> do_call という流れ. 実際に関数を呼び出して監視するのは do_call

%%% New call function which uses the new monitor BIF
%%% call(ServerId, Label, Request)

%%% 新しいビルトイン関数のモニターを使用した関数呼び出し.
%%% 引数は call(サーバーID, ラベル, リクエスト)

%% Process = pid() | atom() | {global, Hoge} | {via, Hoge, Fuga}
call(Process, Label, Request) ->
    call(Process, Label, Request, ?default_timeout).

%%% ローカルノード
%% Local or remote by pid
%% ちゃんと Pid が渡されて, かつ Timeout指定が正しければ
call(Pid, Label, Request, Timeout)
    when is_pid(Pid), Timeout =:= infinity; % or
    is_pid(Pid), is_integer(Timeout), Timeout >= 0 ->
    do_call(Pid, Label, Request, Timeout);

%% Local by name
%% ローカルの名前付きプロセス
call(Name, Label, Request, Timeout)
%% プロセス名(atom)がちゃんと逆引きできて, かつ Timeout指定が正しければ
    when is_atom(Name), Timeout =:= infinity;
    is_atom(Name), is_integer(Timeout), Timeout >= 0 ->
    %% プロセス名からPidを取得できるか
    case whereis(Name) of
        Pid when is_pid(Pid) ->
            do_call(Pid, Label, Request, Timeout);
        undefined ->
            exit(noproc)
    end;

%% Global by name
%% グローバル(全ノード) のなかの名前付きプロセス
call(Process, Label, Request, Timeout)
    %% Processが {global, Hoge} なら
    when ((tuple_size(Process) == 2 andalso element(1, Process) == global)  % elementはタプルのN番目の要素を取得
    %% または
    orelse
    %% Process が {via, Hoge, Fuga} なら
    (tuple_size(Process) == 3 andalso element(1, Process) == via))  % via指定で
    andalso % かつ
    (Timeout =:= infinity orelse (is_integer(Timeout) andalso Timeout >= 0)) ->  % Timeoutがちゃんと設定されてれば
    case where(Process) of
        Pid when is_pid(Pid) ->
            Node = node(Pid),
            %% ここで関数実行
            try do_call(Pid, Label, Request, Timeout)
            catch
                exit:{nodedown, Node} ->
                    %% A nodedown not yet detected by global,
                    %% pretend that it was.
                    %% global 空間で検出されていない nodedown のときは
                    %% まだそれがあるふりをする -> noproc で終わらせる.
                    exit(noproc)
            end;
        undefined ->
            exit(noproc)
    end;
%% Local by name in disguise
%% 偽装した名前でローカル?
%% たとえば, call({hoge, nonode@nohost}, Label, Request, Timeout) みたいな呼び出し方
call({Name, Node}, Label, Request, Timeout)
    %% Node =:= node() というのは, ローカルノードかどうかを判別している
    when Node =:= node(), Timeout =:= infinity;
    Node =:= node(), is_integer(Timeout), Timeout >= 0 ->
    call(Name, Label, Request, Timeout);
%% Remote by name
call({_Name, Node}=Process, Label, Request, Timeout)
    when is_atom(Node), Timeout =:= infinity;
    is_atom(Node), is_integer(Timeout), Timeout >= 0 ->
    if
        %% ローカル名前なしノードなら終了
        node() =:= nonode@nohost ->
            exit({nodedown, Node});
        true ->
            do_call(Process, Label, Request, Timeout)
    end.


%% グローバルからもローカルからも、どっち経由でもこれが呼ばれる
do_call(Process, Label, Request, Timeout) ->
    %% gen_hogeは落ちてはいけないので, linkではなくMonitorで対象を監視しているのかな(私見)
    try erlang:monitor(process, Process) of
        Mref ->
            %% If the monitor/2 call failed to set up a connection to a
            %% remote node, we don't want the '!' operator to attempt
            %% to set up the connection again. (If the monitor/2 call
            %% failed due to an expired timeout, '!' too would probably
            %% have to wait for the timeout to expire.) Therefore,
            %% use erlang:send/3 with the 'noconnect' option so that it
            %% will fail immediately if there is no connection to the
            %% remote node.

            %% monitor/2 がリモートノードとのコネクション確立で失敗したとき,
            %% '!'演算子でコネクションの再接続を行いたくない.
            %% もし, monitor/2 がタイムアウトで失敗すると, '!'演算子は長時間待ってしまう.
            %% なので, erlang:send/3 を 'noconnect'オプションで使う.
            %% こうするとリモートノードとのコネクションがないとき, 即終了する.

            %% noconnectでsendする
            %% コネクションが確立できないときは Timeoutを待たずに失敗を投げる
            %% -spec erlang:send(Dest, Msg, Options) -> ok | nosuspend | noconnect
            %% erlang:sendはメッセージを送ってokを返すか, メッセージを送らずに nosuspend|noconnect を返す
            %% それ以外はerlang:send/2と同じ.
            %% nosuspend -> 送信が中断されたときに返る
            %% noconnect -> 宛先がないときに返る
            %%
            %% erlang:send は最大限の取扱いに注意して使うこと！
            catch erlang:send(Process, {Label, {self(), Mref}, Request},
                [noconnect]),
            receive
                %% 成功
                {Mref, Reply} ->
                    erlang:demonitor(Mref, [flush]),
                    {ok, Reply};
                %% 接続先が落ちてる
                {'DOWN', Mref, _, _, noconnection} ->
                    Node = get_node(Process),
                    exit({nodedown, Node});
                %% Reasonな理由で落ちた
                {'DOWN', Mref, _, _, Reason} ->
                    exit(Reason)
            %% タイムアウト
            after Timeout ->
                erlang:demonitor(Mref, [flush]),
                exit(timeout)
            end
    %% モニターに失敗した (接続先のノードでモニターが使えない場合の処理)
    catch
        error:_ ->
            %% Node (C/Java?) is not supporting the monitor.
            %% The other possible case -- this node  not distributed
            %% -- should have been handled earlier.
            %% Do the best possible with monitor_node/2.
            %% This code may hang indefinitely if the Process
            %% does not exist. It is only used for featureweak remote nodes.

            %% Node(C/Java?)はモニターをサポートしてない.
            %% 他に失敗するケースは, このノードが分散されていないか, 以前に処理された可能性(?).
            %% このとき, monitor_node/2 を使うのがベストの選択肢.
            %% このコードはプロセスが存在しないとき、無限ハングするかもしれない.
            %% これは featureweak (機能が足りてない?) リモートノードにのみ使われる.
            Node = get_node(Process),
            %% monitor_node(Node, Flag) -> true.
            monitor_node(Node, true),
            receive
                {nodedown, Node} ->
                    monitor_node(Node, false),
                    exit({nodedown, Node})
            after 0 ->
                Tag = make_ref(),  % 一意参照取得
                Process ! {Label, {self(), Tag}, Request},
                wait_resp(Node, Tag, Timeout)  % ここで無限ハングするかも
            end
    end.

get_node(Process) ->
    %% We trust the arguments to be correct, i.e
    %% Process is either a local or remote pid,
    %% or a {Name, Node} tuple (of atoms) and in this
    %% case this node (node()) _is_ distributed and Node =/= node().

    %% 渡される引数は正しいと信じている (つまりguardとかしてない).
    %% Processは local か remote のPid.
    %% もしくは {Name, Node} 形式の atom のタプル.
    %% この場合, このノード (node()で取得できるもの) は分散されており, かつ Node =/= node()である.
    case Process of
        {_S, N} when is_atom(N) ->
            N;
        _ when is_pid(Process) ->
            node(Process)
    end.

wait_resp(Node, Tag, Timeout) ->
    %% Tag は make_ref()で作った一意参照
    receive
        {Tag, Reply} ->
            monitor_node(Node, false),
            {ok, Reply};
        {nodedown, Node} ->
            monitor_node(Node, false),
            exit({nodedown, Node})
    after Timeout ->
        monitor_node(Node, false),
        exit(timeout)
    end.

%%
%% Send a reply to the client.
%% クライアントへ返答を返す.
%%
reply({To, Tag}, Reply) ->
    Msg = {Tag, Reply},
    try To ! Msg catch _:_ -> Msg end.

%%%-----------------------------------------------------------------
%%%  Misc. functions.
%%%-----------------------------------------------------------------

%% レジスタで名前付けされたプロセスのPidを取得する
where({global, Name}) -> global:whereis_name(Name);  % グローバル名前空間から探す
where({via, Module, Name}) -> Module:whereis_name(Name);  % Module経由で探す. 経由Moduleが whereis_nameを実装してないと例外でそう
where({local, Name})  -> whereis(Name).  % ローカルから検索 (erlang:whereis/1)

%% プロセス名の登録を試みる.
%% 失敗したときは, すでに登録されている Pid を where/1 で探して返す.
name_register({local, Name} = LN) ->
    try register(Name, self()) of
        true -> true
    catch
        error:_ ->
            {false, where(LN)}
    end;
name_register({global, Name} = GN) ->
    case global:register_name(Name, self()) of
        yes -> true;
        no -> {false, where(GN)}
    end;
name_register({via, Module, Name} = GN) ->
    case Module:register_name(Name, self()) of
        yes ->
            true;
        no ->
            {false, where(GN)}
    end.

%% {timeout, Time} なパターンのオプション検索する
timeout(Options) ->
    case opt(timeout, Options) of
        {ok, Time} ->
            Time;
        _ ->
            infinity
    end.

%% {spawn_opt, Opts} なパターンのオプションを検索する
spawn_opts(Options) ->
    case opt(spawn_opt, Options) of
        {ok, Opts} ->
            Opts;
        _ ->
            []
    end.

%% タプルのリストからOpの値を検索する.
%% なければfalseを返す
opt(Op, [{Op, Value}|_]) ->
    {ok, Value};
opt(Op, [_|Options]) ->
    opt(Op, Options);
opt(_, []) ->
    false.

%% {debug, [Hoge]} のパターンのオプションを検索し, あればプロセスへ適用
debug_options(Opts) ->
    case opt(debug, Opts) of
        {ok, Options} -> sys:debug_options(Options);
        _ -> []
    end.

format_status_header(TagLine, Pid) when is_pid(Pid) ->
    lists:concat([TagLine, " ", pid_to_list(Pid)]);
format_status_header(TagLine, RegName) when is_atom(RegName) ->
    lists:concat([TagLine, " ", RegName]);
format_status_header(TagLine, Name) ->
    {TagLine, Name}.

start関数を読んだときのメモ

proc_libとは？

do_spawn で呼ばれているプロセスを生成する関数。
ドキュメントに書かれていた内容は大体以下のとおり。

これは、OTPデザイン原則に従った同期/非同期のプロセス起動を担当する。
proc_lib は stdlib の一部で、特にOTPの標準ビヘイビアが新しいプロセスを起動するときに使われる。
その他に、proc_lib は 特別なプロセス、OTP原則に従ったユーザー定義プロセスの起動にも使える。

proc_lib でプロセスを起動すると、いspawn時に保存されるくつかの有益な情報が保存される。

プロセス名かプロセス識別子
親プロセスの情報
親プロセスのさらに祖先の情報

通常のErlangプロセスは normal な時だけ終了するが, proc_lib を使ったプロセスは, shutdown , {shutdown, Term} でも終了する.
shutdown は application(スーパーバイザ監視ツリー) の終了時などに起こる.

proc_lib で起動したプロセスは、normal , shutdown 以外で終了したときは クラッシュレポート が吐かれる.
クラッシュレポートはSASL設定がされたアプリじゃないと見れない。
レポートには、起動時に保存された情報と、終了理由、クラッシュの原因となったプロセスに関する情報が含まれる。

動作イメージ

start関数は引数が多く、起動の様子がいまいちわかりづらかったので図にしてみた。

call関数を読んだときのメモ

monitorとは？

ドキュメントに書かれていた内容は大体以下のとおり。

monitor(Type, Item) -> MonitorRef な関数。
Type はモニタリングしたいものをアトムで指定するが, 現状 process のみしか対応してない。将来増えるかも。
Item は pid() か {レジスタ名, Node} つまりリモートノードのレジスタ名かレジスタ名(ローカルノード中のプロセス)の3種が指定可能。

レジスタ名でモニタリングするとき、名前をもつプロセスは monitor/2 が呼び出された時点での名前を持つプロセスがずっとモニタリングされるので注意。
レジスタ名が途中で変わっても、自動で切り替えたりしてくれない。

Item が死ぬと, モニタリングしているプロセスまで 'Down' メッセージが届く。
Item が無い、またはノードとのコネクションが消えたときも同じく 'Down' が届く。
届くメッセージの形式は {'DOWN', MonitorRef, Type, Object, Info}
MonitorRef と Type は monitor/2 呼び出し時のものと同じものが返る。
Object はモニタリングされているオブジェクトへの参照。 Pid か {RegName, Node} のタプル。
Info は終了理由 noproc(存在しないプロセス) か noconnection(ノードへのコネクションがない)のどちらかが返る。

モニタリングは 'DOWN' が返った時か demonitor/1 を呼び出したときに終わる。
もし古いノード (リモートプロセスのモニタリングや名前付きプロセスのモニタリングが実装されていないもの) をモニタリングしようとしたときは badarg と共に失敗する。

同じ対象への複数回の monitor/2 の呼び出しはエラーにはならない。このときは複数の完全に独立したモニターが返る。

動作イメージ

難しかったので同様に図にしてみた。

感想

proc_lib や monitor などの理解が曖昧なままなので、ちゃんと理解できたか怪しい。
gen.erl は gen_server などの汎用プロセスのさらに汎用的な部分を担当していたので「夜安心して眠れる理由」を把握できるまでに至らなかった。
次は時間のあるときに gen_server.erl を読んでみたい。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up