集団通信

集団通信とは、多対多となる通信、つまり複数のプロセスを相手にデータをやり取りする場合を指します。

流れとしては、すべてのプロセスにデータを送る、あるいはすべてのプロセスからデータを受け取るというような処理になります。MPIではすべてのプロセスがその処理を行うので、多数対多数になります。

MPI_bcast

bcastはbroadcastの略です。自身の変数を他のすべてのプロセスに送ります。

あるプロセスでだけ設定ファイルを読み取り、他のプロセスへ配信するというような場合に使います。

以下にテストサブルーチンを示します。これでは変数xx_bcastをプロセス0がブロードキャストします。

subroutine smp_bcast()
    double precision :: xx_bcast = 0d0

    integer :: data_num = 1     ! 送信するデータの個数
    integer :: origin_rank = 0  ! 送信元プロセスのランク

    !ランク0のxx_bcastだけ1d0にする
    if(me == 0) then
       xx_bcast=1d0
    end if

    write(LOGUNIT,*) "smp_bcast"
    write(LOGUNIT,*) "original xx_bcast:",xx_bcast

    call mpi_bcast(xx_bcast, data_num, mpi_double_precision, origin_rank, &
         mpi_comm_world, ierr)

    write(LOGUNIT,*) "new xx_bcast:", xx_bcast

  end subroutine smp_bcast

これを実行すると、プロセス0以外は次のようになります。

 smp_bcast
 original xx_bcast:   0.0000000000000000
 new xx_bcast:   1.0000000000000000

プロセス0ではoriginal xx_bcastも1.0になります。こうしてすべてのプロセスでxx_bcastがoriginalのxx_bcastという同じ値に同期されます。

MPI_gather

gatherは「集める」と言う意味です。他のプロセスの配列を受け取り、配列に並べます。

以下のテストコードでは、各プロセスのランクを2倍した値をプロセス0の配列xx_gatherに集約します。

subroutine smp_gather()
    integer, allocatable :: xx_gather(:)

    integer :: data_num = 1
    integer :: root_rank = 0

    allocate(xx_gather(nnn))

    xx_gather=0

    write(LOGUNIT,*) "smp_gather"

    call mpi_gather(2*me, data_num, mpi_integer, &
         xx_gather(1), data_num, mpi_integer, &
         root_rank, &
         mpi_comm_world, ierr)

    write(LOGUNIT,*) xx_gather

  end subroutine smp_gather

これを実行すると、プロセス0では次のような結果になります。

smp_gather
           0           2           4           6           8          10          12          14

MPI_allgather

gather + bcastのようなものです。別個にやるよりこっちの方が速いらしいです。

MPI_reduce

他のプロセスの変数を合計したり積を取ったり、処理したものを
受け取ります。

以下のコードでは、各プロセスのランクの総和をプロセス0のsum_rankに代入します。

  subroutine smp_reduce()
    integer :: sum_rank 
    integer :: root_rank = 0

    sum_rank = 0

    write(LOGUNIT,*) "smp_reduce"

    call mpi_reduce(me, sum_rank, 1, mpi_integer, mpi_sum,&
         root_rank, mpi_comm_world, ierr)

    write(LOGUNIT,*) sum_rank

  end subroutine smp_reduce

MPI_allreduce

MPI_reduce + MPI_bcastのようなもの。こちらも速いらしいです。

サンプルコード

上記のテストを実行して各プロセスがsmp0301_xxxx.logに出力するコードが下記になります。

smp0301.f90

program main
  implicit none
  include "mpif.h"

  integer :: LOGUNIT = 125

  integer :: ierr
  integer :: me
  integer :: nnn

  character(len=4) :: me_char
  character(len=40) :: log_name

  ! 初期化
  call mpi_init(ierr)
  call mpi_comm_rank(mpi_comm_world,me,ierr)
  call mpi_comm_size(mpi_comm_world,nnn,ierr)

  ! 各プロセスがそれぞれのログファイルを開く
  write(me_char,'(i4.4)') me
  log_name = "smp0301_" // me_char // ".log"
  open(LOGUNIT, file=log_name,status="replace")

  write(LOGUNIT,*) "Process: ", me , "/", nnn

  ! テストを実行
  call smp_bcast()
  call smp_gather()
  call smp_reduce()

  ! 終了処理
  close(LOGUNIT)
  call mpi_finalize(ierr)


contains
  !----------------------------------------------------------------------
  subroutine smp_bcast()
    double precision :: xx_bcast = 0d0

    integer :: data_num = 1     ! 送信するデータの個数
    integer :: origin_rank = 0  ! 送信元プロセスのランク

    !ランク0のxx_bcastだけ1d0にする
    if(me == 0) then
       xx_bcast=1d0
    end if

    write(LOGUNIT,*) "smp_bcast"
    write(LOGUNIT,*) "original xx_bcast:",xx_bcast

    call mpi_bcast(xx_bcast, data_num, mpi_double_precision, origin_rank, &
         mpi_comm_world, ierr)

    write(LOGUNIT,*) "new xx_bcast:", xx_bcast

  end subroutine smp_bcast
  !----------------------------------------------------------------------
  subroutine smp_gather()
    integer, allocatable :: xx_gather(:)

    integer :: data_num = 1
    integer :: root_rank = 0

    allocate(xx_gather(nnn))

    xx_gather=0

    write(LOGUNIT,*) "smp_gather"

    call mpi_gather(2*me, data_num, mpi_integer, &
         xx_gather(1), data_num, mpi_integer, &
         root_rank, &
         mpi_comm_world, ierr)

    write(LOGUNIT,*) xx_gather

  end subroutine smp_gather
  !----------------------------------------------------------------------
  subroutine smp_reduce()
    integer :: sum_rank 
    integer :: root_rank = 0

    sum_rank = 0

    write(LOGUNIT,*) "smp_reduce"

    call mpi_reduce(me, sum_rank, 1, mpi_integer, mpi_sum,&
         root_rank, mpi_comm_world, ierr)

    write(LOGUNIT,*) sum_rank

  end subroutine smp_reduce

end program main

fortran+MPIコーディング入門(2) 集団通信