Qiita Teams that are logged in
You are not logged in to any team

Log in to Qiita Team
Community
OrganizationAdvent CalendarQiitadon (β)
Service
Qiita JobsQiita ZineQiita Blog
Help us understand the problem. What is going on with this article?

CoArray Fortran で Mandelbrot

More than 5 years have passed since last update.

CoArray Fortran (CAF)

CoArray Fortran (CAF) とは、Partitioned Global Address Space (PGAS) に基づく分散並列処理機能を含んだ Fortran のことで、かつては F-- と呼ばれていたものです。のちに CoArray Fortran と名前を変え、議論の末 ISO 規格として Fortran2008 の機能として正式に採用されました。

思想としては、既存のFortran に最小限の拡張を施して並列プログラミングを可能にするというもので、データパラレル処理機能のみを持っています。このため色々機能不足で、次の Fortran 規格での機能の拡張が論じられていますDraft TS 18508 Additional Parallel Features in Fortran

CAF は主に Cray が手がけていたもので、10 年以上やっていますが、他の PGAS 言語同様イマイチ盛り上がりに欠けます。最近では Intel Fortran や GNU Fortran などでも CAF が利用できるようになってきていますが、実行性能が出るのは Cray だけということで、今後普及するか規格から取り外されることになるか将来はまだ不明です。

実行結果

コンソール出力

CAF は、いわゆる Single Program Multiple Data (SPMD) 型の言語に属するので、並列動作するそれぞれの image と呼ばれる CPU が同じ動作をします。いま8個の image で実行したため、基本的に8個の重複する出力がなされています。ただ BMP データの書き出しだけは、第1 image でのみ行っています。

image=   8: time=       0.0000 cpu=       0.0000: start Mandel
image=   7: time=       0.0000 cpu=       0.0000: start Mandel
image=   4: time=       0.0000 cpu=       0.0000: start Mandel
image=   3: time=       0.0000 cpu=       0.0000: start Mandel
image=   2: time=       0.0000 cpu=       0.0000: start Mandel
image=   6: time=       0.0000 cpu=       0.0000: start Mandel
image=   5: time=       0.0000 cpu=       0.0000: start Mandel
image=   1: time=       0.0000 cpu=       0.0000: start Mandel
image=   8: time=       2.0300 cpu=       1.9531: end   Mandel
image=   7: time=       2.0450 cpu=       1.9688: end   Mandel
image=   2: time=       2.0210 cpu=       1.9688: end   Mandel
image=   1: time=       2.0440 cpu=       1.9688: end   Mandel
image=   3: time=       2.1300 cpu=       2.0625: end   Mandel
image=   4: time=       2.2500 cpu=       2.1719: end   Mandel
image=   6: time=       2.2430 cpu=       2.0625: end   Mandel
image=   5: time=       2.3270 cpu=       2.2344: end   Mandel
image=   6: time=       0.0890 cpu=       0.0000: normal end
image=   1: time=       0.2560 cpu=       0.0000: start bmp
image=   2: time=       0.3320 cpu=       0.0000: normal end
image=   8: time=       0.3540 cpu=       0.0000: normal end
image=   7: time=       0.3370 cpu=       0.0000: normal end
image=   3: time=       0.2480 cpu=       0.0000: normal end
image=   5: time=       0.0000 cpu=       0.0000: normal end
image=   4: time=       0.1290 cpu=       0.0000: normal end
image=   1: time=       0.0400 cpu=       0.0469: end   bmp
image=   1: time=       0.0000 cpu=       0.0000: normal end
続行するには何かキーを押してください . . .

MandelCAF.png

プログラム

ここでは、計算領域をy軸方向に分割して、横長の短冊状の領域で並列計算します。
I/O が絡む BMP ファイル生成と出力は第1 image のみで行います。この時、各 image 毎に計算した結果を第1 image に集約する必要があるので、その手前で sync all を用いて同期を行います。

    module m_bmp
      implicit none
      type :: t_bmp_file_header
        sequence  
        integer(2) :: bfType = transfer('BM', 0_2, 1) ! BitMap
        integer(4) :: bfSize          ! file size in bytes
        integer(2) :: bfReserved1 = 0 ! always 0
        integer(2) :: bfReserved2 = 0 ! always 0
        integer(4) :: bfOffBits
      end type t_bmp_file_header
      ! 
      type :: t_bmp_info_header
        sequence
        integer(4) :: biSize     = Z'28' ! size of bmp_info_header ; 40bytes 
        integer(4) :: biWidth
        integer(4) :: biHeight
        integer(2) :: biPlanes   = 1 ! always 1
        integer(2) :: biBitCount
        integer(4) :: biCompression = 0 !0:nocompression,1:8bitRLE,2:4bitRLE,3:bitfield
        integer(4) :: biSizeImage
        integer(4) :: biXPelsPerMeter = 3780 ! 96dpi
        integer(4) :: biYPelsPerMeter = 3780 ! 96dpi 
        integer(4) :: biClrUsed      = 0
        integer(4) :: biClrImportant = 0 
      end type t_bmp_info_header  
      !
      type :: t_rgb
        sequence
        character :: b, g, r  ! order is b g r 
      end type t_rgb 
      !
      type :: t_bmp
        type(t_rgb), allocatable :: rgb(:, :)
      contains 
        procedure :: wr => wr_bmp
      end type
    contains   
      subroutine wr_bmp(bmp, fn)
        class(t_bmp), intent(in) :: bmp 
        character(len = *), intent(in) :: fn
        type(t_bmp_file_header) :: bmp_file_header
        type(t_bmp_info_header) :: bmp_info_header
        associate(nx => size(bmp%rgb, 1), ny => size(bmp%rgb, 2))
          bmp_file_header%bfSize      = 14 + 40 + 0 + nx * ny * 3
          bmp_file_header%bfOffBits   = 14 + 40
          bmp_info_header%biWidth     = nx       ! nx shouold be a multiple of 4
          bmp_info_header%biHeight    = ny
          bmp_info_header%biBitCount  = 24 
          bmp_info_header%biSizeImage = nx * ny * 3
        end associate
        open(9, file = fn//'.bmp', form = 'binary', status = 'unknown')
        write(9) bmp_file_header
        write(9) bmp_info_header
        write(9) bmp%rgb
        close(9)
        return
      end subroutine wr_bmp
 ! convert to t_RGB    
      pure elemental type(t_rgb) function to_rgb(ir, ig, ib)
        integer, intent(in) :: ir, ig, ib
        to_rgb = t_rgb(achar(ib), achar(ig), achar(ir))
      end function to_rgb  
    end module m_bmp

    program Mandel
      use m_bmp
      implicit none
      integer, parameter :: nx = 1920, ny = 1920, maxiter = 255
      real   , parameter :: x0 = -2.0, x1 = 1.0, y0 = -1.5, y1 = 1.5
      complex, allocatable :: c(:, :)[:], z(:, :)[:]
      integer :: i, j, k, ni, my
      integer, allocatable :: niter(:, :)[:]
      ni = num_images()
      k  = this_image()
      if (mod(ny, ni) /= 0) stop 'ny must be a multiple of num_image()'
      my = ny / ni
      allocate(c(nx, my)[*], z(nx, my)[*])
!
! make 2D-mesh :  c(:, :)
!      
      block
        real  :: x(nx), y(ny)
        x = [( (x1 - x0) / (nx - 1) * (i - 1) + x0, i = 1, nx )]
        y = [( (y1 - y0) / (ny - 1) * (i - 1) + y0, i = 1, ny )]
        forall (i = 1:nx, j = 1:my) c(i, j)[k] = cmplx(x(i), y((k - 1) * my + j)) 
      end block
 !   
 ! main iteration : niter(:, :)        
 !     
      call stamp('start Mandel')
      allocate(niter(nx, my)[*])
      do concurrent (i = 1:nx, j = 1:my)
        niter(i, j)[k] = imandel(c(i, j)[k])  ! elemental function/forall  not working
      end do  
      call stamp('end   Mandel')
 !
 ! make bmp file in image 1 : Mandel.bmp   
 !
      sync all
      if (k == 1) then
        call stamp('start bmp')
        block          
          type (t_bmp) :: bmp
          integer :: m1, m2, kk, niter2(nx, ny)
          do kk = 1, ni ! num_images()
            m1 = (kk - 1) * my + 1
            m2 = m1 + my - 1
            niter2(:, m1:m2) = 256 - niter(:, :)[kk]  
          end do

          allocate(bmp%rgb(nx, ny))
          bmp%rgb = to_rgb(niter2, niter2, niter2)
          call bmp%wr('Mandel')
        end block  
        call stamp('end   bmp')
      end if
      call stamp('normal end')
      sync all
      stop
    contains

      pure elemental integer function imandel(c)
        complex, intent(in) :: c
        complex :: z
        z = c
        do imandel = 1, maxiter
          if (abs(z) > 2.0) exit
          z = z * z + c
        end do    
      end function imandel

      subroutine stamp(text)
        character(*), intent(in) :: text
        real :: t0 = 0.0, t1      ! t0 save
        integer :: c0 = 0, c1, cr ! c0 save
        logical :: first = .true.
        if (first) then 
          first = .false.
          call cpu_time(t1)
          call system_clock(c1, cr)
          t0 = t1
          c0 = c1
        end if  
        call cpu_time(t1)
        call system_clock(c1, cr)
        print '(a, i3, a, f12.4, a, f12.4, 2a)', 'image= ', this_image(), ': time= ', real(c1 - c0) / cr,' cpu= ', t1 - t0, ': ', text
        t0 = t1
        c0 = c1
      end subroutine stamp
    end program Mandel
cure_honey
Fortran でおk。 https://twitter.com/Fortran2008
http://oomorigohan.tumblr.com/
Why not register and get more from Qiita?
  1. We will deliver articles that match you
    By following users and tags, you can catch up information on technical fields that you are interested in as a whole
  2. you can read useful information later efficiently
    By "stocking" the articles you like, you can search right away