#CoArray Fortran (CAF)
以前書いたプログラムを改良したので載せておきます。
CoArray Fortran で Mandelbrot
今回は余計な配列をとらずに、ファイル I/O のところで、横長の短冊状に並列で計算した CoArray を直接書き出しています。class 変数は CoArray に出来ないようなのですが、派生型の成分として取ることで OO (Object Oriented) 風に書き出しています。
(H29.6.6 追記)
CAF では同時に同じファイルにアクセスできると知ったので、I/O を書き直してみたのですが、さすがにシーケンシャルファイルの書き込みはダメなようでしたw APPEND で毎回 open することにしました。(読み取りや DIRECT access は可能?) これによりシリアルなコードからの改変がより少なくなりました。
###コンソール出力
image= 3: time= 0.0000 cpu= 0.0000: start Mandel
image= 5: time= 0.0000 cpu= 0.0000: start Mandel
image= 4: time= 0.0000 cpu= 0.0000: start Mandel
image= 2: time= 0.0000 cpu= 0.0000: start Mandel
image= 1: time= 0.0000 cpu= 0.0000: start Mandel
image= 5: time= 0.0320 cpu= 0.0312: end Mandel
image= 1: time= 0.1310 cpu= 0.0312: end Mandel
image= 5: time= 0.1550 cpu= 0.0781: start bmp
image= 1: time= 0.1080 cpu= 0.0781: start bmp
image= 4: time= 0.3040 cpu= 0.2969: end Mandel
image= 2: time= 0.3160 cpu= 0.3125: end Mandel
image= 4: time= 0.0560 cpu= 0.0625: start bmp
image= 2: time= 0.0590 cpu= 0.0625: start bmp
image= 1: time= 0.1370 cpu= 0.0000: end bmp
image= 1: time= 0.0000 cpu= 0.0000: normal end
image= 3: time= 0.7160 cpu= 0.7188: end Mandel
image= 3: time= 0.0570 cpu= 0.0469: start bmp
image= 2: time= 0.3990 cpu= 0.0000: end bmp
image= 2: time= 0.0000 cpu= 0.0000: normal end
image= 3: time= 0.0060 cpu= 0.0156: end bmp
image= 3: time= 0.0000 cpu= 0.0000: normal end
image= 4: time= 0.4260 cpu= 0.0000: end bmp
image= 4: time= 0.0000 cpu= 0.0000: normal end
image= 5: time= 0.5990 cpu= 0.0000: end bmp
image= 5: time= 0.0000 cpu= 0.0000: normal end
続行するには何かキーを押してください . . .
###BMP 出力
(BMP upload不可の為、手動でPNGに変換後の図)
##ソースコード
coarray 変数の allocate/deallocate は、暗黙の内に sync all となるので、無駄なバリアを作らないよう置く位置に注意が必要です。
module m_bmp
implicit none
type :: t_bmp_file_header
sequence
integer(2) :: bfType = transfer('BM', 0_2, 1) ! BitMap
integer(4) :: bfSize ! file size in bytes
integer(2) :: bfReserved1 = 0 ! always 0
integer(2) :: bfReserved2 = 0 ! always 0
integer(4) :: bfOffBits
end type t_bmp_file_header
!
type :: t_bmp_info_header
sequence
integer(4) :: biSize = Z'28' ! size of bmp_info_header ; 40bytes
integer(4) :: biWidth
integer(4) :: biHeight
integer(2) :: biPlanes = 1 ! always 1
integer(2) :: biBitCount
integer(4) :: biCompression = 0 !0:nocompression,1:8bitRLE,2:4bitRLE,3:bitfield
integer(4) :: biSizeImage
integer(4) :: biXPelsPerMeter = 3780 ! 96dpi
integer(4) :: biYPelsPerMeter = 3780 ! 96dpi
integer(4) :: biClrUsed = 0
integer(4) :: biClrImportant = 0
end type t_bmp_info_header
!
type :: t_rgb
sequence
character :: b, g, r ! order is b g r
end type t_rgb
!
type :: t_bmp
type(t_rgb), allocatable :: rgb(:, :)[:]
contains
procedure :: wr => wr_bmp_caf
end type
contains
subroutine wr_bmp_caf(bmp, fn)
class(t_bmp), intent(in) :: bmp
character(len = *), intent(in) :: fn
type(t_bmp_file_header) :: bmp_file_header
type(t_bmp_info_header) :: bmp_info_header
integer :: me, ni, ir, ic
ni = num_images()
me = this_image()
associate(nx => size(bmp%rgb, 1), ny => size(bmp%rgb, 2) * ni)
bmp_file_header%bfSize = 14 + 40 + 0 + nx * ny * 3
bmp_file_header%bfOffBits = 14 + 40
bmp_info_header%biWidth = nx ! nx shouold be a multiple of 4
bmp_info_header%biHeight = ny
bmp_info_header%biBitCount = 24
bmp_info_header%biSizeImage = nx * ny * 3
end associate
if (me == 1) then
open(9, file = fn//'.bmp', access = 'stream', status = 'replace')
write(9) bmp_file_header
write(9) bmp_info_header
endfile(9)
close(9)
end if
if (me > 1) sync images(me - 1) ! order 1, 2, 3...., n
open(9, file = fn//'.bmp', access = 'stream', status = 'old', position = 'append')
write(9) bmp%rgb
close(9)
if (me < ni) sync images(me + 1)
end subroutine wr_bmp_caf
! convert to t_RGB
pure elemental type(t_rgb) function to_rgb(ir, ig, ib)
integer, intent(in) :: ir, ig, ib
to_rgb = t_rgb(achar(ib), achar(ig), achar(ir))
end function to_rgb
end module m_bmp
program Mandel
use m_bmp
implicit none
integer, parameter :: nx = 1920, ny = 1920, maxiter = 255
real , parameter :: x0 = -2.0, x1 = 1.0, y0 = -1.5, y1 = 1.5
complex, allocatable :: c(:, :)[:], z(:, :)[:]
integer :: i, j, me, ni, my
integer, allocatable :: niter(:, :)[:]
type (t_bmp) :: bmp
ni = num_images()
me = this_image()
if (mod(ny, ni) /= 0) stop 'ny must be a multiple of num_image()'
my = ny / ni
allocate(c(nx, my)[*], z(nx, my)[*], bmp%rgb(nx, my)[*])
!
! make 2D-mesh : c(:, :)
!
block
real :: x(nx), y(ny)
x = [( (x1 - x0) / (nx - 1) * (i - 1) + x0, i = 1, nx )]
y = [( (y1 - y0) / (ny - 1) * (i - 1) + y0, i = 1, ny )]
forall (i = 1:nx, j = 1:my) c(i, j) = cmplx(x(i), y((me - 1) * my + j))
end block
!
! main iteration : niter(:, :)
!
call stamp('start Mandel')
allocate(niter(nx, my)[*]) ! implicitly sync all
niter = imandel(c)
call stamp('end Mandel')
!
! make bmp file: Mandel.bmp
!
bmp%rgb = to_rgb(256 - niter, 256 - niter, 256 - niter)
call stamp('start bmp')
call bmp%wr('Mandel')
call stamp('end bmp')
call stamp('normal end')
deallocate(bmp%rgb) ! implicitly sync all
stop
contains
pure elemental integer function imandel(c)
complex, intent(in) :: c
complex :: z
z = c
do imandel = 1, maxiter
if (abs(z) > 2.0) exit
z = z * z + c
end do
end function imandel
subroutine stamp(text)
character(*), intent(in) :: text
real :: t0 = 0.0, t1 ! t0 save
integer :: c0 = 0, c1, cr ! c0 save
logical :: first = .true.
if (first) then
first = .false.
call cpu_time(t1)
call system_clock(c1, cr)
t0 = t1
c0 = c1
end if
call cpu_time(t1)
call system_clock(c1, cr)
print '(a, i3, a, f12.4, a, f12.4, 2a)', 'image= ', this_image(), ': time= ', real(c1 - c0) / cr,' cpu= ', t1 - t0, ': ', text
t0 = t1
c0 = c1
end subroutine stamp
end program Mandel
##補足
sync images によって image 1,2..., n の順に実行されるようにしています。普通の do loop のように使えます。
###ソースコード
program order
implicit none
integer :: me, ni
me = this_image()
ni = num_images()
if (me > 1 ) sync images(me - 1)
print *, me ! 1, 2, ...., n
if (me < ni) sync images(me + 1)
end program order
###実行結果
1
2
3
4
5
続行するには何かキーを押してください . . .
##(参考)ソースコード 旧
module m_bmp
implicit none
type :: t_bmp_file_header
sequence
integer(2) :: bfType = transfer('BM', 0_2, 1) ! BitMap
integer(4) :: bfSize ! file size in bytes
integer(2) :: bfReserved1 = 0 ! always 0
integer(2) :: bfReserved2 = 0 ! always 0
integer(4) :: bfOffBits
end type t_bmp_file_header
!
type :: t_bmp_info_header
sequence
integer(4) :: biSize = Z'28' ! size of bmp_info_header ; 40bytes
integer(4) :: biWidth
integer(4) :: biHeight
integer(2) :: biPlanes = 1 ! always 1
integer(2) :: biBitCount
integer(4) :: biCompression = 0 !0:nocompression,1:8bitRLE,2:4bitRLE,3:bitfield
integer(4) :: biSizeImage
integer(4) :: biXPelsPerMeter = 3780 ! 96dpi
integer(4) :: biYPelsPerMeter = 3780 ! 96dpi
integer(4) :: biClrUsed = 0
integer(4) :: biClrImportant = 0
end type t_bmp_info_header
!
type :: t_rgb
sequence
character :: b, g, r ! order is b g r
end type t_rgb
!
type :: t_bmp
type(t_rgb), allocatable :: rgb(:, :)[:]
contains
procedure :: wr => wr_bmp_caf
end type
contains
subroutine wr_bmp_caf(bmp, fn)
class(t_bmp), intent(in) :: bmp
character(len = *), intent(in) :: fn
type(t_bmp_file_header) :: bmp_file_header
type(t_bmp_info_header) :: bmp_info_header
integer :: i, ir, ic
associate(nx => size(bmp%rgb, 1), ny => size(bmp%rgb, 2) * num_images())
bmp_file_header%bfSize = 14 + 40 + 0 + nx * ny * 3
bmp_file_header%bfOffBits = 14 + 40
bmp_info_header%biWidth = nx ! nx shouold be a multiple of 4
bmp_info_header%biHeight = ny
bmp_info_header%biBitCount = 24
bmp_info_header%biSizeImage = nx * ny * 3
end associate
open(9, file = fn//'.bmp', form = 'binary', status = 'unknown')
write(9) bmp_file_header
write(9) bmp_info_header
do i = 1, num_images()
write(9) bmp%rgb[i]
end do
close(9)
end subroutine wr_bmp_caf
! convert to t_RGB
pure elemental type(t_rgb) function to_rgb(ir, ig, ib)
integer, intent(in) :: ir, ig, ib
to_rgb = t_rgb(achar(ib), achar(ig), achar(ir))
end function to_rgb
end module m_bmp
program Mandel
use m_bmp
implicit none
integer, parameter :: nx = 1920, ny = 1920, maxiter = 255
real , parameter :: x0 = -2.0, x1 = 1.0, y0 = -1.5, y1 = 1.5
complex, allocatable :: c(:, :)[:], z(:, :)[:]
integer :: i, j, k, ni, my
integer, allocatable :: niter(:, :)[:]
type (t_bmp) :: bmp
ni = num_images()
k = this_image()
if (mod(ny, ni) /= 0) stop 'ny must be a multiple of num_image()'
my = ny / ni
allocate(c(nx, my)[*], z(nx, my)[*])
!
! make 2D-mesh : c(:, :)
!
block
real :: x(nx), y(ny)
x = [( (x1 - x0) / (nx - 1) * (i - 1) + x0, i = 1, nx )]
y = [( (y1 - y0) / (ny - 1) * (i - 1) + y0, i = 1, ny )]
forall (i = 1:nx, j = 1:my) c(i, j)[k] = cmplx(x(i), y((k - 1) * my + j))
end block
!
! main iteration : niter(:, :)
!
call stamp('start Mandel')
allocate(niter(nx, my)[*]) ! implicitly sync all
niter = imandel(c)
call stamp('end Mandel')
!
! make bmp file in image 1 : Mandel.bmp
!
allocate(bmp%rgb(nx, my)[*])
bmp%rgb = to_rgb(256 - niter, 256 - niter, 256 - niter)
sync all
if (k == 1) then
call stamp('start bmp')
call bmp%wr('mandel')
call stamp('end bmp')
end if
deallocate(bmp%rgb) ! implicitly sync all
call stamp('normal end')
stop
contains
pure elemental integer function imandel(c)
complex, intent(in) :: c
complex :: z
z = c
do imandel = 1, maxiter
if (abs(z) > 2.0) exit
z = z * z + c
end do
end function imandel
subroutine stamp(text)
character(*), intent(in) :: text
real :: t0 = 0.0, t1 ! t0 save
integer :: c0 = 0, c1, cr ! c0 save
logical :: first = .true.
if (first) then
first = .false.
call cpu_time(t1)
call system_clock(c1, cr)
t0 = t1
c0 = c1
end if
call cpu_time(t1)
call system_clock(c1, cr)
print '(a, i3, a, f12.4, a, f12.4, 2a)', 'image= ', this_image(), ': time= ', real(c1 - c0) / cr,' cpu= ', t1 - t0, ': ', text
t0 = t1
c0 = c1
end subroutine stamp
end program Mandel