I experienced a problem when usign -O3 optimization on code with nested loops acting on allocatable arrays. The following shows a minimal example of my code:
--->main.f90
program main use norms implicit none integer :: Nx,Ny,Nz complex, ALLOCATABLE, DIMENSION(:,:,:) :: arr real, ALLOCATABLE, DIMENSION(:) :: normParts1 integer :: i,j Nx=4 Ny=4 Nz=4 ALLOCATE(arr(Nx,Ny,Nz), normParts1(Nz)) arr(1,1,1)=(1.D0,0.D0) arr(1,2,1)=(1.D0,0.D0) arr(4,4,1)=(1.D0,0.D0) write(*,*) norm(Nx,Ny,Nz,arr) CALL normParts(Nx,Ny,Nz,arr,NormParts1) write(*,*) NormParts1 end program main
--->norms.f90
module norms implicit none contains function norm(Nx,Ny,Nz, arr) implicit none INTEGER, INTENT(IN) :: Nx,Ny,Nz COMPLEX, INTENT(IN) :: arr(Nx,Ny,Nz) REAL :: norm INTEGER :: i,j, k norm=0.D0 do i=1,Nx do j=1,Ny do k=1, Nz norm=norm+abs(arr(i,j,k))**2 end do end do end do end function norm !---------------------------------------------------------- subroutine normParts(Nx, Ny, Nz, arr, normParts1) IMPLICIT none REAL :: normParts1(Nz) INTEGER, INTENT(IN) :: Nx, Ny, Nz COMPLEX, INTENT(IN) :: arr(Nx,Ny,Nz) INTEGER:: i,j,k do k=1,Nz normParts1(k)=0.0d0 end do !do k=1,Nz do i=1,Nx do j=1,Ny do k=1,Nz normParts1(k)=normParts1(k)+abs(arr(i,j,k))**2 end do end do end do end subroutine normParts !----------------------------------------------------------- end module norms
when compiled with composer_xe_2013_sp1.0.080 and -03, normParts gives an incorrect result: norm=3, normParts=(2,0,0,0).
When compiled with 02, or when interchanging the loop order, such that k is the outermost loop (which would indeed be more efficient) everything works fine and norm=3, normParts=(3,0,0,0).
Is there anything dangerous in my code that may lead to behavior? Can this behavior be expected or is there something weird about this?