Hi, I was trying to vectorize one loop that has a lot of ifs with the next directive
m=0.0 ! form banded matrix of Puasson equastion pok=3. call annotate_site_begin( "pressure" ) call annotate_iteration_task( "pressure-task" ) do 99 k=2,kbm1 do 99 i=2,imm1 !DIR$ SIMD LASTPRIVATE(bb2) REDUCTION(+:m, gc2, gc1, gen) do 99 j=2,jmm1 if (k+1<=kb.and.i+1<=im)then aa1(i+1,j,k+1)=.25e0*aaf(i+1,j,k+1) 1 *.5*(dy(i,j)+dy(i+1,j))/ddx(i,j)/art(i,j) 1 +.25e0*aaf(i,j,k+1)*dq(i+1,j)/dq(i,j) 1 *dy(i,j)/ddx(i,j)/art(i,j) endif if (k+1<=kb.and.i-1>=1)then aa2(i-1,j,k+1)=-.25e0*aaf(i-1,j,k+1) 1 *.5*(dy(i,j)+dy(i-1,j))/ddx(i-1,j)/art(i,j) 1 -.25e0*aaf(i,j,k+1)*dq(i-1,j)/dq(i,j) 1 *dy(i,j)/ddx(i,j)/art(i,j) end if if (k-1>=1.and.i+1<=im)then aa3(i+1,j,k-1)=-.25e0*aaf(i+1,j,k-1) 1 *.5*(dy(i,j)+dy(i+1,j))/ddx(i,j)/art(i,j) 1 -.25e0*aaf(i,j,k-1)*dq(i+1,j)/dq(i,j) 1 *dy(i,j)/ddx(i,j)/art(i,j) end if if (k-1>=1.and.i-1>=1)then aa4(i-1,j,k-1)=.25e0*aaf(i-1,j,k-1) 1 *.5*(dy(i,j)+dy(i-1,j))/ddx(i-1,j)/art(i,j) 1 +.25e0*aaf(i,j,k-1)*dq(i-1,j)/dq(i,j) 1 *dy(i,j)/ddx(i,j)/art(i,j) end if if (k+1<=kb.and.j+1<=jm)then bb1(i,j+1,k+1)=.25e0*bbf(i,j+1,k+1) 1 *.5*(dx(i,j)+dx(i,j+1))/ddy(i,j)/art(i,j) 1 +.25e0*bbf(i,j,k+1)*dq(i,j+1)/dq(i,j) 1 *dx(i,j)/ddy(i,j)/art(i,j) end if if (k+1<=kb.and.j-1>=1)then bb2(i,j-1,k+1)=-.25e0*bbf(i,j-1,k+1) 1 *.5*(dx(i,j)+dx(i,j-1))/ddy(i,j-1)/art(i,j) 1 -.25e0*bbf(i,j,k+1)*dq(i,j-1)/dq(i,j) 1 *dx(i,j)/ddy(i,j)/art(i,j) end if if (k-1>=1.and.j+1<=jm)then bb3(i,j+1,k-1)=-.25e0*bbf(i,j+1,k-1) 1 *.5*(dx(i,j)+dx(i,j+1))/ddy(i,j)/art(i,j) 1 -.25e0*bbf(i,j,k-1)*dq(i,j+1)/dq(i,j) 1 *dx(i,j)/ddy(i,j)/art(i,j) end if if (k-1>=1.and.j-1>=1) then bb4(i,j-1,k-1)=.25e0*bbf(i,j-1,k-1) 1 *.5*(dx(i,j)+dx(i,j-1))/ddx(i,j-1)/art(i,j) 1 +.25e0*bbf(i,j,k-1)*dq(i,j-1)/dq(i,j) 1 *dx(i,j)/ddy(i,j)/art(i,j) end if if (i+1<=im) then ga1(i+1,j,k)=dz(k)*dq(i+1,j) 1 *.5*(dy(i,j)+dy(i+1,j))/ddx(i,j)/art(i,j) end if if (i-1>=1) then ga2(i-1,j,k)=dz(k)*dq(i-1,j) 1 *.5*(dy(i,j)+dy(i-1,j))/ddx(i-1,j)/art(i,j) end if if (j+1<=jm) then gb1(i,j+1,k)=dz(k)*dq(i,j+1) 1 *.5*(dx(i,j)+dx(i,j+1))/ddy(i,j)/art(i,j) end if if (j-1>=1) then gb2(i,j-1,k)=dz(k)*dq(i,j-1) 1 *.5*(dx(i,j)+dx(i,j-1))/ddy(i,j-1)/art(i,j) end if if (k+1<=kb) then gc1(i,j,k+1)=1.e0/(dzz(k)*dq(i,j))* 1 (art(i,j)+.5*(aaf(i,j,k+1)+aaf(i,j,k))*aaf(i,j,k+1)*dy(i,j)/ 1 dx(i,j)+.5*(bbf(i,j,k+1)+bbf(i,j,k))*bbf(i,j,k+1)*dx(i,j)/ 1 dy(i,j))/art(i,j) end if if (k-1>=1) then gc2(i,j,k-1)=1.e0/(dzz(k-1)*dq(i,j))* 1 (art(i,j)+.5*(aaf(i,j,k-1)+aaf(i,j,k))*aaf(i,j,k-1)*dy(i,j)/ 1 dx(i,j)+.5*(bbf(i,j,k-1)+bbf(i,j,k))*bbf(i,j,k-1)*dx(i,j)/ 1 dy(i,j))/art(i,j) end if ! if(iint==5)stop if (i-1>=1.and.j-1>=1.and.k-1>=1.) then gen(i,j,k)=(-dq(i,j)*dz(k)*(.5*(dy(i,j)+dy(i+1,j))/ddx(i,j)+ 1 .5*(dy(i,j)+dy(i-1,j))/ddx(i-1,j)+ 1 .5*(dx(i,j)+dx(i,j+1))/ddy(i,j)+ 1 .5*(dx(i,j)+dx(i,j-1))/ddy(i,j-1)))/art(i,j)- 2 (1.e0/dzz(k-1)+1.e0/dzz(k))/dq(i,j)*(art(i,j) 1 +.5*(aaf(i,j,k+1)+aaf(i,j,k))*aaf(i,j,k)*dy(i,j)/dx(i,j)+ 1 .5*(bbf(i,j,k+1)+bbf(i,j,k))*bbf(i,j,k)*dx(i,j)/dy(i,j) 1 )/art(i,j) else gen(i,j,k)=(-dq(i,j)*dz(k)*(dy(i,j)/ddx(i,j)+ 1 dy(i,j)/ddx(i-1,j)+ 1 dx(i,j)/ddy(i,j)+ 1 dx(i,j)/ddy(i,j-1)))/art(i,j)- 2 (2.e0/dzz(k))/dq(i,j)* 1 (art(i,j)+.5*(aaf(i,j,k+1)+aaf(i,j,k))*aaf(i,j,k)*dy(i,j)/dx(i,j)+ 1 .5*(bbf(i,j,k+1)+bbf(i,j,k))*bbf(i,j,k)*dx(i,j)/dy(i,j) 1 )/art(i,j) end if if (((k-1)*(k-kb)*(i-1)*(i-im)*(j-1)*(j-jm)).ne.0) then m=m+1 if (k==kbm1) then ga1(i+1,j,k)=ga1(i+1,j,k)+aa1(i+1,j,k+1) ga2(i-1,j,k)=ga2(i-1,j,k)+aa2(i-1,j,k+1) gb1(i,j+1,k)=gb1(i,j+1,k)+bb1(i,j+1,k+1) gb2(i,j-1,k)=gb2(i,j-1,k)+bb2(i,j-1,k+1) gen(i,j,k)=gen(i,j,k)+gc1(i,j,k+1) endif if (k==2) then !¸òþñþôýð ÿþòõ¨¿ýþ¸ª¹ aa2(i+1,j,k-1)=0. bb2(i,j+1,k-1)=0. bb4(i,j-1,k-1)=0. gc2(i,j,k-1)=0. endif if (i==2) then gc2(i,j,k-1)=gc2(i,j,k-1)+aa4(i-1,j,k-1) gc1(i,j,k+1)=gc1(i,j,k+1)+aa2(i-1,j,k+1) gen(i,j,k)=gen(i,j,k)+ga2(i-1,j,k) endif if (i==imm1) then gc2(i,j,k-1)=gc2(i,j,k-1)+aa3(i+1,j,k-1) gc1(i,j,k+1)=gc1(i,j,k+1)+aa1(i+1,j,k+1) gen(i,j,k)=gen(i,j,k)+ga1(i+1,j,k) endif if (j==2) then gc2(i,j,k-1)=gc2(i,j,k-1)+bb4(i,j-1,k-1) gc1(i,j,k+1)=gc1(i,j,k+1)+bb2(i,j-1,k+1) gen(i,j,k)=gen(i,j,k)+gb2(i,j-1,k) endif if (j==jmm1) then gc2(i,j,k-1)=gc2(i,j,k-1)+bb3(i,j+1,k-1) gc1(i,j,k+1)=gc1(i,j,k+1)+bb1(i,j+1,k+1) gen(i,j,k)=gen(i,j,k)+gb1(i,j+1,k) endif endif if (maa1+m<=lm) then if (k+1>kbm1.or.i+1>imm1) then apr(m)=0.0 else apr(m)=aa1(i+1,j,k+1) end if ja(m)=ind(m+maa1) ia(m)=ind(m) end if lapr=ma1 if (mbb1+m<=lm) then if (k+1>kbm1.or.j+1>jmm1) then apr(lapr+m)=0.0 else apr(lapr+m)=bb1(i,j+1,k+1) end if ja(m+lapr)=ind(m+mbb1) ia(m+lapr)=ind(m) end if lapr=ma1+mb1 if (mgc+m<=lm) then if (k+1>kbm1) then apr(lapr+m)=0.0 else apr(lapr+m)=gc1(i,j,k+1) end if ja(m+lapr)=ind(m+mgc) ia(m+lapr)=ind(m) end if lapr=ma1+mb1+mc if (mbb2+m<=lm) then if (k+1>kbm1.or.j-1<2) then apr(lapr+m)=0.0 else apr(lapr+m)=bb2(i,j-1,k+1) end if ja(m+lapr)=ind(m+mbb2) ia(m+lapr)=ind(m) end if lapr=ma1+mb1+mc+mb2 if (maa2+m<=lm) then if (k+1>kbm1.or.i-1<2) then apr(lapr+m)=0.0 else apr(lapr+m)=aa2(i-1,j,k+1) endif ja(m+lapr)=ind(m+maa2) ia(m+lapr)=ind(m) end if lapr=ma1+mb1+mc+mb2+ma2 if (mga+m<=lm) then if (i+1>imm1) then apr(lapr+m)=0.0 else apr(lapr+m)=ga1(i+1,j,k) end if ja(m+lapr)=ind(m+mga) ia(m+lapr)=ind(m) end if lapr=ma1+mb1+mc+mb2+ma2+ma if (1+m<=lm) then if (j+1>jmm1) then apr(lapr+m)=0.0 else apr(lapr+m)=gb1(i,j+1,k) end if ja(m+lapr)=ind(m+mgb) ia(m+lapr)=ind(m) end if lapr=ma1+mb1+mc+mb2+ma2+ma+mb apr(lapr+m)=gen(i,j,k) ja(m+lapr)=ind(m) ia(m+lapr)=ind(m) lapr=ma1+mb1+mc+mb2+ma2+ma+mb+lm if (m-mgb>=1) then if (j-1<2) then apr(m-mgb+lapr)=0.0 else apr(m-mgb+lapr)=gb2(i,j-1,k) end if ja(m-mgb+lapr)=ind(m-mgb) ia(m-mgb+lapr)=ind(m) end if lapr=ma1+mb1+mc+mb2+ma2+ma+2*mb+lm if (m-mga>=1) then if (i-1<2) then apr(m-mga+lapr)=0.0 else apr(m-mga+lapr)=ga2(i-1,j,k) end if ia(m-mga+lapr)=ind(m) ja(m-mga+lapr)=ind(-mga+m) end if lapr=ma1+mb1+mc+mb2+ma2+2*ma+2*mb+lm if (m-maa2>=1) then if (k-1<2.or.i+1>imm1) then apr(m-maa2+lapr)=0.0 else apr(m-maa2+lapr)=aa3(i+1,j,k-1) endif ia(m-maa2+lapr)=ind(m) ja(m-maa2+lapr)=ind(-maa2+m) end if lapr=ma1+mb1+mc+mb2+2*ma2+2*ma+2*mb+lm if (m-mbb2>=1) then if (j+1>jmm1.or.k-1<2) then apr(m-mbb2+lapr)=0.0 else apr(m-mbb2+lapr)=bb3(i,j+1,k-1) endif ia(m-mbb2+lapr)=ind(m) ja(m-mbb2+lapr)=ind(-mbb2+m) end if lapr=ma1+mb1+mc+2*mb2+2*ma2+2*ma+2*mb+lm if(m-mgc>=1)then if (k-1>kbm1) then apr(m-mgc+lapr)=0.0 else apr(m-mgc+lapr)=gc2(i,j,k-1) end if ja(m-mgc+lapr)=ind(m-mgc) ia(m-mgc+lapr)=ind(m) end if lapr=ma1+mb1+2*mc+2*mb2+2*ma2+2*ma+2*mb+lm if (m-mbb1>=1) then if (j-1<2.or.k-1<2)then apr(m-mbb1+lapr)=0.0 else apr(m-mbb1+lapr)=bb4(i,j-1,k-1) endif ia(m-mbb1+lapr)=ind(m) ja(m-mbb1+lapr)=ind(m-mbb1) end if lapr=ma1+2*mb1+2*mc+2*mb2+2*ma2+2*ma+2*mb+lm if (m-maa1>=1) then if (i-1<2.or.k-1<2)then apr(m-maa1+lapr)=0.0 else apr(m-maa1+lapr)=aa4(i-1,j,k-1) endif ia(m-maa1+lapr)=ind(m) ja(m-maa1+lapr)=ind(m-maa1) end if 99 continue call annotate_site_end
However, when I run it, I got this SIGSEGV
forrtl: severe (174): SIGSEGV, segmentation fault occurred
Image PC Routine Line Source
nohydropom_intel 000000000044B7F3 Unknown Unknown Unknown
libpthread-2.23.s 00007F1CF2551390 Unknown Unknown Unknown
nohydropom_intel 000000000042BEBD pressure1_ 92 pressure1.for
nohydropom_intel 000000000040C755 MAIN__.R 614 Main.for
nohydropom_intel 0000000000403D32 Unknown Unknown Unknown
libc-2.23.so 00007F1CF1F92830 __libc_start_main Unknown Unknown
nohydropom_intel 0000000000403C29 Unknown Unknown Unknown
I debugged my code. I inserted a breakpoint in line 614 at Main.for. Then I found that the SIGSEGV happened at one argument that it is an array.
Breakpoint 1, main () at ../Main.for:614
614 call pressure1(dti,q)
(gdb) info address dti
No symbol "dti" in current context.
(gdb) info address q
Symbol "q" is static storage at address 0x1088280.
(gdb) print dti
No symbol "dti" in current context.
(gdb) print q
$1 = (( ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...) ...) ...)
(gdb) info line 614
Line 614 of "../Main.for" starts at address 0x40c743 <main+35251> and ends at 0x40c755 <main+35269>.
(gdb) disas 0x40c743, 0x40c755
Dump of assembler code from 0x40c743 to 0x40c755:
=> 0x000000000040c743 <main+35251>: mov $0x6458900,%edi
0x000000000040c748 <main+35256>: mov $0x1088280,%esi
0x000000000040c74d <main+35261>: vzeroupper
0x000000000040c750 <main+35264>: callq 0x42bb30 <pressure1>
End of assembler dump.
(gdb) continue
Continuing.
Program received signal SIGSEGV, Segmentation fault.
0x000000000042bebd in pressure1 (dt2=1.9762625833649862e-323, q1=<error reading variable: Cannot access memory at address 0x2>)
at ../pressure1.for:92
92 allocate(apr(n_apr))
(gdb) info stack
#0 0x000000000042bebd in pressure1 (dt2=1.9762625833649862e-323, q1=<error reading variable: Cannot access memory at address 0x2>)
at ../pressure1.for:92
#1 0x000000000040c755 in main () at ../Main.for:614
#2 0x0000000000403d32 in main ()
(gdb) info frame
Stack level 0, frame at 0x7fffffffbf80:
rip = 0x42bebd in pressure1 (../pressure1.for:92); saved rip = 0x40c755
called by frame at 0x7fffffffc400
source language fortran.
Arglist at 0x7fffffffbf70, args: dt2=1.9762625833649862e-323, q1=<error reading variable: Cannot access memory at address 0x2>
Locals at 0x7fffffffbf70, Previous frame's sp is 0x7fffffffbf80
Saved registers:
rbx at 0x7fffffffbf38, rbp at 0x7fffffffbf70, r12 at 0x7fffffffbf58, r13 at 0x7fffffffbf50, r14 at 0x7fffffffbf48,
r15 at 0x7fffffffbf40, rip at 0x7fffffffbf78
(gdb) info address q1
Symbol "q1" is a complex DWARF expression:
0: DW_OP_breg4 0 [$rsi]
.
(gdb) whatis q1
type = REAL(8) (400,6,80)
(gdb) up
#1 0x000000000040c755 in main () at ../Main.for:614
614 call pressure1(dti,q)
(gdb) whatis q
type = REAL(8) (400,6,80)
When I delete the DIR SIMD directive, and recompile my code then my code runs. This is the content of my makefile
EXE= nohydropom_intel
FC= ifort
FFLAGS+= -O2 -m64 -mavx -mtune=core-avx-i -axAVX -real-size 64 -fp-model precise -fp-model source \
-fast-transcendentals -fimf-use-svml=true -fma -g -ipo -qopt-report=5 \
-traceback
#FFLAGS+= -O2 -xHost -real-size 64 -parallel -ipo -fstack-protector-all
#LDFLAGS = -lslatec -llapack
#LIBDIR = -L/usr/local/lib -L/usr/lib/lapack
LDFLAGS = -ladvisor
LIBDIR = -L/opt/intel/advisor/lib64
INCDIR = -I/opt/intel/advisor/include/intel64
OBJS = \
Advsm.o Subr.o Bcond1.o Vertstruct.o S_t_subr.o \
Coef.o Main.o ztosig.o pprint.o pressure1.o Wveloc.o \
Depth.o seamount.o Liadv.o Slap.o
${EXE}: ${OBJS}
$(FC) $(FFLAGS) -o $(EXE) $^ $(INCDIR) $(LIBDIR) $(LDFLAGS)
${OBJS}: %.o: ../%.for
${FC} ${FFLAGS} -c -o $@ $< $(INCDIR)
clean:
rm -f *.o $(EXE)
I have many questions about this issue. The variables in the DIR SIMD are not related with q1. I checked the declarations of q in Main.for, and q1 in pressure.for and they have the same declarations
Main.for => DIMENSION q(im,jm,kb)
pressure1.for => dimension q1(im,jm,kb)
im, jm, kb are defined as PARAMETER (IM=400,JM=6,KB=80,ks=80) in a file called comblk98.h.
So, I don't think that is an issue with my code. Somehow, the compiler is messing the options with the DIR SIMD directive. I wonder if anyone of you know if the DIR SIMD affects the way an array is passed to a subroutine. If you don't then I will open a ticket to Intel.
By the way, this is my environment
Iepardo@epardohome:~/nohydro/intel$ ifort -v
ifort version 19.0.3.199
epardo@epardohome:~/nohydro/intel$ uname -a
Linux epardohome 4.4.0-128-generic #154-Ubuntu SMP Fri May 25 14:15:18 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux
epardo@epardohome:~/nohydro/intel$ cat /etc/os-release
NAME="Ubuntu"
VERSION="16.04.6 LTS (Xenial Xerus)"
ID=ubuntu
ID_LIKE=debian
PRETTY_NAME="Ubuntu 16.04.6 LTS"
VERSION_ID="16.04"
HOME_URL="http://www.ubuntu.com/"
SUPPORT_URL="http://help.ubuntu.com/"
BUG_REPORT_URL="http://bugs.launchpad.net/ubuntu/"
VERSION_CODENAME=xenial
UBUNTU_CODENAME=xenial