Hi all,
I'm hoping someone on the forum has some insight to lend on my compilation error. I've been trying for almost two months to get a stable build that scales well when jobs are ran on multiple nodes. It's been a pretty demoralizing task. I thought I was close on Friday when some attempts led to decent performance when ran on three nodes. I tried some additional options and unfortunately, this morning neither the new builds or the ones that were successful are working. Rather, the output file at run time reads " nwchem.F: ma_init failed (ga_uses_ma=F) 911" despite a "successful" compilation. I will list my current compilation and job submission scripts below, as well as my test input file. Note that some lines are commented out, these are options I have tried toggling on and off. Note that this forum seems to interpret the pound/hashtag symbol as the start of a list.
--compilation--
- !/bin/bash -l
module purge
module load hpc_defaults/0.1
module load intel/2019
export NWCHEM_TOP=/home/epj/secondcomp/nwchem-6.8.1-release
export NWCHEM_TARGET=LINUX64
export NWCHEM_LONG_PATHS=y
export NWCHEM_MODULES="all python"
export USE_NOFSCHECK=y
export LARGE_FILES=y
export I_MPI_OFI_LIBRARY_INTERNAL=1
export FI_PROVIDER=verbs
export I_MPI_FABRICS=shm:ofi
export ARMCI_NETWORK=OPENIB
- export IB_HOME=/usr
- export IB_INCLUDE=/usr/include/infiniband
- export IB_LIB=/usr/lib64
- export IB_LIB_NAME="-libumad -libverbs -lpthread"
export USE_PYTHONCONFIG=y
export PYTHONHOME=/usr
export PYTHONVERSION=2.7
export PYTHONLIBTYPE=so
export USE_PYTHON64=y
export TCGRSH=/usr/bin/ssh
export USE_F90_ALLOCATABLE=y
export USE_MPI=y
export USE_MPIF=y
export USE_MPIF4=y
export LIBMPI="-lmpifort -lmpi -lmpi_ilp64 -ldl -lrt -lpthread"
- export USE_OPENMP=y
export HAS_BLAS=y
export BLAS_SIZE=8
- export BLAS_SIZE=4
export BLASOPT="-mkl -lpthread -lm"
- export BLASOPT="-L$MKLROOT/lib/intel64 -lmkl_rt -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lmkl_def -lmkl_blacs_intelmpi_lp64 -lmkl_lapack95_lp64 -liomp5 -lpthread -ldl -lm"
export LAPACK_SIZE=8
- export LAPACK_SIZE=4
export LAPACK_LIB="$BLASOPT"
export LAPACK_LIBS="$BLASOPT"
export LAPACKOPT="$BLASOPT"
export USE_SCALAPACK=y
export SCALAPACK_SIZE=8
- export SCALAPACK_SIZE=4
export SCALAPACK="-mkl -lpthread -lm"
- export SCALAPACK="-L$MKLROOT/lib/intel64 -lmkl_rt -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lmkl_def -lmkl_blacs_intelmpi_lp64 -lmkl_lapack95_lp64 -liomp5 -lmkl_scalapack_lp64 -lpthread -ldl -lm"
export SCALAPACK_LIB="$SCALAPACK"
export SCALAPACK_LIBS="$SCALAPACK"
export USE_64TO32=y
export CC=icc
export FC=ifort
cd $NWCHEM_TOP/src
make realclean
make nwchem_config >& make_config1zg.log
make 64_to_32 >& make_64_1zg.log
make >& make1zg.log
--job submission--
- !/bin/bash -l
- SBATCH -e myjob.err
- SBATCH -J water
- SBATCH --mail-type=END,FAIL
- SBATCH --nodes=3
- SBATCH --ntasks=48
- SBATCH --ntasks-per-node=16
- SBATCH --constraint="[mlx]"
- SBATCH -o myjob.out
- SBATCH -t 1:00:00
module purge
module load hpc_defaults/0.1
module load intel/2019
- export I_MPI_OFI_LIBRARY_INTERNAL=1
export FI_PROVIDER=verbs
- export I_MPI_FABRICS=shm:ofi
export ARMCI_NETWORK=OPENIB
export ARMCI_DEFAULT_SHMMAX=32768
export NWCHEM_BASIS_LIBRARY="/home/epj/secondcomp/nwchem-6.8.1-release/src/basis/libraries/"
export NWCHEM_NWPW_LIBRARY="/home/epj/secondcomp/nwchem-6.8.1-release/src/nwpw/libraryps/"
export ARMCI_OPENIB_DEVICE=mlx4_0
export OMP_NUM_THREADS=1
export MKL_NUM_THREADS=1
export NWC_RANKS_PER_DEVICE=0
export OFFLOAD_INIT=on_offload
export MPIRETURN=999
mpirun -np $SLURM_NPROCS /home/epj/secondcomp/nwchem-6.8.1-release/bin/LINUX64/nwchem ./input.nw > run.out
export MPIRETURN=$?
- End of the job script
exit $MPIRETURN
--input file--
echo
start freq-trial
scratch_dir /scratch/
memory 4000 mb noverify
geometry units angstrom noautoz nocenter noautosym
O 0.00000000 0.00000000 0.00000000
H 0.00000000 1.93042809 -1.10715266
H 0.00000000 -1.93042809 -1.10715266
end
BASIS "ao basis" spherical PRINT
H library 6-311g**
O library 6-311g**
END
dft
xc b3lyp
odft
mult 1
mulliken
direct
maxiter 1000
convergence energy 1e-6
end
task dft optimize
dft
convergence energy 1e-8
end
freq
animate
end
task dft vscf
|