NOTE: Some of the info provided here may be outdated.
This example describes step by step procedure that was executed in order to install the W-SLDA Toolkit on Eagle (PCSS) system.
- Step 1: Cloning the toolkit
- Step 2: Compiling supporting libraries and tools
- Step 3: Compiling ELPA
- Step 4: Setting templates for st codes
- Step 5: Setting templates for td codes
Step 1: Cloning the toolkit
git clone https://gitlab.fizyka.pw.edu.pl/wtools/wslda.git
Step 2: Compiling supporting libraries and tools
# On Eagle system compilation can be executed only via an interactive session
srun --pty -n 1 /bin/bash
# load compilers
module load openmpi/1.10.2-1_gcc482
# go to W-SLDA folder
gabrielw@e1135:~/grant_518/project_data/wslda$ ./install-libs.sh
Installing libs...
# Making lib/wderiv..
# ...
make: Leaving directory `/mnt/storage_2/project_data/grant_518/cold-atoms/tools'
# ==========================================================================
# =========================== SET ENVIRONMENT ==============================
# ==========================================================================
# Add to your .bashrc
export WSLDA=/mnt/storage_2/project_data/grant_518/wslda
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/mnt/storage_2/project_data/grant_518/wslda/lib/wdata:/mnt/storage_2/project_data/grant_518/wslda/lib/wderiv:/mnt/storage_2/project_data/grant_518/wslda/lib/winterp
export PATH=$PATH:/mnt/storage_2/project_data/grant_518/wslda/lib/wdata/bin:/mnt/storage_2/project_data/grant_518/wslda/tools/bin
Step 3: Compiling ELPA
See here for instructions.
st
codes
Step 4: Setting templates for The same operations were applied to st-project-template
and st-testcase-uniform
folders. The following files were modified to the form as presented below.
env.sh
# source env.sh
module load impi/2020.4.912
module load mkl/2020.0.4
module load cudatoolkit/8.0.61-21551265
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/mnt/storage_2/project_data/grant_518/share/elpa/lib/
module list
cat $WSLDA/VERSION.h
Makefile
# LOAD MODULES BEFORE COMPILATION
#
# module load impi/2020.4.912
# module load mkl/2020.0.4
# module load cudatoolkit/8.0.61-21551265
# COMPILER
CXX=mpiicc
PYTHON=python
# DIRECTORY SETTINGS (must end with /)
WSLDADIR=/mnt/storage_2/project_data/grant_518/wslda/
OBJDIR=./obj/
# folder where executable binary will be placed (will be created automatically)
BINDIR=./
# COMPILER FLAGS
CFLAGS= -O3 -I/mnt/storage_2/project_data/grant_518/share/elpa/include/elpa-2021.05.002/
LIBS=-lfftw3 -L$(MKLROOT)/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lmkl_blacs_intelmpi_lp64 -liomp5 -lpthread -lstdc++ -ldl -L/mnt/storage_2/project_data/grant_518/share/elpa/lib/ -lelpa
# ----- DO NOT MODIFY -----
include $(WSLDADIR)hpc-engine/mk.st
job.sh
#!/bin/bash -l
#SBATCH -J test # name that will be displayed in queue system
#SBATCH --output="test.out" # stdout will be saved here
#SBATCH -N 2 # here set number of nodes
#SBATCH -n 56 # must be: for altair n=N*32; for standard n=N*28
#SBATCH --mem-per-cpu=2GB
#SBATCH --time=00:10:00 # here set walltime hh:mm:ss
#SBATCH --mail-type=BEGIN,END,FAIL # notifications for job started, done & fail
#SBATCH --mail-user=your@email # send-to address
#SBATCH -p standard # altair or standard
## ------ QUEUE SYSTEM ------
## For submission use:
## sbatch job.sh
## For checking status use:
## squeue
## To kill job use:
## scancel pid
##
## ------ COMPUTATION -------
## For computation it is recomended to use location:
## cd ~/grant_518/scratch/
cd $SLURM_SUBMIT_DIR
source ./env.sh
## NOTE: YOU MUST ASSURE: -np=N*32 for altair
## -np=N*28 for standard
mpirun -np 56 ./st-wslda-2d input.txt
predefines.h
The job.sh
script is configured to send the code to standard CPU nodes. Thus, one needs to set the correct drivers for ELPA.
#define DIAGONALIZATION_ROUTINE ELPA
// ...
/**
* uncomment it if you want to activate GPUs for diagonalizations
* */
// #define ELPA_USE_GPU
/**
* Select ELPA kernels,
* for more info see documentation of ELPA lib
* */
#define ELPA_USE_SOLVER ELPA_SOLVER_2STAGE
#define ELPA_USE_COMPLEX_KERNEL ELPA_2STAGE_COMPLEX_GENERIC
#define ELPA_USE_REAL_KERNEL ELPA_2STAGE_REAL_GENERIC
You can always check the available ELPA kernel on your system by executing the tool elpa2_print_kernels
.
st
codes
Testing For testing, it is recommended to use st-testcase-uniform
.
# Use scratch area for tests
gabrielw@eagle:~$ cd grant_518/scratch/
gabrielw@eagle:~/grant_518/scratch$ cp -r $WSLDA/st-testcase-uniform .
gabrielw@eagle:~/grant_518/scratch$ cd st-testcase-uniform/
gabrielw@eagle:~/grant_518/scratch/st-testcase-uniform$ ls
env.sh input.txt job.sh logger.h Makefile predefines.h problem-definition.h README.txt
# Compile codes
gabrielw@eagle:~/grant_518/scratch/st-testcase-uniform$ srun --pty -n 1 /bin/bash
gabrielw@e1729:~/grant_518/scratch/st-testcase-uniform$ source env.sh
gmp/5.1.3 load complete.
mpfr/3.1.2 load complete.
libmpc/1.0.1 load complete.
gcc/6.2.0 load complete.
icc/19.1.3.304 load complete.
impi/2020.4.912 load complete.
mkl/2020.0.4 load complete.
cudatoolkit/8.0.61-21551265 load complete.
Currently Loaded Modulefiles:
1) gmp/5.1.3(default) 3) libmpc/1.0.1(default) 5) icc/19.1.3.304(default) 7) mkl/2020.0.4(default)
2) mpfr/3.1.2(default) 4) gcc/6.2.0 6) impi/2020.4.912(default) 8) cudatoolkit/8.0.61-21551265
#define CODE "W-SLDA-TOOLKIT"
#define VERSION "2021.09.05"
gabrielw@e1729:~/grant_518/scratch/st-testcase-uniform$ make
mkdir -p ./obj/
mkdir -p ./
mpiicc -O3 -I/mnt/storage_2/project_data/grant_518/share/elpa/include/elpa-2021.05.002/ -I/mnt/storage_2/project_data/grant_518/wslda/hpc-engine/ -I/mnt/storage_2/project_data/grant_518/wslda/lib/wdata/c/ -I/mnt/storage_2/project_data/grant_518/wslda/lib/wderiv/c/ -I/mnt/storage_2/project_data/grant_518/wslda/lib/winterp/c/ -I./ -DWSLDA -DCODEDIM=1 -c -o obj/s1dpca.o /mnt/storage_2/project_data/grant_518/wslda/hpc-engine/s2dpca.c
# ...
gabrielw@e1729:~/grant_518/scratch/st-testcase-uniform$ exit
# submit the job and wait till it executes
gabrielw@eagle:~/grant_518/scratch/st-testcase-uniform$ sbatch job.sh
Submitted batch job 19722600
# check is files were produced...
gabrielw@eagle:~/grant_518/scratch/st-testcase-uniform$ ls
env.sh logger.h predefines.h st-wslda-1d test test_checkpoint.dat test_input.txt test_nu.wdat test_problem-definition.h test_tau_a.wdat test_V_ext_a.wdat
input.txt Makefile problem-definition.h st-wslda-2d test_A_a.wdat test_delta_ext.wdat test_j_a.wdat test.out test_rho_a.wdat test_V_a.wdat test.wlog
job.sh obj README.txt st-wslda-3d test_alpha_a.wdat test_delta.wdat test_logger.h test_predefines.h test.stdout test_velocity_ext_a.wdat test.wtxt
# ... and if the code converged
gabrielw@eagle:~/grant_518/scratch/st-testcase-uniform$ tail test.stdout
E_potext: NEW= 0 OLD= 0 DIFF= 0 CONVSTATUS= PASS
E_pairext: NEW= 0 OLD= 0 DIFF= 0 CONVSTATUS= PASS
E_velext: NEW= 0 OLD= 0 DIFF= 0 CONVSTATUS= PASS
------------------------------------------------------------------------------------------
E_tot: NEW= 0.41534176 OLD= 0.41534176 DIFF= -6.0015734e-09 CONVSTATUS= PASS
# MINIMIZATION FUNCTION: -3.20717243
# FUNCTION CHANGED BY: -7.57995854
# WRITING CHECKPOINT FILE `test_checkpoint.dat`
# TIMING rt_tot= 0.51: rt_zheev= 0.03[ 5.56%] rt_dens= 0.00[ 0.72%] rt_pot= 0.00[ 0.14%] rt_me= 0.00[ 0.06%] rt_redistrib= 0.00[ 0.31%] rt_other= 0.48[93.21%]
# ALGORITHM CONVERGED!
td
codes
Step 5: Setting templates for The same operations were applied to td-project-template
and td-testcase-uniform
folders. The following files were modified to the form as presented below.
env.sh
# source env.sh
module load openmpi/3.1.4_gcc620
module unload cudatoolkit
module load cuda/11.2.1_460.32.03
module list
cat $WSLDA/VERSION.h
Makefile
# LOAD MODULES BEFORE COMPILATION
#
# module load openmpi/3.1.4_gcc620
# module unload cudatoolkit
# module load cuda/11.2.1_460.32.03
# COMPILERS
CXX=mpicc
NVCC=nvcc
PYTHON=python
# DIRECTORY SETTINGS (must end with /)
# W-SLDA instalation folder (for read only)
WSLDADIR=/mnt/storage_2/project_data/grant_518/wslda/
# folder where temporarry files will be stored (will be created automatically)
OBJDIR=./obj/
# folder where executable binary will be placed (will be created automatically)
BINDIR=./
# COMPILER FLAGS & LIBRARIES
# C compiler flags
CFLAGS= -std=gnu99 -O3
# CUDA compiler flags
NVCCFLAGS = -arch sm_60 -O3
# prescription how to link with: FFTW, CUDA
LIBS=-lcudart -lcufft -lfftw3 -lm
# ----- DO NOT MODIFY -----
include $(WSLDADIR)hpc-engine/mk.td
job.sh
#!/bin/bash -l
#SBATCH -J test # name that will be displayed in queue system
#SBATCH --output="test.out" # stdout will be saved here
#SBATCH -N 1 # here set number of nodes
#SBATCH -n 32 # must be N*32
#SBATCH --mem-per-cpu=2GB
#SBATCH --gpus-per-node=8 # use 8 if you want to use Altair nodes
#SBATCH --time=24:00:00 # here set walltime hh:mm:ss
#SBATCH --mail-type=BEGIN,END,FAIL # notifications for job started, done & fail
#SBATCH --mail-user=your@email # send-to address
#SBATCH -p tesla
## ------ QUEUE SYSTEM ------
## For submission use:
## sbatch job.sh
## For checking status use:
## squeue
## To kill job use:
## scancel pid
##
## ------ COMPUTATION -------
## For computation it is recomended to use location:
## cd ~/grant_518/scratch/
cd $SLURM_SUBMIT_DIR
source ./env.sh
## NOTE: YOU MUST ASSURE: -np=N*gpus-per-node
## -npernode=gpus-per-node
mpirun -np 8 -npernode 8 ./td-wslda-2d input.txt
input.txt
The job.sh
script is configured to send the code to standard GPU nodes, with gpus-per-node=8
. Thus, one needs to set the correct value of gpuspernode
in template of input file.
gpuspernode 8 # must be equal gpus-per-node provided in the job script, 8 for Altair nodes
td
codes
Testing For testing, it is recommended to use td-testcase-uniform
.
# Use scratch area for tests
gabrielw@eagle:~$ cd grant_518/scratch/
gabrielw@eagle:~/grant_518/scratch$ cp -r $WSLDA/td-testcase-uniform .
gabrielw@eagle:~/grant_518/scratch$ cd td-testcase-uniform/
gabrielw@e1296:/mnt/storage_2/scratch/grant_518/td-testcase-uniform$ ls
env.sh input.txt job.sh logger.h Makefile predefines.h problem-definition.h README.txt
# Compile codes
gabrielw@eagle:~/grant_518/scratch/td-testcase-uniform$ srun --pty -n 1 /bin/bash
gabrielw@e1296:/mnt/storage_2/scratch/grant_518/td-testcase-uniform$ source env.sh
openmpi/3.1.4_gcc620 load complete.
cuda/11.2.1_460.32.03 load complete.
Currently Loaded Modulefiles:
1) gmp/5.1.3(default) 2) mpfr/3.1.2(default) 3) libmpc/1.0.1(default) 4) gcc/6.2.0 5) openmpi/3.1.4_gcc620 6) cuda/11.2.1_460.32.03
#define CODE "W-SLDA-TOOLKIT"
#define VERSION "2021.09.05"
gabrielw@e1296:~/grant_518/scratch/td-testcase-uniform$ make
mkdir -p ./obj/
mkdir -p ./
mpicc -std=gnu99 -O3 -I/mnt/storage_2/project_data/grant_518/wslda/hpc-engine/ -I/mnt/storage_2/project_data/grant_518/wslda/lib/wdata/c/ -I/mnt/storage_2/project_data/grant_518/wslda/lib/wderiv/c/ -I/mnt/storage_2/project_data/grant_518/wslda/lib/winterp/c/ -I./ -DTDWSLDA -DCODEDIM=1 -c -o obj/ccpca.o /mnt/storage_2/project_data/grant_518/wslda/hpc-engine/ccpca.c
# ...
gabrielw@e1296:~/grant_518/scratch/td-testcase-uniform$ exit
# submit the job and wait till it executes
gabrielw@eagle:~/grant_518/scratch/td-testcase-uniform$ sbatch job.sh
Submitted batch job 19731598
# check is files were produced...
gabrielw@eagle:~/grant_518/scratch/td-testcase-uniform$ ls
env.sh logger.h predefines.h td-wslda-1d test test_input.txt test_logger.h test_problem-definition.h test.stdout
input.txt Makefile problem-definition.h td-wslda-2d test_check.stamp test_j_a.wdat test.out test_rho_a.wdat test.wlog
job.sh obj README.txt td-wslda-3d test_delta.wdat test_j_b.wdat test_predefines.h test_rho_b.wdat test.wtxt
# ... and if the code was producing results - all numbers should be the same if everything went ok.
gabrielw@eagle:~/grant_518/scratch/td-testcase-uniform$ tail test.stdout
0.3500 17.00000061 17.99999904 34.99999965 0.43917132 1.91518974 -0.52650127 -0.94951715 0.00000000 0.00000000 0.00000000 0.00000000 -0.00000000 -0.00000000 0.25
0.7000 17.00000061 17.99999904 34.99999965 0.43917132 1.91518974 -0.52650127 -0.94951715 0.00000000 0.00000000 0.00000000 0.00000000 -0.00000000 0.00000000 0.21
1.0500 17.00000061 17.99999904 34.99999965 0.43917132 1.91518974 -0.52650127 -0.94951715 0.00000000 0.00000000 0.00000000 0.00000000 -0.00000000 -0.00000000 0.21
1.4000 17.00000061 17.99999904 34.99999965 0.43917132 1.91518974 -0.52650127 -0.94951716 0.00000000 0.00000000 0.00000000 0.00000000 -0.00000000 -0.00000000 0.21
1.7500 17.00000061 17.99999904 34.99999965 0.43917132 1.91518974 -0.52650127 -0.94951716 0.00000000 0.00000000 0.00000000 0.00000000 -0.00000000 -0.00000000 0.21
2.1000 17.00000061 17.99999904 34.99999965 0.43917132 1.91518974 -0.52650127 -0.94951716 0.00000000 0.00000000 0.00000000 0.00000000 -0.00000000 0.00000000 0.21
2.4500 17.00000061 17.99999904 34.99999965 0.43917132 1.91518974 -0.52650127 -0.94951716 0.00000000 0.00000000 0.00000000 0.00000000 -0.00000000 0.00000000 0.21
2.8000 17.00000061 17.99999904 34.99999965 0.43917132 1.91518974 -0.52650127 -0.94951716 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.21
3.1500 17.00000061 17.99999904 34.99999965 0.43917132 1.91518974 -0.52650127 -0.94951716 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.21
3.5000 17.00000061 17.99999904 34.99999965 0.43917132 1.91518974 -0.52650127 -0.94951716 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.21