Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

Table of Contents

...


Modules required

  1. lang/Perl/5.28.1-GCCcore-6.3.0
  2. data/netCDF-WRF/C-4.6.2_CXX-4.3.0_F-4.4.2_p-1.9.0-intel-2018.5.274
  3. toolchain/intel/2018.5.274
  4. devel/CMake/3.12.1-intel-2018.5.274
  5. data/XML-LibXML/2.0206-GCCcore-6.3.0

...

Info

As Mana has two different Infiniband networks (QDR and HDR), two different machine entries are created.  This also means that subsequent env_mach_specific and mkbatch  files will also come in duplicate , with only changes in selecting the right slightly different file names and the only content change is the  selection of network to use for MPI.

config_machines.xml


Code Block
languagexml
themeMidnight
titleconfig_machines.xml
collapsetrue
<machine MACH="uhhpcmana_qdr">
<DESC>User Defined Machine</DESC> <!-- can be anything -->
<OS>LINUX</OS> <!-- LINUX,Darwin,CNL,AIX,BGL,BGP -->
<COMPILERS>intel</COMPILERS> <!-- intel,ibm,pgi,pathscale,gnu,cray,lahey -->
<MPILIBS>impi</MPILIBS> <!-- openmpi, mpich, ibm, mpi-serial -->
<CESMSCRATCHROOT>~/lus_scratch/cesm/case</CESMSCRATCHROOT> <!-- complete path to the 'scratch' directory -->
<RUNDIR>$CASEROOT/run</RUNDIR> <!-- complete path to the run directory -->
<EXEROOT>$CASEROOT/bld</EXEROOT> <!-- complete path to the build directory -->
<DIN_LOC_ROOT>~/cesm/input</DIN_LOC_ROOT> <!-- complete path to the inputdata directory -->
<DIN_LOC_ROOT_CLMFORC>USERDEFINED_optional_build</DIN_LOC_ROOT_CLMFORC> <!-- path to the optional forcing data for CLM (for CRUNCEP forcing) -->
<DOUT_S>TRUE</DOUT_S> <!-- logical for short term archiving -->
<DOUT_S_ROOT>$CASEROOT/output</DOUT_S_ROOT> <!-- complete path to a short term archiving directory -->
<DOUT_L_MSROOT>USERDEFINED_optional_run</DOUT_L_MSROOT> <!-- complete path to a long term archiving directory -->
<CCSM_BASELINE>USERDEFINED_optional_run</CCSM_BASELINE> <!-- where the cesm testing scripts write and read baseline results -->
<CCSM_CPRNC>USERDEFINED_optional_test</CCSM_CPRNC> <!-- path to the cprnc tool used to compare netcdf history files in testing -->
<BATCHQUERY>squeue -a</BATCHQUERY>
<BATCHSUBMIT>sbatch</BATCHSUBMIT>
<SUPPORTED_BY>uh</SUPPORTED_BY>
<GMAKE_J>8</GMAKE_J>
<MAX_TASKS_PER_NODE>19</MAX_TASKS_PER_NODE>
</machine>


<machine MACH="uhhpcmana_hdr"> 
<DESC>User Defined Machine</DESC> <!-- can be anything --> 
<OS>LINUX</OS> <!-- LINUX,Darwin,CNL,AIX,BGL,BGP --> 
<COMPILERS>intel</COMPILERS> <!-- intel,ibm,pgi,pathscale,gnu,cray,lahey --> 
<MPILIBS>impi</MPILIBS> <!-- openmpi, mpich, ibm, mpi-serial -->
<CESMSCRATCHROOT>~/lus_scratch/cesm/case</CESMSCRATCHROOT> <!-- complete path to the 'scratch' directory -->
<RUNDIR>$CASEROOT/run</RUNDIR> <!-- complete path to the run directory --> 
<EXEROOT>$CASEROOT/bld</EXEROOT> <!-- complete path to the build directory --> 
<DIN_LOC_ROOT>~/cesm/input</DIN_LOC_ROOT> <!-- complete path to the inputdata directory --> 
<DIN_LOC_ROOT_CLMFORC>USERDEFINED_optional_build</DIN_LOC_ROOT_CLMFORC> <!-- path to the optional forcing data for CLM (for CRUNCEP forcing) -->
<DOUT_S>TRUE</DOUT_S> <!-- logical for short term archiving --> 
<DOUT_S_ROOT>$CASEROOT/output</DOUT_S_ROOT> <!-- complete path to a short term archiving directory -->
<DOUT_L_MSROOT>USERDEFINED_optional_run</DOUT_L_MSROOT> <!-- complete path to a long term archiving directory -->
<CCSM_BASELINE>USERDEFINED_optional_run</CCSM_BASELINE> <!-- where the cesm testing scripts write and read baseline results -->
<CCSM_CPRNC>USERDEFINED_optional_test</CCSM_CPRNC> <!-- path to the cprnc tool used to compare netcdf history files in testing -->
<BATCHQUERY>squeue -a</BATCHQUERY>
<BATCHSUBMIT>sbatch</BATCHSUBMIT>
<SUPPORTED_BY>uh</SUPPORTED_BY>
<GMAKE_J>8</GMAKE_J>
<MAX_TASKS_PER_NODE>39</MAX_TASKS_PER_NODE> 
</machine>

...

Code Block
languagebash
themeMidnight
titleenv_mach_specific.uhhpcmana_qdr
collapsetrue
#! /bin/csh -f

# -------------------------------------------------------------------------
# UHHPC_QDR build specific settings
# -------------------------------------------------------------------------


source /etc/profile.d/lmod.csh
module purge

module load devel/CMake/3.12.1-intel-2018.5.274
module load lang/Perl/5.28.1-GCCcore-6.3.0
module load data/netCDF-Fortran/4.4.5-intel-2018.5.274
module load data/netCDF/4.6.2-intel-2018.5.274
module load toolchain/intel/2018.5.274

setenv NETCDF ${HOME}/netcdf4
setenv LD_LIBRARY_PATH ${HOME}/netcdf4/lib/:$LD_LIBRARY_PATH
setenv LIBRARY_PATH ${HOME}/netcdf4/lib/:$LIBRARY_PATH
setenv PATH ${HOME}/netcdf4/bin/:$PATH
setenv CPATH ${HOME}/netcdf4/include/:$CPATH

# -------------------------------------------------------------------------
# Build and runtime environment variables - edit before the initial build
# -------------------------------------------------------------------------
limit stacksize unlimited
limit datasize unlimited

...

Code Block
languagebash
themeMidnight
titleenv_mach_specific.uhhpcmana_hdr
collapsetrue
#! /bin/csh -f

# -------------------------------------------------------------------------
# UHHPC_QDR build specific settings
# -------------------------------------------------------------------------


source /etc/profile.d/lmod.csh
module purge

module load devel/CMake/3.12.1-intel-2018.5.274
module load lang/Perl/5.28.1-GCCcore-6.3.0
module load data/netCDF-Fortran/4.4.5-intel-2018.5.274
module load data/netCDF/4.6.2-intel-2018.5.274
module load toolchain/intel/2018.5.274

setenv NETCDF ${HOME}/netcdf4
setenv LD_LIBRARY_PATH ${HOME}/netcdf4/lib/:$LD_LIBRARY_PATH
setenv LIBRARY_PATH ${HOME}/netcdf4/lib/:$LIBRARY_PATH
setenv PATH ${HOME}/netcdf4/bin/:$PATH
setenv CPATH ${HOME}/netcdf4/include/:$CPATH

# -------------------------------------------------------------------------
# Build and runtime environment variables - edit before the initial build
# -------------------------------------------------------------------------
limit stacksize unlimited
limit datasize unlimited

...

Code Block
themeMidnight
titlemkbatch.uhhpcmana_qdr
collapsetrue
#! /bin/csh -f

source /etc/profile.d/lmod.csh
#################################################################################
if ($PHASE == set_batch) then
#################################################################################

source ./Tools/ccsm_getenv || exit -1

module load lang/Perl/5.28.1-GCCcore-6.3.0
set ntasks = `${CASEROOT}/Tools/taskmaker.pl -sumonly`
set maxthrds = `${CASEROOT}/Tools/taskmaker.pl -maxthrds`
module purge
@ nodes = $ntasks / ${MAX_TASKS_PER_NODE}
if ( $ntasks % ${MAX_TASKS_PER_NODE} > 0) then
@ nodes = $nodes + 1
@ ntasks = $nodes * ${MAX_TASKS_PER_NODE}
endif
@ taskpernode = ${MAX_TASKS_PER_NODE} / ${maxthrds}
set qname = batch
set tlimit = "3-00:00:00"

if ($?TESTMODE) then
set file = $CASEROOT/${CASE}.test
else
set file = $CASEROOT/${CASE}.run
endif

cat >! $file << EOF1
#!/bin/csh
#SBATCH --job-name=${CASE}
#SBATCH --constraint="ib_qdr"
#SBATCH --distribution="*:*:*"
#SBATCH --partition=exclusive
#SBATCH --time=$tlimit
#SBATCH --job-name=${CASE}
#SBATCH --ntasks=$ntasks
#SBATCH --cpus-per-task=$maxthrds
#SBATCH --output=${CASE}.%A.out


# Configure the Intel MPI parameters
setenv I_MPI_FABRICS "shm:ofi"
setenv I_MPI_PMI_LIBRARY "/lib64/libpmi.so"
# ### FOR QDR NETWORK #####
setenv FI_PROVIDER "psm"
setenv FI_PSM_TAGGED_RMA 0
setenv FI_PSM_AM_MSG 1
setenv FI_PSM_UUID \`uuidgen\`
# # ###### ######## ###### ##
source /etc/profile.d/lmod.csh
module purge

EOF1

#################################################################################
else if ($PHASE == set_exe) then
#################################################################################
module load lang/Perl/5.28.1-GCCcore-6.3.0
set maxthrds = `${CASEROOT}/Tools/taskmaker.pl -maxthrds`
set maxtasks = `${CASEROOT}/Tools/taskmaker.pl -sumtasks`
module purge


cat >> ${CASEROOT}/${CASE}.run << EOF1
# -------------------------------------------------------------------------
# Run the model
# -------------------------------------------------------------------------

sleep 25
cd \$RUNDIR
echo "\`date\` -- CSM EXECUTION BEGINS HERE"
setenv OMP_NUM_THREADS ${maxthrds}
module load data/netCDF-Fortran/4.4.5-intel-2018.5.274
module load data/netCDF/4.6.2-intel-2018.5.274
module load toolchain/intel/2018.5.274
srun --ntasks=${maxtasks} --cpu_bind=sockets --cpu_bind=verbose --kill-on-bad-exit \$EXEROOT/cesm.exe >&! cesm.log.\$LID
wait
echo "\`date\` -- CSM EXECUTION HAS FINISHED"

EOF1


#################################################################################
else if ($PHASE == set_larch) then
#################################################################################

#This is a place holder for a long-term archiving script

#################################################################################
else
#################################################################################

echo " PHASE setting of $PHASE is not an accepted value"
echo " accepted values are set_batch, set_exe and set_larch"
exit 1

#################################################################################
endif
#################################################################################

...

Code Block
themeMidnight
titlemkbatch.uhhpcmana_hdr
collapsetrue
#! /bin/csh -f

source /etc/profile.d/lmod.csh
#################################################################################
if ($PHASE == set_batch) then
#################################################################################

source ./Tools/ccsm_getenv || exit -1
maxtasks
module load lang/Perl/5.28.1-GCCcore-6.3.0
set ntasks = `${CASEROOT}/Tools/taskmaker.pl -sumonly`
set maxthrds = `${CASEROOT}/Tools/taskmaker.pl -maxthrds`
module purge
@ nodes = $ntasks / ${MAX_TASKS_PER_NODE}
if ( $ntasks % ${MAX_TASKS_PER_NODE} > 0) then
@ nodes = $nodes + 1
@ ntasks = $nodes * ${MAX_TASKS_PER_NODE}
endif
@ taskpernode = ${MAX_TASKS_PER_NODE} / ${maxthrds}
set qname = batch
set tlimit = "3-00:00:00"

if ($?TESTMODE) then
set file = $CASEROOT/${CASE}.test
else
set file = $CASEROOT/${CASE}.run
endif

cat >! $file << EOF1
#!/bin/csh
#SBATCH --job-name=${CASE}
#SBATCH --constraint="ib_hdr"
#SBATCH --distribution="*:*:*"
#SBATCH --partition=exclusive
#SBATCH --time=$tlimit
#SBATCH --job-name=${CASE}
#SBATCH --ntasks=$ntasks
#SBATCH --cpus-per-task=$maxthrds
#SBATCH --output=${CASE}.%A.out


###### ######## ###### ##
# Libfabric method
###### ######## ###### ##
# Configure the Intel MPI parameters
setenv I_MPI_FABRICS "shm:ofi"
setenv I_MPI_PMI_LIBRARY "/lib64/libpmi.so"
 setenv I_MPI_HYDRA_TOPOLIB "ipl" # May be required if newer libfabric and intel MPI is used
### FOR HDR NETWORK #####
# https://ofiwg.github.io/libfabric/master/man/
# https://ofiwg.github.io/libfabric/v1.9.1/man/
setenv FI_PROVIDER "shm,verbs;ofi_rxm"
setenv FI_MR_CACHE_MONITOR "disabled" # currently a bug exists that a segfault could happen
setenv FI_VERBS_MR_CACHE_ENABLE "0" # currently a bug exists that a segfault could happen
setenv  FI_VERBS_INLINE_SIZE "256"
setenv FI_UNIVERSE_SIZE "${maxtasks}" # should equal at least the max number of tasks one task will communicate with
setenv FI_VERBS_IFACE "i"
###### ######## ###### ##

###### ######## ###### ##
# DAPL method (deprecated but not gone in Intel 2018)
###### ######## ###### ##
# Configure the Intel MPI parameters

#setenv I_MPI_FABRICS "shm:dapl"
#setenv I_MPI_PMI_LIBRARY "/lib64/libpmi.so"

###### ######## ###### ##
source /etc/profile.d/lmod.csh
module purge

EOF1

#################################################################################
else if ($PHASE == set_exe) then
#################################################################################
module load lang/Perl/5.28.1-GCCcore-6.3.0
set maxthrds = `${CASEROOT}/Tools/taskmaker.pl -maxthrds`
set maxtasks = `${CASEROOT}/Tools/taskmaker.pl -sumtasks`
module purge


cat >> ${CASEROOT}/${CASE}.run << EOF1
# -------------------------------------------------------------------------
# Run the model
# -------------------------------------------------------------------------

sleep 25
cd \$RUNDIR
setenv OMP_NUM_THREADS ${maxthrds}
module load data/netCDF-Fortran/4.4.5-intel-2018.5.274
module load data/netCDF/4.6.2-intel-2018.5.274
module load toolchain/intel/2018.5.274
echo "\`date\` -- CSM EXECUTION BEGINS HERE" 
srun --ntasks=${maxtasks} --cpu_bind=sockets --cpu_bind=verbose --kill-on-bad-exit \$EXEROOT/cesm.exe >&! cesm.log.\$LID
wait
echo "\`date\` -- CSM EXECUTION HAS FINISHED"

EOF1


#################################################################################
else if ($PHASE == set_larch) then
#################################################################################

#This is a place holder for a long-term archiving script

#################################################################################
else
#################################################################################

echo " PHASE setting of $PHASE is not an accepted value"
echo " accepted values are set_batch, set_exe and set_larch"
exit 1

#################################################################################
endif
#################################################################################

...

Once the above files are made and added to the  "/scripts/ccsm_utils/Machines/" of the CESM directory, we should now be able to build and then even run our model.

...

Code Block
languagebash
themeMidnight
titlesubmission.slurm
collapsetrue
#!/bin/bash
#SBATCH --job-name=submit
#SBATCH --partition=sandbox
#SBATCH --time=00-04:00:00 ## time format is DD-HH:MM:SS
#SBATCH --cpus-per-task=8
#SBATCH --mem=64G
#SBATCH --output=submit.%A.out


MODELDIR=${HOME}/cesm1_2_2
CASEDIR=${HOME}/lus_scratch/cesm_output/
EXPNAME=ood.I5
CASEROOT=$CASEDIR/$EXPNAME

module purge
module load lang/Perl/5.28.1-GCCcore-6.3.0

#================================================================;
#===================== 1. CREATE CASE ===========================;
#================================================================;
cd $MODELDIR/scripts
./create_newcase -case $CASEDIR/$EXPNAME -compset FC5 -res f19_f19 -mach uhhpc_qdrmana_qdr
#  ./create_newcase -case $CASEDIR/$EXPNAME -compset FC5 -res f19_f19 -mach mana_hdr 
#=================== PE LAYOUT CONTROL ====================#
cd $CASEDIR/$EXPNAME

./xmlchange -file env_mach_pes.xml -id NTASKS_ATM -val "24"
./xmlchange -file env_mach_pes.xml -id NTASKS_OCN -val "24"
./xmlchange -file env_mach_pes.xml -id NTASKS_LND -val "24"
./xmlchange -file env_mach_pes.xml -id NTASKS_ICE -val "24"
./xmlchange -file env_mach_pes.xml -id NTASKS_CPL -val "24"
./xmlchange -file env_mach_pes.xml -id NTASKS_GLC -val "24"
./xmlchange -file env_mach_pes.xml -id NTASKS_ROF -val "24"
./xmlchange -file env_mach_pes.xml -id NTASKS_WAV -val "24"
./xmlchange -file env_mach_pes.xml -id NTASKS_GLC -val "24"

./cesm_setup
#================================================================;
#========================== 2. BUILD ============================;
#================================================================;
cd $CASEDIR/$EXPNAME

#==================== RUN START CONTROL ==================#
./xmlchange -file env_run.xml -id RUN_TYPE -val "startup"

#==================== RUN TIME CONTROL =====================#
./xmlchange -file env_run.xml -id STOP_OPTION -val "nyears"
./xmlchange -file env_run.xml -id STOP_N -val "50"

#===================== RESTART CONTROL ==============#
./xmlchange -file env_run.xml -id REST_OPTION -val "nyears"
./xmlchange -file env_run.xml -id REST_N -val "1"

#================= OUTPUT CONTROL ================#
./xmlchange -file env_run.xml -id DOUT_S -val "TRUE"
./xmlchange -file env_run.xml -id DOUT_S_ROOT -val '$CASEROOT/output'


./xmlchange -file env_build.xml -id DEBUG -val "TRUE"

#========================= BUILD ===========================#
./$EXPNAME.build

#================================================================;
#========================== 3. RUN ============================;
#================================================================;
./$EXPNAME.submit

...