Slurm batch script for Science of Light

Sample batch script for embarrassingly parallel CPU jobs

#! /bin/bash -l
#
# This file is a sample batch script for "embarrassingly parallel" CPU applications via Slurm's job array mechanism.
# For more information on job arrays, see: https://slurm.schedmd.com/job_array.html
#
# Standard output and error:
#SBATCH -o ./tjob_%A_%a_out.txt
#SBATCH -e ./tjob_%A_%a_err.txt
#
# Initial working directory:
#SBATCH -D ./
#
# Job Name:
#SBATCH -J test_slurm
#
# Queue (Partition):
#SBATCH --partition=<highfreq/highmem>
#
# Process management (number of parallel executions is specified using the --array option):
#     * possible formats: `--array=0-9`, `--array=1,3,5,7`, `--array=1-7:2`
#     * reduce maximum number of simultaneously running tasks using a "%" separator (e.g. `--array=0-9%4`)
#     * to start only one instance, use --array=0 or (better) leave the --array option away completely
#SBATCH --array=0-9
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#
# Explicitly specify memory requirement (default is maximum available on node):
#SBATCH --mem=1024MB
#
# Wall clock limit:
#SBATCH --time=24:00:00
#
# Configure notification via mail:
#SBATCH --mail-type=none
#SBATCH --mail-user=<name>@mpl.mpg.de


# Run the program
srun <myprog> $SLURM_ARRAY_TASK_ID $SLURM_ARRAY_TASK_COUNT

Sample batch script for multithreaded CPU jobs without hypertheading

#! /bin/bash -l
#
# This file is a sample batch script for multi-threaded CPU applications (e.g. with pthread, OpenMP, ...).
#
# Standard output and error:
#SBATCH -o ./tjob_%j_out.txt
#SBATCH -e ./tjob_%j_err.txt
#
# Initial working directory:
#SBATCH -D ./
#
# Job Name:
#SBATCH -J test_slurm
#
# Queue (Partition):
#SBATCH --partition=<highfreq/highmem>
#
# Process management:
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=16     # specify number of CPU cores (maximum: 16 on highfreq, 32 on highmem)
#
# Explicitly specify memory (default is maximum available on node)
#SBATCH --mem=64GB
#
# Wall clock limit:
#SBATCH --time=24:00:00
#
# Configure notification via mail:
#SBATCH --mail-type=none
#SBATCH --mail-user=<name>@mpl.mpg.de

# Load necessary modules here
# module load ...

# Set number of CPUs per tasks for OpenMP programs
if [ ! -z $SLURM_CPUS_PER_TASK ] ; then
    export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
else
    export OMP_NUM_THREADS=1
fi

# Disable hyperthreading. Disabled by default. Set it after modules load.
# export SLURM_HINT=nomultithread

# Run the program
srun <myprog>

Sample batch script for multithreaded CPU jobs in hypertheading mode

#! /bin/bash -l
#
# This file is a sample batch script for multi-threaded CPU applications (e.g. with pthread, OpenMP, ...).
#
# Standard output and error:
#SBATCH -o ./tjob_%j_out.txt
#SBATCH -e ./tjob_%j_err.txt
#
# Initial working directory:
#SBATCH -D ./
#
# Job Name:
#SBATCH -J test_slurm
#
# Queue (Partition):
#SBATCH --partition=<highfreq/highmem>
#
# Process management:
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=32     # specify number of CPU cores (maximum: 32 on highfreq, 64 on highmem)
#
# Explicitly specify memory (default is maximum on node):
#SBATCH --mem=64GB
#
# Wall clock limit:
#SBATCH --time=24:00:00
#
# Configure notification via mail:
#SBATCH --mail-type=none
#SBATCH --mail-user=<name>@mpl.mpg.de

# Load necessaru modules here
# module load ...

# Set number of CPUs per tasks for OpenMP programs
if [ ! -z $SLURM_CPUS_PER_TASK ] ; then
    export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
else
    export OMP_NUM_THREADS=1
fi

# Enable hyperthreading. Set it after modules load.
export SLURM_HINT=multithread

# Run the program
srun <myprog>

Sample batch script for embarrassingly parallel GPU jobs

#! /bin/bash -l
#
# This file is a sample batch script for "embarrassingly parallel" GPU applications via Slurm's job array mechanism.
# For more information on job arrays, see: https://slurm.schedmd.com/job_array.html
#
# Standard output and error:
#SBATCH -o ./tjob_%A_%a_out.txt
#SBATCH -e ./tjob_%A_%a_err.txt
#
# Initial working directory:
#SBATCH -D ./
#
# Job Name:
#SBATCH -J test_slurm
#
# Queue (Partition):
#SBATCH --partition=dgx
#
# Process management (number of parallel executions is specified using the --array option):
#     * possible formats: `--array=0-9`, `--array=1,3,5,7`, `--array=1-7:2`
#     * reduce maximum number of simultaneously running tasks using a "%" separator (e.g. `--array=0-9%4`)
#     * to start only one instance, use --array=0 or (better) leave the --array option away completely
#SBATCH --array=0-3
#SBATCH --gres=gpu:1           # specify number of GPUs
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=4      # specify number of CPU cores (as a rule of thumb, 4 per GPU)
#
# Memory requirement (default is 64GB):
#SBATCH --mem=32GB
#
# Wall clock limit:
#SBATCH --time=24:00:00
#
# Configure notification via mail:
#SBATCH --mail-type=none
#SBATCH --mail-user=<name>@mpl.mpg.de

# Choose container image
SINGULARITY_IMAGE_FILE="/ptmp/simg/nvidia-theano:18.03-python3-v1.simg"

if [ ! -f /singularity ]; then
    # This branch is executed when in Slurm; essentially, it calls Singularity.
    # Better do not edit this branch unless you know exactly what you are doing.

    COPY_OF_THIS_SCRIPT=$(mktemp --suffix=.sh)
    trap "{ rm -f ${COPY_OF_THIS_SCRIPT}; }" EXIT
    chmod 700 ${COPY_OF_THIS_SCRIPT}
    cp $0 ${COPY_OF_THIS_SCRIPT}
    srun singularity run --nv ${SINGULARITY_IMAGE_FILE} ${COPY_OF_THIS_SCRIPT}
else
    # This branch is executed when in Singularity.
    # From here, you can start your computation (feel free to modify this branch).

    <myprog> $SLURM_ARRAY_TASK_ID $SLURM_ARRAY_TASK_COUNT
fi

Sample batch script for parallel COMSOL jobs

#!/bin/bash -l
#
# This file is a sample batch script for parallel multi-threaded (MPI/OpenMP) comsol run.
#
# Standard output and error:
#SBATCH -o ./comsol_%j_out.txt
#SBATCH -e ./comsol_%j_err.txt
#
# Initial working directory:
#SBATCH -D ./
#
# Job Name:
#SBATCH --job-name="COMSOL"
#
# Queue (Partition):
#SBATCH --partition=highmem  # specify partition <highfreq/highmem>
#
# Process management:
##SBATCH --array=0-9
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#
# Always set number of cores to be used by each ntask (ntasks-per-node * cpus-per-task <= maximum on node)
#SBATCH --cpus-per-task=4     # specify number of CPU cores (maximum: 16 on highfreq, 32 on highmem)
#
# Explicitly specify memory (default is maximum available on node)
#SBATCH --mem=128GB
#
# Wall clock limit:
#SBATCH --time=04:00:00
#
# Configure notification via mail:
#SBATCH --mail-type=none
#SBATCH --mail-user=<name>@mpl.mpg.de

# Load necessary modules here
module load comsol

# choose suitable model
MODELTOCOMPUTE="comsol_smalltest.mph"
INPUTFILE="input/$MODELTOCOMPUTE"
DIR_OUTPUT="output/$SLURM_JOB_ID"
DIR_LOGS="logs/$SLURM_JOB_ID"
mkdir -p $DIR_OUTPUT
mkdir -p $DIR_LOGS
OUTPUTFILE="$DIR_OUTPUT/$MODELTOCOMPUTE"
BATCHLOG="$DIR_LOGS/${MODELTOCOMPUTE}.log"

# Run the COMSOL command, using -nn 8 and -nnhost 2 deduced from SLURM
comsol batch -np $SLURM_CPUS_PER_TASK -mpibootstrap slurm -mpifabrics shm:tcp \
-inputfile ${INPUTFILE} -outputfile ${OUTPUTFILE} \
-batchlog ${BATCHLOG} -alivetime 15 -prefermph -recover

Sample batch script for COMSOL with Matlab jobs

#!/bin/bash -l
#
# This file is a sample batch script to run Comsol models from Matlab via LiveLink.
#
# Standard output and error:
#SBATCH -o ./comsol_%j_out.txt
#SBATCH -e ./comsol_%j_err.txt
#
# Initial working directory:
#SBATCH -D ./
#
# Job Name:
#SBATCH --job-name="COMSOL"
#
# Queue (Partition):
#SBATCH --partition=highmem  # specify partition <highfreq/highmem>
#
# Process management:
##SBATCH --array=0-9
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1 
#
# Explicitly specify memory (default is maximum available on node)
#SBATCH --mem=128GB
#
# Always set number of cores to be used by each ntask (ntasks-per-node * cpus-per-task <= maximum on node)
#SBATCH --cpus-per-task=10     # specify number of CPU cores (maximum: 16 on highfreq, 32 on highmem)
#
# Wall clock limit:
#SBATCH --time=04:00:00
#
# Configure notification via mail:
#SBATCH --mail-type=none
#SBATCH --mail-user=<name>@mpl.mpg.de
#
# Use resources on node exclusively
##SBATCH --exclusive

# Load necessary modules here
module load comsol matlab   

# Define name of the running script
MSCRIPT="comsol_livelink_matlab_script.m"

# Set free port for Matlab-Comsol communication. Needed if several Comsol servers run on the same node
# Not necessary if node is used exclusively
PORT=$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')

# Start Comsol server in background
comsol mphserver -port $PORT &

# Wait until server will start
sleep 10s

# Run Matlab script
matlab -nosplash -nodisplay -r " addpath $COMSOL_HOME/mli; mphstart($PORT); run $MSCRIPT; exit "