Merge branch 'pmix' into 'master'

OpenMPI+Intel

See merge request Pmodules/buildblocks!404
This commit is contained in:
2023-06-28 14:06:32 +00:00
15 changed files with 225 additions and 24 deletions

14
Batchsystem/slurm/build Executable file
View File

@@ -0,0 +1,14 @@
#!/usr/bin/env modbuild
pbuild::set_download_url "https://download.schedmd.com/${P}/${P}-${V}.tar.bz2"
pbuild::add_to_group 'Batchsystem'
pbuild::install_docfiles 'AUTHORS' 'INSTALL' 'NEWS' 'README.rst' 'RELEASE_NOTES'
pbuild::pre_configure() {
pbuild::add_configure_args "--with-pmix=${PMIX_PREFIX}"
pbuild::add_configure_args "--with-nvml=${CUDA_PREFIX}"
pbuild::add_configure_args "--with-hwloc=${HWLOC_PREFIX}"
pbuild::add_configure_args "--with-ucx=${UCX_PREFIX}"
# pbuild::add_configure_args "--with-netloc=${HWLOC_PREFIX}"
}

View File

@@ -0,0 +1 @@
slurm/22.05.9 unstable b:pmix/4.2.4 b:cuda/12.1.1 b:ucx/1.14.1_slurm

View File

@@ -0,0 +1,28 @@
#%Module1.0
module-whatis "Slurm Workload Manager"
module-url "https://slurm.schedmd.com/"
module-license "See https://github.com/SchedMD/slurm/blob/master/LICENSE.OpenSSL"
module-maintainer "Marc Caubet <marc.caubet@psi.ch>"
module-help "
Slurm is an open source, fault-tolerant, and highly scalable cluster
management and job scheduling system for large and small Linux clusters.
Slurm requires no kernel modifications for its operation and is relatively
self-contained. As a cluster workload manager, Slurm has three key functions:
* First, it allocates exclusive and/or non-exclusive access to resources
(compute nodes) to users for some duration of time so they can perform
work.
* Second, it provides a framework for starting, executing, and monitoring
work (normally a parallel job) on the set of allocated nodes.
* Finally, it arbitrates contention for resources by managing a queue of
pending work.
Optional plugins can be used for accounting, advanced reservation, gang
scheduling (time sharing for parallel jobs), backfill scheduling, topology
optimized resource selection, resource limits by user or bank account, and
sophisticated multifactor job prioritization algorithms.
"
module-addgroup Batchsystem

View File

@@ -13,23 +13,55 @@ pbuild::install_docfiles 'AUTHORS' 'LICENSE' 'NEWS' 'README'
pbuild::pre_configure() {
if [[ -n "${CUDA_VERSION}" ]]; then
pbuild::add_configure_args "--with-cuda=${CUDA_HOME}"
fi
pbuild::add_configure_args "--prefix=${PREFIX}"
pbuild::add_configure_args "--enable-mpi-cxx"
pbuild::add_configure_args "--enable-mpi-cxx-seek"
pbuild::add_configure_args "--enable-orterun-prefix-by-default"
pbuild::add_configure_args "--enable-shared"
pbuild::add_configure_args "--enable-static"
pbuild::add_configure_args "--with-hwloc=internal"
pbuild::add_configure_args "--with-slurm=yes"
if [[ -n "${CUDA_VERSION}" ]]; then
pbuild::add_configure_args "--with-cuda=${CUDA_HOME}"
fi
if [[ -n "${HWLOC_VERSION}" ]]; then
unset HWLOC_VERSION
pbuild::add_configure_args "--with-hwloc=${HWLOC_PREFIX}"
else
pbuild::add_configure_args "--with-hwloc=internal"
fi
if [[ -n "${LIBEVENT_VERSION}" ]]; then
pbuild::add_configure_args "--with-libevent=${LIBEVENT_PREFIX}"
fi
if [[ -n "${PMIX_VERSION}" ]]; then
unset PMIX_VERSION
pbuild::add_configure_args "--with-pmix=${PMIX_PREFIX}"
fi
if [[ -n "${LIBFABRIC_VERSION}" ]]; then
pbuild::add_configure_args "--with-ofi=${LIBFABRIC_PREFIX}"
fi
if [[ -n "${UCX_VERSION}" ]]; then
pbuild::add_configure_args "--with-ucx=${UCX_PREFIX}"
fi
if [[ -n "${INTEL_VERSION}" ]]; then
pbuild::add_configure_args "CC=icc"
pbuild::add_configure_args "CXX=icpc"
pbuild::add_configure_args "FC=ifort"
pbuild::add_configure_args "F90=ifort"
pbuild::add_configure_args "F77=ifort"
pbuild::add_configure_args "LDFLAGS=-Wc,-static-intel,-O0"
fi
if pbuild::use_flag slurm || pbuild::use_flag dgx || pbuild::use_flag merlin6; then
pbuild::add_configure_args "--with-gpfs=/usr/lpp/mmfs"
pbuild::add_configure_args "--with-pmi"
pbuild::add_configure_args "--with-pmi-libdir=/usr/lib64/"
# pbuild::add_configure_args "--with-pmi-libdir=/usr/lib64/"
if pbuild::use_flag "libpmix"; then
pbuild::add_configure_args "--enable-install-libpmix"
@@ -50,18 +82,29 @@ pbuild::pre_configure() {
}
pbuild::post_install() {
mkdir -p "${PREFIX}/lib/fallback"
local -r binary=$(ls "${PREFIX}"/lib/libmpi.so.*.*.*)
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libuc[mpst].so'
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libuct_ib.so.0'
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libnuma.so'
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libibverbs.so'
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/librdmacm.so'
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi.so'
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi2.so'
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi2.so'
if [[ -n "${CUDA_VERSION}" ]]; then
echo "opal_warn_on_missing_libcuda = 0" >> ${PREFIX}/etc/openmpi-mca-params.conf
if ! pbuild::use_flag slurm && ! pbuild::use_flag dgx && ! pbuild::use_flag merlin6; then
mkdir -p "${PREFIX}/lib/fallback"
local -r binary=$(ls "${PREFIX}"/lib/libmpi.so.*.*.*)
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libuc[mpst].so'
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libuct_ib.so.0'
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libnuma.so'
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libibverbs.so'
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/librdmacm.so'
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi.so'
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi2.so'
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi2.so'
fi
if [[ -n "${CUDA_VERSION}" ]]; then
echo "opal_warn_on_missing_libcuda = 0" >> ${PREFIX}/etc/openmpi-mca-params.conf
fi
for FILE in $(find $PREFIX -type f \( ! -name "*.a" -and ! -name "*.mod" \) -exec grep -IL . "{}" \;)
do
OLD_RPATH=$(objdump -a -x $FILE | grep RPATH | awk '{print $2}')
NEW_RPATH=$(echo $OLD_RPATH | sed 's/:\/usr\/lib64:/:/g')
if [[ "${OLD_RPATH}" != "${NEW_RPATH}" ]]; then
patchelf --force-rpath --set-rpath "${NEW_RPATH}" "${FILE}"
fi
done
}

View File

@@ -41,6 +41,7 @@ openmpi/4.1.3_slurm stable gcc/{9.3.0,10.3.0,11.2.0} cuda/11.5.1 b:ucx/1
openmpi/4.1.4_slurm stable gcc/10.4.0 cuda/11.5.1 b:ucx/1.12.1_slurm
openmpi/4.1.5_slurm unstable gcc/10.4.0 cuda/12.1.1 b:ucx/1.14.1_slurm
openmpi/4.1.5_slurm unstable intelcc/22.2 b:cuda/12.1.1 b:pmix/4.2.4 b:ucx/1.14.1_slurm b:libfabric/1.18.0 b:hwloc/2.9.1 b:patchelf/0.14.5
openmpi/4.0.5-1_dgx deprecated gcc/{8.4.0,9.3.0,10.2.0} cuda/11.2.2 b:ucx/1.10.0-1_dgx
openmpi/4.1.0-1_dgx deprecated gcc/10.2.0 cuda/11.2.2 b:ucx/1.10.0-1_dgx

13
Libraries/hwloc/build Executable file
View File

@@ -0,0 +1,13 @@
#!/usr/bin/env modbuild
pbuild::set_download_url "https://download.open-mpi.org/release/${P}/v${V_MAJOR}.${V_MINOR}/${P}-${V}.tar.gz"
pbuild::add_to_group 'Libraries'
pbuild::install_docfiles 'README' 'AUTHORS' 'NEWS' 'COPYING' 'VERSION'
pbuild::pre_configure() {
pbuild::add_configure_args "--enable-netloc"
pbuild::add_configure_args "--enable-plugins"
}

View File

@@ -0,0 +1 @@
hwloc/2.7.1 unstable

View File

@@ -0,0 +1,2 @@
hwloc/2.7.1 unstable b:cuda/11.5.1
hwloc/2.9.1 unstable b:cuda/12.1.1

View File

@@ -0,0 +1 @@
hwloc/2.7.1 unstable b:cuda/11.5.1

View File

@@ -0,0 +1,25 @@
#%Module1.0
module-whatis "The Portable Hardware Locality (hwloc) software package"
module-url "https://www.open-mpi.org/projects/hwloc/"
module-license "Open source, see $PREFIX/share/doc/hwloc/COPYING"
module-maintainer "Marc Caubet Serrabou <marc.caubet@psi.ch>"
module-help "
The Portable Hardware Locality (hwloc) software package provides a
portable abstraction (across OS, versions, architectures, ...) of the
hierarchical topology of modern architectures, including NUMA memory
nodes, sockets, shared caches, cores and simultaneous multithreading.
It also gathers various system attributes such as cache and memory
information as well as the locality of I/O devices such as network
interfaces, InfiniBand HCAs or GPUs.
Hwloc primarily aims at helping applications with gathering information
about increasingly complex parallel computing platforms so as to exploit
them accordingly and efficiently.
Hwloc may also help many applications just by providing a portable CPU
and memory binding API and a reliable way to find out how many cores
and/or hardware threads are available.
"

7
Libraries/libfabric/build Executable file
View File

@@ -0,0 +1,7 @@
#!/usr/bin/env modbuild
pbuild::set_download_url "https://github.com/ofiwg/libfabric/releases/download/v$V/$P-$V.tar.bz2"
pbuild::add_to_group 'Libraries'
pbuild::install_docfiles 'COPYING' 'AUTHORS' 'README' 'NEWS.md'

View File

@@ -0,0 +1 @@
libfabric/1.18.0 unstable b:gcc/10.4.0

View File

@@ -0,0 +1,53 @@
#%Module1.0
module-whatis "Open Fabrics Interfaces (OFI)"
module-url "https://ofiwg.github.io/libfabric/"
module-license "Open source, see https://github.com/ofiwg/libfabric/blob/main/COPYING"
module-maintainer "Marc Caubet Serrabou <marc.caubet@psi.ch>"
module-help "
The Open Fabrics Interfaces (OFI) is a framework focused on exporting
fabric communication services to applications.
Libfabric, also known as Open Fabrics Interfaces (OFI), defines a
communication API for high-performance parallel and distributed
applications. It is a low-level communication library that abstracts
diverse networking technologies. Libfabric is developed by the OFI
Working Group (OFIWG, pronounced “o-fee-wig”), a subgroup of the
OpenFabrics Alliance - OFA.
Participation in the OFIWG is open to anyone, and not restricted to
members of OFA.
The goal of libfabric is to define interfaces that enable a tight
semantic map between applications and underlying fabric services.
Specifically, libfabric software interfaces have been co-designed with
fabric hardware providers and application developers, with a focus on
the needs of HPC users. Libfabric supports multiple communication
semantics, is fabric and hardware implementation agnostic, and leverages
and expands the existing RDMA open source community.
Libfabric is designed to minimize the impedance mismatch between
applications, including middleware such as MPI, SHMEM, data storage, and
PGAS, and fabric communication hardware. Its interfaces target
high-bandwidth, low-latency NICs, with a goal to scale to tens of
thousands of nodes.
Libfabric targets support for the Linux, Free BSD, Windows, and OS X.
A reasonable effort is made to support all major, modern Linux
distributions; however, validation is limited to the most recent 2-3
releases of Red Hat Enterprise Linux (RHEL) and SUSE Linux Enterprise
Server (SLES). Support for a particular operating system version or
distribution is vendor specific. The exceptions are the tcp and udp
based socket providers are available on all platforms.
"
unsetenv LIBFABRIC_LIBRARY_DIR /opt/psi/Libraries/libfabric/1.18.0/lib
remove-path LIBRARY_PATH $PREFIX/lib
remove-path LD_LIBRARY_PATH $PREFIX/lib
setenv LIBFABRIC_LIBRARY_DIR $PREFIX/lib64
# prepend-path LIBRARY_PATH $PREFIX/lib64
# prepend-path LD_LIBRARY_PATH $PREFIX/lib64
# prepend-path LIBFABRIC_LIBRARY_DIR $PREFIX/lib64

View File

@@ -7,7 +7,7 @@ pbuild::add_to_group 'Libraries'
(( version = (${V_MAJOR} * 100 + ${V_MINOR}) * 100 + V_PATCHLVL ))
if (( V_MAJOR > 1 )); then
pbuild::install_docfiles AUTHORS README LICENSE NEWS VERSION
pbuild::install_docfiles AUTHORS README.md LICENSE NEWS VERSION
else
pbuild::install_docfiles README LICENSE NEWS VERSION
fi
@@ -23,7 +23,16 @@ pbuild::pre_configure() {
pbuild::add_configure_args "--with-libevent=${LIBEVENT_PREFIX}"
fi
pbuild::add_configure_args "--enable-pmi-backward-compatibility"
if [[ -n "${INTEL_VERSION}" ]]; then
pbuild::add_configure_args "CC=icc"
pbuild::add_configure_args "CXX=icpc"
pbuild::add_configure_args "FC=ifort"
pbuild::add_configure_args "F77=ifort"
pbuild::add_configure_args "F90=ifort"
# pbuild::add_configure_args "LDFLAGS=-Wc,-static-intel"
fi
# pbuild::add_configure_args "--enable-pmi-backward-compatibility"
pbuild::add_configure_args "--enable-shared"
pbuild::add_configure_args "--enable-static"
}

View File

@@ -1,4 +1,6 @@
pmix/1.2.5 unstable b:libevent/2.1.12
pmix/2.2.5 unstable b:libevent/2.1.12
pmix/3.2.3 unstable b:libevent/2.1.12
pmix/4.1.2 unstable hwloc/2.7.1 libevent/2.1.12
pmix/1.2.5 unstable libevent/2.1.12
pmix/2.2.5 unstable libevent/2.1.12
pmix/3.2.3 unstable libevent/2.1.12
pmix/4.1.2 unstable libevent/2.1.12 b:hwloc/2.7.1
pmix/4.2.3 unstable libevent/2.1.12 b:hwloc/2.9.1
pmix/4.2.4 unstable b:gcc/10.4.0 libevent/2.1.12 b:hwloc/2.9.1