Intelcc+OpenMPI update
This commit is contained in:
14
Batchsystem/slurm/build
Executable file
14
Batchsystem/slurm/build
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env modbuild
|
||||
|
||||
pbuild::set_download_url "https://download.schedmd.com/${P}/${P}-${V}.tar.bz2"
|
||||
|
||||
pbuild::add_to_group 'Batchsystem'
|
||||
pbuild::install_docfiles 'AUTHORS' 'INSTALL' 'NEWS' 'README.rst' 'RELEASE_NOTES'
|
||||
|
||||
pbuild::pre_configure() {
|
||||
pbuild::add_configure_args "--with-pmix=${PMIX_PREFIX}"
|
||||
pbuild::add_configure_args "--with-nvml=${CUDA_PREFIX}"
|
||||
pbuild::add_configure_args "--with-hwloc=${HWLOC_PREFIX}"
|
||||
pbuild::add_configure_args "--with-ucx=${UCX_PREFIX}"
|
||||
# pbuild::add_configure_args "--with-netloc=${HWLOC_PREFIX}"
|
||||
}
|
||||
1
Batchsystem/slurm/files/variants.merlin6
Normal file
1
Batchsystem/slurm/files/variants.merlin6
Normal file
@@ -0,0 +1 @@
|
||||
slurm/22.05.9 unstable b:pmix/4.2.4 b:cuda/12.1.1 b:ucx/1.14.1_slurm
|
||||
28
Batchsystem/slurm/modulefile
Normal file
28
Batchsystem/slurm/modulefile
Normal file
@@ -0,0 +1,28 @@
|
||||
#%Module1.0
|
||||
|
||||
module-whatis "Slurm Workload Manager"
|
||||
module-url "https://slurm.schedmd.com/"
|
||||
module-license "See https://github.com/SchedMD/slurm/blob/master/LICENSE.OpenSSL"
|
||||
module-maintainer "Marc Caubet <marc.caubet@psi.ch>"
|
||||
module-help "
|
||||
Slurm is an open source, fault-tolerant, and highly scalable cluster
|
||||
management and job scheduling system for large and small Linux clusters.
|
||||
|
||||
Slurm requires no kernel modifications for its operation and is relatively
|
||||
self-contained. As a cluster workload manager, Slurm has three key functions:
|
||||
|
||||
* First, it allocates exclusive and/or non-exclusive access to resources
|
||||
(compute nodes) to users for some duration of time so they can perform
|
||||
work.
|
||||
* Second, it provides a framework for starting, executing, and monitoring
|
||||
work (normally a parallel job) on the set of allocated nodes.
|
||||
* Finally, it arbitrates contention for resources by managing a queue of
|
||||
pending work.
|
||||
|
||||
Optional plugins can be used for accounting, advanced reservation, gang
|
||||
scheduling (time sharing for parallel jobs), backfill scheduling, topology
|
||||
optimized resource selection, resource limits by user or bank account, and
|
||||
sophisticated multifactor job prioritization algorithms.
|
||||
"
|
||||
|
||||
module-addgroup Batchsystem
|
||||
@@ -13,28 +13,43 @@ pbuild::install_docfiles 'AUTHORS' 'LICENSE' 'NEWS' 'README'
|
||||
|
||||
|
||||
pbuild::pre_configure() {
|
||||
if [[ -n "${CUDA_VERSION}" ]]; then
|
||||
pbuild::add_configure_args "--with-cuda=${CUDA_HOME}"
|
||||
fi
|
||||
pbuild::add_configure_args "--prefix=${PREFIX}"
|
||||
pbuild::add_configure_args "--enable-mpi-cxx"
|
||||
pbuild::add_configure_args "--enable-mpi-cxx-seek"
|
||||
pbuild::add_configure_args "--enable-orterun-prefix-by-default"
|
||||
pbuild::add_configure_args "--enable-shared"
|
||||
pbuild::add_configure_args "--enable-static"
|
||||
pbuild::add_configure_args "--with-hwloc=internal"
|
||||
pbuild::add_configure_args "--with-slurm=yes"
|
||||
|
||||
if [[ -n "${CUDA_VERSION}" ]]; then
|
||||
pbuild::add_configure_args "--with-cuda=${CUDA_HOME}"
|
||||
fi
|
||||
|
||||
if [[ -n "${HWLOC_VERSION}" ]]; then
|
||||
unset HWLOC_VERSION
|
||||
pbuild::add_configure_args "--with-hwloc=${HWLOC_PREFIX}"
|
||||
else
|
||||
pbuild::add_configure_args "--with-hwloc=internal"
|
||||
fi
|
||||
|
||||
if [[ -n "${LIBEVENT_VERSION}" ]]; then
|
||||
pbuild::add_configure_args "--with-libevent=${LIBEVENT_PREFIX}"
|
||||
fi
|
||||
|
||||
if [[ -n "${PMIX_VERSION}" ]]; then
|
||||
unset PMIX_VERSION
|
||||
pbuild::add_configure_args "--with-pmix=${PMIX_PREFIX}"
|
||||
fi
|
||||
|
||||
if [[ -n "${LIBFABRIC_VERSION}" ]]; then
|
||||
pbuild::add_configure_args "--with-ofi=${LIBFABRIC_PREFIX}"
|
||||
# pbuild::add_configure_args "--with-ofi-libdir=${LIBFABRIC_LIBRARY_DIR}"
|
||||
fi
|
||||
|
||||
if [[ -n "${UCX_VERSION}" ]]; then
|
||||
pbuild::add_configure_args "--with-ucx=${UCX_PREFIX}"
|
||||
fi
|
||||
|
||||
if [[ -n "${INTEL_VERSION}" ]]; then
|
||||
pbuild::add_configure_args "CC=icc"
|
||||
pbuild::add_configure_args "CXX=icpc"
|
||||
@@ -46,16 +61,10 @@ pbuild::pre_configure() {
|
||||
pbuild::add_configure_args "--enable-debug"
|
||||
fi
|
||||
|
||||
if [[ -n "${LIBEVENT_VERSION}" ]]; then
|
||||
pbuild::add_configure_args "--with-libevent=${LIBEVENT_PREFIX}"
|
||||
fi
|
||||
|
||||
if [[ -n "${UCX_VERSION}" ]]; then
|
||||
pbuild::add_configure_args "--with-ucx=${UCX_PREFIX}"
|
||||
fi
|
||||
if pbuild::use_flag slurm || pbuild::use_flag dgx || pbuild::use_flag merlin6; then
|
||||
pbuild::add_configure_args "--with-pmi"
|
||||
pbuild::add_configure_args "--with-pmi-libdir=/usr/lib64/"
|
||||
pbuild::add_configure_args "--with-gpfs=/usr/lpp/mmfs"
|
||||
# pbuild::add_configure_args "--with-pmi-libdir=/usr/lib64/"
|
||||
|
||||
if pbuild::use_flag "libpmix"; then
|
||||
pbuild::add_configure_args "--enable-install-libpmix"
|
||||
@@ -76,18 +85,29 @@ pbuild::pre_configure() {
|
||||
}
|
||||
|
||||
pbuild::post_install() {
|
||||
mkdir -p "${PREFIX}/lib/fallback"
|
||||
local -r binary=$(ls "${PREFIX}"/lib/libmpi.so.*.*.*)
|
||||
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libuc[mpst].so'
|
||||
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libuct_ib.so.0'
|
||||
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libnuma.so'
|
||||
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libibverbs.so'
|
||||
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/librdmacm.so'
|
||||
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi.so'
|
||||
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi2.so'
|
||||
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi2.so'
|
||||
|
||||
if [[ -n "${CUDA_VERSION}" ]]; then
|
||||
echo "opal_warn_on_missing_libcuda = 0" >> ${PREFIX}/etc/openmpi-mca-params.conf
|
||||
if ! pbuild::use_flag slurm && ! pbuild::use_flag dgx && ! pbuild::use_flag merlin6; then
|
||||
mkdir -p "${PREFIX}/lib/fallback"
|
||||
local -r binary=$(ls "${PREFIX}"/lib/libmpi.so.*.*.*)
|
||||
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libuc[mpst].so'
|
||||
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libuct_ib.so.0'
|
||||
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libnuma.so'
|
||||
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libibverbs.so'
|
||||
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/librdmacm.so'
|
||||
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi.so'
|
||||
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi2.so'
|
||||
pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi2.so'
|
||||
fi
|
||||
|
||||
if [[ -n "${CUDA_VERSION}" ]]; then
|
||||
echo "opal_warn_on_missing_libcuda = 0" >> ${PREFIX}/etc/openmpi-mca-params.conf
|
||||
fi
|
||||
|
||||
for FILE in $(find $PREFIX -type f \( ! -name "*.a" -and ! -name "*.mod" \) -exec grep -IL . "{}" \;)
|
||||
do
|
||||
OLD_RPATH=$(objdump -a -x $FILE | grep RPATH | awk '{print $2}')
|
||||
NEW_RPATH=$(echo $OLD_RPATH | sed 's/:\/usr\/lib64:/:/g')
|
||||
if [[ "${OLD_RPATH}" != "${NEW_RPATH}" ]]; then
|
||||
patchelf --force-rpath --set-rpath "${NEW_RPATH}" "${FILE}"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ openmpi/4.1.3_slurm stable gcc/{9.3.0,10.3.0,11.2.0} cuda/11.5.1 b:ucx/1
|
||||
openmpi/4.1.4_slurm stable gcc/10.4.0 cuda/11.5.1 b:ucx/1.12.1_slurm
|
||||
|
||||
openmpi/4.1.5_slurm unstable gcc/10.4.0 cuda/12.1.1 b:ucx/1.14.1_slurm
|
||||
openmpi/4.1.5_slurm unstable intelcc/22.2 cuda/12.1.1 b:pmix/4.2.4 b:ucx/1.14.1_slurm b:hwloc/2.9.1
|
||||
openmpi/4.1.5_slurm unstable intelcc/22.2 b:cuda/12.1.1 b:pmix/4.2.4 b:ucx/1.14.1_slurm b:libfabric/1.18.0 b:hwloc/2.9.1 b:patchelf/0.14.5
|
||||
|
||||
openmpi/4.0.5-1_dgx deprecated gcc/{8.4.0,9.3.0,10.2.0} cuda/11.2.2 b:ucx/1.10.0-1_dgx
|
||||
openmpi/4.1.0-1_dgx deprecated gcc/10.2.0 cuda/11.2.2 b:ucx/1.10.0-1_dgx
|
||||
|
||||
7
Libraries/libfabric/build
Executable file
7
Libraries/libfabric/build
Executable file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env modbuild
|
||||
|
||||
pbuild::set_download_url "https://github.com/ofiwg/libfabric/releases/download/v$V/$P-$V.tar.bz2"
|
||||
pbuild::add_to_group 'Libraries'
|
||||
|
||||
pbuild::install_docfiles 'COPYING' 'AUTHORS' 'README' 'NEWS.md'
|
||||
|
||||
1
Libraries/libfabric/files/variants
Normal file
1
Libraries/libfabric/files/variants
Normal file
@@ -0,0 +1 @@
|
||||
libfabric/1.18.0 unstable b:gcc/10.4.0
|
||||
53
Libraries/libfabric/modulefile
Normal file
53
Libraries/libfabric/modulefile
Normal file
@@ -0,0 +1,53 @@
|
||||
#%Module1.0
|
||||
|
||||
module-whatis "Open Fabrics Interfaces (OFI)"
|
||||
module-url "https://ofiwg.github.io/libfabric/"
|
||||
module-license "Open source, see https://github.com/ofiwg/libfabric/blob/main/COPYING"
|
||||
module-maintainer "Marc Caubet Serrabou <marc.caubet@psi.ch>"
|
||||
|
||||
module-help "
|
||||
The Open Fabrics Interfaces (OFI) is a framework focused on exporting
|
||||
fabric communication services to applications.
|
||||
|
||||
Libfabric, also known as Open Fabrics Interfaces (OFI), defines a
|
||||
communication API for high-performance parallel and distributed
|
||||
applications. It is a low-level communication library that abstracts
|
||||
diverse networking technologies. Libfabric is developed by the OFI
|
||||
Working Group (OFIWG, pronounced “o-fee-wig”), a subgroup of the
|
||||
OpenFabrics Alliance - OFA.
|
||||
|
||||
Participation in the OFIWG is open to anyone, and not restricted to
|
||||
members of OFA.
|
||||
|
||||
The goal of libfabric is to define interfaces that enable a tight
|
||||
semantic map between applications and underlying fabric services.
|
||||
Specifically, libfabric software interfaces have been co-designed with
|
||||
fabric hardware providers and application developers, with a focus on
|
||||
the needs of HPC users. Libfabric supports multiple communication
|
||||
semantics, is fabric and hardware implementation agnostic, and leverages
|
||||
and expands the existing RDMA open source community.
|
||||
|
||||
Libfabric is designed to minimize the impedance mismatch between
|
||||
applications, including middleware such as MPI, SHMEM, data storage, and
|
||||
PGAS, and fabric communication hardware. Its interfaces target
|
||||
high-bandwidth, low-latency NICs, with a goal to scale to tens of
|
||||
thousands of nodes.
|
||||
|
||||
Libfabric targets support for the Linux, Free BSD, Windows, and OS X.
|
||||
A reasonable effort is made to support all major, modern Linux
|
||||
distributions; however, validation is limited to the most recent 2-3
|
||||
releases of Red Hat Enterprise Linux (RHEL) and SUSE Linux Enterprise
|
||||
Server (SLES). Support for a particular operating system version or
|
||||
distribution is vendor specific. The exceptions are the tcp and udp
|
||||
based socket providers are available on all platforms.
|
||||
"
|
||||
|
||||
unsetenv LIBFABRIC_LIBRARY_DIR /opt/psi/Libraries/libfabric/1.18.0/lib
|
||||
|
||||
remove-path LIBRARY_PATH $PREFIX/lib
|
||||
remove-path LD_LIBRARY_PATH $PREFIX/lib
|
||||
setenv LIBFABRIC_LIBRARY_DIR $PREFIX/lib64
|
||||
|
||||
# prepend-path LIBRARY_PATH $PREFIX/lib64
|
||||
# prepend-path LD_LIBRARY_PATH $PREFIX/lib64
|
||||
# prepend-path LIBFABRIC_LIBRARY_DIR $PREFIX/lib64
|
||||
@@ -3,4 +3,4 @@ pmix/2.2.5 unstable libevent/2.1.12
|
||||
pmix/3.2.3 unstable libevent/2.1.12
|
||||
pmix/4.1.2 unstable libevent/2.1.12 b:hwloc/2.7.1
|
||||
pmix/4.2.3 unstable libevent/2.1.12 b:hwloc/2.9.1
|
||||
pmix/4.2.4 unstable libevent/2.1.12 b:hwloc/2.9.1
|
||||
pmix/4.2.4 unstable b:gcc/10.4.0 libevent/2.1.12 b:hwloc/2.9.1
|
||||
|
||||
Reference in New Issue
Block a user