diff --git a/Batchsystem/slurm/build b/Batchsystem/slurm/build new file mode 100755 index 0000000..3800ffa --- /dev/null +++ b/Batchsystem/slurm/build @@ -0,0 +1,14 @@ +#!/usr/bin/env modbuild + +pbuild::set_download_url "https://download.schedmd.com/${P}/${P}-${V}.tar.bz2" + +pbuild::add_to_group 'Batchsystem' +pbuild::install_docfiles 'AUTHORS' 'INSTALL' 'NEWS' 'README.rst' 'RELEASE_NOTES' + +pbuild::pre_configure() { + pbuild::add_configure_args "--with-pmix=${PMIX_PREFIX}" + pbuild::add_configure_args "--with-nvml=${CUDA_PREFIX}" + pbuild::add_configure_args "--with-hwloc=${HWLOC_PREFIX}" + pbuild::add_configure_args "--with-ucx=${UCX_PREFIX}" + # pbuild::add_configure_args "--with-netloc=${HWLOC_PREFIX}" +} diff --git a/Batchsystem/slurm/files/variants.merlin6 b/Batchsystem/slurm/files/variants.merlin6 new file mode 100644 index 0000000..d30cdae --- /dev/null +++ b/Batchsystem/slurm/files/variants.merlin6 @@ -0,0 +1 @@ +slurm/22.05.9 unstable b:pmix/4.2.4 b:cuda/12.1.1 b:ucx/1.14.1_slurm diff --git a/Batchsystem/slurm/modulefile b/Batchsystem/slurm/modulefile new file mode 100644 index 0000000..cdd0490 --- /dev/null +++ b/Batchsystem/slurm/modulefile @@ -0,0 +1,28 @@ +#%Module1.0 + +module-whatis "Slurm Workload Manager" +module-url "https://slurm.schedmd.com/" +module-license "See https://github.com/SchedMD/slurm/blob/master/LICENSE.OpenSSL" +module-maintainer "Marc Caubet " +module-help " +Slurm is an open source, fault-tolerant, and highly scalable cluster +management and job scheduling system for large and small Linux clusters. + +Slurm requires no kernel modifications for its operation and is relatively +self-contained. As a cluster workload manager, Slurm has three key functions: + + * First, it allocates exclusive and/or non-exclusive access to resources + (compute nodes) to users for some duration of time so they can perform + work. + * Second, it provides a framework for starting, executing, and monitoring + work (normally a parallel job) on the set of allocated nodes. + * Finally, it arbitrates contention for resources by managing a queue of + pending work. + +Optional plugins can be used for accounting, advanced reservation, gang +scheduling (time sharing for parallel jobs), backfill scheduling, topology +optimized resource selection, resource limits by user or bank account, and +sophisticated multifactor job prioritization algorithms. +" + +module-addgroup Batchsystem diff --git a/Compiler/openmpi/build b/Compiler/openmpi/build index 40931ab..ab75d48 100755 --- a/Compiler/openmpi/build +++ b/Compiler/openmpi/build @@ -13,23 +13,55 @@ pbuild::install_docfiles 'AUTHORS' 'LICENSE' 'NEWS' 'README' pbuild::pre_configure() { - if [[ -n "${CUDA_VERSION}" ]]; then - pbuild::add_configure_args "--with-cuda=${CUDA_HOME}" - fi pbuild::add_configure_args "--prefix=${PREFIX}" pbuild::add_configure_args "--enable-mpi-cxx" pbuild::add_configure_args "--enable-mpi-cxx-seek" pbuild::add_configure_args "--enable-orterun-prefix-by-default" pbuild::add_configure_args "--enable-shared" pbuild::add_configure_args "--enable-static" - pbuild::add_configure_args "--with-hwloc=internal" pbuild::add_configure_args "--with-slurm=yes" + + if [[ -n "${CUDA_VERSION}" ]]; then + pbuild::add_configure_args "--with-cuda=${CUDA_HOME}" + fi + + if [[ -n "${HWLOC_VERSION}" ]]; then + unset HWLOC_VERSION + pbuild::add_configure_args "--with-hwloc=${HWLOC_PREFIX}" + else + pbuild::add_configure_args "--with-hwloc=internal" + fi + + if [[ -n "${LIBEVENT_VERSION}" ]]; then + pbuild::add_configure_args "--with-libevent=${LIBEVENT_PREFIX}" + fi + + if [[ -n "${PMIX_VERSION}" ]]; then + unset PMIX_VERSION + pbuild::add_configure_args "--with-pmix=${PMIX_PREFIX}" + fi + + if [[ -n "${LIBFABRIC_VERSION}" ]]; then + pbuild::add_configure_args "--with-ofi=${LIBFABRIC_PREFIX}" + fi + if [[ -n "${UCX_VERSION}" ]]; then pbuild::add_configure_args "--with-ucx=${UCX_PREFIX}" fi + + if [[ -n "${INTEL_VERSION}" ]]; then + pbuild::add_configure_args "CC=icc" + pbuild::add_configure_args "CXX=icpc" + pbuild::add_configure_args "FC=ifort" + pbuild::add_configure_args "F90=ifort" + pbuild::add_configure_args "F77=ifort" + pbuild::add_configure_args "LDFLAGS=-Wc,-static-intel,-O0" + fi + if pbuild::use_flag slurm || pbuild::use_flag dgx || pbuild::use_flag merlin6; then + pbuild::add_configure_args "--with-gpfs=/usr/lpp/mmfs" pbuild::add_configure_args "--with-pmi" - pbuild::add_configure_args "--with-pmi-libdir=/usr/lib64/" + # pbuild::add_configure_args "--with-pmi-libdir=/usr/lib64/" if pbuild::use_flag "libpmix"; then pbuild::add_configure_args "--enable-install-libpmix" @@ -50,18 +82,29 @@ pbuild::pre_configure() { } pbuild::post_install() { - mkdir -p "${PREFIX}/lib/fallback" - local -r binary=$(ls "${PREFIX}"/lib/libmpi.so.*.*.*) - pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libuc[mpst].so' - pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libuct_ib.so.0' - pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libnuma.so' - pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libibverbs.so' - pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/librdmacm.so' - pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi.so' - pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi2.so' - pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi2.so' - - if [[ -n "${CUDA_VERSION}" ]]; then - echo "opal_warn_on_missing_libcuda = 0" >> ${PREFIX}/etc/openmpi-mca-params.conf + if ! pbuild::use_flag slurm && ! pbuild::use_flag dgx && ! pbuild::use_flag merlin6; then + mkdir -p "${PREFIX}/lib/fallback" + local -r binary=$(ls "${PREFIX}"/lib/libmpi.so.*.*.*) + pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libuc[mpst].so' + pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libuct_ib.so.0' + pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libnuma.so' + pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libibverbs.so' + pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/librdmacm.so' + pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi.so' + pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi2.so' + pbuild::install_shared_libs "${binary}" "${PREFIX}/lib/fallback" '/libpmi2.so' fi + + if [[ -n "${CUDA_VERSION}" ]]; then + echo "opal_warn_on_missing_libcuda = 0" >> ${PREFIX}/etc/openmpi-mca-params.conf + fi + + for FILE in $(find $PREFIX -type f \( ! -name "*.a" -and ! -name "*.mod" \) -exec grep -IL . "{}" \;) + do + OLD_RPATH=$(objdump -a -x $FILE | grep RPATH | awk '{print $2}') + NEW_RPATH=$(echo $OLD_RPATH | sed 's/:\/usr\/lib64:/:/g') + if [[ "${OLD_RPATH}" != "${NEW_RPATH}" ]]; then + patchelf --force-rpath --set-rpath "${NEW_RPATH}" "${FILE}" + fi + done } diff --git a/Compiler/openmpi/files/variants.merlin6 b/Compiler/openmpi/files/variants.merlin6 index 7e9bcb4..cb2cd16 100644 --- a/Compiler/openmpi/files/variants.merlin6 +++ b/Compiler/openmpi/files/variants.merlin6 @@ -41,6 +41,7 @@ openmpi/4.1.3_slurm stable gcc/{9.3.0,10.3.0,11.2.0} cuda/11.5.1 b:ucx/1 openmpi/4.1.4_slurm stable gcc/10.4.0 cuda/11.5.1 b:ucx/1.12.1_slurm openmpi/4.1.5_slurm unstable gcc/10.4.0 cuda/12.1.1 b:ucx/1.14.1_slurm +openmpi/4.1.5_slurm unstable intelcc/22.2 b:cuda/12.1.1 b:pmix/4.2.4 b:ucx/1.14.1_slurm b:libfabric/1.18.0 b:hwloc/2.9.1 b:patchelf/0.14.5 openmpi/4.0.5-1_dgx deprecated gcc/{8.4.0,9.3.0,10.2.0} cuda/11.2.2 b:ucx/1.10.0-1_dgx openmpi/4.1.0-1_dgx deprecated gcc/10.2.0 cuda/11.2.2 b:ucx/1.10.0-1_dgx diff --git a/Libraries/hwloc/build b/Libraries/hwloc/build new file mode 100755 index 0000000..c65a014 --- /dev/null +++ b/Libraries/hwloc/build @@ -0,0 +1,13 @@ +#!/usr/bin/env modbuild + +pbuild::set_download_url "https://download.open-mpi.org/release/${P}/v${V_MAJOR}.${V_MINOR}/${P}-${V}.tar.gz" +pbuild::add_to_group 'Libraries' + +pbuild::install_docfiles 'README' 'AUTHORS' 'NEWS' 'COPYING' 'VERSION' + +pbuild::pre_configure() { + pbuild::add_configure_args "--enable-netloc" + pbuild::add_configure_args "--enable-plugins" +} + + diff --git a/Libraries/hwloc/files/variants b/Libraries/hwloc/files/variants new file mode 100644 index 0000000..4a6b27f --- /dev/null +++ b/Libraries/hwloc/files/variants @@ -0,0 +1 @@ +hwloc/2.7.1 unstable diff --git a/Libraries/hwloc/files/variants.merlin6 b/Libraries/hwloc/files/variants.merlin6 new file mode 100644 index 0000000..e754fa1 --- /dev/null +++ b/Libraries/hwloc/files/variants.merlin6 @@ -0,0 +1,2 @@ +hwloc/2.7.1 unstable b:cuda/11.5.1 +hwloc/2.9.1 unstable b:cuda/12.1.1 diff --git a/Libraries/hwloc/files/variants.overlay_merlin b/Libraries/hwloc/files/variants.overlay_merlin new file mode 100644 index 0000000..de1dd40 --- /dev/null +++ b/Libraries/hwloc/files/variants.overlay_merlin @@ -0,0 +1 @@ +hwloc/2.7.1 unstable b:cuda/11.5.1 diff --git a/Libraries/hwloc/modulefile b/Libraries/hwloc/modulefile new file mode 100644 index 0000000..4536734 --- /dev/null +++ b/Libraries/hwloc/modulefile @@ -0,0 +1,25 @@ +#%Module1.0 + +module-whatis "The Portable Hardware Locality (hwloc) software package" +module-url "https://www.open-mpi.org/projects/hwloc/" +module-license "Open source, see $PREFIX/share/doc/hwloc/COPYING" +module-maintainer "Marc Caubet Serrabou " + +module-help " +The Portable Hardware Locality (hwloc) software package provides a +portable abstraction (across OS, versions, architectures, ...) of the +hierarchical topology of modern architectures, including NUMA memory +nodes, sockets, shared caches, cores and simultaneous multithreading. + +It also gathers various system attributes such as cache and memory +information as well as the locality of I/O devices such as network +interfaces, InfiniBand HCAs or GPUs. + +Hwloc primarily aims at helping applications with gathering information +about increasingly complex parallel computing platforms so as to exploit +them accordingly and efficiently. + +Hwloc may also help many applications just by providing a portable CPU +and memory binding API and a reliable way to find out how many cores +and/or hardware threads are available. +" diff --git a/Libraries/libfabric/build b/Libraries/libfabric/build new file mode 100755 index 0000000..0580efc --- /dev/null +++ b/Libraries/libfabric/build @@ -0,0 +1,7 @@ +#!/usr/bin/env modbuild + +pbuild::set_download_url "https://github.com/ofiwg/libfabric/releases/download/v$V/$P-$V.tar.bz2" +pbuild::add_to_group 'Libraries' + +pbuild::install_docfiles 'COPYING' 'AUTHORS' 'README' 'NEWS.md' + diff --git a/Libraries/libfabric/files/variants b/Libraries/libfabric/files/variants new file mode 100644 index 0000000..ab94958 --- /dev/null +++ b/Libraries/libfabric/files/variants @@ -0,0 +1 @@ +libfabric/1.18.0 unstable b:gcc/10.4.0 diff --git a/Libraries/libfabric/modulefile b/Libraries/libfabric/modulefile new file mode 100644 index 0000000..d7dc74f --- /dev/null +++ b/Libraries/libfabric/modulefile @@ -0,0 +1,53 @@ +#%Module1.0 + +module-whatis "Open Fabrics Interfaces (OFI)" +module-url "https://ofiwg.github.io/libfabric/" +module-license "Open source, see https://github.com/ofiwg/libfabric/blob/main/COPYING" +module-maintainer "Marc Caubet Serrabou " + +module-help " +The Open Fabrics Interfaces (OFI) is a framework focused on exporting +fabric communication services to applications. + +Libfabric, also known as Open Fabrics Interfaces (OFI), defines a +communication API for high-performance parallel and distributed +applications. It is a low-level communication library that abstracts +diverse networking technologies. Libfabric is developed by the OFI +Working Group (OFIWG, pronounced “o-fee-wig”), a subgroup of the +OpenFabrics Alliance - OFA. + +Participation in the OFIWG is open to anyone, and not restricted to +members of OFA. + +The goal of libfabric is to define interfaces that enable a tight +semantic map between applications and underlying fabric services. +Specifically, libfabric software interfaces have been co-designed with +fabric hardware providers and application developers, with a focus on +the needs of HPC users. Libfabric supports multiple communication +semantics, is fabric and hardware implementation agnostic, and leverages +and expands the existing RDMA open source community. + +Libfabric is designed to minimize the impedance mismatch between +applications, including middleware such as MPI, SHMEM, data storage, and +PGAS, and fabric communication hardware. Its interfaces target +high-bandwidth, low-latency NICs, with a goal to scale to tens of +thousands of nodes. + +Libfabric targets support for the Linux, Free BSD, Windows, and OS X. +A reasonable effort is made to support all major, modern Linux +distributions; however, validation is limited to the most recent 2-3 +releases of Red Hat Enterprise Linux (RHEL) and SUSE Linux Enterprise +Server (SLES). Support for a particular operating system version or +distribution is vendor specific. The exceptions are the tcp and udp +based socket providers are available on all platforms. +" + +unsetenv LIBFABRIC_LIBRARY_DIR /opt/psi/Libraries/libfabric/1.18.0/lib + +remove-path LIBRARY_PATH $PREFIX/lib +remove-path LD_LIBRARY_PATH $PREFIX/lib +setenv LIBFABRIC_LIBRARY_DIR $PREFIX/lib64 + +# prepend-path LIBRARY_PATH $PREFIX/lib64 +# prepend-path LD_LIBRARY_PATH $PREFIX/lib64 +# prepend-path LIBFABRIC_LIBRARY_DIR $PREFIX/lib64 diff --git a/Libraries/pmix/build b/Libraries/pmix/build index c0dcf01..a058988 100755 --- a/Libraries/pmix/build +++ b/Libraries/pmix/build @@ -7,7 +7,7 @@ pbuild::add_to_group 'Libraries' (( version = (${V_MAJOR} * 100 + ${V_MINOR}) * 100 + V_PATCHLVL )) if (( V_MAJOR > 1 )); then - pbuild::install_docfiles AUTHORS README LICENSE NEWS VERSION + pbuild::install_docfiles AUTHORS README.md LICENSE NEWS VERSION else pbuild::install_docfiles README LICENSE NEWS VERSION fi @@ -23,7 +23,16 @@ pbuild::pre_configure() { pbuild::add_configure_args "--with-libevent=${LIBEVENT_PREFIX}" fi - pbuild::add_configure_args "--enable-pmi-backward-compatibility" + if [[ -n "${INTEL_VERSION}" ]]; then + pbuild::add_configure_args "CC=icc" + pbuild::add_configure_args "CXX=icpc" + pbuild::add_configure_args "FC=ifort" + pbuild::add_configure_args "F77=ifort" + pbuild::add_configure_args "F90=ifort" + # pbuild::add_configure_args "LDFLAGS=-Wc,-static-intel" + fi + + # pbuild::add_configure_args "--enable-pmi-backward-compatibility" pbuild::add_configure_args "--enable-shared" pbuild::add_configure_args "--enable-static" } diff --git a/Libraries/pmix/files/variants.merlin6 b/Libraries/pmix/files/variants.merlin6 index 3f3509d..beb6bdd 100644 --- a/Libraries/pmix/files/variants.merlin6 +++ b/Libraries/pmix/files/variants.merlin6 @@ -1,4 +1,6 @@ -pmix/1.2.5 unstable b:libevent/2.1.12 -pmix/2.2.5 unstable b:libevent/2.1.12 -pmix/3.2.3 unstable b:libevent/2.1.12 -pmix/4.1.2 unstable hwloc/2.7.1 libevent/2.1.12 +pmix/1.2.5 unstable libevent/2.1.12 +pmix/2.2.5 unstable libevent/2.1.12 +pmix/3.2.3 unstable libevent/2.1.12 +pmix/4.1.2 unstable libevent/2.1.12 b:hwloc/2.7.1 +pmix/4.2.3 unstable libevent/2.1.12 b:hwloc/2.9.1 +pmix/4.2.4 unstable b:gcc/10.4.0 libevent/2.1.12 b:hwloc/2.9.1