Files
Jungfraujoch/common/NUMAHWPolicy.cpp
2025-10-20 20:43:44 +02:00

126 lines
4.6 KiB
C++

// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
// SPDX-License-Identifier: GPL-3.0-only
#include "NUMAHWPolicy.h"
#include "../common/CUDAWrapper.h"
#include "JFJochException.h"
#ifdef JFJOCH_USE_NUMA
#include <numa.h>
#endif
NUMAHWPolicy::NUMAHWPolicy(const std::string &policy) : name(policy) {
if ((policy.empty()) || (policy == "none")) {
name = "none";
} else if (policy == "n2g2") {
bindings.emplace_back(NUMABinding{.cpu_node = 0, .mem_node = 0, .gpu = 0});
bindings.emplace_back(NUMABinding{.cpu_node = 1, .mem_node = 1, .gpu = 1});
} else if (policy == "n2g4") {
bindings.emplace_back(NUMABinding{.cpu_node = 0, .mem_node = 0, .gpu = 0});
bindings.emplace_back(NUMABinding{.cpu_node = 1, .mem_node = 1, .gpu = 2});
bindings.emplace_back(NUMABinding{.cpu_node = 0, .mem_node = 0, .gpu = 1});
bindings.emplace_back(NUMABinding{.cpu_node = 1, .mem_node = 1, .gpu = 3});
} else if (policy == "n2g2_hbm") {
bindings.emplace_back(NUMABinding{.cpu_node = 0, .mem_node = 2, .gpu = 0});
bindings.emplace_back(NUMABinding{.cpu_node = 1, .mem_node = 3, .gpu = 1});
} else if (policy == "n2g4_hbm") {
bindings.emplace_back(NUMABinding{.cpu_node = 0, .mem_node = 2, .gpu = 0});
bindings.emplace_back(NUMABinding{.cpu_node = 1, .mem_node = 3, .gpu = 2});
bindings.emplace_back(NUMABinding{.cpu_node = 0, .mem_node = 2, .gpu = 1});
bindings.emplace_back(NUMABinding{.cpu_node = 1, .mem_node = 3, .gpu = 3});
} else if (policy == "n8g4") {
for (int32_t i = 0; i < 8; i++)
bindings.emplace_back(NUMABinding{.cpu_node = i, .mem_node = i, .gpu = i/2});
} else if (policy == "n8g4_hbm") {
for (int32_t i = 0; i < 8; i++)
bindings.emplace_back(NUMABinding{.cpu_node = i, .mem_node = i + 8, .gpu = i / 2});
} else if (policy == "g2") {
bindings.emplace_back(NUMABinding{.cpu_node = -1, .mem_node = -1, .gpu = 0});
bindings.emplace_back(NUMABinding{.cpu_node = -1, .mem_node = -1, .gpu = 1});
} else if (policy == "g4") {
bindings.emplace_back(NUMABinding{.cpu_node = -1, .mem_node = -1, .gpu = 0});
bindings.emplace_back(NUMABinding{.cpu_node = -1, .mem_node = -1, .gpu = 1});
bindings.emplace_back(NUMABinding{.cpu_node = -1, .mem_node = -1, .gpu = 2});
bindings.emplace_back(NUMABinding{.cpu_node = -1, .mem_node = -1, .gpu = 3});
} else
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Unknown NUMA policy");
}
NUMABinding NUMAHWPolicy::GetBinding(uint32_t thread) const {
if (bindings.empty())
return NUMABinding{.cpu_node = -1, .mem_node = -1, .gpu = -1};
else
return bindings.at(thread % bindings.size());
}
void NUMAHWPolicy::Bind(uint32_t thread) const {
Bind(GetBinding(thread));
}
void NUMAHWPolicy::Bind(const NUMABinding &binding) {
RunOnNode(binding.cpu_node);
MemOnNode(binding.mem_node);
SelectGPU(binding.gpu);
}
void NUMAHWPolicy::RunOnNode(int32_t cpu_node) {
#ifdef JFJOCH_USE_NUMA
if (numa_available() != -1) {
auto max_nodes = numa_num_configured_nodes();
if (cpu_node >= 0) {
if (cpu_node < max_nodes)
numa_run_on_node(cpu_node);
else
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "CPU NUMA node out of bounds");
}
}
#endif
}
void NUMAHWPolicy::MemOnNode(int32_t mem_node) {
#ifdef JFJOCH_USE_NUMA
if (numa_available() != -1) {
auto max_nodes = numa_num_configured_nodes();
if (mem_node >= 0) {
if (mem_node < max_nodes) {
struct bitmask *mask = numa_allocate_nodemask();
numa_bitmask_setbit(mask, mem_node);
numa_set_membind(mask);
numa_bitmask_free(mask);
} else
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Memory NUMA node out of bounds");
}
}
#endif
}
void NUMAHWPolicy::SelectGPU(int32_t gpu) {
auto gpu_count = get_gpu_count();
if ((gpu_count > 0) && (gpu > 0)) {
if (gpu < gpu_count)
set_gpu(gpu);
else
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "GPU device out of bounds");
}
}
void NUMAHWPolicy::SelectGPUAndItsNUMA(int32_t gpu) {
int numa = get_gpu_numa_node(gpu);
if (numa >= 0) {
RunOnNode(numa);
MemOnNode(numa);
}
set_gpu(gpu);
}
const std::string &NUMAHWPolicy::GetName() const {
return name;
}