126 lines
4.6 KiB
C++
126 lines
4.6 KiB
C++
// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
|
|
// SPDX-License-Identifier: GPL-3.0-only
|
|
|
|
#include "NUMAHWPolicy.h"
|
|
|
|
#include "../common/CUDAWrapper.h"
|
|
#include "JFJochException.h"
|
|
|
|
#ifdef JFJOCH_USE_NUMA
|
|
#include <numa.h>
|
|
#endif
|
|
|
|
NUMAHWPolicy::NUMAHWPolicy(const std::string &policy) : name(policy) {
|
|
if ((policy.empty()) || (policy == "none")) {
|
|
name = "none";
|
|
} else if (policy == "n2g2") {
|
|
bindings.emplace_back(NUMABinding{.cpu_node = 0, .mem_node = 0, .gpu = 0});
|
|
bindings.emplace_back(NUMABinding{.cpu_node = 1, .mem_node = 1, .gpu = 1});
|
|
} else if (policy == "n2g4") {
|
|
bindings.emplace_back(NUMABinding{.cpu_node = 0, .mem_node = 0, .gpu = 0});
|
|
bindings.emplace_back(NUMABinding{.cpu_node = 1, .mem_node = 1, .gpu = 2});
|
|
bindings.emplace_back(NUMABinding{.cpu_node = 0, .mem_node = 0, .gpu = 1});
|
|
bindings.emplace_back(NUMABinding{.cpu_node = 1, .mem_node = 1, .gpu = 3});
|
|
} else if (policy == "n2g2_hbm") {
|
|
bindings.emplace_back(NUMABinding{.cpu_node = 0, .mem_node = 2, .gpu = 0});
|
|
bindings.emplace_back(NUMABinding{.cpu_node = 1, .mem_node = 3, .gpu = 1});
|
|
} else if (policy == "n2g4_hbm") {
|
|
bindings.emplace_back(NUMABinding{.cpu_node = 0, .mem_node = 2, .gpu = 0});
|
|
bindings.emplace_back(NUMABinding{.cpu_node = 1, .mem_node = 3, .gpu = 2});
|
|
bindings.emplace_back(NUMABinding{.cpu_node = 0, .mem_node = 2, .gpu = 1});
|
|
bindings.emplace_back(NUMABinding{.cpu_node = 1, .mem_node = 3, .gpu = 3});
|
|
} else if (policy == "n8g4") {
|
|
for (int32_t i = 0; i < 8; i++)
|
|
bindings.emplace_back(NUMABinding{.cpu_node = i, .mem_node = i, .gpu = i/2});
|
|
} else if (policy == "n8g4_hbm") {
|
|
for (int32_t i = 0; i < 8; i++)
|
|
bindings.emplace_back(NUMABinding{.cpu_node = i, .mem_node = i + 8, .gpu = i / 2});
|
|
} else if (policy == "g2") {
|
|
bindings.emplace_back(NUMABinding{.cpu_node = -1, .mem_node = -1, .gpu = 0});
|
|
bindings.emplace_back(NUMABinding{.cpu_node = -1, .mem_node = -1, .gpu = 1});
|
|
} else if (policy == "g4") {
|
|
bindings.emplace_back(NUMABinding{.cpu_node = -1, .mem_node = -1, .gpu = 0});
|
|
bindings.emplace_back(NUMABinding{.cpu_node = -1, .mem_node = -1, .gpu = 1});
|
|
bindings.emplace_back(NUMABinding{.cpu_node = -1, .mem_node = -1, .gpu = 2});
|
|
bindings.emplace_back(NUMABinding{.cpu_node = -1, .mem_node = -1, .gpu = 3});
|
|
} else
|
|
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Unknown NUMA policy");
|
|
}
|
|
|
|
NUMABinding NUMAHWPolicy::GetBinding(uint32_t thread) const {
|
|
if (bindings.empty())
|
|
return NUMABinding{.cpu_node = -1, .mem_node = -1, .gpu = -1};
|
|
else
|
|
return bindings.at(thread % bindings.size());
|
|
}
|
|
|
|
void NUMAHWPolicy::Bind(uint32_t thread) const {
|
|
Bind(GetBinding(thread));
|
|
}
|
|
|
|
void NUMAHWPolicy::Bind(const NUMABinding &binding) {
|
|
RunOnNode(binding.cpu_node);
|
|
MemOnNode(binding.mem_node);
|
|
SelectGPU(binding.gpu);
|
|
}
|
|
|
|
void NUMAHWPolicy::RunOnNode(int32_t cpu_node) {
|
|
#ifdef JFJOCH_USE_NUMA
|
|
if (numa_available() != -1) {
|
|
auto max_nodes = numa_num_configured_nodes();
|
|
|
|
if (cpu_node >= 0) {
|
|
if (cpu_node < max_nodes)
|
|
numa_run_on_node(cpu_node);
|
|
else
|
|
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "CPU NUMA node out of bounds");
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void NUMAHWPolicy::MemOnNode(int32_t mem_node) {
|
|
#ifdef JFJOCH_USE_NUMA
|
|
if (numa_available() != -1) {
|
|
auto max_nodes = numa_num_configured_nodes();
|
|
|
|
if (mem_node >= 0) {
|
|
if (mem_node < max_nodes) {
|
|
struct bitmask *mask = numa_allocate_nodemask();
|
|
numa_bitmask_setbit(mask, mem_node);
|
|
numa_set_membind(mask);
|
|
numa_bitmask_free(mask);
|
|
} else
|
|
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Memory NUMA node out of bounds");
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void NUMAHWPolicy::SelectGPU(int32_t gpu) {
|
|
auto gpu_count = get_gpu_count();
|
|
|
|
if ((gpu_count > 0) && (gpu > 0)) {
|
|
if (gpu < gpu_count)
|
|
set_gpu(gpu);
|
|
else
|
|
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "GPU device out of bounds");
|
|
}
|
|
}
|
|
|
|
void NUMAHWPolicy::SelectGPUAndItsNUMA(int32_t gpu) {
|
|
int numa = get_gpu_numa_node(gpu);
|
|
if (numa >= 0) {
|
|
RunOnNode(numa);
|
|
MemOnNode(numa);
|
|
}
|
|
set_gpu(gpu);
|
|
}
|
|
|
|
|
|
const std::string &NUMAHWPolicy::GetName() const {
|
|
return name;
|
|
}
|
|
|
|
|