diff --git a/common/CUDAWrapper.cpp b/common/CUDAWrapper.cpp index 48420880..e2cf8733 100644 --- a/common/CUDAWrapper.cpp +++ b/common/CUDAWrapper.cpp @@ -11,4 +11,8 @@ int32_t get_gpu_count() { void set_gpu(int32_t dev_id) {} +int get_gpu_numa_node(int dev_id) { + return -1; +} + #endif diff --git a/common/CUDAWrapper.cu b/common/CUDAWrapper.cu index cea3683e..54e06740 100644 --- a/common/CUDAWrapper.cu +++ b/common/CUDAWrapper.cu @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: GPL-3.0-only +#include + #include "CUDAWrapper.h" #include "JFJochException.h" @@ -21,6 +23,7 @@ int32_t get_gpu_count() { default: throw JFJochException(JFJochExceptionCategory::GPUCUDAError, cudaGetErrorString(val)); } + } void set_gpu(int32_t dev_id) { @@ -34,3 +37,57 @@ void set_gpu(int32_t dev_id) { cuda_err(cudaSetDevice(dev_id)); } } + +// Return CUDA device PCI Bus ID as "domain:bus:device.function", e.g., "0000:65:00.0" +static std::string get_cuda_device_pci_bus_id(int dev_id) { + // CUDA API provides cudaDeviceGetPCIBusId + char buf[64] = {0}; + cudaDeviceProp prop; + cudaError_t st = cudaGetDeviceProperties(&prop, dev_id); + if (st != cudaSuccess) { + throw JFJochException(JFJochExceptionCategory::GPUCUDAError, cudaGetErrorString(st)); + } + // Prefer cudaDeviceGetPCIBusId for full id including domain and function + cudaError_t st2 = cudaDeviceGetPCIBusId(buf, static_cast(sizeof(buf)), dev_id); + if (st2 == cudaSuccess) { + return std::string(buf); + } + // Fallback: synthesize from properties (domain may be missing on very old drivers) + // Note: function is typically ".0" + char alt[64]; + std::snprintf(alt, sizeof(alt), "%04x:%02x:%02x.%u", + prop.pciDomainID, prop.pciBusID, prop.pciDeviceID, 0u); + return std::string(alt); +} + +// Resolve NUMA node from PCI address using Linux sysfs +// Returns: +// >=0 NUMA node index +// -1 if NUMA node is not available/unknown +int get_gpu_numa_node(int dev_id) { + auto dev_count = get_gpu_count(); + if (dev_count <= 0) return -1; + if (dev_id < 0 || dev_id >= dev_count) { + throw JFJochException(JFJochExceptionCategory::InputParameterInvalid, "Invalid CUDA device ID"); + } + + // We don't need to call cudaSetDevice here; querying by id is sufficient. + const std::string pci_bus_id = get_cuda_device_pci_bus_id(dev_id); // "dddd:bb:dd.f" + + // sysfs path for PCI device. Examples: + // - /sys/bus/pci/devices/0000:65:00.0/numa_node + const std::string sysfs_path = std::string("/sys/bus/pci/devices/") + pci_bus_id + "/numa_node"; + + std::ifstream f(sysfs_path); + if (!f.is_open()) { + // On some systems, the symlink may be via /sys/class/drm or nvidia, but primary path should exist. + return -1; + } + + int numa = -1; + f >> numa; + if (!f.good()) { + return -1; + } + return numa; +} diff --git a/common/CUDAWrapper.h b/common/CUDAWrapper.h index 65909811..e3325f97 100644 --- a/common/CUDAWrapper.h +++ b/common/CUDAWrapper.h @@ -8,5 +8,6 @@ int32_t get_gpu_count(); void set_gpu(int32_t dev_id); +int get_gpu_numa_node(int dev_id); #endif //JUNGFRAUJOCH_CUDAWRAPPER_H diff --git a/common/NUMAHWPolicy.cpp b/common/NUMAHWPolicy.cpp index d3b1ce86..ffe4b8f0 100644 --- a/common/NUMAHWPolicy.cpp +++ b/common/NUMAHWPolicy.cpp @@ -108,6 +108,16 @@ void NUMAHWPolicy::SelectGPU(int32_t gpu) { } } +void NUMAHWPolicy::SelectGPUAndItsNUMA(int32_t gpu) { + int numa = get_gpu_numa_node(gpu); + if (numa >= 0) { + RunOnNode(numa); + MemOnNode(numa); + } + set_gpu(gpu); +} + + const std::string &NUMAHWPolicy::GetName() const { return name; } diff --git a/common/NUMAHWPolicy.h b/common/NUMAHWPolicy.h index 4c99f3a8..4c8e503a 100644 --- a/common/NUMAHWPolicy.h +++ b/common/NUMAHWPolicy.h @@ -30,6 +30,7 @@ public: static void RunOnNode(int32_t cpu_node); static void MemOnNode(int32_t mem_node); static void SelectGPU(int32_t gpu); + static void SelectGPUAndItsNUMA(int32_t gpu); }; #endif //JUNGFRAUJOCH_NUMAHWPOLICY_H diff --git a/image_analysis/indexing/IndexerThreadPool.cpp b/image_analysis/indexing/IndexerThreadPool.cpp index fe5fe5b7..dacf1bc2 100644 --- a/image_analysis/indexing/IndexerThreadPool.cpp +++ b/image_analysis/indexing/IndexerThreadPool.cpp @@ -67,9 +67,17 @@ std::future > IndexerThreadPool::Run(const Diffrac return result; } -void IndexerThreadPool::Worker(size_t threadIndex, const NUMAHWPolicy &numa_policy, const IndexingSettings &settings) { +void IndexerThreadPool::Worker(int32_t threadIndex, const NUMAHWPolicy &numa_policy, const IndexingSettings &settings) { try { +#ifdef JFJOCH_USE_CUDA + auto gpu_count = get_gpu_count(); + if (gpu_count > 0) + NUMAHWPolicy::SelectGPUAndItsNUMA(threadIndex % gpu_count); + else + numa_policy.Bind(threadIndex); +#else numa_policy.Bind(threadIndex); +#endif } catch (...) { // NUMA policy errors are not critical and should be ignored for the time being. } diff --git a/image_analysis/indexing/IndexerThreadPool.h b/image_analysis/indexing/IndexerThreadPool.h index b857b68f..5eae63ef 100644 --- a/image_analysis/indexing/IndexerThreadPool.h +++ b/image_analysis/indexing/IndexerThreadPool.h @@ -39,7 +39,7 @@ class IndexerThreadPool { std::latch workers_ready; bool stop; - void Worker(size_t threadIndex, const NUMAHWPolicy &numa_policy, const IndexingSettings& settings); + void Worker(int32_t threadIndex, const NUMAHWPolicy &numa_policy, const IndexingSettings& settings); public: IndexerThreadPool(const IndexingSettings& settings, const NUMAHWPolicy &numa_policy = NUMAHWPolicy()); ~IndexerThreadPool();