From 40644d0c9607507cef94232ee34237529b03f1bd Mon Sep 17 00:00:00 2001 From: Achim Gsell Date: Sat, 12 Oct 2024 19:34:22 +0200 Subject: [PATCH] example from Eurohack24 added --- examples/eurohack24/read_setnparticles_ats.cu | 87 +++++++++++++++++ .../eurohack24/read_setnparticles_ats.sbatch | 14 +++ .../eurohack24/read_setnparticles_managed.cu | 96 +++++++++++++++++++ examples/eurohack24/setnparticule.sbatch | 15 +++ .../eurohack24/write_setnparticles.sbatch | 15 +++ 5 files changed, 227 insertions(+) create mode 100644 examples/eurohack24/read_setnparticles_ats.cu create mode 100644 examples/eurohack24/read_setnparticles_ats.sbatch create mode 100644 examples/eurohack24/read_setnparticles_managed.cu create mode 100644 examples/eurohack24/setnparticule.sbatch create mode 100644 examples/eurohack24/write_setnparticles.sbatch diff --git a/examples/eurohack24/read_setnparticles_ats.cu b/examples/eurohack24/read_setnparticles_ats.cu new file mode 100644 index 0000000..bf87c0c --- /dev/null +++ b/examples/eurohack24/read_setnparticles_ats.cu @@ -0,0 +1,87 @@ +/* + Copyright (c) 2006-2015, The Regents of the University of California, + through Lawrence Berkeley National Laboratory (subject to receipt of any + required approvals from the U.S. Dept. of Energy) and the Paul Scherrer + Institut (Switzerland). All rights reserved. + + License: see file COPYING in top level of source distribution. +*/ + +#include "H5hut.h" +#include "examples.h" + +#include + +#include "cuda.h" +// name of input file +const char* fname = "example_setnparticles.h5"; + +// H5hut verbosity level +const h5_int64_t h5_verbosity = H5_VERBOSE_DEFAULT; + +// #define USE_CUDA_KERNEL +#ifdef USE_CUDA_KERNEL +__global__ +#endif +void kernel(h5_int32_t *data, h5_size_t n) +{ + for (h5_size_t i=0; i>>(data, num_particles); +#else + kernel(data, num_particles); +#endif + int ec=cudaDeviceSynchronize(); + printf("%d\n", ec); + for (int i = 0; i < num_particles; i++) { + printf ("[proc %d]: local index = %d, value = %d\n", + comm_rank, i, data[i]); + } + + // cleanup + free (data); + MPI_Finalize (); + return 0; +} diff --git a/examples/eurohack24/read_setnparticles_ats.sbatch b/examples/eurohack24/read_setnparticles_ats.sbatch new file mode 100644 index 0000000..3c012f8 --- /dev/null +++ b/examples/eurohack24/read_setnparticles_ats.sbatch @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --uenv=eurohack/24.9:rc1 +#SBATCH --view=modules +#SBATCH --ntasks-per-node=1 +#SBATCH --nodes=1 +#SBATCH --output=out-%j.out +#SBATCH -C gpu +#SBATCH --partition=debug +#SBATCH --time=00:05:00 +# +export NSYS_NVTX_PROFILER_REGISTER_ONLY=0 +export CUDA_LAUNCH_BLOCKING=1 +EXE="${HOME}/src/H5hut/src/examples/H5Part/read_setnparticles_ats" +nsys profile -t cuda,nvtx,mpi -o report.%p $EXE diff --git a/examples/eurohack24/read_setnparticles_managed.cu b/examples/eurohack24/read_setnparticles_managed.cu new file mode 100644 index 0000000..cf89afc --- /dev/null +++ b/examples/eurohack24/read_setnparticles_managed.cu @@ -0,0 +1,96 @@ +/* + Copyright (c) 2006-2015, The Regents of the University of California, + through Lawrence Berkeley National Laboratory (subject to receipt of any + required approvals from the U.S. Dept. of Energy) and the Paul Scherrer + Institut (Switzerland). All rights reserved. + + License: see file COPYING in top level of source distribution. +*/ + +#include "H5hut.h" +#include "examples.h" + +#include + +#include "cuda.h" +// name of input file +const char* fname = "example_setnparticles.h5"; + +// H5hut verbosity level +const h5_int64_t h5_verbosity = H5_VERBOSE_DEFAULT; + +#define USE_CUDA_KERNEL 1 +#ifdef USE_CUDA_KERNEL +__global__ +#endif +void kernel(h5_int32_t *data, h5_size_t n) +{ + for (h5_size_t i=0; i>>(data, num_particles); +#else + kernel(data, num_particles); +#endif + int ec=cudaDeviceSynchronize(); + printf("%d\n", ec); + for (int i = 0; i < num_particles; i++) { + printf ("[proc %d]: local index = %d, value = %d\n", + comm_rank, i, data[i]); + } + + // cleanup +#ifdef USE_CUDA_KERNEL + cudaFree(data); +#else + free (data); +#endif + MPI_Finalize (); + return 0; +} diff --git a/examples/eurohack24/setnparticule.sbatch b/examples/eurohack24/setnparticule.sbatch new file mode 100644 index 0000000..e157c72 --- /dev/null +++ b/examples/eurohack24/setnparticule.sbatch @@ -0,0 +1,15 @@ +#!/bin/bash +#SBATCH --uenv=eurohack/24.9:rc1 +#SBATCH --view=modules +#SBATCH --ntasks-per-node=1 +#SBATCH --nodes=1 +#SBATCH --output=out-%j.out +#SBATCH -C gpu +#SBATCH --time=00:05:00 +#SBATCH --reservation=eurohack24 +# +export CUDA_LAUNCH_BLOCKING=1 +# + +#nsys profile -t cuda,mpi -o report.%p read_setnparticles_managed +ncu --kernel-name kernel --launch-skip 0 --launch-count 1 -o report.%p "read_setnparticles_managed" diff --git a/examples/eurohack24/write_setnparticles.sbatch b/examples/eurohack24/write_setnparticles.sbatch new file mode 100644 index 0000000..5f4d5f9 --- /dev/null +++ b/examples/eurohack24/write_setnparticles.sbatch @@ -0,0 +1,15 @@ +#!/bin/bash +#SBATCH --uenv=eurohack/24.9:rc1 +#SBATCH --view=modules +#SBATCH --ntasks-per-node=16 +#SBATCH --nodes=4 +#SBATCH --output=out-%j.out +#SBATCH -C gpu +#SBATCH --partition=debug +#SBATCH --time=00:05:00 +# +export NSYS_NVTX_PROFILER_REGISTER_ONLY=0 +export CUDA_LAUNCH_BLOCKING=1 +EXE="${HOME}/src/H5hut/src/examples/H5Part/write_setnparticles" +srun -n 64 "$EXE" +#nsys profile -t cuda,nvtx,mpi -o report.%p $EXE