example from Eurohack24 added

H5hut/src

Fork 0

Browse Source

This commit is contained in:

gsell

2024-10-12 19:34:22 +02:00

parent c41bd6ec54

commit 40644d0c96

5 changed files with 227 additions and 0 deletions

examples/eurohack24/read_setnparticles_ats.cu

+87

View File

@@ -0,0 +1,87 @@
 /*
   Copyright (c) 2006-2015, The Regents of the University of California,
   through Lawrence Berkeley National Laboratory (subject to receipt of any
   required approvals from the U.S. Dept. of Energy) and the Paul Scherrer
   Institut (Switzerland).  All rights reserved.
   License: see file COPYING in top level of source distribution.
 */
 #include "H5hut.h"
 #include "examples.h"
 #include <stdlib.h>
 #include "cuda.h"
 // name of input file
 const char* fname = "example_setnparticles.h5";
 // H5hut verbosity level
 const h5_int64_t h5_verbosity = H5_VERBOSE_DEFAULT;
 // #define USE_CUDA_KERNEL
 #ifdef USE_CUDA_KERNEL
 __global__
 #endif
 void kernel(h5_int32_t *data, h5_size_t n)
 {
 	for (h5_size_t i=0; i<n; i++) {
                 data[i] += 2;
 	}
 }
 int
 main (
         int argc, char* argv[]
         ){
         // initialize MPI & H5hut
         MPI_Init (&argc, &argv);
         MPI_Comm comm = MPI_COMM_WORLD;
         int comm_size = 1;
         MPI_Comm_size (comm, &comm_size);
         int comm_rank = 0;
         MPI_Comm_rank (comm, &comm_rank);
         H5AbortOnError ();
         H5SetVerbosityLevel (h5_verbosity);
         // open file and go to first step
         h5_file_t file = H5OpenFile (fname, H5_O_RDONLY, H5_PROP_DEFAULT);
         H5SetStep (file, 0);
         // compute number of particles this process has to read
         h5_ssize_t num_particles_total = H5PartGetNumParticles (file);
         h5_ssize_t num_particles = num_particles_total / comm_size;
         if (comm_rank+1 == comm_size)
                 num_particles += num_particles_total % comm_size;
 	printf ("[proc %d]: particles in view: %lld\n", comm_rank, (long long)num_particles);
 	printf ("[proc %d]: total number of particles: %lld\n",
 		comm_rank, (long long unsigned)num_particles_total);
 	// set number of particles
         H5PartSetNumParticles (file, num_particles);
         // read and print data
         h5_int32_t* data = (h5_int32_t*)calloc (num_particles, sizeof (*data));
         H5PartReadDataInt32 (file, "data", data);
         H5CloseFile (file);
 #ifdef USE_CUDA_KERNEL
 	kernel<<<1, 1>>>(data, num_particles);
 #else
 	kernel(data, num_particles);
 #endif
 	int ec=cudaDeviceSynchronize();
 	printf("%d\n", ec);
 	for (int i = 0; i < num_particles; i++) {
                 printf ("[proc %d]: local index = %d, value = %d\n",
                         comm_rank, i, data[i]);
         }
         // cleanup
 	free (data);
 	MPI_Finalize ();
         return 0;
 }

examples/eurohack24/read_setnparticles_ats.sbatch

+14

View File

@@ -0,0 +1,14 @@
 #!/bin/bash
 #SBATCH --uenv=eurohack/24.9:rc1
 #SBATCH --view=modules
 #SBATCH --ntasks-per-node=1
 #SBATCH --nodes=1
 #SBATCH --output=out-%j.out
 #SBATCH -C gpu
 #SBATCH --partition=debug
 #SBATCH --time=00:05:00
 #
 export NSYS_NVTX_PROFILER_REGISTER_ONLY=0
 export CUDA_LAUNCH_BLOCKING=1
 EXE="${HOME}/src/H5hut/src/examples/H5Part/read_setnparticles_ats"
 nsys profile -t cuda,nvtx,mpi -o report.%p $EXE

examples/eurohack24/read_setnparticles_managed.cu

+96

View File

@@ -0,0 +1,96 @@
 /*
   Copyright (c) 2006-2015, The Regents of the University of California,
   through Lawrence Berkeley National Laboratory (subject to receipt of any
   required approvals from the U.S. Dept. of Energy) and the Paul Scherrer
   Institut (Switzerland).  All rights reserved.
   License: see file COPYING in top level of source distribution.
 */
 #include "H5hut.h"
 #include "examples.h"
 #include <stdlib.h>
 #include "cuda.h"
 // name of input file
 const char* fname = "example_setnparticles.h5";
 // H5hut verbosity level
 const h5_int64_t h5_verbosity = H5_VERBOSE_DEFAULT;
 #define USE_CUDA_KERNEL 1
 #ifdef USE_CUDA_KERNEL
 __global__
 #endif
 void kernel(h5_int32_t *data, h5_size_t n)
 {
 	for (h5_size_t i=0; i<n; i++) {
                 data[i] += 2;
 	}
 }
 int
 main (
         int argc, char* argv[]
         ){
         // initialize MPI & H5hut
         MPI_Init (&argc, &argv);
         MPI_Comm comm = MPI_COMM_WORLD;
         int comm_size = 1;
         MPI_Comm_size (comm, &comm_size);
         int comm_rank = 0;
         MPI_Comm_rank (comm, &comm_rank);
         H5AbortOnError ();
         H5SetVerbosityLevel (h5_verbosity);
         // open file and go to first step
         h5_file_t file = H5OpenFile (fname, H5_O_RDONLY, H5_PROP_DEFAULT);
         H5SetStep (file, 0);
         // compute number of particles this process has to read
         h5_ssize_t num_particles_total = H5PartGetNumParticles (file);
         h5_ssize_t num_particles = num_particles_total / comm_size;
         if (comm_rank+1 == comm_size)
                 num_particles += num_particles_total % comm_size;
 	printf ("[proc %d]: particles in view: %lld\n", comm_rank, (long long)num_particles);
 	printf ("[proc %d]: total number of particles: %lld\n",
 		comm_rank, (long long unsigned)num_particles_total);
 	// set number of particles
         H5PartSetNumParticles (file, num_particles);
         // read and print data
 	h5_int32_t *data;
 #ifdef USE_CUDA_KERNEL
 	cudaMallocManaged((void **)&data, num_particles * sizeof(*data));
 #else
 	data = (h5_int32_t*)calloc (num_particles, sizeof (*data));
 #endif
 	H5PartReadDataInt32 (file, "data", data);
         H5CloseFile (file);
 #ifdef USE_CUDA_KERNEL
 	kernel<<<1, 1>>>(data, num_particles);
 #else
 	kernel(data, num_particles);
 #endif
 	int ec=cudaDeviceSynchronize();
 	printf("%d\n", ec);
 	for (int i = 0; i < num_particles; i++) {
                 printf ("[proc %d]: local index = %d, value = %d\n",
                         comm_rank, i, data[i]);
         }
         // cleanup
 #ifdef USE_CUDA_KERNEL
 	cudaFree(data);
 #else
 	free (data);
 #endif
 	MPI_Finalize ();
         return 0;
 }

examples/eurohack24/setnparticule.sbatch

+15

View File

@@ -0,0 +1,15 @@
 #!/bin/bash
 #SBATCH --uenv=eurohack/24.9:rc1
 #SBATCH --view=modules
 #SBATCH --ntasks-per-node=1
 #SBATCH --nodes=1
 #SBATCH --output=out-%j.out
 #SBATCH -C gpu
 #SBATCH --time=00:05:00
 #SBATCH --reservation=eurohack24
 #
 export CUDA_LAUNCH_BLOCKING=1
 #
 #nsys profile -t cuda,mpi -o report.%p read_setnparticles_managed
 ncu --kernel-name kernel --launch-skip 0 --launch-count 1 -o report.%p "read_setnparticles_managed"

examples/eurohack24/write_setnparticles.sbatch

+15

View File

@@ -0,0 +1,15 @@
 #!/bin/bash
 #SBATCH --uenv=eurohack/24.9:rc1
 #SBATCH --view=modules
 #SBATCH --ntasks-per-node=16
 #SBATCH --nodes=4
 #SBATCH --output=out-%j.out
 #SBATCH -C gpu
 #SBATCH --partition=debug
 #SBATCH --time=00:05:00
 #
 export NSYS_NVTX_PROFILER_REGISTER_ONLY=0
 export CUDA_LAUNCH_BLOCKING=1
 EXE="${HOME}/src/H5hut/src/examples/H5Part/write_setnparticles"
 srun -n 64  "$EXE"
 #nsys profile -t cuda,nvtx,mpi -o report.%p $EXE