Merge branch 'int8' into 'master'

Add support for hdf5 int8 types (int8, uint8) See merge request H5hut/src!7
Add support for hdf5 int8 types (int8, uint8)
2024-10-31 13:29:33 +01:00 · 2024-10-30 16:46:59 +01:00 · 2024-10-12 19:34:22 +02:00
8 changed files with 250 additions and 0 deletions
@@ -0,0 +1,87 @@
+/*
+  Copyright (c) 2006-2015, The Regents of the University of California,
+  through Lawrence Berkeley National Laboratory (subject to receipt of any
+  required approvals from the U.S. Dept. of Energy) and the Paul Scherrer
+  Institut (Switzerland).  All rights reserved.
+
+  License: see file COPYING in top level of source distribution.
+*/
+
+#include "H5hut.h"
+#include "examples.h"
+
+#include <stdlib.h>
+
+#include "cuda.h"
+// name of input file
+const char* fname = "example_setnparticles.h5";
+
+// H5hut verbosity level
+const h5_int64_t h5_verbosity = H5_VERBOSE_DEFAULT;
+
+// #define USE_CUDA_KERNEL
+#ifdef USE_CUDA_KERNEL
+__global__
+#endif
+void kernel(h5_int32_t *data, h5_size_t n)
+{
+	for (h5_size_t i=0; i<n; i++) {
+                data[i] += 2;
+	}
+}
+
+
+int
+main (
+        int argc, char* argv[]
+        ){
+
+        // initialize MPI & H5hut
+        MPI_Init (&argc, &argv);
+        MPI_Comm comm = MPI_COMM_WORLD;
+        int comm_size = 1;
+        MPI_Comm_size (comm, &comm_size);
+        int comm_rank = 0;
+        MPI_Comm_rank (comm, &comm_rank);
+        H5AbortOnError ();
+        H5SetVerbosityLevel (h5_verbosity);
+
+        // open file and go to first step
+        h5_file_t file = H5OpenFile (fname, H5_O_RDONLY, H5_PROP_DEFAULT);
+        H5SetStep (file, 0);
+
+        // compute number of particles this process has to read
+        h5_ssize_t num_particles_total = H5PartGetNumParticles (file);
+        h5_ssize_t num_particles = num_particles_total / comm_size;
+        if (comm_rank+1 == comm_size)
+                num_particles += num_particles_total % comm_size;
+
+	printf ("[proc %d]: particles in view: %lld\n", comm_rank, (long long)num_particles);
+	printf ("[proc %d]: total number of particles: %lld\n",
+		comm_rank, (long long unsigned)num_particles_total);
+
+	// set number of particles
+        H5PartSetNumParticles (file, num_particles);
+
+        // read and print data
+        h5_int32_t* data = (h5_int32_t*)calloc (num_particles, sizeof (*data));
+        H5PartReadDataInt32 (file, "data", data);
+        H5CloseFile (file);
+
+#ifdef USE_CUDA_KERNEL
+	kernel<<<1, 1>>>(data, num_particles);
+#else
+	kernel(data, num_particles);
+#endif
+	int ec=cudaDeviceSynchronize();
+	printf("%d\n", ec);
+	for (int i = 0; i < num_particles; i++) {
+                printf ("[proc %d]: local index = %d, value = %d\n",
+                        comm_rank, i, data[i]);
+        }
+
+        // cleanup
+	free (data);
+	MPI_Finalize ();
+        return 0;
+}
@@ -0,0 +1,14 @@
+#!/bin/bash
+#SBATCH --uenv=eurohack/24.9:rc1
+#SBATCH --view=modules
+#SBATCH --ntasks-per-node=1
+#SBATCH --nodes=1
+#SBATCH --output=out-%j.out
+#SBATCH -C gpu
+#SBATCH --partition=debug
+#SBATCH --time=00:05:00
+#
+export NSYS_NVTX_PROFILER_REGISTER_ONLY=0
+export CUDA_LAUNCH_BLOCKING=1
+EXE="${HOME}/src/H5hut/src/examples/H5Part/read_setnparticles_ats"
+nsys profile -t cuda,nvtx,mpi -o report.%p $EXE
@@ -0,0 +1,96 @@
+/*
+  Copyright (c) 2006-2015, The Regents of the University of California,
+  through Lawrence Berkeley National Laboratory (subject to receipt of any
+  required approvals from the U.S. Dept. of Energy) and the Paul Scherrer
+  Institut (Switzerland).  All rights reserved.
+
+  License: see file COPYING in top level of source distribution.
+*/
+
+#include "H5hut.h"
+#include "examples.h"
+
+#include <stdlib.h>
+
+#include "cuda.h"
+// name of input file
+const char* fname = "example_setnparticles.h5";
+
+// H5hut verbosity level
+const h5_int64_t h5_verbosity = H5_VERBOSE_DEFAULT;
+
+#define USE_CUDA_KERNEL 1
+#ifdef USE_CUDA_KERNEL
+__global__
+#endif
+void kernel(h5_int32_t *data, h5_size_t n)
+{
+	for (h5_size_t i=0; i<n; i++) {
+                data[i] += 2;
+	}
+}
+
+
+int
+main (
+        int argc, char* argv[]
+        ){
+
+        // initialize MPI & H5hut
+        MPI_Init (&argc, &argv);
+        MPI_Comm comm = MPI_COMM_WORLD;
+        int comm_size = 1;
+        MPI_Comm_size (comm, &comm_size);
+        int comm_rank = 0;
+        MPI_Comm_rank (comm, &comm_rank);
+        H5AbortOnError ();
+        H5SetVerbosityLevel (h5_verbosity);
+
+        // open file and go to first step
+        h5_file_t file = H5OpenFile (fname, H5_O_RDONLY, H5_PROP_DEFAULT);
+        H5SetStep (file, 0);
+
+        // compute number of particles this process has to read
+        h5_ssize_t num_particles_total = H5PartGetNumParticles (file);
+        h5_ssize_t num_particles = num_particles_total / comm_size;
+        if (comm_rank+1 == comm_size)
+                num_particles += num_particles_total % comm_size;
+
+	printf ("[proc %d]: particles in view: %lld\n", comm_rank, (long long)num_particles);
+	printf ("[proc %d]: total number of particles: %lld\n",
+		comm_rank, (long long unsigned)num_particles_total);
+
+	// set number of particles
+        H5PartSetNumParticles (file, num_particles);
+
+        // read and print data
+	h5_int32_t *data;
+#ifdef USE_CUDA_KERNEL
+	cudaMallocManaged((void **)&data, num_particles * sizeof(*data));
+#else
+	data = (h5_int32_t*)calloc (num_particles, sizeof (*data));
+#endif
+	H5PartReadDataInt32 (file, "data", data);
+        H5CloseFile (file);
+
+#ifdef USE_CUDA_KERNEL
+	kernel<<<1, 1>>>(data, num_particles);
+#else
+	kernel(data, num_particles);
+#endif
+	int ec=cudaDeviceSynchronize();
+	printf("%d\n", ec);
+	for (int i = 0; i < num_particles; i++) {
+                printf ("[proc %d]: local index = %d, value = %d\n",
+                        comm_rank, i, data[i]);
+        }
+
+        // cleanup
+#ifdef USE_CUDA_KERNEL
+	cudaFree(data);
+#else
+	free (data);
+#endif
+	MPI_Finalize ();
+        return 0;
+}
@@ -0,0 +1,15 @@
+#!/bin/bash
+#SBATCH --uenv=eurohack/24.9:rc1
+#SBATCH --view=modules
+#SBATCH --ntasks-per-node=1
+#SBATCH --nodes=1
+#SBATCH --output=out-%j.out
+#SBATCH -C gpu
+#SBATCH --time=00:05:00
+#SBATCH --reservation=eurohack24
+#
+export CUDA_LAUNCH_BLOCKING=1
+#
+
+#nsys profile -t cuda,mpi -o report.%p read_setnparticles_managed
+ncu --kernel-name kernel --launch-skip 0 --launch-count 1 -o report.%p "read_setnparticles_managed"
@@ -0,0 +1,15 @@
+#!/bin/bash
+#SBATCH --uenv=eurohack/24.9:rc1
+#SBATCH --view=modules
+#SBATCH --ntasks-per-node=16
+#SBATCH --nodes=4
+#SBATCH --output=out-%j.out
+#SBATCH -C gpu
+#SBATCH --partition=debug
+#SBATCH --time=00:05:00
+#
+export NSYS_NVTX_PROFILER_REGISTER_ONLY=0
+export CUDA_LAUNCH_BLOCKING=1
+EXE="${HOME}/src/H5hut/src/examples/H5Part/write_setnparticles"
+srun -n 64  "$EXE"
+#nsys profile -t cuda,nvtx,mpi -o report.%p $EXE
@@ -135,6 +135,12 @@ h5priv_map_enum_to_normalized_type (
 	case H5_STRING_T:
 		ret_value = H5_STRING;
 		break;
+	case H5_INT8_T: 
+		ret_value = H5_INT8;
+		break;
+	case H5_UINT8_T:
+		ret_value = H5_UINT8;
+		break;
 	case H5_INT16_T:
 		ret_value = H5_INT16;
 		break;
@@ -203,6 +209,12 @@ h5priv_normalize_type (
 			} else {
 				ret_value = H5_UINT16;
 			}
+		} else if (tsize==1) {
+			if (tsign == H5T_SGN_2) {
+				ret_value = H5_INT8;
+			} else {
+				ret_value = H5_UINT8;
+			}
 		}
 		break;
 	case H5T_FLOAT:
@@ -263,6 +275,13 @@ h5priv_map_hdf5_type_to_enum (
 			} else {
 				ret_value = H5_UINT16_T;
 			}
+		} else if (tsize==1) {
+			if (tsign == H5T_SGN_2) {
+				ret_value = H5_INT8_T;
+			}
+			else {
+				ret_value = H5_UINT8_T;
+			}
 		} else {
 			ret_value = H5_STRING_T;
 		}
@@ -13,6 +13,8 @@
 #include <hdf5.h>
 #include "h5core/h5_types.h"

+#define H5_INT8			H5T_NATIVE_INT8
+#define H5_UINT8		H5T_NATIVE_UINT8
 #define H5_INT16                H5T_NATIVE_INT16
 #define H5_UINT16		H5T_NATIVE_UINT16
 #define H5_INT32                H5T_NATIVE_INT32
@@ -30,6 +30,8 @@ typedef int MPI_Datatype;

 typedef enum  {
 	H5_STRING_T,
+	H5_INT8_T,
+	H5_UINT8_T,
 	H5_INT16_T,
 	H5_UINT16_T,
 	H5_INT32_T,
Author	SHA1	Message	Date
adelmann	3a9e6d8335	Merge branch 'int8' into 'master' Add support for hdf5 int8 types (int8, uint8) See merge request H5hut/src!7	2024-10-31 13:29:33 +01:00
John Biddiscombe	9d4c884434	Add support for hdf5 int8 types (int8, uint8)	2024-10-30 16:46:59 +01:00
gsell	40644d0c96	example from Eurohack24 added	2024-10-12 19:34:22 +02:00