From 40644d0c9607507cef94232ee34237529b03f1bd Mon Sep 17 00:00:00 2001
From: Achim Gsell <achim.gsell@psi.ch>
Date: Sat, 12 Oct 2024 19:34:22 +0200
Subject: [PATCH] example from Eurohack24 added

---
 examples/eurohack24/read_setnparticles_ats.cu | 87 +++++++++++++++++
 .../eurohack24/read_setnparticles_ats.sbatch  | 14 +++
 .../eurohack24/read_setnparticles_managed.cu  | 96 +++++++++++++++++++
 examples/eurohack24/setnparticule.sbatch      | 15 +++
 .../eurohack24/write_setnparticles.sbatch     | 15 +++
 5 files changed, 227 insertions(+)
 create mode 100644 examples/eurohack24/read_setnparticles_ats.cu
 create mode 100644 examples/eurohack24/read_setnparticles_ats.sbatch
 create mode 100644 examples/eurohack24/read_setnparticles_managed.cu
 create mode 100644 examples/eurohack24/setnparticule.sbatch
 create mode 100644 examples/eurohack24/write_setnparticles.sbatch

diff --git a/examples/eurohack24/read_setnparticles_ats.cu b/examples/eurohack24/read_setnparticles_ats.cu
new file mode 100644
index 00000000..bf87c0c6
--- /dev/null
+++ b/examples/eurohack24/read_setnparticles_ats.cu
@@ -0,0 +1,87 @@
+/*
+  Copyright (c) 2006-2015, The Regents of the University of California,
+  through Lawrence Berkeley National Laboratory (subject to receipt of any
+  required approvals from the U.S. Dept. of Energy) and the Paul Scherrer
+  Institut (Switzerland).  All rights reserved.
+
+  License: see file COPYING in top level of source distribution.
+*/
+
+#include "H5hut.h"
+#include "examples.h"
+
+#include <stdlib.h>
+
+#include "cuda.h"
+// name of input file
+const char* fname = "example_setnparticles.h5";
+
+// H5hut verbosity level
+const h5_int64_t h5_verbosity = H5_VERBOSE_DEFAULT;
+
+// #define USE_CUDA_KERNEL
+#ifdef USE_CUDA_KERNEL
+__global__
+#endif
+void kernel(h5_int32_t *data, h5_size_t n)
+{
+	for (h5_size_t i=0; i<n; i++) {
+                data[i] += 2;
+	}
+}
+
+
+int
+main (
+        int argc, char* argv[]
+        ){
+
+        // initialize MPI & H5hut
+        MPI_Init (&argc, &argv);
+        MPI_Comm comm = MPI_COMM_WORLD;
+        int comm_size = 1;
+        MPI_Comm_size (comm, &comm_size);
+        int comm_rank = 0;
+        MPI_Comm_rank (comm, &comm_rank);
+        H5AbortOnError ();
+        H5SetVerbosityLevel (h5_verbosity);
+
+        // open file and go to first step
+        h5_file_t file = H5OpenFile (fname, H5_O_RDONLY, H5_PROP_DEFAULT);
+        H5SetStep (file, 0);
+
+        // compute number of particles this process has to read
+        h5_ssize_t num_particles_total = H5PartGetNumParticles (file);
+        h5_ssize_t num_particles = num_particles_total / comm_size;
+        if (comm_rank+1 == comm_size)
+                num_particles += num_particles_total % comm_size;
+
+	printf ("[proc %d]: particles in view: %lld\n", comm_rank, (long long)num_particles);
+	printf ("[proc %d]: total number of particles: %lld\n",
+		comm_rank, (long long unsigned)num_particles_total);
+
+	// set number of particles
+        H5PartSetNumParticles (file, num_particles);
+
+        // read and print data
+        h5_int32_t* data = (h5_int32_t*)calloc (num_particles, sizeof (*data));
+        H5PartReadDataInt32 (file, "data", data);
+        H5CloseFile (file);
+
+#ifdef USE_CUDA_KERNEL
+	kernel<<<1, 1>>>(data, num_particles);
+#else
+	kernel(data, num_particles);
+#endif
+	int ec=cudaDeviceSynchronize();
+	printf("%d\n", ec);
+	for (int i = 0; i < num_particles; i++) {
+                printf ("[proc %d]: local index = %d, value = %d\n",
+                        comm_rank, i, data[i]);
+        }
+
+        // cleanup
+	free (data);
+	MPI_Finalize ();
+        return 0;
+}
diff --git a/examples/eurohack24/read_setnparticles_ats.sbatch b/examples/eurohack24/read_setnparticles_ats.sbatch
new file mode 100644
index 00000000..3c012f84
--- /dev/null
+++ b/examples/eurohack24/read_setnparticles_ats.sbatch
@@ -0,0 +1,14 @@
+#!/bin/bash
+#SBATCH --uenv=eurohack/24.9:rc1
+#SBATCH --view=modules
+#SBATCH --ntasks-per-node=1
+#SBATCH --nodes=1
+#SBATCH --output=out-%j.out
+#SBATCH -C gpu
+#SBATCH --partition=debug
+#SBATCH --time=00:05:00
+#
+export NSYS_NVTX_PROFILER_REGISTER_ONLY=0
+export CUDA_LAUNCH_BLOCKING=1
+EXE="${HOME}/src/H5hut/src/examples/H5Part/read_setnparticles_ats"
+nsys profile -t cuda,nvtx,mpi -o report.%p $EXE
diff --git a/examples/eurohack24/read_setnparticles_managed.cu b/examples/eurohack24/read_setnparticles_managed.cu
new file mode 100644
index 00000000..cf89afc6
--- /dev/null
+++ b/examples/eurohack24/read_setnparticles_managed.cu
@@ -0,0 +1,96 @@
+/*
+  Copyright (c) 2006-2015, The Regents of the University of California,
+  through Lawrence Berkeley National Laboratory (subject to receipt of any
+  required approvals from the U.S. Dept. of Energy) and the Paul Scherrer
+  Institut (Switzerland).  All rights reserved.
+
+  License: see file COPYING in top level of source distribution.
+*/
+
+#include "H5hut.h"
+#include "examples.h"
+
+#include <stdlib.h>
+
+#include "cuda.h"
+// name of input file
+const char* fname = "example_setnparticles.h5";
+
+// H5hut verbosity level
+const h5_int64_t h5_verbosity = H5_VERBOSE_DEFAULT;
+
+#define USE_CUDA_KERNEL 1
+#ifdef USE_CUDA_KERNEL
+__global__
+#endif
+void kernel(h5_int32_t *data, h5_size_t n)
+{
+	for (h5_size_t i=0; i<n; i++) {
+                data[i] += 2;
+	}
+}
+
+
+int
+main (
+        int argc, char* argv[]
+        ){
+
+        // initialize MPI & H5hut
+        MPI_Init (&argc, &argv);
+        MPI_Comm comm = MPI_COMM_WORLD;
+        int comm_size = 1;
+        MPI_Comm_size (comm, &comm_size);
+        int comm_rank = 0;
+        MPI_Comm_rank (comm, &comm_rank);
+        H5AbortOnError ();
+        H5SetVerbosityLevel (h5_verbosity);
+
+        // open file and go to first step
+        h5_file_t file = H5OpenFile (fname, H5_O_RDONLY, H5_PROP_DEFAULT);
+        H5SetStep (file, 0);
+
+        // compute number of particles this process has to read
+        h5_ssize_t num_particles_total = H5PartGetNumParticles (file);
+        h5_ssize_t num_particles = num_particles_total / comm_size;
+        if (comm_rank+1 == comm_size)
+                num_particles += num_particles_total % comm_size;
+
+	printf ("[proc %d]: particles in view: %lld\n", comm_rank, (long long)num_particles);
+	printf ("[proc %d]: total number of particles: %lld\n",
+		comm_rank, (long long unsigned)num_particles_total);
+
+	// set number of particles
+        H5PartSetNumParticles (file, num_particles);
+
+        // read and print data
+	h5_int32_t *data;
+#ifdef USE_CUDA_KERNEL
+	cudaMallocManaged((void **)&data, num_particles * sizeof(*data));
+#else
+	data = (h5_int32_t*)calloc (num_particles, sizeof (*data));
+#endif
+	H5PartReadDataInt32 (file, "data", data);
+        H5CloseFile (file);
+
+#ifdef USE_CUDA_KERNEL
+	kernel<<<1, 1>>>(data, num_particles);
+#else
+	kernel(data, num_particles);
+#endif
+	int ec=cudaDeviceSynchronize();
+	printf("%d\n", ec);
+	for (int i = 0; i < num_particles; i++) {
+                printf ("[proc %d]: local index = %d, value = %d\n",
+                        comm_rank, i, data[i]);
+        }
+
+        // cleanup
+#ifdef USE_CUDA_KERNEL
+	cudaFree(data);
+#else
+	free (data);
+#endif
+	MPI_Finalize ();
+        return 0;
+}
diff --git a/examples/eurohack24/setnparticule.sbatch b/examples/eurohack24/setnparticule.sbatch
new file mode 100644
index 00000000..e157c72c
--- /dev/null
+++ b/examples/eurohack24/setnparticule.sbatch
@@ -0,0 +1,15 @@
+#!/bin/bash
+#SBATCH --uenv=eurohack/24.9:rc1
+#SBATCH --view=modules
+#SBATCH --ntasks-per-node=1
+#SBATCH --nodes=1
+#SBATCH --output=out-%j.out
+#SBATCH -C gpu
+#SBATCH --time=00:05:00
+#SBATCH --reservation=eurohack24
+#
+export CUDA_LAUNCH_BLOCKING=1
+#
+
+#nsys profile -t cuda,mpi -o report.%p read_setnparticles_managed
+ncu --kernel-name kernel --launch-skip 0 --launch-count 1 -o report.%p "read_setnparticles_managed"
diff --git a/examples/eurohack24/write_setnparticles.sbatch b/examples/eurohack24/write_setnparticles.sbatch
new file mode 100644
index 00000000..5f4d5f9c
--- /dev/null
+++ b/examples/eurohack24/write_setnparticles.sbatch
@@ -0,0 +1,15 @@
+#!/bin/bash
+#SBATCH --uenv=eurohack/24.9:rc1
+#SBATCH --view=modules
+#SBATCH --ntasks-per-node=16
+#SBATCH --nodes=4
+#SBATCH --output=out-%j.out
+#SBATCH -C gpu
+#SBATCH --partition=debug
+#SBATCH --time=00:05:00
+#
+export NSYS_NVTX_PROFILER_REGISTER_ONLY=0
+export CUDA_LAUNCH_BLOCKING=1
+EXE="${HOME}/src/H5hut/src/examples/H5Part/write_setnparticles"
+srun -n 64  "$EXE"
+#nsys profile -t cuda,nvtx,mpi -o report.%p $EXE
-- 
GitLab