example from Eurohack24 added

40644d0c · gsell · c41bd6ec · 40644d0c · 40644d0c · 40644d0c
Commit 40644d0c authored 5 months ago by gsell
--- a/examples/eurohack24/read_setnparticles_ats.cu
+++ b/examples/eurohack24/read_setnparticles_ats.cu
+/*
+  Copyright (c) 2006-2015, The Regents of the University of California,
+  through Lawrence Berkeley National Laboratory (subject to receipt of any
+  required approvals from the U.S. Dept. of Energy) and the Paul Scherrer
+  Institut (Switzerland).  All rights reserved.
+
+  License: see file COPYING in top level of source distribution.
+*/
+
+#include "H5hut.h"
+#include "examples.h"
+
+#include <stdlib.h>
+
+#include "cuda.h"
+// name of input file
+const char* fname = "example_setnparticles.h5";
+
+// H5hut verbosity level
+const h5_int64_t h5_verbosity = H5_VERBOSE_DEFAULT;
+
+// #define USE_CUDA_KERNEL
+#ifdef USE_CUDA_KERNEL
+__global__
+#endif
+void kernel(h5_int32_t *data, h5_size_t n)
+{
+	for (h5_size_t i=0; i<n; i++) {
+                data[i] += 2;
+	}
+}
+
+
+int
+main (
+        int argc, char* argv[]
+        ){
+
+        // initialize MPI & H5hut
+        MPI_Init (&argc, &argv);
+        MPI_Comm comm = MPI_COMM_WORLD;
+        int comm_size = 1;
+        MPI_Comm_size (comm, &comm_size);
+        int comm_rank = 0;
+        MPI_Comm_rank (comm, &comm_rank);
+        H5AbortOnError ();
+        H5SetVerbosityLevel (h5_verbosity);
+
+        // open file and go to first step
+        h5_file_t file = H5OpenFile (fname, H5_O_RDONLY, H5_PROP_DEFAULT);
+        H5SetStep (file, 0);
+
+        // compute number of particles this process has to read
+        h5_ssize_t num_particles_total = H5PartGetNumParticles (file);
+        h5_ssize_t num_particles = num_particles_total / comm_size;
+        if (comm_rank+1 == comm_size)
+                num_particles += num_particles_total % comm_size;
+
+	printf ("[proc %d]: particles in view: %lld\n", comm_rank, (long long)num_particles);
+	printf ("[proc %d]: total number of particles: %lld\n",
+		comm_rank, (long long unsigned)num_particles_total);
+
+	// set number of particles
+        H5PartSetNumParticles (file, num_particles);
+
+        // read and print data
+        h5_int32_t* data = (h5_int32_t*)calloc (num_particles, sizeof (*data));
+        H5PartReadDataInt32 (file, "data", data);
+        H5CloseFile (file);
+
+#ifdef USE_CUDA_KERNEL
+	kernel<<<1, 1>>>(data, num_particles);
+#else
+	kernel(data, num_particles);
+#endif
+	int ec=cudaDeviceSynchronize();
+	printf("%d\n", ec);
+	for (int i = 0; i < num_particles; i++) {
+                printf ("[proc %d]: local index = %d, value = %d\n",
+                        comm_rank, i, data[i]);
+        }
+
+        // cleanup
+	free (data);
+	MPI_Finalize ();
+        return 0;
+}
--- a/examples/eurohack24/read_setnparticles_ats.sbatch
+++ b/examples/eurohack24/read_setnparticles_ats.sbatch
+#!/bin/bash
+#SBATCH --uenv=eurohack/24.9:rc1
+#SBATCH --view=modules
+#SBATCH --ntasks-per-node=1
+#SBATCH --nodes=1
+#SBATCH --output=out-%j.out
+#SBATCH -C gpu
+#SBATCH --partition=debug
+#SBATCH --time=00:05:00
+#
+export NSYS_NVTX_PROFILER_REGISTER_ONLY=0
+export CUDA_LAUNCH_BLOCKING=1
+EXE="${HOME}/src/H5hut/src/examples/H5Part/read_setnparticles_ats"
+nsys profile -t cuda,nvtx,mpi -o report.%p $EXE
--- a/examples/eurohack24/read_setnparticles_managed.cu
+++ b/examples/eurohack24/read_setnparticles_managed.cu
+/*
+  Copyright (c) 2006-2015, The Regents of the University of California,
+  through Lawrence Berkeley National Laboratory (subject to receipt of any
+  required approvals from the U.S. Dept. of Energy) and the Paul Scherrer
+  Institut (Switzerland).  All rights reserved.
+
+  License: see file COPYING in top level of source distribution.
+*/
+
+#include "H5hut.h"
+#include "examples.h"
+
+#include <stdlib.h>
+
+#include "cuda.h"
+// name of input file
+const char* fname = "example_setnparticles.h5";
+
+// H5hut verbosity level
+const h5_int64_t h5_verbosity = H5_VERBOSE_DEFAULT;
+
+#define USE_CUDA_KERNEL 1
+#ifdef USE_CUDA_KERNEL
+__global__
+#endif
+void kernel(h5_int32_t *data, h5_size_t n)
+{
+	for (h5_size_t i=0; i<n; i++) {
+                data[i] += 2;
+	}
+}
+
+
+int
+main (
+        int argc, char* argv[]
+        ){
+
+        // initialize MPI & H5hut
+        MPI_Init (&argc, &argv);
+        MPI_Comm comm = MPI_COMM_WORLD;
+        int comm_size = 1;
+        MPI_Comm_size (comm, &comm_size);
+        int comm_rank = 0;
+        MPI_Comm_rank (comm, &comm_rank);
+        H5AbortOnError ();
+        H5SetVerbosityLevel (h5_verbosity);
+
+        // open file and go to first step
+        h5_file_t file = H5OpenFile (fname, H5_O_RDONLY, H5_PROP_DEFAULT);
+        H5SetStep (file, 0);
+
+        // compute number of particles this process has to read
+        h5_ssize_t num_particles_total = H5PartGetNumParticles (file);
+        h5_ssize_t num_particles = num_particles_total / comm_size;
+        if (comm_rank+1 == comm_size)
+                num_particles += num_particles_total % comm_size;
+
+	printf ("[proc %d]: particles in view: %lld\n", comm_rank, (long long)num_particles);
+	printf ("[proc %d]: total number of particles: %lld\n",
+		comm_rank, (long long unsigned)num_particles_total);
+
+	// set number of particles
+        H5PartSetNumParticles (file, num_particles);
+
+        // read and print data
+	h5_int32_t *data;
+#ifdef USE_CUDA_KERNEL
+	cudaMallocManaged((void **)&data, num_particles * sizeof(*data));
+#else
+	data = (h5_int32_t*)calloc (num_particles, sizeof (*data));
+#endif
+	H5PartReadDataInt32 (file, "data", data);
+        H5CloseFile (file);
+
+#ifdef USE_CUDA_KERNEL
+	kernel<<<1, 1>>>(data, num_particles);
+#else
+	kernel(data, num_particles);
+#endif
+	int ec=cudaDeviceSynchronize();
+	printf("%d\n", ec);
+	for (int i = 0; i < num_particles; i++) {
+                printf ("[proc %d]: local index = %d, value = %d\n",
+                        comm_rank, i, data[i]);
+        }
+
+        // cleanup
+#ifdef USE_CUDA_KERNEL
+	cudaFree(data);
+#else
+	free (data);
+#endif
+	MPI_Finalize ();
+        return 0;
+}
--- a/examples/eurohack24/setnparticule.sbatch
+++ b/examples/eurohack24/setnparticule.sbatch
+#!/bin/bash
+#SBATCH --uenv=eurohack/24.9:rc1
+#SBATCH --view=modules
+#SBATCH --ntasks-per-node=1
+#SBATCH --nodes=1
+#SBATCH --output=out-%j.out
+#SBATCH -C gpu
+#SBATCH --time=00:05:00
+#SBATCH --reservation=eurohack24
+#
+export CUDA_LAUNCH_BLOCKING=1
+#
+
+#nsys profile -t cuda,mpi -o report.%p read_setnparticles_managed
+ncu --kernel-name kernel --launch-skip 0 --launch-count 1 -o report.%p "read_setnparticles_managed"
--- a/examples/eurohack24/write_setnparticles.sbatch
+++ b/examples/eurohack24/write_setnparticles.sbatch
+#!/bin/bash
+#SBATCH --uenv=eurohack/24.9:rc1
+#SBATCH --view=modules
+#SBATCH --ntasks-per-node=16
+#SBATCH --nodes=4
+#SBATCH --output=out-%j.out
+#SBATCH -C gpu
+#SBATCH --partition=debug
+#SBATCH --time=00:05:00
+#
+export NSYS_NVTX_PROFILER_REGISTER_ONLY=0
+export CUDA_LAUNCH_BLOCKING=1
+EXE="${HOME}/src/H5hut/src/examples/H5Part/write_setnparticles"
+srun -n 64  "$EXE"
+#nsys profile -t cuda,nvtx,mpi -o report.%p $EXE