From 40644d0c9607507cef94232ee34237529b03f1bd Mon Sep 17 00:00:00 2001 From: Achim Gsell <achim.gsell@psi.ch> Date: Sat, 12 Oct 2024 19:34:22 +0200 Subject: [PATCH] example from Eurohack24 added --- examples/eurohack24/read_setnparticles_ats.cu | 87 +++++++++++++++++ .../eurohack24/read_setnparticles_ats.sbatch | 14 +++ .../eurohack24/read_setnparticles_managed.cu | 96 +++++++++++++++++++ examples/eurohack24/setnparticule.sbatch | 15 +++ .../eurohack24/write_setnparticles.sbatch | 15 +++ 5 files changed, 227 insertions(+) create mode 100644 examples/eurohack24/read_setnparticles_ats.cu create mode 100644 examples/eurohack24/read_setnparticles_ats.sbatch create mode 100644 examples/eurohack24/read_setnparticles_managed.cu create mode 100644 examples/eurohack24/setnparticule.sbatch create mode 100644 examples/eurohack24/write_setnparticles.sbatch diff --git a/examples/eurohack24/read_setnparticles_ats.cu b/examples/eurohack24/read_setnparticles_ats.cu new file mode 100644 index 00000000..bf87c0c6 --- /dev/null +++ b/examples/eurohack24/read_setnparticles_ats.cu @@ -0,0 +1,87 @@ +/* + Copyright (c) 2006-2015, The Regents of the University of California, + through Lawrence Berkeley National Laboratory (subject to receipt of any + required approvals from the U.S. Dept. of Energy) and the Paul Scherrer + Institut (Switzerland). All rights reserved. + + License: see file COPYING in top level of source distribution. +*/ + +#include "H5hut.h" +#include "examples.h" + +#include <stdlib.h> + +#include "cuda.h" +// name of input file +const char* fname = "example_setnparticles.h5"; + +// H5hut verbosity level +const h5_int64_t h5_verbosity = H5_VERBOSE_DEFAULT; + +// #define USE_CUDA_KERNEL +#ifdef USE_CUDA_KERNEL +__global__ +#endif +void kernel(h5_int32_t *data, h5_size_t n) +{ + for (h5_size_t i=0; i<n; i++) { + data[i] += 2; + } +} + + +int +main ( + int argc, char* argv[] + ){ + + // initialize MPI & H5hut + MPI_Init (&argc, &argv); + MPI_Comm comm = MPI_COMM_WORLD; + int comm_size = 1; + MPI_Comm_size (comm, &comm_size); + int comm_rank = 0; + MPI_Comm_rank (comm, &comm_rank); + H5AbortOnError (); + H5SetVerbosityLevel (h5_verbosity); + + // open file and go to first step + h5_file_t file = H5OpenFile (fname, H5_O_RDONLY, H5_PROP_DEFAULT); + H5SetStep (file, 0); + + // compute number of particles this process has to read + h5_ssize_t num_particles_total = H5PartGetNumParticles (file); + h5_ssize_t num_particles = num_particles_total / comm_size; + if (comm_rank+1 == comm_size) + num_particles += num_particles_total % comm_size; + + printf ("[proc %d]: particles in view: %lld\n", comm_rank, (long long)num_particles); + printf ("[proc %d]: total number of particles: %lld\n", + comm_rank, (long long unsigned)num_particles_total); + + // set number of particles + H5PartSetNumParticles (file, num_particles); + + // read and print data + h5_int32_t* data = (h5_int32_t*)calloc (num_particles, sizeof (*data)); + H5PartReadDataInt32 (file, "data", data); + H5CloseFile (file); + +#ifdef USE_CUDA_KERNEL + kernel<<<1, 1>>>(data, num_particles); +#else + kernel(data, num_particles); +#endif + int ec=cudaDeviceSynchronize(); + printf("%d\n", ec); + for (int i = 0; i < num_particles; i++) { + printf ("[proc %d]: local index = %d, value = %d\n", + comm_rank, i, data[i]); + } + + // cleanup + free (data); + MPI_Finalize (); + return 0; +} diff --git a/examples/eurohack24/read_setnparticles_ats.sbatch b/examples/eurohack24/read_setnparticles_ats.sbatch new file mode 100644 index 00000000..3c012f84 --- /dev/null +++ b/examples/eurohack24/read_setnparticles_ats.sbatch @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --uenv=eurohack/24.9:rc1 +#SBATCH --view=modules +#SBATCH --ntasks-per-node=1 +#SBATCH --nodes=1 +#SBATCH --output=out-%j.out +#SBATCH -C gpu +#SBATCH --partition=debug +#SBATCH --time=00:05:00 +# +export NSYS_NVTX_PROFILER_REGISTER_ONLY=0 +export CUDA_LAUNCH_BLOCKING=1 +EXE="${HOME}/src/H5hut/src/examples/H5Part/read_setnparticles_ats" +nsys profile -t cuda,nvtx,mpi -o report.%p $EXE diff --git a/examples/eurohack24/read_setnparticles_managed.cu b/examples/eurohack24/read_setnparticles_managed.cu new file mode 100644 index 00000000..cf89afc6 --- /dev/null +++ b/examples/eurohack24/read_setnparticles_managed.cu @@ -0,0 +1,96 @@ +/* + Copyright (c) 2006-2015, The Regents of the University of California, + through Lawrence Berkeley National Laboratory (subject to receipt of any + required approvals from the U.S. Dept. of Energy) and the Paul Scherrer + Institut (Switzerland). All rights reserved. + + License: see file COPYING in top level of source distribution. +*/ + +#include "H5hut.h" +#include "examples.h" + +#include <stdlib.h> + +#include "cuda.h" +// name of input file +const char* fname = "example_setnparticles.h5"; + +// H5hut verbosity level +const h5_int64_t h5_verbosity = H5_VERBOSE_DEFAULT; + +#define USE_CUDA_KERNEL 1 +#ifdef USE_CUDA_KERNEL +__global__ +#endif +void kernel(h5_int32_t *data, h5_size_t n) +{ + for (h5_size_t i=0; i<n; i++) { + data[i] += 2; + } +} + + +int +main ( + int argc, char* argv[] + ){ + + // initialize MPI & H5hut + MPI_Init (&argc, &argv); + MPI_Comm comm = MPI_COMM_WORLD; + int comm_size = 1; + MPI_Comm_size (comm, &comm_size); + int comm_rank = 0; + MPI_Comm_rank (comm, &comm_rank); + H5AbortOnError (); + H5SetVerbosityLevel (h5_verbosity); + + // open file and go to first step + h5_file_t file = H5OpenFile (fname, H5_O_RDONLY, H5_PROP_DEFAULT); + H5SetStep (file, 0); + + // compute number of particles this process has to read + h5_ssize_t num_particles_total = H5PartGetNumParticles (file); + h5_ssize_t num_particles = num_particles_total / comm_size; + if (comm_rank+1 == comm_size) + num_particles += num_particles_total % comm_size; + + printf ("[proc %d]: particles in view: %lld\n", comm_rank, (long long)num_particles); + printf ("[proc %d]: total number of particles: %lld\n", + comm_rank, (long long unsigned)num_particles_total); + + // set number of particles + H5PartSetNumParticles (file, num_particles); + + // read and print data + h5_int32_t *data; +#ifdef USE_CUDA_KERNEL + cudaMallocManaged((void **)&data, num_particles * sizeof(*data)); +#else + data = (h5_int32_t*)calloc (num_particles, sizeof (*data)); +#endif + H5PartReadDataInt32 (file, "data", data); + H5CloseFile (file); + +#ifdef USE_CUDA_KERNEL + kernel<<<1, 1>>>(data, num_particles); +#else + kernel(data, num_particles); +#endif + int ec=cudaDeviceSynchronize(); + printf("%d\n", ec); + for (int i = 0; i < num_particles; i++) { + printf ("[proc %d]: local index = %d, value = %d\n", + comm_rank, i, data[i]); + } + + // cleanup +#ifdef USE_CUDA_KERNEL + cudaFree(data); +#else + free (data); +#endif + MPI_Finalize (); + return 0; +} diff --git a/examples/eurohack24/setnparticule.sbatch b/examples/eurohack24/setnparticule.sbatch new file mode 100644 index 00000000..e157c72c --- /dev/null +++ b/examples/eurohack24/setnparticule.sbatch @@ -0,0 +1,15 @@ +#!/bin/bash +#SBATCH --uenv=eurohack/24.9:rc1 +#SBATCH --view=modules +#SBATCH --ntasks-per-node=1 +#SBATCH --nodes=1 +#SBATCH --output=out-%j.out +#SBATCH -C gpu +#SBATCH --time=00:05:00 +#SBATCH --reservation=eurohack24 +# +export CUDA_LAUNCH_BLOCKING=1 +# + +#nsys profile -t cuda,mpi -o report.%p read_setnparticles_managed +ncu --kernel-name kernel --launch-skip 0 --launch-count 1 -o report.%p "read_setnparticles_managed" diff --git a/examples/eurohack24/write_setnparticles.sbatch b/examples/eurohack24/write_setnparticles.sbatch new file mode 100644 index 00000000..5f4d5f9c --- /dev/null +++ b/examples/eurohack24/write_setnparticles.sbatch @@ -0,0 +1,15 @@ +#!/bin/bash +#SBATCH --uenv=eurohack/24.9:rc1 +#SBATCH --view=modules +#SBATCH --ntasks-per-node=16 +#SBATCH --nodes=4 +#SBATCH --output=out-%j.out +#SBATCH -C gpu +#SBATCH --partition=debug +#SBATCH --time=00:05:00 +# +export NSYS_NVTX_PROFILER_REGISTER_ONLY=0 +export CUDA_LAUNCH_BLOCKING=1 +EXE="${HOME}/src/H5hut/src/examples/H5Part/write_setnparticles" +srun -n 64 "$EXE" +#nsys profile -t cuda,nvtx,mpi -o report.%p $EXE -- GitLab