Code indexing in gitaly is broken and leads to code not being visible to the user. We work on the issue with highest priority.

Skip to content
Snippets Groups Projects

Resolve "mixed-precision"

Merged montan_v requested to merge 146-mixed-precision into master
Compare and Show latest version
127 files
+ 6713
5502
Compare changes
  • Side-by-side
  • Inline
Files
127
// Bump on Tail Instability/Two-stream Instability Test
// Usage:
// srun ./BumponTailInstability <nx> <ny> <nz> <Np> <Nt> <stype> <lbthres> <ovfactor> --info 10
// srun ./BumponTailInstability
// <nx> [<ny>...] <Np> <Nt> <stype>
// <lbthres> --overallocate <ovfactor> --info 10
// nx = No. cell-centered points in the x-direction
// ny = No. cell-centered points in the y-direction
// nz = No. cell-centered points in the z-direction
// ny... = No. cell-centered points in the y-, z-, ...-direction
// Np = Total no. of macro-particles in the simulation
// Nt = Number of time steps
// stype = Field solver type e.g., FFT
// stype = Field solver type (FFT and CG supported)
// lbthres = Load balancing threshold i.e., lbthres*100 is the maximum load imbalance
// percentage which can be tolerated and beyond which
// particle load balancing occurs. A value of 0.01 is good for many typical
@@ -14,7 +15,7 @@
// ovfactor = Over-allocation factor for the buffers used in the communication. Typical
// values are 1.0, 2.0. Value 1.0 means no over-allocation.
// Example:
// srun ./BumponTailInstability 128 128 128 10000 10 FFT 0.01 2.0 --info 10
// srun ./BumponTailInstability 128 128 128 10000 10 FFT 0.01 --overallocate 2.0 --info 10
// Change the TestName to TwoStreamInstability or BumponTailInstability
// in order to simulate the Two stream instability or bump on tail instability
// cases
@@ -47,6 +48,10 @@
#include "ChargedParticles.hpp"
constexpr unsigned Dim = 3;
constexpr bool EnablePhaseDump = false;
template <typename T>
struct Newton1D {
double tol = 1e-12;
@@ -125,9 +130,11 @@ struct generate_random {
value_type muZ = (value_type)(((!isBeam) * muBulk) + (isBeam * muBeam));
for (unsigned d = 0; d < Dim - 1; ++d) {
x(i)[d] = rand_gen.drand(minU[d], maxU[d]);
v(i)[d] = rand_gen.normal(0.0, sigma);
if constexpr (Dim > 1) {
for (unsigned d = 0; d < Dim - 1; ++d) {
x(i)[d] = rand_gen.drand(minU[d], maxU[d]);
v(i)[d] = rand_gen.normal(0.0, sigma);
}
}
v(i)[Dim - 1] = rand_gen.normal(muZ, sigma);
@@ -148,7 +155,7 @@ double CDF(const double& x, const double& delta, const double& k, const unsigned
}
KOKKOS_FUNCTION
double PDF(const Vector_t& xvec, const double& delta, const Vector_t& kw) {
double PDF(const Vector_t<Dim>& xvec, const double& delta, const Vector_t<Dim>& kw) {
double pdf = 1.0 * 1.0 * (1.0 + delta * std::cos(kw[Dim - 1] * xvec[Dim - 1]));
return pdf;
}
@@ -156,18 +163,87 @@ double PDF(const Vector_t& xvec, const double& delta, const Vector_t& kw) {
// const char* TestName = "BumponTailInstability";
const char* TestName = "TwoStreamInstability";
template <typename Bunch>
struct PhaseDump {
void initialize(size_t nr, double domain) {
ippl::Index I(nr);
ippl::NDIndex<2> owned(I, I);
layout = FieldLayout_t<2>(owned, serial);
ippl::Vector<double, 2> hx = {domain / nr, 16. / nr};
ippl::Vector<double, 2> origin{0, -8};
mesh = Mesh_t<2>(owned, hx, origin);
phaseSpace.initialize(mesh, layout);
if (Ippl::Comm->rank() == 0) {
phaseSpaceBuf.initialize(mesh, layout);
}
std::cout << Ippl::Comm->rank() << ": " << phaseSpace.getOwned() << std::endl;
}
void dump(int it, std::shared_ptr<Bunch> P, bool allDims = false) {
const auto pcount = P->getLocalNum();
phase.realloc(pcount);
auto& Ri = P->R;
auto& Pi = P->P;
for (unsigned d = allDims ? 0 : Dim - 1; d < Dim; d++) {
Kokkos::parallel_for(
"Copy phase space", pcount, KOKKOS_CLASS_LAMBDA(const size_t i) {
phase(i) = {Ri(i)[d], Pi(i)[d]};
});
phaseSpace = 0;
Kokkos::fence();
scatter(P->q, phaseSpace, phase);
auto& view = phaseSpace.getView();
MPI_Reduce(view.data(), phaseSpaceBuf.getView().data(), view.size(), MPI_DOUBLE,
MPI_SUM, 0, Ippl::getComm());
if (Ippl::Comm->rank() == 0) {
std::stringstream fname;
fname << "PhaseSpace_t=" << it << "_d=" << d << ".csv";
Inform out("Phase Dump", fname.str().c_str(), Inform::OVERWRITE, 0);
phaseSpaceBuf.write(out);
auto max = phaseSpaceBuf.max();
auto min = phaseSpaceBuf.min();
if (max > maxValue) {
maxValue = max;
}
if (min < minValue) {
minValue = min;
}
}
Ippl::Comm->barrier();
}
MPI_Bcast(&maxValue, 1, MPI_DOUBLE, 0, Ippl::getComm());
MPI_Bcast(&minValue, 1, MPI_DOUBLE, 0, Ippl::getComm());
}
double maxRecorded() const { return maxValue; }
double minRecorded() const { return minValue; }
private:
ippl::e_dim_tag serial[2] = {ippl::SERIAL, ippl::SERIAL};
FieldLayout_t<2> layout;
Mesh_t<2> mesh;
Field_t<2> phaseSpace, phaseSpaceBuf;
ippl::ParticleAttrib<Vector<double, 2>> phase;
double maxValue = 0, minValue = 0;
};
int main(int argc, char* argv[]) {
Ippl ippl(argc, argv);
Inform msg(TestName);
Inform msg2all(TestName, INFORM_ALL_NODES);
Ippl::Comm->setDefaultOverallocation(std::atof(argv[8]));
int arg = 1;
auto start = std::chrono::high_resolution_clock::now();
ippl::Vector<int, Dim> nr = {
std::atoi(argv[1]),
std::atoi(argv[2]),
std::atoi(argv[3]),
auto start = std::chrono::high_resolution_clock::now();
ippl::Vector<int, Dim> nr;
for (unsigned d = 0; d < Dim; d++) {
nr[d] = std::atoi(argv[arg++]);
};
static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total");
@@ -182,14 +258,14 @@ int main(int argc, char* argv[]) {
IpplTimings::startTimer(mainTimer);
const size_type totalP = std::atoll(argv[4]);
const unsigned int nt = std::atoi(argv[5]);
const size_type totalP = std::atoll(argv[arg++]);
const unsigned int nt = std::atoi(argv[arg++]);
msg << TestName << endl << "nt " << nt << " Np= " << totalP << " grid = " << nr << endl;
using bunch_type = ChargedParticles<PLayout_t>;
using bunch_type = ChargedParticles<PLayout_t<Dim>, Dim, double>;
std::unique_ptr<bunch_type> P;
std::shared_ptr<bunch_type> P;
ippl::NDIndex<Dim> domain;
for (unsigned i = 0; i < Dim; i++) {
@@ -201,20 +277,20 @@ int main(int argc, char* argv[]) {
decomp[d] = ippl::PARALLEL;
}
Vector_t kw;
Vector_t<Dim> kw;
double sigma, muBulk, muBeam, epsilon, delta;
if (std::strcmp(TestName, "TwoStreamInstability") == 0) {
// Parameters for two stream instability as in
// https://www.frontiersin.org/articles/10.3389/fphy.2018.00105/full
kw = {0.5, 0.5, 0.5};
kw = 0.5;
sigma = 0.1;
epsilon = 0.5;
muBulk = -pi / 2.0;
muBeam = pi / 2.0;
delta = 0.01;
} else if (std::strcmp(TestName, "BumponTailInstability") == 0) {
kw = {0.21, 0.21, 0.21};
kw = 0.21;
sigma = 1.0 / std::sqrt(2.0);
epsilon = 0.1;
muBulk = 0.0;
@@ -222,7 +298,7 @@ int main(int argc, char* argv[]) {
delta = 0.01;
} else {
// Default value is two stream instability
kw = {0.5, 0.5, 0.5};
kw = 0.5;
sigma = 0.1;
epsilon = 0.5;
muBulk = -pi / 2.0;
@@ -230,36 +306,35 @@ int main(int argc, char* argv[]) {
delta = 0.01;
}
Vector_t rmin(0.0);
Vector_t rmax = 2 * pi / kw;
double dx = rmax[0] / nr[0];
double dy = rmax[1] / nr[1];
double dz = rmax[2] / nr[2];
Vector_t<Dim> rmin(0.0);
Vector_t<Dim> rmax = 2 * pi / kw;
Vector_t hr = {dx, dy, dz};
Vector_t origin = {rmin[0], rmin[1], rmin[2]};
const double dt = 0.5 * dx; // 0.05
Vector_t<Dim> hr;
for (unsigned d = 0; d < Dim; d++) {
hr[d] = rmax[d] / nr[d];
}
Vector_t<Dim> origin = rmin;
const double dt = std::min(.05, 0.5 * *std::min_element(hr.begin(), hr.end()));
const bool isAllPeriodic = true;
Mesh_t mesh(domain, hr, origin);
FieldLayout_t FL(domain, decomp, isAllPeriodic);
PLayout_t PL(FL, mesh);
Mesh_t<Dim> mesh(domain, hr, origin);
FieldLayout_t<Dim> FL(domain, decomp, isAllPeriodic);
PLayout_t<Dim> PL(FL, mesh);
// Q = -\int\int f dx dv
double Q = -rmax[0] * rmax[1] * rmax[2];
P = std::make_unique<bunch_type>(PL, hr, rmin, rmax, decomp, Q);
double Q = std::reduce(rmax.begin(), rmax.end(), -1., std::multiplies<double>());
std::string solver = argv[arg++];
P = std::make_shared<bunch_type>(PL, hr, rmin, rmax, decomp, Q, solver);
P->nr_m = nr;
P->E_m.initialize(mesh, FL);
P->rho_m.initialize(mesh, FL);
P->initializeFields(mesh, FL);
bunch_type bunchBuffer(PL);
P->stype_m = argv[6];
P->initSolver();
P->time_m = 0.0;
P->loadbalancethreshold_m = std::atof(argv[7]);
P->loadbalancethreshold_m = std::atof(argv[arg++]);
bool isFirstRepartition;
@@ -269,26 +344,21 @@ int main(int argc, char* argv[]) {
isFirstRepartition = true;
const ippl::NDIndex<Dim>& lDom = FL.getLocalNDIndex();
const int nghost = P->rho_m.getNghost();
using mdrange_type = Kokkos::MDRangePolicy<Kokkos::Rank<3>>;
auto rhoview = P->rho_m.getView();
Kokkos::parallel_for(
"Assign initial rho based on PDF",
mdrange_type({nghost, nghost, nghost},
{rhoview.extent(0) - nghost, rhoview.extent(1) - nghost,
rhoview.extent(2) - nghost}),
KOKKOS_LAMBDA(const int i, const int j, const int k) {
using index_array_type = typename ippl::RangePolicy<Dim>::index_array_type;
ippl::parallel_for(
"Assign initial rho based on PDF", ippl::getRangePolicy<Dim>(rhoview, nghost),
KOKKOS_LAMBDA(const index_array_type& args) {
// local to global index conversion
const size_t ig = i + lDom[0].first() - nghost;
const size_t jg = j + lDom[1].first() - nghost;
const size_t kg = k + lDom[2].first() - nghost;
double x = (ig + 0.5) * hr[0] + origin[0];
double y = (jg + 0.5) * hr[1] + origin[1];
double z = (kg + 0.5) * hr[2] + origin[2];
Vector_t xvec = {x, y, z};
rhoview(i, j, k) = PDF(xvec, delta, kw);
Vector_t<Dim> xvec = args;
for (unsigned d = 0; d < Dim; d++) {
xvec[d] = (xvec[d] + lDom[d].first() - nghost + 0.5) * hr[d] + origin[d];
}
// ippl::apply<unsigned> accesses the view at the given indices and obtains a
// reference; see src/Expression/IpplOperations.h
ippl::apply<Dim>(rhoview, args) = PDF(xvec, delta, kw);
});
Kokkos::fence();
@@ -301,10 +371,10 @@ int main(int argc, char* argv[]) {
msg << "First domain decomposition done" << endl;
IpplTimings::startTimer(particleCreation);
typedef ippl::detail::RegionLayout<double, Dim, Mesh_t> RegionLayout_t;
typedef ippl::detail::RegionLayout<double, Dim, Mesh_t<Dim>> RegionLayout_t;
const RegionLayout_t& RLayout = PL.getRegionLayout();
const typename RegionLayout_t::host_mirror_type Regions = RLayout.gethLocalRegions();
Vector_t Nr, Dr, minU, maxU;
Vector_t<Dim> Nr, Dr, minU, maxU;
int myRank = Ippl::Comm->rank();
for (unsigned d = 0; d < Dim; ++d) {
Nr[d] = CDF(Regions(myRank)[d].max(), delta, kw[d], d)
@@ -314,7 +384,10 @@ int main(int argc, char* argv[]) {
maxU[d] = CDF(Regions(myRank)[d].max(), delta, kw[d], d);
}
double factorConf = (Nr[0] * Nr[1] * Nr[2]) / (Dr[0] * Dr[1] * Dr[2]);
double factorConf = 1;
for (unsigned d = 0; d < Dim; d++) {
factorConf *= Nr[d] / Dr[d];
}
double factorVelBulk = 1.0 - epsilon;
double factorVelBeam = 1.0 - factorVelBulk;
size_type nlocBulk = (size_type)(factorConf * factorVelBulk * totalP);
@@ -326,21 +399,33 @@ int main(int argc, char* argv[]) {
int rest = (int)(totalP - Total_particles);
if (Ippl::Comm->rank() < rest)
if (Ippl::Comm->rank() < rest) {
++nloc;
}
P->create(nloc);
PhaseDump<bunch_type> phase;
if constexpr (EnablePhaseDump) {
if (Ippl::Comm->size() != 1) {
msg << "Phase dump only supported on one rank" << endl;
IpplAbort();
}
phase.initialize(*std::max_element(nr.begin(), nr.end()),
*std::max_element(rmax.begin(), rmax.end()));
}
Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100 * Ippl::Comm->rank()));
Kokkos::parallel_for(nloc, generate_random<Vector_t, Kokkos::Random_XorShift64_Pool<>, Dim>(
P->R.getView(), P->P.getView(), rand_pool64, delta, kw, sigma,
muBulk, muBeam, nlocBulk, minU, maxU));
Kokkos::parallel_for(
nloc, generate_random<Vector_t<Dim>, Kokkos::Random_XorShift64_Pool<>, Dim>(
P->R.getView(), P->P.getView(), rand_pool64, delta, kw, sigma, muBulk, muBeam,
nlocBulk, minU, maxU));
Kokkos::fence();
Ippl::Comm->barrier();
IpplTimings::stopTimer(particleCreation);
P->q = P->Q_m / totalP;
// P->dumpParticleData();
msg << "particles created and initial conditions assigned " << endl;
isFirstRepartition = false;
// The update after the particle creation is not needed as the
@@ -348,13 +433,13 @@ int main(int argc, char* argv[]) {
IpplTimings::startTimer(DummySolveTimer);
P->rho_m = 0.0;
P->solver_mp->solve();
P->runSolver();
IpplTimings::stopTimer(DummySolveTimer);
P->scatterCIC(totalP, 0, hr);
IpplTimings::startTimer(SolveTimer);
P->solver_mp->solve();
P->runSolver();
IpplTimings::stopTimer(SolveTimer);
P->gatherCIC();
@@ -404,7 +489,7 @@ int main(int argc, char* argv[]) {
// Field solve
IpplTimings::startTimer(SolveTimer);
P->solver_mp->solve();
P->runSolver();
IpplTimings::stopTimer(SolveTimer);
// gather E field
@@ -421,6 +506,10 @@ int main(int argc, char* argv[]) {
P->gatherStatistics(totalP);
IpplTimings::stopTimer(dumpDataTimer);
msg << "Finished time step: " << it + 1 << " time: " << P->time_m << endl;
if constexpr (EnablePhaseDump) {
phase.dump(it, P);
}
}
msg << TestName << ": End." << endl;
@@ -433,5 +522,19 @@ int main(int argc, char* argv[]) {
std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
std::cout << "Elapsed time: " << time_chrono.count() << std::endl;
if constexpr (EnablePhaseDump) {
// clang-format off
msg << "--- Phase Space Parameters ---\n"
<< "Resolution: " << *std::max_element(nr.begin(), nr.end()) << "\n"
<< "Domain: " << rmax[Dim - 1] << "\n"
<< "Phase space axis: " << (Dim - 1)
<< "; range: [" << phase.minRecorded() << ", " << phase.maxRecorded() << "]\n"
<< "Particle count: " << totalP << "\n"
<< "Ranks: " << Ippl::Comm->size() << "\n"
<< "Timestep: " << dt << "\n"
<< "------------------------------" << endl;
// clang-format on
}
return 0;
}
Loading