Code indexing in gitaly is broken and leads to code not being visible to the user. We work on the issue with highest priority.

Skip to content
Snippets Groups Projects

Resolve "mixed-precision"

Merged montan_v requested to merge 146-mixed-precision into master
17 files
+ 1259
1286
Compare changes
  • Side-by-side
  • Inline
Files
17
+ 105
113
@@ -10,7 +10,7 @@
// stype = Field solver type (FFT and CG supported)
// lbthres = Load balancing threshold i.e., lbthres*100 is the maximum load imbalance
// percentage which can be tolerated and beyond which
// particle load balancing occurs. A value of 0.01 is good for many typical
// particle load balancing occurs. A value of 0.01 is good for many typical
// simulations.
// ovfactor = Over-allocation factor for the buffers used in the communication. Typical
// values are 1.0, 2.0. Value 1.0 means no over-allocation.
@@ -53,87 +53,86 @@ struct Newton1D {
double tol = 1e-12;
int max_iter = 20;
double pi = Kokkos::numbers::pi_v<double>;
T k, alpha, u;
KOKKOS_INLINE_FUNCTION
Newton1D() {}
KOKKOS_INLINE_FUNCTION
Newton1D(const T& k_, const T& alpha_,
const T& u_)
: k(k_), alpha(alpha_), u(u_) {}
KOKKOS_INLINE_FUNCTION
~Newton1D() {}
KOKKOS_INLINE_FUNCTION T f(T& x) {
T F;
F = x + (alpha * (Kokkos::sin(k * x) / k)) - u;
return F;
}
KOKKOS_INLINE_FUNCTION T fprime(T& x) {
T Fprime;
Fprime = 1 + (alpha * Kokkos::cos(k * x));
return Fprime;
}
KOKKOS_FUNCTION
void solve(T& x) {
int iterations = 0;
while (iterations < max_iter && Kokkos::fabs(f(x)) > tol) {
x = x - (f(x) / fprime(x));
iterations += 1;
}
}
};
template <typename T, class GeneratorPool, unsigned Dim>
struct generate_random {
T k, alpha, u;
using view_type = typename ippl::detail::ViewType<T, 1>::view_type;
using value_type = typename T::value_type;
// Output View for the random numbers
view_type x, v;
KOKKOS_INLINE_FUNCTION Newton1D() {}
// The GeneratorPool
GeneratorPool rand_pool;
KOKKOS_INLINE_FUNCTION Newton1D(const T& k_, const T& alpha_, const T& u_)
: k(k_)
, alpha(alpha_)
, u(u_) {}
value_type alpha;
KOKKOS_INLINE_FUNCTION ~Newton1D() {}
T k, minU, maxU;
KOKKOS_INLINE_FUNCTION T f(T& x) {
T F;
F = x + (alpha * (Kokkos::sin(k * x) / k)) - u;
return F;
}
// Initialize all members
generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_,
value_type& alpha_, T& k_, T& minU_, T& maxU_)
: x(x_), v(v_), rand_pool(rand_pool_),
alpha(alpha_), k(k_), minU(minU_), maxU(maxU_) {}
KOKKOS_INLINE_FUNCTION T fprime(T& x) {
T Fprime;
Fprime = 1 + (alpha * Kokkos::cos(k * x));
return Fprime;
}
KOKKOS_INLINE_FUNCTION
void operator()(const size_t i) const {
// Get a random number state from the pool for the active thread
typename GeneratorPool::generator_type rand_gen = rand_pool.get_state();
KOKKOS_FUNCTION
void solve(T& x) {
int iterations = 0;
while (iterations < max_iter && Kokkos::fabs(f(x)) > tol) {
x = x - (f(x) / fprime(x));
iterations += 1;
}
}
};
value_type u;
for (unsigned d = 0; d < Dim; ++d) {
template <typename T, class GeneratorPool, unsigned Dim>
struct generate_random {
using view_type = typename ippl::detail::ViewType<T, 1>::view_type;
using value_type = typename T::value_type;
// Output View for the random numbers
view_type x, v;
// The GeneratorPool
GeneratorPool rand_pool;
value_type alpha;
T k, minU, maxU;
// Initialize all members
generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, value_type& alpha_, T& k_,
T& minU_, T& maxU_)
: x(x_)
, v(v_)
, rand_pool(rand_pool_)
, alpha(alpha_)
, k(k_)
, minU(minU_)
, maxU(maxU_) {}
KOKKOS_INLINE_FUNCTION void operator()(const size_t i) const {
// Get a random number state from the pool for the active thread
typename GeneratorPool::generator_type rand_gen = rand_pool.get_state();
value_type u;
for (unsigned d = 0; d < Dim; ++d) {
u = rand_gen.drand(minU[d], maxU[d]);
x(i)[d] = u / (1 + alpha);
Newton1D<value_type> solver(k[d], alpha, u);
solver.solve(x(i)[d]);
v(i)[d] = rand_gen.normal(0.0, 1.0);
}
u = rand_gen.drand(minU[d], maxU[d]);
x(i)[d] = u / (1 + alpha);
Newton1D<value_type> solver(k[d], alpha, u);
solver.solve(x(i)[d]);
v(i)[d] = rand_gen.normal(0.0, 1.0);
// Give the state back, which will allow another thread to acquire it
rand_pool.free_state(rand_gen);
}
// Give the state back, which will allow another thread to acquire it
rand_pool.free_state(rand_gen);
}
};
double CDF(const double& x, const double& alpha, const double& k) {
double cdf = x + (alpha / k) * std::sin(k * x);
return cdf;
double cdf = x + (alpha / k) * std::sin(k * x);
return cdf;
}
KOKKOS_FUNCTION
@@ -149,11 +148,11 @@ double PDF(const Vector_t<Dim>& xvec, const double& alpha, const Vector_t<Dim>&
const char* TestName = "LandauDamping";
int main(int argc, char *argv[]){
int main(int argc, char* argv[]) {
Ippl ippl(argc, argv);
Inform msg("LandauDamping");
Inform msg2all("LandauDamping",INFORM_ALL_NODES);
Inform msg2all("LandauDamping", INFORM_ALL_NODES);
auto start = std::chrono::high_resolution_clock::now();
@@ -179,18 +178,14 @@ int main(int argc, char *argv[]){
const size_type totalP = std::atoll(argv[arg++]);
const unsigned int nt = std::atoi(argv[arg++]);
msg << "Landau damping"
<< endl
<< "nt " << nt << " Np= "
<< totalP << " grid = " << nr
<< endl;
msg << "Landau damping" << endl << "nt " << nt << " Np= " << totalP << " grid = " << nr << endl;
using bunch_type = ChargedParticles<PLayout_t<Dim>, Dim, double>;
std::unique_ptr<bunch_type> P;
std::unique_ptr<bunch_type> P;
ippl::NDIndex<Dim> domain;
for (unsigned i = 0; i< Dim; i++) {
for (unsigned i = 0; i < Dim; i++) {
domain[i] = ippl::Index(nr[i]);
}
@@ -234,7 +229,7 @@ int main(int argc, char *argv[]){
if ((P->loadbalancethreshold_m != 1.0) && (Ippl::Comm->size() > 1)) {
msg << "Starting first repartition" << endl;
IpplTimings::startTimer(domainDecomposition);
isFirstRepartition = true;
isFirstRepartition = true;
const ippl::NDIndex<Dim>& lDom = FL.getLocalNDIndex();
const int nghost = P->rho_m.getNghost();
auto rhoview = P->rho_m.getView();
@@ -255,12 +250,12 @@ int main(int argc, char *argv[]){
});
Kokkos::fence();
P->initializeORB(FL, mesh);
P->repartition(FL, mesh, bunchBuffer, isFirstRepartition);
IpplTimings::stopTimer(domainDecomposition);
}
msg << "First domain decomposition done" << endl;
IpplTimings::startTimer(particleCreation);
@@ -282,30 +277,29 @@ int main(int argc, char *argv[]){
size_type nloc = (size_type)(factor * totalP);
size_type Total_particles = 0;
MPI_Allreduce(&nloc, &Total_particles, 1,
MPI_UNSIGNED_LONG, MPI_SUM, Ippl::getComm());
MPI_Allreduce(&nloc, &Total_particles, 1, MPI_UNSIGNED_LONG, MPI_SUM, Ippl::getComm());
int rest = (int) (totalP - Total_particles);
int rest = (int)(totalP - Total_particles);
if (Ippl::Comm->rank() < rest) {
++nloc;
}
P->create(nloc);
Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank()));
Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100 * Ippl::Comm->rank()));
Kokkos::parallel_for(nloc,
generate_random<Vector_t<Dim>, Kokkos::Random_XorShift64_Pool<>, Dim>(
P->R.getView(), P->P.getView(), rand_pool64, alpha, kw, minU, maxU));
Kokkos::fence();
Ippl::Comm->barrier();
IpplTimings::stopTimer(particleCreation);
P->q = P->Q_m/totalP;
IpplTimings::stopTimer(particleCreation);
P->q = P->Q_m / totalP;
msg << "particles created and initial conditions assigned " << endl;
isFirstRepartition = false;
//The update after the particle creation is not needed as the
//particles are generated locally
// The update after the particle creation is not needed as the
// particles are generated locally
IpplTimings::startTimer(DummySolveTimer);
P->rho_m = 0.0;
@@ -323,13 +317,12 @@ int main(int argc, char *argv[]){
IpplTimings::startTimer(dumpDataTimer);
P->dumpLandau();
P->gatherStatistics(totalP);
//P->dumpLocalDomains(FL, 0);
// P->dumpLocalDomains(FL, 0);
IpplTimings::stopTimer(dumpDataTimer);
// begin main timestep loop
msg << "Starting iterations ..." << endl;
for (unsigned int it=0; it<nt; it++) {
for (unsigned int it = 0; it < nt; it++) {
// LeapFrog time stepping https://en.wikipedia.org/wiki/Leapfrog_integration
// Here, we assume a constant charge-to-mass ratio of -1 for
// all the particles hence eliminating the need to store mass as
@@ -340,33 +333,32 @@ int main(int argc, char *argv[]){
P->P = P->P - 0.5 * dt * P->E;
IpplTimings::stopTimer(PTimer);
//drift
// drift
IpplTimings::startTimer(RTimer);
P->R = P->R + dt * P->P;
IpplTimings::stopTimer(RTimer);
// P->R.print();
//Since the particles have moved spatially update them to correct processors
IpplTimings::startTimer(updateTimer);
// Since the particles have moved spatially update them to correct processors
IpplTimings::startTimer(updateTimer);
PL.update(*P, bunchBuffer);
IpplTimings::stopTimer(updateTimer);
// Domain Decomposition
if (P->balance(totalP, it+1)) {
msg << "Starting repartition" << endl;
IpplTimings::startTimer(domainDecomposition);
P->repartition(FL, mesh, bunchBuffer, isFirstRepartition);
IpplTimings::stopTimer(domainDecomposition);
//IpplTimings::startTimer(dumpDataTimer);
//P->dumpLocalDomains(FL, it+1);
//IpplTimings::stopTimer(dumpDataTimer);
if (P->balance(totalP, it + 1)) {
msg << "Starting repartition" << endl;
IpplTimings::startTimer(domainDecomposition);
P->repartition(FL, mesh, bunchBuffer, isFirstRepartition);
IpplTimings::stopTimer(domainDecomposition);
// IpplTimings::startTimer(dumpDataTimer);
// P->dumpLocalDomains(FL, it+1);
// IpplTimings::stopTimer(dumpDataTimer);
}
// scatter the charge onto the underlying grid
P->scatterCIC(totalP, it + 1, hr);
//scatter the charge onto the underlying grid
P->scatterCIC(totalP, it+1, hr);
//Field solve
// Field solve
IpplTimings::startTimer(SolveTimer);
P->runSolver();
IpplTimings::stopTimer(SolveTimer);
@@ -374,0+366,0 @@
// gather E field
P->gatherCIC();
//kick
// kick
IpplTimings::startTimer(PTimer);
P->P = P->P - 0.5 * dt * P->E;
IpplTimings::stopTimer(PTimer);
@@ -384,7 +376,7 @@ int main(int argc, char *argv[]){
P->dumpLandau();
P->gatherStatistics(totalP);
IpplTimings::stopTimer(dumpDataTimer);
msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl;
msg << "Finished time step: " << it + 1 << " time: " << P->time_m << endl;
}
msg << "LandauDamping: End." << endl;
@@ -393,9 +385,9 @@ int main(int argc, char *argv[]){
IpplTimings::print(std::string("timing.dat"));
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_chrono = std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
std::chrono::duration<double> time_chrono =
std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
std::cout << "Elapsed time: " << time_chrono.count() << std::endl;
return 0;
}
Loading