Compare revisions

ae5797af · ae5797af · ae5797af · ae5797af · ae5797af · ae5797af
--- a/src/Algorithms/DistributionMoments.h
+++ b/src/Algorithms/DistributionMoments.h
@@ -68,12 +68,14 @@ public:
    void computeMoments(ippl::ParticleAttrib<Vector_t<double,3>>::view_type& Rview,
                        ippl::ParticleAttrib<Vector_t<double,3>>::view_type& Pview,
                        ippl::ParticleAttrib<double>::view_type& Mview,
-                        size_t Np);
-    void computeMinMaxPosition(ippl::ParticleAttrib<Vector_t<double,3>>::view_type& Rview);
+                        size_t Np,
+                        size_t Nlocal);
+    void computeMinMaxPosition(ippl::ParticleAttrib<Vector_t<double,3>>::view_type& Rview, size_t Nlcoal);
    void computeMeanKineticEnergy();
    void computeDebyeLength(ippl::ParticleAttrib<Vector_t<double,3>>::view_type& Rview,
                        ippl::ParticleAttrib<Vector_t<double,3>>::view_type& Pview,
                        size_t Np,
+                        size_t Nlocal,
                        double density);
    void computePlasmaParameter(double);

@@ -121,7 +123,8 @@ public:
    void computeMeans(ippl::ParticleAttrib<Vector_t<double,3>>::view_type&  Rview,
                                         ippl::ParticleAttrib<Vector_t<double,3>>::view_type&  Pview,
                                         ippl::ParticleAttrib<double>::view_type&  Mview,
-                                         size_t Np);
+                                         size_t Np,
+                                         size_t Nlocal);
 private:
    bool isParticleExcluded(const OpalParticle&) const;


--- a/src/Algorithms/OrbitThreader.cpp
+++ b/src/Algorithms/OrbitThreader.cpp
@@ -134,14 +134,13 @@ void OrbitThreader::execute() {
    errorFlag_m = EVERYTHINGFINE;

    *gmsg << "OrbitThreader dt_m= " << dt_m << endl;
-
-
+    
    do {
        checkElementLengths(elementSet);
        if (containsCavity(elementSet)) {
            autophaseCavities(elementSet, visitedElements);
        }
-
+        
        double initialS              = pathLength_m;
        Vector_t<double, 3> initialR = r_m;
        Vector_t<double, 3> initialP = p_m;
@@ -149,6 +148,9 @@ void OrbitThreader::execute() {

        integrate(elementSet, maxDistance);

+        *gmsg << "OrbitThreader maxDistance= " << maxDistance << endl;
+        *gmsg << "OrbitThreader #elements  = " << elementSet.size() << endl;
+        
        registerElement(elementSet, initialS, initialR, initialP);

        if (errorFlag_m == HITMATERIAL) {
@@ -251,6 +253,7 @@ void OrbitThreader::integrate(const IndexMap::value_t& activeSet, double /*maxDr
        const Vector<double, 3> d = r_m - oldR;

        pathLength_m += std::copysign(euclidean_norm(d), dt_m);
+
        ++currentStep_m;
        time_m += dt_m;

@@ -439,7 +442,6 @@ void OrbitThreader::computeBoundingBox() {
    FieldList allElements         = itsOpalBeamline_m.getElementByType(ElementType::ANY);
    FieldList::iterator it        = allElements.begin();
    const FieldList::iterator end = allElements.end();
-
    for (; it != end; ++it) {
        if (it->getElement()->getType() == ElementType::MARKER) {
            continue;
@@ -452,26 +454,25 @@ void OrbitThreader::computeBoundingBox() {

 void OrbitThreader::updateBoundingBoxWithCurrentPosition() {
    Vector_t<double, 3> dR = Physics::c * dt_m * p_m / Util::getGamma(p_m);
-
-    /// \todo needs to be fixed
-    /*
-      for (const Vector_t<double, 3>& pos : {r_m - 10 * dR, r_m + 10 * dR}) {
+    std::array<Vector_t<double, 3>, 2> positions = {r_m - 10 * dR, r_m + 10 * dR};
+   
+    for (const Vector_t<double, 3>& pos : positions) {
        globalBoundingBox_m.enlargeToContainPosition(pos);
    }
-    */
+    
 }

 double OrbitThreader::computeDriftLengthToBoundingBox(
    const std::set<std::shared_ptr<Component>>& elements, const Vector_t<double, 3>& position,
    const Vector_t<double, 3>& direction) const {
-    /*
+
    if (elements.empty()
        || (elements.size() == 1 && (*elements.begin())->getType() == ElementType::DRIFT)) {
        boost::optional<Vector_t<double, 3>> intersectionPoint =
            globalBoundingBox_m.getIntersectionPoint(position, direction);
-
-        return intersectionPoint ? euclidean_norm(intersectionPoint.get() - position) : 10.0;
+        const Vector_t<double, 3> r = intersectionPoint.get() - position;
+        return intersectionPoint ? euclidean_norm(r) : 10.0;
    }
-    */
+
    return std::numeric_limits<double>::max();
 }
\ No newline at end of file
--- a/src/Algorithms/ParallelTracker.cpp
+++ b/src/Algorithms/ParallelTracker.cpp
@@ -71,14 +71,15 @@ ParallelTracker::ParallelTracker(
      zstart_m(0.0),
      dtCurrentTrack_m(0.0),
      minStepforReBin_m(-1),
-      repartFreq_m(-1),
+      repartFreq_m(0),
      emissionSteps_m(std::numeric_limits<unsigned int>::max()),
      numParticlesInSimulation_m(0),
      timeIntegrationTimer1_m(IpplTimings::getTimer("TIntegration1")),
      timeIntegrationTimer2_m(IpplTimings::getTimer("TIntegration2")),
      fieldEvaluationTimer_m(IpplTimings::getTimer("External field eval")),
      PluginElemTimer_m(IpplTimings::getTimer("PluginElements")),
-      BinRepartTimer_m(IpplTimings::getTimer("Binaryrepart")) {
+      BinRepartTimer_m(IpplTimings::getTimer("Binaryrepart")),
+      OrbThreader_m(IpplTimings::getTimer("OrbThreader")) {
 }

 ParallelTracker::ParallelTracker(
@@ -96,22 +97,21 @@ ParallelTracker::ParallelTracker(
      zstart_m(zstart),
      dtCurrentTrack_m(0.0),
      minStepforReBin_m(-1),
-      repartFreq_m(-1),
+      repartFreq_m(0),
      emissionSteps_m(std::numeric_limits<unsigned int>::max()),
      numParticlesInSimulation_m(0),
      timeIntegrationTimer1_m(IpplTimings::getTimer("TIntegration1")),
      timeIntegrationTimer2_m(IpplTimings::getTimer("TIntegration2")),
      fieldEvaluationTimer_m(IpplTimings::getTimer("External field eval")),
-      BinRepartTimer_m(IpplTimings::getTimer("Binaryrepart")) {
-
-    *gmsg << "* ParallelTracker zstop.size()= " << zstop.size() << endl;
-
-    for (unsigned int i = 0; i < zstop.size(); ++i) {
-        stepSizes_m.push_back(dt[i], zstop[i], maxSteps[i]);
-    }
+      BinRepartTimer_m(IpplTimings::getTimer("Binaryrepart")),
+      OrbThreader_m(IpplTimings::getTimer("OrbThreader")) {
+    
+      for (unsigned int i = 0; i < zstop.size(); ++i) {
+          stepSizes_m.push_back(dt[i], zstop[i], maxSteps[i]);
+      }

-    stepSizes_m.sortAscendingZStop();
-    stepSizes_m.resetIterator();
+      stepSizes_m.sortAscendingZStop();
+      stepSizes_m.resetIterator();
 }

 ParallelTracker::~ParallelTracker() {
@@ -348,13 +348,23 @@ void ParallelTracker::execute() {
    *gmsg << "itsBunch_m->RefPartR_m= " << itsBunch_m->RefPartR_m << endl;
    *gmsg << "itsBunch_m->RefPartP_m= " << itsBunch_m->RefPartP_m << endl;

+    IpplTimings::startTimer(OrbThreader_m);
+
+    bool const psDump0 = 0;
+    bool const statDump0 = 0;
+
+    writePhaseSpace(0, psDump0, statDump0);
+    msg << level2 << "Dump initial phase space" << endl;
+
+
    OrbitThreader oth(
        itsReference, itsBunch_m->RefPartR_m, itsBunch_m->RefPartP_m, pathLength_m, -rmin(2),
        itsBunch_m->getT(), (back_track ? -minTimeStep : minTimeStep), stepSizes_m,
        itsOpalBeamline_m);

    oth.execute();
-
+    IpplTimings::stopTimer(OrbThreader_m);
+    
    BoundingBox globalBoundingBox = oth.getBoundingBox();

    numParticlesInSimulation_m = itsBunch_m->getTotalNum();
@@ -368,7 +378,6 @@ void ParallelTracker::execute() {
    OPALTimer::Timer myt1;
    *gmsg << "* Track start at: " << myt1.time() << ", t= " << Util::getTimeString(time) << "; "
          << "zstart at: " << Util::getLengthString(pathLength_m) << endl;
-
    *gmsg << "* Executing ParallelTracker\n"
          << "* Initial dt = " << Util::getTimeString(itsBunch_m->getdT()) << "\n"
          << "* Max integration steps = " << stepSizes_m.getMaxSteps() << ", next step = " << step
@@ -383,8 +392,9 @@ void ParallelTracker::execute() {
    OpalData::getInstance()->setInPrepState(false);

    stepSizes_m.printDirect(*gmsg);
-
+    
    while (!stepSizes_m.reachedEnd()) {
+
        unsigned long long trackSteps = stepSizes_m.getNumSteps() + step;
        dtCurrentTrack_m              = stepSizes_m.getdT();
        changeDT(back_track);
@@ -394,23 +404,22 @@ void ParallelTracker::execute() {
            if (itsBunch_m->getTotalNum() > 0) {
                itsBunch_m->get_bounds(rmin, rmax);
            }
-
+            // ADA
            timeIntegration1(pusher);
-
+            
            computeSpaceChargeFields(step);
            
            // \todo for a drift we can neglect that 
            // computeExternalFields(oth);

            timeIntegration2(pusher);
-
+            
            selectDT(back_track);
            // \todo emitParticles(step);
            //selectDT(back_track);

            itsBunch_m->incrementT();

-            
            if (itsBunch_m->getT() > 0.0 || itsBunch_m->getdT() < 0.0) {
                updateReference(pusher);
            }
@@ -421,7 +430,7 @@ void ParallelTracker::execute() {
            }

            itsBunch_m->set_sPos(pathLength_m);
-            
+
            // if (hasEndOfLineReached(globalBoundingBox)) break;
  
            bool const psDump =
@@ -459,7 +468,7 @@ void ParallelTracker::execute() {

    writePhaseSpace((step + 1), psDump, statDump);

-    msg << level2 << "Dump phase space of last step" << endl;
+    *gmsg << "* Dump phase space of last step" << endl;

    itsOpalBeamline_m.switchElementsOff();

@@ -508,6 +517,7 @@ void ParallelTracker::timeIntegration2(BorisPusher& pusher) {
    // switchElements();
    pushParticles(pusher);

+    
    auto dtview  = itsBunch_m->getParticleContainer()->dt.getView();
    double newdT = itsBunch_m->getdT();

@@ -516,7 +526,7 @@ void ParallelTracker::timeIntegration2(BorisPusher& pusher) {
                         KOKKOS_LAMBDA(const int i) {
                             dtview(i) = newdT;
                         });                     
-
+    
    IpplTimings::stopTimer(timeIntegrationTimer2_m);
 }

@@ -572,6 +582,7 @@ void ParallelTracker::computeSpaceChargeFields(unsigned long long step) {

     */

+    
    const matrix_t                rot = referenceToBeamCSTrafo.getRotationMatrix();
    const ippl::Vector<double, 3> org = referenceToBeamCSTrafo.getOrigin();

@@ -627,7 +638,7 @@ void ParallelTracker::computeSpaceChargeFields(unsigned long long step) {
            prod_boost_vector(boost::numeric::ublas::trans(rotationMatrix_m)

     */
-
+    
    Kokkos::parallel_for(
                         "CSTrafo:transformTo", ippl::getRangePolicy(Rview),
                         KOKKOS_LAMBDA(const int i) {                           
@@ -753,18 +764,19 @@ void ParallelTracker::doBinaryRepartition() {

 void ParallelTracker::dumpStats(long long step, bool psDump, bool statDump) {
    OPALTimer::Timer myt2;
-    Inform msg("ParallelTracker ", *gmsg);

+    /*
    if (itsBunch_m->getGlobalTrackStep() % 1000 + 1 == 1000) {
-        msg << level1;
+        *gmsg << level1;
    } else if (itsBunch_m->getGlobalTrackStep() % 100 + 1 == 100) {
-        msg << level2;
+        *gmsg << level2;
    } else {
-        msg << level3;
+        *gmsg << level3;
    }
-
+    */
+    
    if (numParticlesInSimulation_m == 0) {
-        msg << myt2.time() << " "
+        *gmsg << "* " << myt2.time() << " "
            << "Step " << std::setw(6) << itsBunch_m->getGlobalTrackStep() << "; "
            << "   -- no emission yet --     "
            << "t= " << Util::getTimeString(itsBunch_m->getT()) << endl;
@@ -778,7 +790,7 @@ void ParallelTracker::dumpStats(long long step, bool psDump, bool statDump) {
            "ParallelTracker::dumpStats()",
            "there seems to be something wrong with the position of the bunch!");
    } else {
-        msg << myt2.time() << " "
+        *gmsg << "* " << myt2.time() << " "
            << "Step " << std::setw(6) << itsBunch_m->getGlobalTrackStep() << " "
            << "at " << Util::getLengthString(pathLength_m) << ", "
            << "t= " << Util::getTimeString(itsBunch_m->getT()) << ", "
@@ -790,7 +802,6 @@ void ParallelTracker::dumpStats(long long step, bool psDump, bool statDump) {

 void ParallelTracker::setOptionalVariables() {

-    Inform msg("ParallelTracker ", *gmsg);
    /*
    minStepforReBin_m = Options::minStepForRebin;
    RealVariable* br =
@@ -802,14 +813,14 @@ void ParallelTracker::setOptionalVariables() {

    // there is no point to do repartitioning with one node
    if (ippl::Comm->size() == 1) {
-        repartFreq_m = std::numeric_limits<unsigned int>::max();
+        repartFreq_m = std::numeric_limits<unsigned long long>::max();
    } else {
        repartFreq_m = Options::repartFreq * 100;
        RealVariable* rep =
            dynamic_cast<RealVariable*>(OpalData::getInstance()->find("REPARTFREQ"));
        if (rep)
            repartFreq_m = static_cast<int>(rep->getReal());
-        msg << level2 << "REPARTFREQ " << repartFreq_m << endl;
+        *gmsg  << "* REPARTFREQ " << repartFreq_m << endl;
    }
 }

@@ -833,8 +844,6 @@ void ParallelTracker::setTime() {
 }

 void ParallelTracker::writePhaseSpace(const long long /*step*/, bool psDump, bool statDump) {
-    extern Inform* gmsg;
-    Inform msg("OPAL ", *gmsg);
    Vector_t<double, 3> externalE, externalB;
    Vector_t<double, 3> FDext[2];  // FDext = {BHead, EHead, BRef, ERef, BTail, ETail}.

@@ -858,12 +867,14 @@ void ParallelTracker::writePhaseSpace(const long long /*step*/, bool psDump, boo

    if (statDump) {
        itsDataSink_m->dumpSDDS(itsBunch_m, FDext, -1.0);
-        msg << level3 << "* Wrote beam statistics." << endl;
+        *gmsg << "* Wrote beam statistics." << endl;
    }

-    /* \todo
-
    if (psDump && (itsBunch_m->getTotalNum() > 0)) {
+        // Write bunch to .h5 file.
+        itsDataSink_m->dumpH5(itsBunch_m, FDext);
+
+        /*
        // Write fields to .h5 file.
        const size_t localNum    = itsBunch_m->getLocalNum();
        double distToLastStop    = stepSizes_m.getFinalZStop() - pathLength_m;
@@ -910,9 +921,13 @@ void ParallelTracker::writePhaseSpace(const long long /*step*/, bool psDump, boo
        if (!statDump && !driftToCorrectPosition)
            itsBunch_m->calcBeamParameters();

-        msg << *itsBunch_m << endl;
+            msg << *itsBunch_m << endl;
+        
        itsDataSink_m->dumpH5(itsBunch_m, FDext);
-
+        */
+        
+        /*
+        
        if (driftToCorrectPosition) {
            if (localNum > 0) {
                itsBunch_m->R = stashedR;
@@ -925,9 +940,10 @@ void ParallelTracker::writePhaseSpace(const long long /*step*/, bool psDump, boo
            itsBunch_m->calcBeamParameters();
        }

-        msg << level2 << "* Wrote beam phase space." << endl;
+        */
+        
+        *gmsg << level2 << "* Wrote beam phase space." << endl;
    }
-    */
 }

 void ParallelTracker::updateReference(const BorisPusher& pusher) {
@@ -977,7 +993,7 @@ void ParallelTracker::updateReferenceParticle(const BorisPusher& pusher) {
 void ParallelTracker::transformBunch(const CoordinateSystemTrafo& trafo) {
    const unsigned int localNum = itsBunch_m->getLocalNum();
    for (unsigned int i = 0; i < localNum; ++i) {
-        /*
+        /* \todo host device .... 
        itsBunch_m->R[i]  = trafo.transformTo(itsBunch_m->R[i]);
        itsBunch_m->P[i]  = trafo.rotateTo(itsBunch_m->P[i]);
        itsBunch_m->Ef[i] = trafo.rotateTo(itsBunch_m->Ef[i]);
@@ -987,11 +1003,21 @@ void ParallelTracker::transformBunch(const CoordinateSystemTrafo& trafo) {
 }

 void ParallelTracker::updateRefToLabCSTrafo() {
-    itsBunch_m->toLabTrafo_m.transformFrom(itsBunch_m->RefPartR_m);
-    Vector_t<double, 3> R = itsBunch_m->RefPartR_m;
+    /*
+    Inform m("updateRefToLabCSTrafo ");
+    m << " initial RefPartR: " << itsBunch_m->RefPartR_m << endl;
+    m << " RefPartR after transformFrom( " << itsBunch_m->RefPartR_m << endl;
    itsBunch_m->toLabTrafo_m.rotateFrom(itsBunch_m->RefPartP_m);
    Vector_t<double, 3> P = itsBunch_m->RefPartP_m;
+    m << " CS " << itsBunch_m->toLabTrafo_m << endl;
+    m << " rotMat= " << itsBunch_m->toLabTrafo_m.getRotationMatrix() << endl;
+    m << " initial: path Lenght= " << pathLength_m << endl;
+    m << " dt= " << itsBunch_m->getdT() << " R=" << R << endl;
+    */

+    Vector_t<double, 3> R = itsBunch_m->toLabTrafo_m.transformFrom(itsBunch_m->RefPartR_m);
+    Vector_t<double, 3> P = itsBunch_m->toLabTrafo_m.transformFrom(itsBunch_m->RefPartP_m);
+    
    pathLength_m += std::copysign(1, itsBunch_m->getdT()) * euclidean_norm(R);

    CoordinateSystemTrafo update(R, getQuaternion(P, Vector_t<double, 3>(0, 0, 1)));

--- a/src/Algorithms/ParallelTracker.h
+++ b/src/Algorithms/ParallelTracker.h
@@ -95,7 +95,7 @@ class ParallelTracker : public Tracker {
    int minStepforReBin_m;

    // this variable controls the minimal number of steps until we repartition the particles
-    unsigned int repartFreq_m;
+    unsigned long long repartFreq_m;

    unsigned int emissionSteps_m;

@@ -107,7 +107,8 @@ class ParallelTracker : public Tracker {
    IpplTimings::TimerRef WakeFieldTimer_m;
    IpplTimings::TimerRef PluginElemTimer_m;
    IpplTimings::TimerRef BinRepartTimer_m;
-
+    IpplTimings::TimerRef OrbThreader_m;
+    
    std::set<ParticleMatterInteractionHandler*> activeParticleMatterInteractionHandlers_m;
    bool particleMatterStatus_m;

@@ -301,23 +302,70 @@ inline void ParallelTracker::visitTravelingWave(const TravelingWave& as) {
 }

 inline void ParallelTracker::kickParticles(const BorisPusher& pusher) {
-
+    
    auto Rview  = itsBunch_m->getParticleContainer()->R.getView();
    auto Pview  = itsBunch_m->getParticleContainer()->P.getView();
    auto dtview = itsBunch_m->getParticleContainer()->dt.getView();
    auto Efview = itsBunch_m->getParticleContainer()->E.getView();
    auto Bfview = itsBunch_m->getParticleContainer()->B.getView();

+
+    const double mass = itsReference.getM();
+    const double charge = itsReference.getQ();
+
    Kokkos::parallel_for(
                         "kickParticles", ippl::getRangePolicy(Rview),
                         KOKKOS_LAMBDA(const int i) {
-                             Vector_t<double, 3> x = {Rview(i)[0],Rview(i)[1],Rview(i)[2]};
                             Vector_t<double, 3> p = {Pview(i)[0],Pview(i)[1],Pview(i)[2]};

                             Vector_t<double, 3> e = {Efview(i)[0],Efview(i)[1],Efview(i)[2]};
                             Vector_t<double, 3> b = {Bfview(i)[0],Bfview(i)[1],Bfview(i)[2]};
                             double dt = dtview(i);
-                             pusher.kick(x,p,e,b,dt);
+
+                             // pusher.kick(x,p,e,b,dt);
+                             // Implementation follows chapter 4-4, p. 61 - 63 from
+                             // Birdsall, C. K. and Langdon, A. B. (1985). Plasma physics
+                             // via computer simulation.
+                             //
+                             // Up to finite precision effects, the new implementation is equivalent to the
+                             // old one, but uses less floating point operations.
+                             //
+                             // Relativistic variant implemented below is described in
+                             // chapter 15-4, p. 356 - 357.
+                             // However, since other units are used here, small
+                             // modifications are required. The relativistic variant can be derived
+                             // from the nonrelativistic one by replacing
+                             //     mass
+                             // by
+                             //     gamma * rest mass
+                             // and transforming the units.
+                             //
+                             // Parameters:
+                             //     R = x / (c * dt): Scaled position x, not used in here
+                             //     P = v / c * gamma: Scaled velocity v
+                             //     Ef: Electric field
+                             //     Bf: Magnetic field
+                             //     dt: Timestep
+                             //     mass = rest energy = rest mass * c * c
+                             //     charge
+                             
+                             // Half step E
+                             p += 0.5 * dt * charge * Physics::c / mass * e;
+
+                             // Full step B
+
+                             const double gamma          = Kokkos::sqrt(1.0 + dot(p, p));
+                             Vector_t<double, 3> const t = 0.5 * dt * charge * Physics::c * Physics::c / (gamma * mass) * b;
+                             Vector_t<double, 3> const w = p + cross(p, t);
+                             Vector_t<double, 3> const s = 2.0 / (1.0 + dot(t, t)) * t;
+                             p += cross(w, s);
+
+
+                             // Half step E
+                             p += 0.5 * dt * charge * Physics::c / mass * e;
+
+                             Pview(i) = p;
+                                 
                         });

    itsBunch_m->getParticleContainer()->update();
@@ -338,7 +386,19 @@ inline void ParallelTracker::pushParticles(const BorisPusher& pusher) {
                             Vector_t<double, 3> x = {Rview(i)[0],Rview(i)[1],Rview(i)[2]};
                             Vector_t<double, 3> p = {Pview(i)[0],Pview(i)[1],Pview(i)[2]};
                             double dt = dtview(i);
-                             pusher.push(x,p,dt);
+
+                                 /** \f[ \vec{x}_{n+1/2} = \vec{x}_{n} + \frac{1}{2}\vec{v}_{n-1/2}\quad (= \vec{x}_{n} +
+                                  * \frac{\Delta t}{2} \frac{\vec{\beta}_{n-1/2}\gamma_{n-1/2}}{\gamma_{n-1/2}}) \f]
+                                  *
+                                  * \code
+                                  * R[i] += 0.5 * P[i] * recpgamma;
+                                  * \endcode
+                                  */
+                             // \TODO check +-
+                             
+                             // pusher.push(x,p,dt);
+                             x = 0.5 * dt * p / Kokkos::sqrt(1.0 + dot(p));
+                             Rview(i) += x;
                         });



--- a/src/Algorithms/Tracker.cpp
+++ b/src/Algorithms/Tracker.cpp
@@ -63,7 +63,7 @@
 #include "Fields/BMultipoleField.h"

 // FIXME Remove headers and dynamic_cast in readOneBunchFromFile
-#include "PartBunch/PartBunch.hpp"
+#include "PartBunch/PartBunch.h"

 #include <cfloat>
 #include <cmath>

--- a/src/Algorithms/Tracker.h
+++ b/src/Algorithms/Tracker.h
@@ -63,7 +63,7 @@
 #define CLASSIC_Tracker_HH

 #include "Algorithms/AbstractTracker.h"
-#include "PartBunch/PartBunch.hpp"
+#include "PartBunch/PartBunch.h"
 #include "Algorithms/PartData.h"

 #include "Utilities/ClassicField.h"

--- a/src/BeamlineGeometry/Euclid3DGeometry.cpp
+++ b/src/BeamlineGeometry/Euclid3DGeometry.cpp
@@ -70,6 +70,10 @@ Euclid3D Euclid3DGeometry::getTransform(double /*fromS*/, double /*toS*/) const
    throw GeneralClassicException("Euclid3DGeometry::getTransform", "Not implemented");
 }

+Euclid3D Euclid3DGeometry::getTransform(double /*fromS*/) const {
+    throw GeneralClassicException("Euclid3DGeometry::getTransform", "Not implemented");
+}
+
 Euclid3D Euclid3DGeometry::getTotalTransform() const {
    return transformation_m;
 }
\ No newline at end of file
--- a/src/BeamlineGeometry/Euclid3DGeometry.h
+++ b/src/BeamlineGeometry/Euclid3DGeometry.h
@@ -70,6 +70,8 @@ class Euclid3DGeometry : public BGeometryBase {
    //  position [b]fromS[/b] to the position [b]toS[/b].
    virtual Euclid3D getTransform(double fromS, double toS) const;

+    virtual Euclid3D getTransform(double fromS) const;
+
    /// Get total transform from beginning to end
    //  Corresponds to the Euclid3D
    virtual Euclid3D getTotalTransform() const;

--- a/src/BeamlineGeometry/VarRadiusGeometry.cpp
+++ b/src/BeamlineGeometry/VarRadiusGeometry.cpp
@@ -45,3 +45,10 @@ Euclid3D VarRadiusGeometry::getTransform(double fromS, double toS) const {
    v.setZ(ref_to[1] - ref_from[1]);
    return v;
 }
+
+Euclid3D VarRadiusGeometry::getTransform(double fromS) const {
+    throw GeneralClassicException("Euclid3DGeometry::getTransform", "Not implemented");
+}
+
+
+
--- a/src/BeamlineGeometry/VarRadiusGeometry.h
+++ b/src/BeamlineGeometry/VarRadiusGeometry.h
@@ -116,6 +116,13 @@ public:
     *  to the entrance of the element
     *  Equivalent to getTransform(0.0, getEntrance())
     */
+    
+    virtual Euclid3D getTransform(double fromS) const;
+    /** Transform of the local coordinate system from the origin
+     *  to the entrance of the element
+     *  Equivalent to getTransform(0.0, getEntrance())
+     */
+    
    virtual Euclid3D getEntranceFrame() const;
    /** Transform of the local coordinate system from the origin
     *  to the exit of the element

--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -117,6 +117,8 @@ target_link_libraries( opalx
    z
    ${CMAKE_DL_LIBS}
    ${MPI_CXX_LIBRARIES}
+#    ubsan
+#    asan
    )

 message (STATUS "src/CmakeLists IPPL_LIBRARY : ${IPPL_LIBRARY}")

--- a/src/Distribution/CMakeLists.txt
+++ b/src/Distribution/CMakeLists.txt
 set (_SRCS
    Distribution.cpp
+    FlatTop.cpp
+    Gaussian.cpp
    LaserProfile.cpp
+    MultiVariateGaussian.cpp
    )

 include_directories (
@@ -11,8 +14,11 @@ add_opal_sources(${_SRCS})

 set (HDRS
    Distribution.h
+    FlatTop.h
+    Gaussian.h
    LaserProfile.h
    MapGenerator.h
+    MultiVariateGaussian.h
    matrix_vector_operation.h
    )


--- a/src/Distribution/Distribution.cpp
+++ b/src/Distribution/Distribution.cpp
@@ -60,7 +60,9 @@ using Base = ippl::ParticleBase<ippl::ParticleSpatialLayout<T, Dim>>;
 using view_type = typename ippl::detail::ViewType<ippl::Vector<double, Dim>, 1>::view_type;

 namespace DISTRIBUTION {
-    enum { TYPE, FNAME, SIGMAX, SIGMAY, SIGMAZ, SIGMAPX, SIGMAPY, SIGMAPZ, SIZE, CUTOFFPX, CUTOFFPY, CUTOFFPZ, CUTOFFX, CUTOFFY, CUTOFFLONG };
+    enum { TYPE, FNAME, SIGMAX, SIGMAY, SIGMAZ, SIGMAPX, SIGMAPY, SIGMAPZ, CORR,
+           CUTOFFPX, CUTOFFPY, CUTOFFPZ, CUTOFFX, CUTOFFY, CUTOFFLONG, CORRX, CORRY,
+           CORRZ, CORRT, SIGMAT, TPULSEFWHM, TRISE, TFALL, FTOSCAMPLITUDE, FTOSCPERIODS, EMITTED, SIZE };
 }

 /*
@@ -81,7 +83,7 @@ Distribution::Distribution()
        "The DISTRIBUTION statement defines data for the 6D particle distribution."),
      distrTypeT_m(DistributionType::NODIST) {
    itsAttr[DISTRIBUTION::TYPE] =
-        Attributes::makePredefinedString("TYPE", "Distribution type.", {"GAUSS", "FROMFILE"});
+        Attributes::makePredefinedString("TYPE", "Distribution type.", {"GAUSS", "MULTIVARIATEGAUSS", "FLATTOP", "FROMFILE"});

    itsAttr[DISTRIBUTION::FNAME] =
        Attributes::makeString("FNAME", "File for reading in 6D particle coordinates.", "");
@@ -94,6 +96,41 @@ Distribution::Distribution()
    itsAttr[DISTRIBUTION::SIGMAPY] = Attributes::makeReal("SIGMAPY", "SIGMApy", 0.0);
    itsAttr[DISTRIBUTION::SIGMAPZ] = Attributes::makeReal("SIGMAPZ", "SIGMApz", 0.0);

+    itsAttr[DISTRIBUTION::CORR] = Attributes::makeRealArray("CORR", "r correlation");
+
+    itsAttr[DISTRIBUTION::CUTOFFPX] = Attributes::makeReal("CUTOFFPX", "Distribution cutoff px dimension in units of sigma.", 3.0);
+    itsAttr[DISTRIBUTION::CUTOFFPY] = Attributes::makeReal("CUTOFFPY", "Distribution cutoff py dimension in units of sigma.", 3.0);
+    itsAttr[DISTRIBUTION::CUTOFFPZ] = Attributes::makeReal("CUTOFFPZ", "Distribution cutoff pz dimension in units of sigma.", 3.0);
+
+    itsAttr[DISTRIBUTION::CUTOFFX] = Attributes::makeReal("CUTOFFX", "Distribution cutoff x direction in units of sigma.", 3.0);
+    itsAttr[DISTRIBUTION::CUTOFFY] = Attributes::makeReal("CUTOFFY", "Distribution cutoff r direction in units of sigma.", 3.0);
+    itsAttr[DISTRIBUTION::CUTOFFLONG] = Attributes::makeReal("CUTOFFLONG", "Distribution cutoff z or t direction in units of sigma.", 3.0);
+
+    itsAttr[DISTRIBUTION::CORRX] = Attributes::makeReal("CORRX", "x/px correlation, (R12 in transport notation).", 0.0);
+    itsAttr[DISTRIBUTION::CORRY] = Attributes::makeReal("CORRY", "y/py correlation, (R34 in transport notation).", 0.0);
+    itsAttr[DISTRIBUTION::CORRZ] = Attributes::makeReal("CORRZ", "z/pz correlation, (R56 in transport notation).", 0.0);
+    itsAttr[DISTRIBUTION::CORRT] = Attributes::makeReal("CORRT", "t/pt correlation, (R56 in transport notation).", 0.0);
+
+    itsAttr[DISTRIBUTION::SIGMAT] = Attributes::makeReal("SIGMAT", "SIGMAt (m)", 0.0);
+    itsAttr[DISTRIBUTION::TPULSEFWHM] = Attributes::makeReal("TPULSEFWHM", "Pulse FWHM for emitted distribution.", 0.0);
+    itsAttr[DISTRIBUTION::TRISE] = Attributes::makeReal("TRISE", "Rise time for emitted distribution.", 0.0);
+    itsAttr[DISTRIBUTION::TFALL] = Attributes::makeReal("TFALL", "Fall time for emitted distribution.", 0.0);
+
+    itsAttr[DISTRIBUTION::FTOSCAMPLITUDE]
+        = Attributes::makeReal("FTOSCAMPLITUDE", "Amplitude of oscillations superimposed "
+                               "on flat top portion of emitted GAUSS "
+                               "distribtuion (in percent of flat top "
+                               "amplitude)",0.0);
+
+    itsAttr[DISTRIBUTION::FTOSCPERIODS]
+        = Attributes::makeReal("FTOSCPERIODS", "Number of oscillations superimposed on "
+                               "flat top portion of emitted GAUSS "
+                               "distribution", 0.0);
+
+    itsAttr[DISTRIBUTION::EMITTED]
+        = Attributes::makeBool("EMITTED", "Emitted beam, from cathode, as opposed to "
+                               "an injected beam.", false);
+
    registerOwnership(AttributeHandler::STATEMENT);
 }

@@ -163,7 +200,19 @@ Inform& Distribution::printInfo(Inform& os) const {
    if (OpalData::getInstance()->inRestartRun()) {
        os << "* In restart. Distribution read in from .h5 file." << endl;
    } else {
-        printDistGauss(os);
+        switch (distrTypeT_m) {
+            case DistributionType::GAUSS:
+                printDistGauss(os);
+                break;
+            case DistributionType::MULTIVARIATEGAUSS:
+                printDistMultiVariateGauss(os);
+                break;
+            case DistributionType::FLATTOP:
+                printDistFlatTop(os);
+                break;
+            default:
+                throw OpalException("Distribution Param", "Unknown \"TYPE\" of \"DISTRIBUTION\"");
+         }
        os << "* " << endl;
        os << "* Distribution is injected." << endl;
    }
@@ -178,6 +227,14 @@ void Distribution::setAvrgPz(double avrgpz){
    avrgpz_m = avrgpz;
 }

+void Distribution::setTEmission(double tEmission) {
+        tEmission_m = tEmission;
+}
+
+double Distribution::getTEmission() const {
+        return tEmission_m;
+}
+
 void Distribution::setDistParametersGauss() {
    /*
     * Set distribution parameters. Do all the necessary checks depending
@@ -205,8 +262,191 @@ void Distribution::setDistParametersGauss() {

    setSigmaR_m();
    setSigmaP_m();
+
    avrgpz_m = 0.0;
 }
+
+void Distribution::setDistParametersMultiVariateGauss() {
+
+    cutoffR_m = 3.;
+    cutoffP_m = 3.;
+
+    // initialize the covariance matrix to identity
+    for (unsigned int i = 0; i < 6; ++ i) {
+        for (unsigned int j = 0; j < 6; ++ j) {
+            if (i==j)
+               correlationMatrix_m[i][j] = 1.0;
+            else
+               correlationMatrix_m[i][j] = 0.0;
+        }
+    }
+
+    // set diagonal elements first
+    setSigmaR_m();
+    setSigmaP_m();
+
+    for (unsigned int i = 0; i < 3; ++ i){
+        correlationMatrix_m[2*i  ][2*i  ] = sigmaR_m[i]*sigmaR_m[i];
+        correlationMatrix_m[2*i+1][2*i+1] = sigmaP_m[i]*sigmaP_m[i];
+    }
+
+    std::vector<double> cr = Attributes::getRealArray(itsAttr[DISTRIBUTION::CORR]);
+
+    if (!cr.empty()) {
+            // read off-diagonal correlation matrix from input file
+            if (cr.size() == 15) {
+                *gmsg << "* Use r to specify correlations" << endl;
+                unsigned int k = 0;
+                for (unsigned int i = 0; i < 5; ++ i) {
+                    for (unsigned int j = i + 1; j < 6; ++ j, ++ k) {
+                        correlationMatrix_m[j][i] = cr.at(k)*cr.at(k);
+                        correlationMatrix_m[i][j] = correlationMatrix_m[j][i]; // impose symmetry
+                    }
+                }
+            }
+            else {
+                throw OpalException("Distribution::SetDistParametersGauss",
+                                    "Inconsistent set of correlations specified, check manual");
+            }
+    }
+
+    avrgpz_m = 0.0;
+}
+
+void Distribution::setDistParametersFlatTop() {
+
+    cutoffR_m = 3.;
+    cutoffP_m = 3.;
+
+    // set diagonal elements first
+    setSigmaR_m();
+    setSigmaP_m();
+
+    // initialize the covariance matrix to identity
+    for (unsigned int i = 0; i < 6; ++ i) {
+        for (unsigned int j = 0; j < 6; ++ j) {
+            if (i==j)
+               correlationMatrix_m[i][j] = 1.0;
+            else
+               correlationMatrix_m[i][j] = 0.0;
+        }
+    }
+
+    cutoffR_m = ippl::Vector<double, 3>(
+        std::abs(Attributes::getReal(itsAttr[DISTRIBUTION::CUTOFFX])),
+        std::abs(Attributes::getReal(itsAttr[DISTRIBUTION::CUTOFFY])),
+        std::abs(Attributes::getReal(itsAttr[DISTRIBUTION::CUTOFFLONG])));
+
+    correlationMatrix_m[1][0] = Attributes::getReal(itsAttr[DISTRIBUTION::CORRX]);
+    correlationMatrix_m[3][2] = Attributes::getReal(itsAttr[DISTRIBUTION::CORRY]);
+    correlationMatrix_m[5][4] = Attributes::getReal(itsAttr[DISTRIBUTION::CORRT]);
+
+    if (Attributes::getReal(itsAttr[DISTRIBUTION::CORRZ]) != 0.0)
+        correlationMatrix_m[5][4] = Attributes::getReal(itsAttr[DISTRIBUTION::CORRZ]);
+
+    emitting_m = Attributes::getBool(itsAttr[DISTRIBUTION::EMITTED]);
+
+    if (emitting_m) {
+        sigmaR_m[2] = 0.0;
+
+        sigmaTRise_m = std::abs(Attributes::getReal(itsAttr[DISTRIBUTION::SIGMAT]));
+        sigmaTFall_m = std::abs(Attributes::getReal(itsAttr[DISTRIBUTION::SIGMAT]));
+        tPulseLengthFWHM_m = std::abs(Attributes::getReal(itsAttr[DISTRIBUTION::TPULSEFWHM]));
+
+        FTOSCAmplitude_m = std::abs(Attributes::getReal(itsAttr[DISTRIBUTION::FTOSCAMPLITUDE]));
+        FTOSCPeriods_m = std::abs(Attributes::getReal(itsAttr[DISTRIBUTION::FTOSCPERIODS]));
+
+        // If TRISE and TFALL are defined > 0.0 then these attributes
+        // override SIGMAT.
+        //
+        if (std::abs(Attributes::getReal(itsAttr[DISTRIBUTION::TRISE])) > 0.0
+            || std::abs(Attributes::getReal(itsAttr[DISTRIBUTION::TFALL])) > 0.0) {
+
+            double timeRatio = std::sqrt(2.0 * std::log(10.0)) - std::sqrt(2.0 * std::log(10.0 / 9.0));
+
+            if (std::abs(Attributes::getReal(itsAttr[DISTRIBUTION::TRISE])) > 0.0)
+                sigmaTRise_m = std::abs(Attributes::getReal(itsAttr[DISTRIBUTION::TRISE]))
+                    / timeRatio;
+
+            if (std::abs(Attributes::getReal(itsAttr[DISTRIBUTION::TFALL])) > 0.0)
+                sigmaTFall_m = std::abs(Attributes::getReal(itsAttr[DISTRIBUTION::TFALL]))
+                    / timeRatio;
+        }
+
+        // For an emitted beam, the longitudinal cutoff >= 0.
+        cutoffR_m[2] = std::abs(cutoffR_m[2]);
+
+    }
+    /*
+    cutoffR_m = Vector_t(Attributes::getReal(itsAttr[Attrib::Distribution::CUTOFFX]),
+                         Attributes::getReal(itsAttr[Attrib::Distribution::CUTOFFY]),
+                         Attributes::getReal(itsAttr[Attrib::Distribution::CUTOFFLONG]));
+
+    correlationMatrix_m(1, 0) = Attributes::getReal(itsAttr[Attrib::Distribution::CORRX]);
+    correlationMatrix_m(3, 2) = Attributes::getReal(itsAttr[Attrib::Distribution::CORRY]);
+    correlationMatrix_m(5, 4) = Attributes::getReal(itsAttr[Attrib::Distribution::CORRT]);
+
+    // CORRZ overrides CORRT.
+    if (Attributes::getReal(itsAttr[Attrib::Distribution::CORRZ]) != 0.0)
+        correlationMatrix_m(5, 4) = Attributes::getReal(itsAttr[Attrib::Distribution::CORRZ]);
+
+    setSigmaR_m();
+    if (emitting_m) {
+        sigmaR_m[2] = 0.0;
+
+        sigmaTRise_m = std::abs(Attributes::getReal(itsAttr[Attrib::Distribution::SIGMAT]));
+        sigmaTFall_m = std::abs(Attributes::getReal(itsAttr[Attrib::Distribution::SIGMAT]));
+
+        tPulseLengthFWHM_m = std::abs(Attributes::getReal(itsAttr[Attrib::Distribution::TPULSEFWHM]));
+
+        //
+         // If TRISE and TFALL are defined > 0.0 then these attributes
+         // override SIGMAT.
+         //
+        if (std::abs(Attributes::getReal(itsAttr[Attrib::Distribution::TRISE])) > 0.0
+            || std::abs(Attributes::getReal(itsAttr[Attrib::Distribution::TFALL])) > 0.0) {
+
+            double timeRatio = std::sqrt(2.0 * std::log(10.0)) - std::sqrt(2.0 * std::log(10.0 / 9.0));
+
+            if (std::abs(Attributes::getReal(itsAttr[Attrib::Distribution::TRISE])) > 0.0)
+                sigmaTRise_m = std::abs(Attributes::getReal(itsAttr[Attrib::Distribution::TRISE]))
+                    / timeRatio;
+
+            if (std::abs(Attributes::getReal(itsAttr[Attrib::Distribution::TFALL])) > 0.0)
+                sigmaTFall_m = std::abs(Attributes::getReal(itsAttr[Attrib::Distribution::TFALL]))
+                    / timeRatio;
+
+        }
+
+        // For an emitted beam, the longitudinal cutoff >= 0.
+        cutoffR_m[2] = std::abs(cutoffR_m[2]);
+    }
+
+    // Set laser profile/
+    laserProfileFileName_m = Attributes::getString(itsAttr[Attrib::Distribution::LASERPROFFN]);
+    if (!(laserProfileFileName_m == std::string(""))) {
+        laserImageName_m = Attributes::getString(itsAttr[Attrib::Distribution::IMAGENAME]);
+        laserIntensityCut_m = std::abs(Attributes::getReal(itsAttr[Attrib::Distribution::INTENSITYCUT]));
+        short flags = 0;
+        if (Attributes::getBool(itsAttr[Attrib::Distribution::FLIPX])) flags |= LaserProfile::FLIPX;
+        if (Attributes::getBool(itsAttr[Attrib::Distribution::FLIPY])) flags |= LaserProfile::FLIPY;
+        if (Attributes::getBool(itsAttr[Attrib::Distribution::ROTATE90])) flags |= LaserProfile::ROTATE90;
+        if (Attributes::getBool(itsAttr[Attrib::Distribution::ROTATE180])) flags |= LaserProfile::ROTATE180;
+        if (Attributes::getBool(itsAttr[Attrib::Distribution::ROTATE270])) flags |= LaserProfile::ROTATE270;
+
+        laserProfile_m = new LaserProfile(laserProfileFileName_m,
+                                          laserImageName_m,
+                                          laserIntensityCut_m,
+                                          flags);
+    }
+
+    // Legacy for ASTRAFLATTOPTH.
+    if (distrTypeT_m == DistributionType::ASTRAFLATTOPTH)
+        tRise_m = std::abs(Attributes::getReal(itsAttr[Attrib::Distribution::TRISE]));
+*/
+
+}
+
 void Distribution::createDistributionGauss(size_t numberOfParticles, double massIneV, ippl::ParticleAttrib<ippl::Vector<double, 3>>& R, ippl::ParticleAttrib<ippl::Vector<double, 3>>& P, std::shared_ptr<ParticleContainer_t> &pc, std::shared_ptr<FieldContainer_t> &fc, Vector_t<double, 3> nr) {
    // moved to Gaussian.hpp
 }
@@ -225,9 +465,54 @@ void Distribution::printDistGauss(Inform& os) const {
    os << "* SIGMAPZ    = " << sigmaP_m[2] << " [Beta Gamma]" << endl;
 }

+void Distribution::printDistMultiVariateGauss(Inform& os)  const {
+    os << "* Distribution type: MULTIVARIATEGAUSS" << endl;
+    os << "* " << endl;
+    os << "* SIGMAX     = " << sigmaR_m[0] << " [m]" << endl;
+    os << "* SIGMAY     = " << sigmaR_m[1] << " [m]" << endl;
+    os << "* SIGMAZ     = " << sigmaR_m[2] << " [m]" << endl;
+    os << "* SIGMAPX    = " << sigmaP_m[0] << " [Beta Gamma]" << endl;
+    os << "* SIGMAPY    = " << sigmaP_m[1] << " [Beta Gamma]" << endl;
+    os << "* SIGMAPZ    = " << sigmaP_m[2] << " [Beta Gamma]" << endl;
+
+    os << "* input cov matrix = ";
+    for (unsigned int i = 0; i < 6; ++ i) {
+        for (unsigned int j = 0; j < 6; ++ j) {
+            os << correlationMatrix_m[i][j] << " ";
+        }
+        os << endl << "                     ";
+    }
+}
+
+void Distribution::printDistFlatTop(Inform& os)  const {
+    os << "* Distribution type: FLATTOP" << endl;
+    os << "* " << endl;
+    os << "* SIGMAX     = " << sigmaR_m[0] << " [m]" << endl;
+    os << "* SIGMAY     = " << sigmaR_m[1] << " [m]" << endl;
+
+    if (emitting_m) {
+            os << "* Sigma Time Rise               = " << sigmaTRise_m
+               << " [sec]" << endl;
+            os << "* TPULSEFWHM                    = " << tPulseLengthFWHM_m
+               << " [sec]" << endl;
+            os << "* Sigma Time Fall               = " << sigmaTFall_m
+               << " [sec]" << endl;
+            os << "* Longitudinal cutoff           = " << cutoffR_m[2]
+               << " [units of Sigma Time]" << endl;
+            //os << "* Flat top modulation amplitude = "
+            //   << Attributes::getReal(itsAttr[DISTRIBUTION::FTOSCAMPLITUDE])
+            //   << " [Percent of distribution amplitude]" << endl;
+            //os << "* Flat top modulation periods   = "
+            //   << std::abs(Attributes::getReal(itsAttr[DISTRIBUTION::FTOSCPERIODS]))
+            //   << endl;
+    }
+    else{
+        os << "* SIGMAZ                        = " << sigmaR_m[2] << " [m]" << endl;
+    }
+}
+
 void Distribution::setAttributes() {
-    setSigmaR_m();
-    setSigmaP_m();
+    setDist();
 }

 void Distribution::setDist() {
@@ -238,6 +523,12 @@ void Distribution::setDist() {
        case DistributionType::GAUSS:
            setDistParametersGauss();
            break;
+        case DistributionType::MULTIVARIATEGAUSS:
+            setDistParametersMultiVariateGauss();
+            break;
+        case DistributionType::FLATTOP:
+            setDistParametersFlatTop();
+            break;
        default:
            throw OpalException("Distribution Param", "Unknown \"TYPE\" of \"DISTRIBUTION\"");
    }
@@ -245,7 +536,11 @@ void Distribution::setDist() {

 void Distribution::setDistType() {
    static const std::map<std::string, DistributionType> typeStringToDistType_s = {
-        {"NODIST", DistributionType::NODIST}, {"GAUSS", DistributionType::GAUSS}};
+        {"NODIST", DistributionType::NODIST},
+        {"GAUSS", DistributionType::GAUSS},
+        {"MULTIVARIATEGAUSS", DistributionType::MULTIVARIATEGAUSS},
+        {"FLATTOP", DistributionType::FLATTOP}
+    };

    distT_m = Attributes::getString(itsAttr[DISTRIBUTION::TYPE]);


--- a/src/Distribution/Distribution.h
+++ b/src/Distribution/Distribution.h
@@ -49,7 +49,7 @@ class Beam;
 class Beamline;
 class H5PartWrapper;

-enum class DistributionType : short { NODIST = -1, GAUSS };
+enum class DistributionType : short { NODIST = -1, GAUSS, MULTIVARIATEGAUSS, FLATTOP, FROMFILE };

 using ParticleContainer_t = ParticleContainer<double, 3>;
 using FieldContainer_t = FieldContainer<double, 3>;
@@ -60,6 +60,8 @@ public:

    virtual ~Distribution();

+    using Matrix_t = ippl::Vector< ippl::Vector<double, 6>, 6>;
+
    virtual bool canReplaceBy(Object* object);
    virtual Distribution* clone(const std::string& name);
    virtual void execute();
@@ -88,6 +90,13 @@ public:
    ippl::Vector<double, 3> getSigmaR() const;
    ippl::Vector<double, 3> getSigmaP() const;

+    double getSigmaTRise() const;
+    double getSigmaTFall() const;
+    double getTPulseLengthFWHM() const;
+
+    double getFTOSCAmplitude() const;
+    double getFTOSCPeriods() const;
+
    void setDistType();

    void setDist();
@@ -99,6 +108,14 @@ public:
    ippl::Vector<double, 3> getCutoffR() const;
    ippl::Vector<double, 3> getCutoffP() const;

+    Matrix_t correlationMatrix_m;
+
+    bool emitting_m;                     /// Distribution is an emitted, and is currently
+                                         /// emitting, rather than an injected, beam.
+
+    double getTEmission() const;
+    void setTEmission(double tEmission);
+
 private:
    enum class EmissionModel : unsigned short { NONE, ASTRA, NONEQUIL };

@@ -166,11 +183,17 @@ private:
    // void initializeBeam(PartBunch_t* beam);
    void printDist(Inform& os, size_t numberOfParticles) const;
    void printDistGauss(Inform& os) const;
+    void printDistMultiVariateGauss(Inform& os) const;
+    void printDistFlatTop(Inform& os) const;

    void setAttributes();

    void setDistParametersGauss();

+    void setDistParametersMultiVariateGauss();
+
+    void setDistParametersFlatTop();
+
    void setSigmaR_m();

    void setSigmaP_m();
@@ -188,9 +211,18 @@ private:

    ippl::Vector<double, 3> pmean_m, xmean_m, sigmaR_m, sigmaP_m, cutoffR_m, cutoffP_m;

+    double sigmaTRise_m;
+    double sigmaTFall_m;
+    double tPulseLengthFWHM_m;
+
    DistributionType distrTypeT_m;

    double avrgpz_m;
+
+    double FTOSCAmplitude_m;
+    double FTOSCPeriods_m;
+
+    double tEmission_m;
 };

 inline Inform& operator<<(Inform& os, const Distribution& d) {
@@ -221,6 +253,26 @@ inline ippl::Vector<double, 3> Distribution::getCutoffP() const {
    return cutoffP_m;
 }

+inline double Distribution::getSigmaTRise() const {
+    return sigmaTRise_m;
+}
+
+inline double Distribution::getSigmaTFall() const {
+    return sigmaTFall_m;
+}
+
+inline double Distribution::getTPulseLengthFWHM() const {
+    return tPulseLengthFWHM_m;
+}
+
+inline double Distribution::getFTOSCAmplitude() const {
+    return FTOSCAmplitude_m;
+}
+
+inline double Distribution::getFTOSCPeriods() const {
+    return FTOSCPeriods_m;
+}
+
 inline DistributionType Distribution::getType() const {
    return distrTypeT_m;
 }

--- a/src/Distribution/FlatTop.cpp
+++ b/src/Distribution/FlatTop.cpp
+#include "Distribution.h"
+#include "SamplingBase.hpp"
+#include "FlatTop.h"
+#include <memory>
+#include <cmath>
+
+using ParticleContainer_t = ParticleContainer<double, 3>;
+using FieldContainer_t = FieldContainer<double, 3>;
+using Distribution_t = Distribution;
+using GeneratorPool = typename Kokkos::Random_XorShift64_Pool<>;
+using Dist_t = ippl::random::NormalDistribution<double, 3>;
+
+FlatTop::FlatTop(std::shared_ptr<ParticleContainer_t> &pc,
+                 std::shared_ptr<FieldContainer_t> &fc,
+                 std::shared_ptr<Distribution_t> &opalDist)
+    : SamplingBase(pc, fc, opalDist), rand_pool_m(determineRandInit()) {
+    setParameters(opalDist);
+}
+
+void FlatTop::setWithDomainDecomp(bool withDomainDecomp) {
+    withDomainDecomp_m = withDomainDecomp;
+}
+
+size_t FlatTop::determineRandInit() {
+    extern Inform* gmsg;
+    size_t randInit;
+    if (Options::seed == -1) {
+        randInit = 1234567;
+        *gmsg << "* Seed = " << randInit << " on all ranks" << endl;
+    } else {
+        randInit = static_cast<size_t>(Options::seed + 100 * ippl::Comm->rank());
+    }
+    return randInit;
+}
+
+void FlatTop::setParameters(const std::shared_ptr<Distribution_t> &opalDist) {
+    emitting_m = opalDist->emitting_m;
+    // time span of fall is [0, riseTime, riseTime+flattopTime, fallTime+flattopTime+riseTime ]
+    sigmaTFall_m = opalDist_m->getSigmaTFall();
+    sigmaTRise_m = opalDist_m->getSigmaTRise();
+    cutoffR_m = opalDist_m->getCutoffR();
+
+    fallTime_m = sigmaTFall_m * cutoffR_m[2]; // fall is [0, fallTime]
+    flattopTime_m = opalDist->getTPulseLengthFWHM()
+            - std::sqrt(2.0 * std::log(2.0)) * (sigmaTRise_m + sigmaTFall_m);
+    if (flattopTime_m < 0.0) {
+        flattopTime_m = 0.0;
+    }
+    riseTime_m = sigmaTRise_m * cutoffR_m[2];
+
+    emissionTime_m = fallTime_m + flattopTime_m + riseTime_m;
+    opalDist_m->setTEmission(emissionTime_m);
+
+    // These expression are take from the old OPAL
+    // I think normalizedFlankArea is int_0^{cutoff} exp(-(x/sigma)^2/2 ) / sigma
+    // Instead of int_0^{cutoff} exp(-(x/sigma)^2/2 ) / sqrt(2*pi) / sigma, which is strange!
+    // So the distribution of tails are exp(-(x/sigma)^2/2 ) and not Gaussian!
+    normalizedFlankArea_m = 0.5 * std::sqrt(Physics::two_pi) * std::erf(cutoffR_m[2] / std::sqrt(2.0));
+    distArea_m = flattopTime_m + (sigmaTRise_m + sigmaTFall_m) * normalizedFlankArea_m;
+
+    // make sure only z direction is decomposed
+    fc_m->setDecomp({false, false, true});
+}
+
+void FlatTop::generateUniformDisk(size_type nlocal, size_t nNew) {
+
+    GeneratorPool rand_pool = rand_pool_m;
+    Vector_t<double, 3> rmin;
+    Vector_t<double, 3> rmax;
+    Vector_t<double, 3> hr;
+
+    view_type Rview = pc_m->R.getView();
+    view_type Pview = pc_m->P.getView();
+
+    double pi = Physics::pi;
+    Vector_t<double, 3> sigmaR = opalDist_m->getSigmaR();
+    // Sample (Rx,Ry) on a unit ring, then scale with sigmaRx and sigmaRy, set Px=Py=0
+    Kokkos::parallel_for(
+               "unitDisk", Kokkos::RangePolicy<>(nlocal, nlocal+nNew), KOKKOS_LAMBDA(const size_t j) {
+                auto generator = rand_pool.get_state();
+                double r = Kokkos::sqrt( generator.drand(0., 1.) );
+                double theta = 2.0 * pi * generator.drand(0., 1.);
+                rand_pool.free_state(generator);
+
+                Rview(j)[0] = r * Kokkos::cos(theta) * sigmaR[0];
+                Rview(j)[1] = r * Kokkos::sin(theta) * sigmaR[1];
+                Rview(j)[2]  = 0.0;
+                Pview(j)[0] = 0.0;
+                Pview(j)[1] = 0.0;
+                Pview(j)[2] = 0.0;
+    });
+    Kokkos::fence();
+}
+
+void FlatTop::setNr(Vector_t<double, 3> nr){
+    nr_m = nr;
+}
+
+void FlatTop::generateParticles(size_t& numberOfParticles, Vector_t<double, 3> nr) {
+
+    setNr(nr);
+
+    // initial allocation is similar for both emitting and non-emitting cases
+    allocateParticles(numberOfParticles);
+
+    if(emitting_m){
+        // set nlocal to 0 for the very first time step, before sampling particles
+        //pc_m->setLocalNum(0);
+
+        Kokkos::View<bool*> tmp_invalid("tmp_invalid", 0);
+        // \todo might be abuse of semantics: maybe think about new pc_m->setTotalNum or pc_m->updateTotal function instead?
+        pc_m->destroy(tmp_invalid, pc_m->getLocalNum());
+    }
+}
+
+double FlatTop::FlatTopProfile(double t){
+    double t0;
+    if(t<riseTime_m){
+            t0 = riseTime_m;
+            return exp( -pow((t-t0)/sigmaTRise_m,2) /2. );
+            //  In the old opal, tails seem to be exp(-x^2/sigma^2/2) rather than Gaussian with normalizing factor.
+    }
+    else if( t>riseTime_m && t<riseTime_m + flattopTime_m){
+            return 1.;
+    }
+    else if(t>riseTime_m + flattopTime_m && t < fallTime_m + flattopTime_m + riseTime_m){
+        t0 = fallTime_m + flattopTime_m;
+        return exp( -pow((t-t0)/sigmaTFall_m,2)/2. );
+        //  In the old opal, tails seem to be exp(-x^2/sigma^2/2) rather than Gaussian with normalizing factor.
+    }
+    else
+        return 0.;
+}
+
+size_t FlatTop::computeNlocalUniformly(size_t nglobal){
+    MPI_Comm comm = MPI_COMM_WORLD;
+    int nranks;
+    int rank;
+    MPI_Comm_size(comm, &nranks);
+    MPI_Comm_rank(comm, &rank);
+
+    size_type nlocal = floor(nglobal/nranks);
+
+    size_t remaining = nglobal - nlocal*nranks;
+
+    if(remaining>0 && rank==0){
+       nlocal += remaining;
+    }
+
+    return nlocal;
+}
+
+double FlatTop::integrateTrapezoidal(double x1, double x2, double y1, double y2){
+    return 0.5 * (y1+y2) * fabs(x2-x1);
+}
+
+void FlatTop::initDomainDecomp(double BoxIncr) {
+    auto *mesh = &fc_m->getMesh();
+    auto *FL   = &fc_m->getFL();
+    Vector_t<double, 3> sigmaR = opalDist_m->getSigmaR();
+    ippl::Vector<double, 3> o;
+    ippl::Vector<double, 3> e;
+    double tol = 1e-15; // enlarge grid by tol to avoid missing particles on boundaries
+    o[0] = -sigmaR[0] - tol;
+    e[0] =  sigmaR[0] + tol;
+    o[1] = -sigmaR[1] - tol;
+    e[1] =  sigmaR[1] + tol;
+    o[2] = 0.0 - tol;
+    e[2] = Physics::c * emissionTime_m + tol;
+
+    ippl::Vector<double, 3> l = e - o;
+    hr_m = (1.0+BoxIncr/100.)*(l / nr_m);
+    mesh->setMeshSpacing(hr_m);
+    mesh->setOrigin(o-0.5*hr_m*BoxIncr/100.);
+    pc_m->getLayout().updateLayout(*FL, *mesh);
+}
+
+double FlatTop::countEnteringParticlesPerRank(double t0, double tf){
+    size_type nlocalNew=0;
+    double tArea = 0.0;
+    tArea = integrateTrapezoidal(t0, tf, FlatTopProfile(t0), FlatTopProfile(tf));
+    size_type totalNew = floor(totalN_m * tArea / distArea_m);
+    nlocalNew = 0;
+
+    if(totalNew>0){
+        if(!withDomainDecomp_m){
+            // the same number of particles per rank
+            nlocalNew = computeNlocalUniformly(totalNew);
+        }
+        else{
+            // select number of particles per rank using estimated domain decomposition at final emission time
+            // find min/max of particle positions for [t,t+dt]
+            Vector_t<double, 3> prmin, prmax;
+            Vector_t<double, 3> sigmaR = opalDist_m->getSigmaR();
+            prmin[0] = -sigmaR[0];
+            prmax[0] =  sigmaR[0];
+            prmin[1] = -sigmaR[1];
+            prmax[1] =  sigmaR[1];
+            prmin[2] = std::max(0.0, Physics::c * t0);
+            prmax[2] = Physics::c*tf;
+
+            double dx = prmax[0] - prmin[0];
+            double dy = prmax[1] - prmin[1];
+            double dz = prmax[2] - prmin[2];
+
+            if (dx <= 0 || dy <= 0 || dz <= 0) {
+                throw std::runtime_error("Invalid global particle volume: prmax must be greater than prmin.");
+            }
+
+            double globalpvolume = dx * dy * dz;
+
+            // find min/max of subdomains for the current rank
+            auto regions = pc_m->getLayout().getRegionLayout().gethLocalRegions();
+            int rank = ippl::Comm->rank();
+
+            if (rank < 0 || static_cast<size_t>(rank) >= regions.size()) {
+                throw std::runtime_error("Invalid rank index in gethLocalRegions()");
+            }
+
+            Vector_t<double, 3> locrmin, locrmax;
+            for (unsigned d = 0; d < Dim; ++d) {
+               locrmax[d] = regions(rank)[d].max();
+               locrmin[d] = regions(rank)[d].min();
+            }
+
+            if (prmax[0] >= locrmin[0] && prmin[0] <= locrmax[0] &&
+                prmax[1] >= locrmin[1] && prmin[1] <= locrmax[1] &&
+                prmax[2] >= locrmin[2] && prmin[2] <= locrmax[2]) {
+
+                double x1 = std::max(prmin[0], locrmin[0]);
+                double x2 = std::min(prmax[0], locrmax[0]);
+                double y1 = std::max(prmin[1], locrmin[1]);
+                double y2 = std::min(prmax[1], locrmax[1]);
+                double z1 = std::max(prmin[2], locrmin[2]);
+                double z2 = std::min(prmax[2], locrmax[2]);
+
+                if (x2 >= x1 && y2 >= y1 && z2 >= z1) {
+                    double locpvolume = (x2 - x1) * (y2 - y1) * (z2 - z1);
+                    if (globalpvolume > 0) {
+                        nlocalNew = static_cast<int>(totalNew * locpvolume / globalpvolume);
+                    }
+                    else{
+                        nlocalNew = 0;
+                    }
+                } else {
+                    nlocalNew = 0;
+                }
+            }
+        }
+    }
+    return nlocalNew;
+}
+
+void FlatTop::allocateParticles(size_t numberOfParticles){
+    totalN_m = numberOfParticles;
+
+    size_type nlocal;
+
+    nlocal = computeNlocalUniformly(totalN_m);
+
+    pc_m->create(nlocal);
+}
+
+void FlatTop::emitParticles(double t, double dt) {
+    extern Inform* gmsg;
+
+    // count number of new particles to be emitted
+    size_type nNew = countEnteringParticlesPerRank(t, t + dt);
+
+    // current number of particles per rank
+    size_type nlocal = pc_m->getLocalNum();
+
+    // extend particle container to accomodate new particles
+    pc_m->create(nNew);
+
+    // generate new particles on uniform disc
+    *gmsg << "* generate particles on a disc" << endl;
+    generateUniformDisk(nlocal, nNew);
+
+    *gmsg << "* new particles emmitted" << endl;
+}
+
+void FlatTop::testNumEmitParticles(size_type nsteps, double dt) {
+    size_type nNew;
+    int rank, numRanks;
+    double t = 0.0;
+    double c = Physics::c;
+
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &numRanks);
+
+    std::string filename = "timeNpart_" + std::to_string(rank) + ".txt";
+    std::ofstream file(filename);
+
+    // Check if the file opened successfully
+    if (!file.is_open()) {
+        throw std::runtime_error("Failed to open file: " + filename);
+    }
+
+    for(size_type i=0; i<nsteps; i++){
+        nNew = countEnteringParticlesPerRank(t, t + dt);
+
+        // current number of particles per rank
+        size_type nlocal = pc_m->getLocalNum();
+
+        // extend particle container to accomodate new particles
+        pc_m->create(nNew);
+
+        // generate new particles on uniform disc
+        generateUniformDisk(nlocal, nNew);
+
+        // write to a file
+        auto rViewDevice  = pc_m->R.getView();
+        auto rView = Kokkos::create_mirror_view(rViewDevice);
+        Kokkos::deep_copy(rView,rViewDevice);
+
+        for(size_type j=nlocal; j<nlocal+nNew; j++){
+            file << t << " " << (emissionTime_m-t)*c << " " << rView(j)[0] << " " << rView(j)[1] << "\n";
+        }
+        file.flush();  // Ensure data is written to disk
+
+        t = t + dt;
+    }
+    file.close();
+    ippl::Comm->barrier();
+}
+
+void FlatTop::testEmitParticles(size_type nsteps, double dt) {
+    double t = 0.0;
+
+    for(size_type i=0; i<nsteps; i++){
+        emitParticles(t, dt);
+
+        t = t + dt;
+    }
+}
--- a/src/Distribution/FlatTop.h
+++ b/src/Distribution/FlatTop.h
+/**
+ * @file FlatTop.h
+ * @brief Defines the FlatTop class used for sampling emitting particles.
+ */
+
+#ifndef IPPL_FLAT_TOP_H
+#define IPPL_FLAT_TOP_H
+
+#include "Distribution.h"
+#include "SamplingBase.hpp"
+#include <Kokkos_Random.hpp>
+#include "Ippl.h"
+#include "Utilities/Options.h"
+#include "OPALTypes.h"
+#include <memory>
+#include <cmath>
+
+using ParticleContainer_t = ParticleContainer<double, 3>;
+using FieldContainer_t = FieldContainer<double, 3>;
+using Distribution_t = Distribution;
+using GeneratorPool = typename Kokkos::Random_XorShift64_Pool<>;
+using Dist_t = ippl::random::NormalDistribution<double, 3>;
+
+/**
+ * @class FlatTop
+ * @brief Implements the sampling method for the flat-top distribution.
+ *        x and y coordinates are uniformly distributed inside a circle
+ *        and number of particles entering domain in [t, t+dt] follows flattop profile.
+ *
+ *        The FlatTop distribution is
+ *        f(t)/Z = exp[ -((t-riseTime_m)/sigma)^2/2 ]                            t < riseTime
+ *                 1.0                                                           riseTime < t < t<riseTime + flattopTime
+ *                 exp[ -((t-(fallTime_m + flattopTime_m))/sigmaTFall_m)^2/2 ]   t>riseTime + flattopTime
+ *         where Z is the normalizing factor.
+ */
+class FlatTop : public SamplingBase {
+public:
+    /**
+     * @brief Constructor for FlatTop.
+     * @param pc Shared pointer to ParticleContainer.
+     * @param fc Shared pointer to FieldContainer.
+     * @param opalDist Shared pointer to Distribution.
+     */
+    FlatTop(std::shared_ptr<ParticleContainer_t> &pc, std::shared_ptr<FieldContainer_t> &fc, std::shared_ptr<Distribution_t> &opalDist);
+
+    /**
+     * @brief Tests the number of emitted particles over a given number of steps.
+     * @param nsteps Number of time steps to simulate.
+     * @param dt Time step size.
+     */
+    void testNumEmitParticles(size_type nsteps, double dt) override;
+
+    /**
+     * @brief Tests particle emission over a given number of steps.
+     * @param nsteps Number of time steps to simulate.
+     * @param dt Time step size.
+     */
+    void testEmitParticles(size_type nsteps, double dt) override;
+
+private:
+    using size_type = ippl::detail::size_type;
+    GeneratorPool rand_pool_m;  ///< Random number generator pool.
+    double flattopTime_m;       ///< Time duration of when the time profile is flat.
+    double normalizedFlankArea_m; ///< Normalized area of the distribution flanks.
+    double distArea_m; ///< Total area of the flattop distribution.
+    double sigmaTFall_m; ///< Standard deviation for fall time profile.
+    double sigmaTRise_m; ///< Standard deviation for rise time profile.
+    Vector_t<double, 3> cutoffR_m; ///< Cutoff radius.
+    double fallTime_m; ///< Time duration for the fall phase.
+    double riseTime_m; ///< Time duration for the rise phase.
+    bool emitting_m; ///< Flag for particle emission status.
+    size_type totalN_m; ///< Total number of particles.
+    bool withDomainDecomp_m; ///< Flag for domain decomposition.
+    double emissionTime_m; ///< Total emission time.
+    Vector_t<double, 3> nr_m; ///< Number of grid points per direction.
+    Vector_t<double, 3> hr_m; ///< Grid spacing.
+
+    /**
+     * @brief Sets whether to use domain decomposition.
+     * @param withDomainDecomp Boolean flag for domain decomposition.
+     */
+    void setWithDomainDecomp(bool withDomainDecomp) override;
+
+    /**
+     * @brief Determines the random seed initialization.
+     * @return The seed value.
+     */
+    static size_t determineRandInit();
+
+    /**
+     * @brief Sets distribution parameters.
+     * @param opalDist Shared pointer to the distribution object.
+     */
+    void setParameters(const std::shared_ptr<Distribution_t> &opalDist);
+
+public:
+    /**
+     * @brief Generates particles (x,y) uniformly on a disk distribution.
+     * @param nlocal Number of local particles.
+     * @param nNew Number of new particles to generate.
+     */
+    void generateUniformDisk(size_type nlocal, size_t nNew);
+
+    /**
+     * @brief Sets the number of grid points per direction.
+     * @param nr Vector specifying the number of grid points.
+     */
+    void setNr(Vector_t<double, 3> nr);
+
+    /**
+     * @brief Generates particles with a given number and grid configuration.
+     * @param numberOfParticles Number of particles to generate.
+     * @param nr Number of grid points in each dimension.
+     */
+    void generateParticles(size_t& numberOfParticles, Vector_t<double, 3> nr) override;
+
+    /**
+     * @brief Computes the flat-top profile value at a given time.
+     * @param t Time value.
+     * @return Profile value at time t.
+     */
+    double FlatTopProfile(double t);
+
+    /**
+     * @brief Computes the local number of particles uniformly distributed among ranks.
+     * @param nglobal Total global number of particles.
+     * @return Number of local particles.
+     */
+    size_t computeNlocalUniformly(size_t nglobal);
+
+    /**
+     * @brief Integrates using the trapezoidal rule.
+     * @param x1 begining of interval [x1,x2].
+     * @param x2 end of interval [x1,x2].
+     * @param y1 value of f(x1).
+     * @param y2 value of f(x2).
+     * @return Integrated result.
+     */
+    double integrateTrapezoidal(double x1, double x2, double y1, double y2);
+
+    /**
+     * @brief Initializes the domain decomposition.
+     * @param BoxIncr Box increment factor.
+     */
+    void initDomainDecomp(double BoxIncr) override;
+
+    /**
+     * @brief Counts the number of particles entering per rank in a given time interval.
+     * @param t0 Start time.
+     * @param tf End time.
+     * @return Number of entering particles per rank.
+     */
+    double countEnteringParticlesPerRank(double t0, double tf);
+
+    /**
+     * @brief Allocates memory for a given number of particles.
+     * @param numberOfParticles Number of particles to allocate.
+     */
+    void allocateParticles(size_t numberOfParticles);
+
+    /**
+     * @brief Emits new particles within a given time interval.
+     * @param t Start time.
+     * @param dt Time step.
+     */
+    void emitParticles(double t, double dt) override;
+};
+
+#endif // IPPL_FLAT_TOP_H
+
--- a/src/Distribution/Gaussian.cpp
+++ b/src/Distribution/Gaussian.cpp
+#include "Distribution.h"
+#include "SamplingBase.hpp"
+#include "Gaussian.h"
+#include <memory>
+#include <cmath>
+
+/**
+ * @brief Constructs a Gaussian sampler.
+ *
+ * @param pc Shared pointer to the particle container.
+ * @param fc Shared pointer to the field container.
+ * @param opalDist Shared pointer to the distribution object.
+ */
+Gaussian::Gaussian(std::shared_ptr<ParticleContainer_t> &pc,
+                   std::shared_ptr<FieldContainer_t> &fc,
+                   std::shared_ptr<Distribution_t> &opalDist)
+    : SamplingBase(pc, fc, opalDist) {
+    samperTimer_m = IpplTimings::getTimer("SamplingTimer");
+}
+
+/**
+ * @brief Generates particles following a Gaussian distribution.
+ *
+ * @param numberOfParticles The total number of particles to generate.
+ * @param nr The number of grid points in each dimension (not used here).
+ */
+void Gaussian::generateParticles(size_t& numberOfParticles, Vector_t<double, 3> nr) {
+    extern Inform* gmsg;
+
+    IpplTimings::startTimer(samperTimer_m);
+
+    size_t randInit;
+
+    if (Options::seed == -1) {
+        randInit = 1234567;
+        *gmsg << "* Seed = " << randInit << " on all ranks" << endl;
+    } else {
+        randInit = static_cast<size_t>(Options::seed + 100 * ippl::Comm->rank());
+    }
+
+    GeneratorPool rand_pool64(randInit);
+
+    double mu[3], sd[3];
+
+    Vector_t<double, 3> rmin = -opalDist_m->getCutoffR();
+    Vector_t<double, 3> rmax = opalDist_m->getCutoffR();
+
+    for (int i = 0; i < 3; i++) {
+        mu[i] = 0.0;
+        sd[i] = opalDist_m->getSigmaR()[i];
+        rmin(i) = (rmin(i) + mu[i]) * opalDist_m->getSigmaR()[i];
+        rmax(i) = (rmax(i) + mu[i]) * opalDist_m->getSigmaR()[i];
+    }
+
+    view_type &Rview = pc_m->R.getView();
+    const double par[6] = {mu[0], sd[0], mu[1], sd[1], mu[2], sd[2]};
+
+    using Dist_t = ippl::random::NormalDistribution<double, 3>;
+    using sampling_t = ippl::random::InverseTransformSampling<double, 3, Kokkos::DefaultExecutionSpace, Dist_t>;
+    Dist_t dist(par);
+
+    MPI_Comm comm = MPI_COMM_WORLD;
+    int nranks, rank;
+    MPI_Comm_size(comm, &nranks);
+    MPI_Comm_rank(comm, &rank);
+
+    size_t nlocal = floor(numberOfParticles / nranks);
+    size_t remaining = numberOfParticles - nlocal * nranks;
+
+    if (remaining > 0 && rank == 0) {
+        nlocal += remaining;
+    }
+
+    sampling_t sampling(dist, rmax, rmin, rmax, rmin, nlocal);
+    nlocal = sampling.getLocalSamplesNum();
+    pc_m->create(nlocal);
+    sampling.generate(Rview, rand_pool64);
+
+    double meanR[3], loc_meanR[3];
+    for(int i=0; i<3; i++){
+        meanR[i] = 0.0;
+        loc_meanR[i] = 0.0;
+    }
+
+    Kokkos::parallel_reduce(
+            "calc moments of particle distr.", nlocal,
+            KOKKOS_LAMBDA(
+                    const int k, double& cent0, double& cent1, double& cent2) {
+                    cent0 += Rview(k)[0];
+                    cent1 += Rview(k)[1];
+                    cent2 += Rview(k)[2];
+            },
+            Kokkos::Sum<double>(loc_meanR[0]), Kokkos::Sum<double>(loc_meanR[1]), Kokkos::Sum<double>(loc_meanR[2]));
+    Kokkos::fence();
+
+    MPI_Allreduce(loc_meanR, meanR, 3, MPI_DOUBLE, MPI_SUM, ippl::Comm->getCommunicator());
+    ippl::Comm->barrier();
+
+    for(int i=0; i<3; i++){
+       meanR[i] = meanR[i]/(1.0*numberOfParticles);
+    }
+
+    Kokkos::parallel_for(
+                nlocal, KOKKOS_LAMBDA(
+                    const int k) {
+                    Rview(k)[0] -= meanR[0];
+                    Rview(k)[1] -= meanR[1];
+                    Rview(k)[2] -= meanR[2];
+                }
+    );
+    Kokkos::fence();
+
+    for (int i = 0; i < 3; i++) {
+        mu[i] = 0.0;
+        sd[i] = opalDist_m->getSigmaP()[i];
+    }
+
+    view_type &Pview = pc_m->P.getView();
+    Kokkos::parallel_for(
+        nlocal,
+        ippl::random::randn<double, 3>(Pview, rand_pool64, mu, sd)
+    );
+    Kokkos::fence();
+
+    double avrgpz = opalDist_m->getAvrgpz();
+    Kokkos::parallel_for(
+        nlocal,
+        KOKKOS_LAMBDA(const int k) {
+            Pview(k)[2] += avrgpz;
+        }
+    );
+    Kokkos::fence();
+
+    IpplTimings::stopTimer(samperTimer_m);
+}
+
--- a/src/Distribution/Gaussian.h
+++ b/src/Distribution/Gaussian.h
+#ifndef IPPL_GAUSSIAN_H
+#define IPPL_GAUSSIAN_H
+
+#include "Distribution.h"
+#include "SamplingBase.hpp"
+#include <Kokkos_Random.hpp>
+#include "Ippl.h"
+#include "Utilities/Options.h"
+#include "OPALTypes.h"
+#include <memory>
+#include <cmath>
+
+using ParticleContainer_t = ParticleContainer<double, 3>;
+using FieldContainer_t = FieldContainer<double, 3>;
+using Distribution_t = Distribution;
+using GeneratorPool = typename Kokkos::Random_XorShift64_Pool<>;
+using Dist_t = ippl::random::NormalDistribution<double, 3>;
+
+/**
+ * @class Gaussian Distribution
+ * @brief Generating particles following a Gaussian distribution.
+ *
+ * This function samples particle positions R and momenta P given following normal distributions
+ * a normal distribution
+ * R ~ N ( [0, 0, 0     ], sdR)
+ * P ~ N ( [0, 0, avrgpz], sdP)
+ * where sdR = [SigmaR[0] 0         0
+                0         SigmaR[1] 0
+                0         0         SigmaR[2]]
+ *
+ * and sdP =    [SigmaP[0] 0         0
+                0          SigmaP[1] 0
+                0          0         SigmaP[2]].
+ *
+ * Here, R is sampled in a bounded domains R \in [-CutoffR*SigmaR, CutoffR*SigmaR]^3
+ * and corrected by translation to ensure mean = [0,0,0].
+ *
+ * @param numberOfParticles The total number of particles to generate.
+ * @param nr The number of grid points in each dimension (not used here).
+ */
+class Gaussian : public SamplingBase {
+public:
+    /**
+     * @brief Timer for performance profiling.
+     */
+    IpplTimings::TimerRef samperTimer_m;
+
+    /**
+     * @brief Constructor for the Gaussian sampler.
+     * 
+     * @param pc Shared pointer to the particle container.
+     * @param fc Shared pointer to the field container.
+     * @param opalDist Shared pointer to the distribution object.
+     */
+    Gaussian(std::shared_ptr<ParticleContainer_t> &pc, 
+             std::shared_ptr<FieldContainer_t> &fc, 
+             std::shared_ptr<Distribution_t> &opalDist);
+
+    /**
+     * @brief Generates particles with a Gaussian distribution.
+     *
+     * @param numberOfParticles The total number of particles to generate.
+     * @param nr the number of grid cells in R (used in domain decomposition).
+     */
+    void generateParticles(size_t& numberOfParticles, Vector_t<double, 3> nr) override;
+};
+
+#endif // IPPL_GAUSSIAN_H
+
--- a/src/Distribution/Gaussian.hpp
+++ b/src/Distribution/Gaussian.hpp
-#ifndef IPPL_GAUSSIAN_H
-#define IPPL_GAUSSIAN_H
-
-#include "Distribution.h"
-#include "SamplingBase.hpp"
-#include <memory>
-
-using ParticleContainer_t = ParticleContainer<double, 3>;
-using FieldContainer_t = FieldContainer<double, 3>;
-using Distribution_t = Distribution;
-using GeneratorPool = typename Kokkos::Random_XorShift64_Pool<>;
-using Dist_t = ippl::random::NormalDistribution<double, 3>;
-
-class Gaussian : public SamplingBase {
-public:
-    Gaussian(std::shared_ptr<ParticleContainer_t> &pc, std::shared_ptr<FieldContainer_t> &fc, std::shared_ptr<Distribution_t> &opalDist)
-        : SamplingBase(pc, fc, opalDist) {}
-
-    void generateParticles(size_t& numberOfParticles, Vector_t<double, 3> nr) override {
-        GeneratorPool rand_pool64((size_t)(Options::seed + 100 * ippl::Comm->rank()));
-
-        double mu[3], sd[3];
-
-        // sample R
-        Vector_t<double, 3> rmin = -opalDist_m->getCutoffR();
-        Vector_t<double, 3> rmax =  opalDist_m->getCutoffR();
-        Vector_t<double, 3> hr;
-        for(int i=0; i<3; i++){
-            mu[i] = 0.0;
-       	    sd[i] = opalDist_m->getSigmaR()[i];
-            rmin(i) = (rmin(i) + mu[i])*opalDist_m->getSigmaR()[i];
-            rmax(i) = (rmax(i) + mu[i])*opalDist_m->getSigmaR()[i];
-        }
-        view_type &Rview = pc_m->R.getView();
-
-        const double par[6] = {mu[0], sd[0], mu[1], sd[1], mu[2], sd[2]};
-        using Dist_t = ippl::random::NormalDistribution<double, Dim>;
-        using sampling_t = ippl::random::InverseTransformSampling<double, Dim, Kokkos::DefaultExecutionSpace, Dist_t>;
-        Dist_t dist(par);
-
-        auto& mesh = fc_m->getMesh();
-        auto& FL = fc_m->getFL();
-
-        hr = (rmax-rmin) / nr;
-        mesh.setMeshSpacing(hr);
-        mesh.setOrigin(rmin);
-        pc_m->getLayout().updateLayout(FL, mesh);
-
-        ippl::detail::RegionLayout<double, Dim, Mesh_t<Dim>> rlayout;
-        rlayout = ippl::detail::RegionLayout<double, Dim, Mesh_t<Dim>>(FL, mesh);
-
-        sampling_t sampling(dist, rmax, rmin, rlayout, numberOfParticles);
-
-        size_type nlocal = sampling.getLocalSamplesNum();
-        pc_m->create(nlocal);
-
-        sampling.generate(Rview, rand_pool64);
-
-        double meanR[3], loc_meanR[3];
-        for(int i=0; i<3; i++){
-           meanR[i] = 0.0;
-           loc_meanR[i] = 0.0;
-        }
-
-        Kokkos::parallel_reduce(
-            "calc moments of particle distr.", numberOfParticles,
-            KOKKOS_LAMBDA(
-                    const int k, double& cent0, double& cent1, double& cent2) {
-                    cent0 += Rview(k)[0];
-                    cent1 += Rview(k)[1];
-                    cent2 += Rview(k)[2];
-            },
-            Kokkos::Sum<double>(loc_meanR[0]), Kokkos::Sum<double>(loc_meanR[1]), Kokkos::Sum<double>(loc_meanR[2]));
-        Kokkos::fence();
-        ippl::Comm->barrier();
-
-        MPI_Allreduce(loc_meanR, meanR, 3, MPI_DOUBLE, MPI_SUM, ippl::Comm->getCommunicator());
-
-        for(int i=0; i<3; i++){
-           meanR[i] = meanR[i]/(1.*numberOfParticles);
-        }
-
-        Kokkos::parallel_for(
-                numberOfParticles,KOKKOS_LAMBDA(
-                    const int k) {
-                    Rview(k)[0] -= meanR[0];
-                    Rview(k)[1] -= meanR[1];
-                    Rview(k)[2] -= meanR[2];
-            }
-        );
-        Kokkos::fence();
-        ippl::Comm->barrier();
-
-        // sample P
-        for(int i=0; i<3; i++){
-            mu[i] = 0.0;
-            sd[i] = opalDist_m->getSigmaP()[i];
-        }
-        view_type &Pview = pc_m->P.getView();
-        Kokkos::parallel_for(
-            nlocal, ippl::random::randn<double, 3>(Pview, rand_pool64, mu, sd)
-        );
-        Kokkos::fence();
-        ippl::Comm->barrier();
-
-        double meanP[3], loc_meanP[3];
-        for(int i=0; i<3; i++){
-           meanP[i] = 0.0;
-           loc_meanP[i] = 0.0;
-        }
-        Kokkos::parallel_reduce(
-            "calc moments of particle distr.", nlocal,
-            KOKKOS_LAMBDA(
-                    const int k, double& cent0, double& cent1, double& cent2) {
-                    cent0 += Pview(k)[0];
-                    cent1 += Pview(k)[1];
-                    cent2 += Pview(k)[2];
-            },
-	    Kokkos::Sum<double>(loc_meanP[0]), Kokkos::Sum<double>(loc_meanP[1]), Kokkos::Sum<double>(loc_meanP[2]));
-        Kokkos::fence();
-        ippl::Comm->barrier();
-
-        MPI_Allreduce(loc_meanP, meanP, 3, MPI_DOUBLE, MPI_SUM, ippl::Comm->getCommunicator());
-
-        for(int i=0; i<3; i++){
-           meanP[i] = meanP[i]/(1.*numberOfParticles);
-        }
-
-        Kokkos::parallel_for(
-            numberOfParticles,KOKKOS_LAMBDA(
-                    const int k) {
-                    Pview(k)[0] -= meanP[0];
-                    Pview(k)[1] -= meanP[1];
-                    Pview(k)[2] -= meanP[2];
-            }
-        );
-        Kokkos::fence();
-        ippl::Comm->barrier();
-
-        // correct the mean
-        double avrgpz = opalDist_m->getAvrgpz();
-        Kokkos::parallel_for(
-            numberOfParticles,KOKKOS_LAMBDA(
-                    const int k) {
-                    Pview(k)[2] += avrgpz;
-            }
-        );
-        Kokkos::fence();
-        ippl::Comm->barrier();
-    }
-};
-#endif
--- a/src/Distribution/MultiVariateGaussian.cpp
+++ b/src/Distribution/MultiVariateGaussian.cpp
No results found