Commit e45cd295 authored by Uldis Locans's avatar Uldis Locans

New materials in CollimatorPhysics, DKS parts for FFTPoisson, Degrader and Integration

parent 7887b7ff
......@@ -59,6 +59,60 @@ if (Boost_INCLUDE_DIRS)
include_directories (${Boost_INCLUDE_DIRS})
endif ()
### Uldis Locans 2014.09.05 ###
OPTION (ENABLE_OPENCL "Enable OpenCL" OFF)
OPTION (ENABLE_CUDA "Enable CUDA" OFF)
OPTION (ENABLE_OPENMP "Enable OpenMP + offload" OFF)
OPTION (USE_OPENCL "Use OpenCL" OFF)
OPTION (USE_CUDA "Use CUDA" OFF)
OPTION (USE_MIC "Use MIC" OFF)
### Load DKS package ###
IF (ENABLE_OPENCL OR ENABLE_CUDA OR ENABLE_MIC)
FIND_PACKAGE(DKS REQUIRED HINTS $ENV{DKS_PREFIX} $ENV{DKS_DIR} $ENV{DKS})
MESSAGE (STATUS "Found DKS library: ${DKS_LIBRARY}")
MESSAGE (STATUS "Found DKS include dir: ${DKS_INCLUDE_DIR}")
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDKS_MPI")
ENDIF (ENABLE_OPENCL OR ENABLE_CUDA OR ENABLE_MIC)
### OpenCL compiler flags ###
IF (ENABLE_OPENCL)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lOpenCL -pthread -DDKS_OPENCL")
ENDIF (ENABLE_OPENCL)
### End OpenCL ###
### CUDA compiler flags ###
IF (ENABLE_CUDA)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lcudart -lcufft -lcublas -lnvToolsExt -DDKS_CUDA")
ENDIF (ENABLE_CUDA)
### End Cuda ###
### MIC compiler flags ###
IF (ENABLE_OPENMP)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -openmp -DDKS_MIC")
ENDIF (ENABLE_OPENMP)
### END MIC ###
### if any accelerator enabled set flag to use DKS ###
IF (ENABLE_OPENCL OR ENABLE_CUDA OR ENABLE_OPENMP)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIPPL_DKS -DOPAL_DKS")
ENDIF (ENABLE_OPENCL OR ENABLE_CUDA OR ENABLE_OPENMP)
### select API for DKS (temporary - will be moved to runtime) ###
IF (USE_OPENCL)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIPPL_DKS_OPENCL")
ENDIF (USE_OPENCL)
IF (USE_CUDA)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIPPL_DKS_CUDA")
ENDIF (USE_CUDA)
IF (USE_MIC)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIPPL_DKS_MIC")
ENDIF (USE_MIC)
### END DKS part ###
# Handle options
OPTION (ENABLE_AMR_SOLVER "Enable BoxLib based AMR solver" OFF)
......@@ -184,4 +238,3 @@ ADD_SUBDIRECTORY (src)
IF (BUILD_OPAL_UNIT_TESTS)
ADD_SUBDIRECTORY(tests)
ENDIF (BUILD_OPAL_UNIT_TESTS)
......@@ -1310,4 +1310,4 @@ Inform &operator<<(Inform &os, PartBunch &p) {
}
#endif // OPAL_PartBunch_HH
\ No newline at end of file
#endif // OPAL_PartBunch_HH
......@@ -64,6 +64,10 @@ include_directories (
${CMAKE_SOURCE_DIR}/src # libclassic should not have any dependencies on libopal
)
IF (ENABLE_OPENCL OR ENABLE_CUDA OR ENABLE_MIC)
include_directories (${DKS_INCLUDE_DIR})
ENDIF (ENABLE_OPENCL OR ENABLE_CUDA OR ENABLE_MIC)
add_library( CLASSIC ${CLASSIC_SRCS} )
install (TARGETS CLASSIC DESTINATION "${CMAKE_INSTALL_PREFIX}/lib")
install (FILES Utilities/ClassicException.h DESTINATION "${CMAKE_INSTALL_PREFIX}/include/Utilities")
......@@ -16,11 +16,40 @@
#include "AbsBeamline/Degrader.h"
#include <gsl/gsl_rng.h>
#include "Utility/IpplTimings.h"
#ifdef OPAL_DKS
#include "DKSBase.h"
#endif
class ElementBase;
class PartBunch;
class LossDataSink;
class Inform;
#ifdef OPAL_DKS
typedef struct __align__(16) {
int label;
unsigned localID;
Vector_t Rincol;
Vector_t Pincol;
long IDincol;
int Binincol;
double DTincol;
double Qincol;
long LastSecincol;
Vector_t Bfincol;
Vector_t Efincol;
} PART;
typedef struct {
int label;
unsigned localID;
Vector_t Rincol;
Vector_t Pincol;
} PART_DKS;
#else
typedef struct {
int label;
unsigned localID;
......@@ -34,6 +63,7 @@ typedef struct {
Vector_t Bfincol;
Vector_t Efincol;
} PART;
#endif
class CollimatorPhysics: public SurfacePhysicsHandler {
......@@ -56,10 +86,26 @@ private:
void CoulombScat(Vector_t &R, Vector_t &P, double &deltat);
void EnergyLoss(double &Eng, bool &pdead, double &deltat);
void Rot(double &px, double &pz, double &x, double &z, double xplane, double Norm_P, double thetacou, double deltas, int coord);
void Rot(double &px, double &pz, double &x, double &z, double xplane, double Norm_P,
double thetacou, double deltas, int coord);
void copyFromBunch(PartBunch &bunch);
void addBackToBunch(PartBunch &bunch, unsigned i);
#ifdef OPAL_DKS
void copyFromBunchDKS(PartBunch &bunch);
void addBackToBunchDKS(PartBunch &bunch, unsigned i);
void setupCollimatorDKS(PartBunch &bunch, Degrader *deg);
void clearCollimatorDKS();
void applyDKS();
void applyHost(PartBunch &bunch, Degrader *deg, Collimator *coll);
void deleteParticleFromLocalVectorDKS();
#endif
void deleteParticleFromLocalVector();
bool checkHit(Vector_t R, Vector_t P, double dt, Degrader *deg, Collimator *coll);
......@@ -105,10 +151,33 @@ private:
double Emax_m;
double Emin_m;
std::vector<PART> locParts_m;
std::unique_ptr<LossDataSink> lossDs_m;
#ifdef OPAL_DKS
DKSBase dksbase;
int curandInitSet;
int ierr;
int maxparticles;
int numparticles;
int numlocalparts;
void *par_ptr;
void *mem_ptr;
std::vector<PART_DKS> dksParts_m;
static const int numpar = 12;
#endif
IpplTimings::TimerRef DegraderApplyTimer_m;
IpplTimings::TimerRef DegraderLoopTimer_m;
IpplTimings::TimerRef DegraderInitTimer_m;
};
#endif //COLLIMATORPHYSICS_HH
......@@ -380,7 +380,6 @@ FFT<CCTransform, Dim, T>::transform(
return;
}
#else
template <unsigned Dim, class T>
void
FFT<CCTransform,Dim,T>::transform(
......@@ -910,7 +909,6 @@ FFT<RCTransform,Dim,T>::FFT(
#ifdef IPPL_DKS
#ifdef IPPL_DKS_OPENCL
INFOMSG("Init DKS base opencl" << endl);
//base = DKSBase();
base.setAPI("OpenCL", 6);
base.setDevice("-gpu", 4);
base.initDevice();
......@@ -922,6 +920,7 @@ FFT<RCTransform,Dim,T>::FFT(
base.setAPI("Cuda", 4);
base.setDevice("-gpu", 4);
base.initDevice();
base.setupFFT(0, NULL);
base.createStream(fftStreamId);
#endif
......@@ -1482,27 +1481,7 @@ FFT<RCTransform,Dim,T>::transform(
}
//call real to complex fft
//read real_ptr
/*
int sreal = NR_g[0] * NR_g[1] * NR_g[2];
double *tmpreal = new double[sreal];
base.readData<double>(real_ptr, tmpreal, sreal);
std::cout << "Real old:" << std::endl;
for (int i = 0; i < sreal; i++) std::cout << tmpreal[i] << "\t";
std::cout << std::endl;
*/
base.callR2CFFT(real_ptr, comp_ptr, nTransformDims, (int*)NR_g);
//read comp_ptr
/*
int scomp = NC_g[0] * NC_g[1] * NC_g[2];
std::complex<double> *tmpcomp = new std::complex<double>[scomp];
base.readData< complex<double> >(comp_ptr, tmpcomp, scomp);
std::cout << "Comp old:" << std::endl;
for (int i = 0; i < scomp; i++) std::cout << tmpcomp[i] << "\t";
std::cout << std::endl;
*/
//===
//if only one node is working do dksbase read otherwise use cuda aware mpi
if (Ippl::getNodes() > 1) {
......@@ -1551,8 +1530,8 @@ FFT<RCTransform,Dim,T>::transform(
}
// normalize:
if (direction == +1)
g = g * this->getNormFact();
//if (direction == +1)
// g = g * this->getNormFact();
// finish timing the whole mess
IpplTimings::stopTimer(tottimer);
......@@ -1965,11 +1944,10 @@ FFT<RCTransform,Dim,T>::transformDKSCR(
//call real to complex fft
dksbase.callC2RFFT(real_ptr, comp_ptr, nTransformDims, (int*)NR_g);
//cormalize
//normalize
if (direction == +1)
dksbase.callNormalizeC2RFFT(real_ptr, nTransformDims, (int*)NR_g);
if (Ippl::getNodes() > 1) {
dksbase.syncDevice();
MPI_Barrier(Ippl::getComm());
......@@ -2130,11 +2108,12 @@ FFT<RCTransform,Dim,T>::transform(
int sizecomp = NC_l[0]*NC_l[1]*NC_l[2];
int totalreal = tempR->getDomain().size();
int totalcomp = temp->getDomain().size();
//local vnodes get starting position for complex field subdomains
int *cidx = new int[Ippl::getNodes()];
int *cidy = new int[Ippl::getNodes()];
int *cidz = new int[Ippl::getNodes()];
/*
for (typename Layout_t::const_iterator_iv i_s = temp->getLayout().begin_iv(); i_s != temp->getLayout().end_iv(); ++i_s) {
Domain_t tmp = (*i_s).second->getDomain();
int node = (*i_s).second->getNode();
......@@ -2151,11 +2130,12 @@ FFT<RCTransform,Dim,T>::transform(
cidy[node] = tmp[1].min();
cidz[node] = tmp[2].min();
}
*/
//local vnodes get starting position for real field subdomains
int *idx = new int[Ippl::getNodes()];
int *idy = new int[Ippl::getNodes()];
int *idz = new int[Ippl::getNodes()];
/*
for (typename Layout_t::const_iterator_iv i_s = tempR->getLayout().begin_iv(); i_s != tempR->getLayout().end_iv(); ++i_s) {
Domain_t tmp = (*i_s).second->getDomain();
int node = (*i_s).second->getNode();
......@@ -2172,7 +2152,7 @@ FFT<RCTransform,Dim,T>::transform(
idy[node] = tmp[1].min();
idz[node] = tmp[2].min();
}
*/
//do the FFT on GPU
int ierr;
void *real_ptr, *comp_ptr;
......@@ -2194,6 +2174,8 @@ FFT<RCTransform,Dim,T>::transform(
//call real to complex fft
base.callC2RFFT(real_ptr, comp_ptr, nTransformDims, (int*)NR_g);
if (direction == -1)
base.callNormalizeC2RFFT(real_ptr, nTransformDims, (int*)NR_g);
if (Ippl::getNodes() > 1) {
MPI_Barrier(Ippl::getComm());
......@@ -2228,7 +2210,7 @@ FFT<RCTransform,Dim,T>::transform(
}
// Normalize:
if (direction == +1) g = g * this->getNormFact();
//if (direction == +1) g = g * this->getNormFact();
// finish timing the whole mess
IpplTimings::stopTimer(tottimer);
......
......@@ -60,7 +60,7 @@ DKSBase *IpplInfo::DKS = new DKSBase("OpenCL", "-gpu");
#endif
#ifdef IPPL_DKS_MIC
DKSBase *IpplInfo::DKS = new DKSBase("OpenCL", "-mic");
DKSBase *IpplInfo::DKS = new DKSBase("OpenMP", "-mic");
#endif
#endif
......
......@@ -55,7 +55,7 @@ public:
assertion( const char *cond, const char *file, int line );
assertion( const char *m );
assertion( const assertion& a );
~assertion() { delete[] msg; }
~assertion() throw() { delete[] msg; }
assertion& operator=( const assertion& a );
const char* what() const noexcept { return msg; }
};
......@@ -113,4 +113,4 @@ void insist( const char *cond, const char *msg, const char *file, int line );
* $RCSfile: PAssert.h,v $ $Author: adelmann $
* $Revision: 1.1.1.1 $ $Date: 2003/01/23 07:40:33 $
* IPPL_VERSION_ID: $Id: PAssert.h,v 1.1.1.1 2003/01/23 07:40:33 adelmann Exp $
***************************************************************************/
\ No newline at end of file
***************************************************************************/
......@@ -25,13 +25,17 @@ IF (ENABLE_OPENCL OR ENABLE_CUDA OR ENABLE_OPENMP)
#SET (IPPL_LIBS ${IPPL_LIBS} dks)
SET (IPPL_LIBS ${IPPL_LIBS} ${DKS_LIBRARY_DIR}/libdks.a)
ENDIF (ENABLE_OPENCL OR ENABLE_CUDA OR ENABLE_OPENMP)
add_executable (TestFFT TestFFT.cpp)
add_executable (TestRC TestRC.cpp)
add_executable (TestRCGPU TestRCGPU.cpp)
target_link_libraries (TestFFT ${IPPL_LIBS})
target_link_libraries (TestRC ${IPPL_LIBS})
target_link_libraries (TestRCGPU ${IPPL_LIBS})
......@@ -12,6 +12,7 @@
***************************************************************************/
#include "Ippl.h"
#include <typeinfo>
#ifdef IPPL_USE_STANDARD_HEADERS
#include <complex>
......@@ -29,14 +30,43 @@ bool Configure(int argc, char *argv[],
Inform msg("Configure ");
Inform errmsg("Error ");
string bc_str;
string dist_str;
*nx = 16;
*ny = 16;
*nz = 1024;
*nLoop = 1;
*serialDim = 0;
/*
string bc_str;
string dist_str;
*nx = 8;
*ny = 8;
*nz = 16;
*nLoop = 1;
*serialDim = 0;
if (*serialDim == 0)
msg << "Serial dimension is x" << endl;
else if (*serialDim == 1)
msg << "Serial dimension is y" << endl;
else if (*serialDim == 2)
msg << "Serial dimension is z" << endl;
else {
*serialDim = 0;
msg << "Serial dimension is x" << endl;
}
*processes = Ippl::getNodes();
*/
for (int i=1; i < argc; ++i) {
string s(argv[i]);
if (s == "-grid") {
*nx = atoi(argv[++i]);
*ny = atoi(argv[++i]);
*nz = atoi(argv[++i]);
} else if (s == "-Loop") {
*nLoop = atoi(argv[++i]);
} else if (s == "-Decomp") {
*serialDim = atoi(argv[++i]);
}
else {
errmsg << "Illegal format for or unknown option '" << s.c_str() << "'.";
errmsg << endl;
}
}
if (*serialDim == 0)
msg << "Serial dimension is x" << endl;
else if (*serialDim == 1)
......@@ -44,11 +74,13 @@ bool Configure(int argc, char *argv[],
else if (*serialDim == 2)
msg << "Serial dimension is z" << endl;
else {
*serialDim = 0;
msg << "Serial dimension is x" << endl;
msg << "All parallel" << endl;
*serialDim = -1;
}
*processes = Ippl::getNodes();
return true;
}
......@@ -57,6 +89,11 @@ int main(int argc, char *argv[])
Ippl ippl(argc,argv);
Inform testmsg(NULL,0);
static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("mainTimer");
static IpplTimings::TimerRef fftTimer = IpplTimings::getTimer("fftTimer");
IpplTimings::startTimer(mainTimer);
const unsigned D=3U;
bool compressTemps = false;
......@@ -143,31 +180,46 @@ int main(int argc, char *argv[])
CFieldPPStan[ndiStandard[0]][ndiStandard[1]][ndiStandard[2]] =
sfact * ( sin( (ndiStandard[0]+1) * kx * xfact +
ndiStandard[1] * ky * yfact +
ndiStandard[2] * kz * zfact ) +
ndiStandard[1] * ky * yfact +
ndiStandard[2] * kz * zfact ) +
sin( (ndiStandard[0]+1) * kx * xfact -
ndiStandard[1] * ky * yfact -
ndiStandard[2] * kz * zfact ) ) +
ndiStandard[1] * ky * yfact -
ndiStandard[2] * kz * zfact ) ) +
cfact * (-cos( (ndiStandard[0]+1) * kx * xfact +
ndiStandard[1] * ky * yfact +
ndiStandard[2] * kz * zfact ) +
cos( (ndiStandard[0]+1) * kx * xfact -
ndiStandard[1] * ky * yfact -
ndiStandard[2] * kz * zfact ) );
ndiStandard[1] * ky * yfact +
ndiStandard[2] * kz * zfact ) +
cos( (ndiStandard[0]+1) * kx * xfact -
ndiStandard[1] * ky * yfact -
ndiStandard[2] * kz * zfact ) );
cout << "TYPEINFO:" << endl;
cout << typeid(RFieldSPStan[0][0][0]).name() << endl;
cout << typeid(CFieldPPStan[0][0][0]).name() << endl;
// RC FFT tests
RFieldSPStan = 0;//real(CFieldPPStan);
CFieldSPStan0h = dcomplex(0.0,0.0);
Inform fo1(NULL,"realField.dat",Inform::OVERWRITE);
//RFieldSPStan = real(CFieldPPStan);
for(int x = ndiStandard[0].first(); x <= ndiStandard[0].last(); x++) {
for(int y = ndiStandard[1].first(); y <= ndiStandard[1].last(); y++) {
for(int z = ndiStandard[2].first(); z <= ndiStandard[2].last(); z++) {
fo1 << x << " " << y << " " << z << " " << RFieldSPStan[x][y][z].get() << endl;
RFieldSPStan[x][y][z] = real(CFieldPPStan[x][y][z].get());
}
}
}
CFieldSPStan0h = dcomplex(0.0,0.0);
/*
Inform fo1(NULL,"realField.dat",Inform::OVERWRITE);
for(int x = ndiStandard[0].first(); x <= ndiStandard[0].last(); x++) {
for(int y = ndiStandard[1].first(); y <= ndiStandard[1].last(); y++) {
for(int z = ndiStandard[2].first(); z <= ndiStandard[2].last(); z++) {
fo1 << x << " " << y << " " << z << " " << RFieldSPStan[x][y][z].get() << endl;
}
}
}
*/
// create RC FFT object
FFT<RCTransform,D,double> rcfft(ndiStandard, ndiStandard0h, compressTemps);
......@@ -179,29 +231,50 @@ int main(int argc, char *argv[])
Inform fo2(NULL,"FFTrealField.dat",Inform::OVERWRITE);
testmsg << "RC transform using layout with zeroth dim serial ..." << endl;
double total_time = 0;
rcfft.transform("forward", RFieldSPStan, CFieldSPStan0h, constInput);
rcfft.transform("inverse", CFieldSPStan0h, RFieldSPStan, constInput);
for (unsigned i=0; i<nLoop; i++) {
RFieldSPStan_save = RFieldSPStan;
timer.start();
rcfft.transform("forward", RFieldSPStan, CFieldSPStan0h, constInput);
for(int x = ndiStandard0h[0].first(); x <= ndiStandard0h[0].last(); x++) {
IpplTimings::startTimer(fftTimer);
rcfft.transform("forward", RFieldSPStan, CFieldSPStan0h, constInput);
/*
for(int x = ndiStandard0h[0].first(); x <= ndiStandard0h[0].last(); x++) {
for(int y = ndiStandard0h[1].first(); y <= ndiStandard0h[1].last(); y++) {
for(int z = ndiStandard0h[2].first(); z <= ndiStandard0h[2].last(); z++) {
fo2 << x << " " << y << " " << z << " " << real(CFieldSPStan0h[x][y][z].get()) << " " << imag(CFieldSPStan0h[x][y][z].get()) << endl;
}
for(int z = ndiStandard0h[2].first(); z <= ndiStandard0h[2].last(); z++) {
fo2 << x << " " << y << " " << z << " " << real(CFieldSPStan0h[x][y][z].get()) << " " << imag(CFieldSPStan0h[x][y][z].get()) << endl;
}
}
}
}
*/
rcfft.transform("inverse", CFieldSPStan0h, RFieldSPStan, constInput);
timer.stop();
IpplTimings::stopTimer(fftTimer);
//total_time+= timer.cpu_time();
/*
Inform fo2(NULL,"FFTrealResult.dat",Inform::OVERWRITE);
for(int x = ndiStandard[0].first(); x <= ndiStandard[0].last(); x++) {
for(int y = ndiStandard[1].first(); y <= ndiStandard[1].last(); y++) {
for(int z = ndiStandard[2].first(); z <= ndiStandard[2].last(); z++) {
fo2 << x << " " << y << " " << z << " " << RFieldSPStan[x][y][z].get() << endl;
}
}
}
*/
diffFieldSPStan = Abs(RFieldSPStan - RFieldSPStan_save);
realDiff = max(diffFieldSPStan);
testmsg << "fabs(realDiff) = " << fabs(realDiff) << endl;
}
testmsg << "CPU time used = " << timer.cpu_time() << " secs." << endl;
timer.clear();
IpplTimings::stopTimer(mainTimer);
IpplTimings::print();
IpplTimings::print(std::string("TestRC.timing"));
return 0;
}
......
This diff is collapsed.
......@@ -259,6 +259,25 @@ private:
Vector_t RefPartP_suv_m;
#ifdef OPAL_DKS
DKSBase dksbase;
void *r_ptr;
void *p_ptr;
void *x_ptr;
void *lastSec_ptr;
void *orient_ptr;
void *dt_ptr;
int ierr;
int stream1;
int stream2;
unsigned int numDeviceElements;
#endif
bool globalEOL_m;
bool wakeStatus_m;
......@@ -346,6 +365,11 @@ private:
IpplTimings::TimerRef BinRepartTimer_m;
IpplTimings::TimerRef WakeFieldTimer_m;
IpplTimings::TimerRef timeIntegrationTimer1Loop1_m;
IpplTimings::TimerRef timeIntegrationTimer1Loop2_m;
IpplTimings::TimerRef timeIntegrationTimer2Loop1_m;
IpplTimings::TimerRef timeIntegrationTimer2Loop2_m;
// 1 --- LF-2 (Boris-Buneman)
// 3 --- AMTS (Adaptive Boris-Buneman with multiple time stepping)
int timeIntegrator_m;
......@@ -705,4 +729,4 @@ inline void ParallelTTracker::kickReferenceParticle(const Vector_t &externalE, c
}
#endif // OPAL_ParallelTTracker_HH
\ No newline at end of file
#endif // OPAL_ParallelTTracker_HH
......@@ -86,6 +86,11 @@ link_directories (
${CCSE_LIBRARY_DIR}
)
IF (ENABLE_OPENCL OR ENABLE_CUDA OR ENABLE_MIC)
include_directories (${DKS_INCLUDE_DIR})
link_directories (${DKS_LIBRARY_DIR})
ENDIF (ENABLE_OPENCL OR ENABLE_CUDA OR ENABLE_MIC)
set (OPAL_LIBS
m
${GSL_LIBRARY}
......@@ -98,6 +103,12 @@ set (OPAL_LIBS
${Boost_LIBRARIES}
)
IF (ENABLE_OPENCL OR ENABLE_CUDA OR ENABLE_MIC)
set (OPAL_LIBS
${OPAL_LIBS}
dks)
ENDIF (ENABLE_OPENCL OR ENABLE_CUDA OR ENABLE_MIC)
add_library( OPALib ${OPAL_SRCS} opal.cpp )
set_target_properties( OPALib PROPERTIES OUTPUT_NAME OPAL )
......
......@@ -391,4 +391,4 @@ void OpalBeamline::removeElement(const std::string &ElName) {
}
}
#endif // OPAL_BEAMLINE_H
\ No newline at end of file
#endif // OPAL_BEAMLINE_H
This diff is collapsed.
......@@ -31,6 +31,12 @@
#include <memory>
//////////////////////////////////////////////////////////////
#include "PoissonSolver.h"
#ifdef OPAL_DKS
#include "DKSBase.h"
#include "nvToolsExt.h"
#endif
class PartBunch;
//////////////////////////////////////////////////////////////
......@@ -44,6 +50,10 @@ public:
~FFTPoissonSolver();
#ifdef OPAL_DKS
DKSBase dksbase;
#endif
// given a charge-density field rho and a set of mesh spacings hr,
// compute the scalar potential with image charges at -z
void computePotential(Field_t &rho, Vector_t hr, double zshift);
......@@ -92,6 +102,20 @@ private:
// domain3_m and mesh3_ are used
CxField_t grntr_m;
#ifdef OPAL_DKS
//pointer for Fourier transformed Green's function on GPU
void * grntr_m_ptr;
void * rho2_m_ptr;
void * tmpgreen_ptr;
void *rho2real_m_ptr;
void *rho2tr_m_ptr;
//stream id for calculating greens function
int streamGreens;
int streamFFT;
#endif