Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
D
DKS
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Code Review
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Uldis Locans
DKS
Commits
eee9dfd8
Commit
eee9dfd8
authored
Feb 28, 2017
by
Uldis Locans
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
seperate OPAL DKS functions from base
parent
7c7c2e24
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
520 additions
and
557 deletions
+520
-557
src/DKSBase.cpp
src/DKSBase.cpp
+13
-350
src/DKSBase.h
src/DKSBase.h
+13
-207
src/DKSOPAL.cpp
src/DKSOPAL.cpp
+277
-0
src/DKSOPAL.h
src/DKSOPAL.h
+217
-0
No files found.
src/DKSBase.cpp
View file @
eee9dfd8
This diff is collapsed.
Click to expand it.
src/DKSBase.h
View file @
eee9dfd8
...
...
@@ -76,10 +76,6 @@ private:
bool
m_auto_tuning
;
bool
m_use_config
;
DKSFFT
*
dksfft
;
DKSCollimatorPhysics
*
dkscol
;
GreensFunction
*
dksgreens
;
#ifdef DKS_OPENCL
OpenCLBase
*
oclbase
;
OpenCLChiSquare
*
oclchi
;
...
...
@@ -140,6 +136,12 @@ protected:
}
#endif
#ifdef DKS_MIC
MICBase
*
getMICBase
()
{
return
micbase
;
}
#endif
/** Call OpenCL base to load specified kenrel file.
*
*/
...
...
@@ -155,10 +157,6 @@ protected:
return
device_name
;
}
/** Private function to initialize objects based on the device used.
*
*/
int
setup
();
public:
...
...
@@ -179,6 +177,11 @@ public:
*/
~
DKSBase
();
/** Function to initialize objects based on the device used.
*
*/
int
setupDevice
();
/** Turn on auto tuning */
void
setAutoTuningOn
()
{
m_auto_tuning
=
true
;
}
...
...
@@ -891,184 +894,10 @@ public:
return
DKS_ERROR
;
}
///////////////////////////////////////////////
///////Function library part of dksbase////////
///////////////////////////////////////////////
/**
* Setup FFT function.
* Initializes parameters for fft executuin. If ndim > 0 initializes handles for fft calls.
* If ffts of various sizes are needed setupFFT should be called with ndim 0, in this case
* each fft will do its own setup according to fft size and dimensions.
* TODO: opencl and mic implementations
*/
int
setupFFT
(
int
ndim
,
int
N
[
3
]);
//BENI:
int
setupFFTRC
(
int
ndim
,
int
N
[
3
],
double
scale
=
1.0
);
//BENI:
int
setupFFTCR
(
int
ndim
,
int
N
[
3
],
double
scale
=
1.0
);
/**
* Call complex-to-complex fft.
* Executes in place complex to compelx fft on the device on data pointed by data_ptr.
* stream id can be specified to use other streams than default.
* TODO: mic implementation
*/
int
callFFT
(
void
*
data_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
=
-
1
);
/**
* Call complex-to-complex ifft.
* Executes in place complex to compelx ifft on the device on data pointed by data_ptr.
* stream id can be specified to use other streams than default.
* TODO: mic implementation.
*/
int
callIFFT
(
void
*
data_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
=
-
1
);
/**
* Normalize complex to complex ifft.
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
* fft size
* TODO: mic implementation.
*/
int
callNormalizeFFT
(
void
*
data_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
=
-
1
);
/**
* Call real to complex FFT.
* Executes out of place real to complex fft, real_ptr points to real data, comp_pt - points
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
* TODO: opencl and mic implementations
*/
int
callR2CFFT
(
void
*
real_ptr
,
void
*
comp_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
=
-
1
);
/**
* Call complex to real iFFT.
* Executes out of place complex to real ifft, real_ptr points to real data, comp_pt - points
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
* TODO: opencl and mic implementations.
*/
int
callC2RFFT
(
void
*
real_ptr
,
void
*
comp_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
=
-
1
);
/**
* Normalize compelx to real ifft.
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
* fft size.
* TODO: opencl and mic implementations.
*/
int
callNormalizeC2RFFT
(
void
*
real_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
=
-
1
);
/**
* Integrated greens function from OPAL FFTPoissonsolver.cpp put on device.
* For specifics check OPAL docs.
* TODO: opencl and mic implementations.
*/
int
callGreensIntegral
(
void
*
tmp_ptr
,
int
I
,
int
J
,
int
K
,
int
NI
,
int
NJ
,
double
hz_m0
,
double
hz_m1
,
double
hz_m2
,
int
streamId
=
-
1
);
/**
* Integrated greens function from OPAL FFTPoissonsolver.cpp put on device.
* For specifics check OPAL docs.
* TODO: opencl and mic implementations.
*/
int
callGreensIntegration
(
void
*
mem_ptr
,
void
*
tmp_ptr
,
int
I
,
int
J
,
int
K
,
int
streamId
=
-
1
);
/**
* Integrated greens function from OPAL FFTPoissonsolver.cpp put on device.
* For specifics check OPAL docs.
* TODO: opencl and mic implementations.
*/
int
callMirrorRhoField
(
void
*
mem_ptr
,
int
I
,
int
J
,
int
K
,
int
streamId
=
-
1
);
/**
* Element by element multiplication.
* Multiplies each element of mem_ptr1 with corresponding element of mem_ptr2, size specifies
* the number of elements in mem_ptr1 and mem_ptr2 to use. Results are put in mem_ptr1.
* TODO: opencl and mic implementations.
*/
int
callMultiplyComplexFields
(
void
*
mem_ptr1
,
void
*
mem_ptr2
,
int
size
,
int
streamId
=
-
1
);
/**
* Chi square for parameter fitting on device.
* mem_data - measurement data, mem_par - pointer to parameter set, mem_chisq - pointer for
* intermediate results. Chi square results are put in &results
*/
int
callPHistoTFFcn
(
void
*
mem_data
,
void
*
mem_par
,
void
*
mem_chisq
,
double
fTimeResolution
,
double
fRebin
,
int
sensors
,
int
length
,
int
numpar
,
double
&
result
);
/**
* max-log-likelihood for parameter fitting on device.
* mem_data - measurement data, mem_t0 - pointer to time 0 for each sensor,
* mem_par - pointer to parameter set, mem_results - pointer for
* intermediate results. Chi square results are put in &results.
* TODO: opencl and mic implementations.
*/
int
callSingleGaussTF
(
void
*
mem_data
,
void
*
mem_t0
,
void
*
mem_par
,
void
*
mem_result
,
double
fTimeResolution
,
double
fRebin
,
double
fGoodBinOffser
,
int
sensors
,
int
length
,
int
numpar
,
double
&
result
);
/**
* max-log-likelihood for parameter fitting on device.
* mem_data - measurement data, mem_t0 - pointer to time 0 for each sensor,
* mem_par - pointer to parameter set, mem_results - pointer for
* intermediate results. Chi square results are put in &results.
* TODO: opencl and mic implementations.
*/
int
callDoubleLorentzTF
(
void
*
mem_data
,
void
*
mem_t0
,
void
*
mem_par
,
void
*
mem_result
,
double
fTimeResolution
,
double
fRebin
,
double
fGoodBinOffser
,
int
sensors
,
int
length
,
int
numpar
,
double
&
result
);
/**
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
* TODO: opencl and mic implementations.
*/
int
callCollimatorPhysics
(
void
*
mem_ptr
,
void
*
par_ptr
,
int
numparticles
,
int
numparams
,
int
&
numaddback
,
int
&
numdead
);
/**
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
* TODO: opencl and mic implementations.
*/
int
callCollimatorPhysics2
(
void
*
mem_ptr
,
void
*
par_ptr
,
int
numparticles
);
/**
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
* Test function for the MIC to test SoA layout vs AoS layout used in previous versions
*/
int
callCollimatorPhysicsSoA
(
void
*
label_ptr
,
void
*
localID_ptr
,
void
*
rx_ptr
,
void
*
ry_ptr
,
void
*
rz_ptr
,
void
*
px_ptr
,
void
*
py_ptr
,
void
*
pz_ptr
,
void
*
par_ptr
,
int
numparticles
);
/**
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
* TODO: opencl and mic implementations.
*/
int
callCollimatorPhysicsSort
(
void
*
mem_ptr
,
int
numparticles
,
int
&
numaddback
);
/**
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
* TODO: opencl and mic implementations.
* Create random numbers on the device and fille mem_data array
*/
int
callCollimatorPhysicsSortSoA
(
void
*
label_ptr
,
void
*
localID_ptr
,
void
*
rx_ptr
,
void
*
ry_ptr
,
void
*
rz_ptr
,
void
*
px_ptr
,
void
*
py_ptr
,
void
*
pz_ptr
,
void
*
par_ptr
,
int
numparticles
,
int
&
numaddback
);
int
callCreateRandomNumbers
(
void
*
mem_ptr
,
int
size
);
/**
* Init random number states and save for reuse on device.
...
...
@@ -1076,29 +905,6 @@ public:
*/
int
callInitRandoms
(
int
size
);
/**
* Integration code from ParallelTTracker from OPAL.
* For specifics check OPAL docs and CudaCollimatorPhysics class docs
*/
int
callParallelTTrackerPush
(
void
*
r_ptr
,
void
*
p_ptr
,
int
npart
,
void
*
dt_ptr
,
double
dt
,
double
c
,
bool
usedt
=
false
,
int
streamId
=
-
1
);
/**
* Integration code from ParallelTTracker from OPAL.
* For specifics check OPAL docs and CudaCollimatorPhysics class docs
*/
int
callParallelTTrackerPushTransform
(
void
*
x_ptr
,
void
*
p_ptr
,
void
*
lastSec_ptr
,
void
*
orient_ptr
,
int
npart
,
int
nsec
,
void
*
dt_ptr
,
double
dt
,
double
c
,
bool
usedt
=
false
,
int
streamId
=
-
1
);
/**
* Create random numbers on the device and fille mem_data array
*/
int
callCreateRandomNumbers
(
void
*
mem_ptr
,
int
size
);
/**
* Print memory information on device (total, used, available)
* TODO: opencl and mic imlementation
...
...
src/DKSOPAL.cpp
0 → 100644
View file @
eee9dfd8
#include "DKSOPAL.h"
DKSOPAL
::
DKSOPAL
()
{
dksfft
=
nullptr
;
dkscol
=
nullptr
;
dksgreens
=
nullptr
;
}
DKSOPAL
::~
DKSOPAL
()
{
delete
dksfft
;
delete
dkscol
;
delete
dksgreens
;
}
int
DKSOPAL
::
setupOPAL
()
{
int
ierr
=
DKS_ERROR
;
if
(
apiOpenCL
())
{
ierr
=
OPENCL_SAFECALL
(
DKS_SUCCESS
);
//TODO: only enable if AMD libraries are available
dksfft
=
OPENCL_SAFEINIT_AMD
(
new
OpenCLFFT
(
getOpenCLBase
())
);
dkscol
=
OPENCL_SAFEINIT_AMD
(
new
OpenCLCollimatorPhysics
(
getOpenCLBase
())
);
dksgreens
=
OPENCL_SAFEINIT_AMD
(
new
OpenCLGreensFunction
(
getOpenCLBase
())
);
}
else
if
(
apiCuda
())
{
ierr
=
CUDA_SAFECALL
(
DKS_SUCCESS
);
dksfft
=
CUDA_SAFEINIT
(
new
CudaFFT
(
getCudaBase
())
);
dkscol
=
CUDA_SAFEINIT
(
new
CudaCollimatorPhysics
(
getCudaBase
())
);
dksgreens
=
CUDA_SAFEINIT
(
new
CudaGreensFunction
(
getCudaBase
())
);
}
else
if
(
apiOpenMP
())
{
ierr
=
MIC_SAFECALL
(
DKS_SUCCESS
);
dksfft
=
MIC_SAFEINIT
(
new
MICFFT
(
getMICBase
())
);
dkscol
=
MIC_SAFEINIT
(
new
MICCollimatorPhysics
(
getMICBase
())
);
dksgreens
=
MIC_SAFEINIT
(
new
MICGreensFunction
(
getMICBase
())
);
}
else
{
ierr
=
DKS_ERROR
;
}
return
ierr
;
}
int
DKSOPAL
::
initDevice
()
{
int
ierr
=
setupDevice
();
if
(
ierr
==
DKS_ERROR
)
ierr
=
setupOPAL
();
return
ierr
;
}
/* setup fft plans to reuse if multiple ffts of same size are needed */
int
DKSOPAL
::
setupFFT
(
int
ndim
,
int
N
[
3
])
{
if
(
apiCuda
())
{
return
dksfft
->
setupFFT
(
ndim
,
N
);
}
else
if
(
apiOpenCL
())
{
int
ierr1
=
dksfft
->
setupFFT
(
ndim
,
N
);
int
ierr2
=
dksfft
->
setupFFTRC
(
ndim
,
N
);
int
ierr3
=
dksfft
->
setupFFTCR
(
ndim
,
N
);
if
(
ierr1
!=
DKS_SUCCESS
||
ierr2
!=
DKS_SUCCESS
||
ierr3
!=
DKS_SUCCESS
)
return
DKS_ERROR
;
return
DKS_SUCCESS
;
}
else
if
(
apiOpenMP
())
{
//micbase.mic_setupFFT(ndim, N);
//BENI: setting up RC and CR transformations on MIC
int
ierr1
=
dksfft
->
setupFFTRC
(
ndim
,
N
,
1.
);
int
ierr2
=
dksfft
->
setupFFTCR
(
ndim
,
N
,
1.
/
(
N
[
0
]
*
N
[
1
]
*
N
[
2
]));
if
(
ierr1
!=
DKS_SUCCESS
)
return
ierr1
;
if
(
ierr2
!=
DKS_SUCCESS
)
return
ierr2
;
return
DKS_SUCCESS
;
}
return
DKS_ERROR
;
}
//BENI:
int
DKSOPAL
::
setupFFTRC
(
int
ndim
,
int
N
[
3
],
double
scale
)
{
if
(
apiCuda
())
return
dksfft
->
setupFFT
(
ndim
,
N
);
if
(
apiOpenCL
())
return
dksfft
->
setupFFTRC
(
ndim
,
N
);
else
if
(
apiOpenMP
())
return
dksfft
->
setupFFTRC
(
ndim
,
N
,
scale
);
return
DKS_ERROR
;
}
//BENI:
int
DKSOPAL
::
setupFFTCR
(
int
ndim
,
int
N
[
3
],
double
scale
)
{
if
(
apiCuda
())
return
dksfft
->
setupFFT
(
ndim
,
N
);
if
(
apiOpenCL
())
return
dksfft
->
setupFFTCR
(
ndim
,
N
);
else
if
(
apiOpenMP
())
return
dksfft
->
setupFFTCR
(
ndim
,
N
,
scale
);
return
DKS_ERROR
;
}
/* call OpenCL FFT function for selected platform */
int
DKSOPAL
::
callFFT
(
void
*
data_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
)
{
if
(
apiOpenCL
()
||
apiOpenMP
())
return
dksfft
->
executeFFT
(
data_ptr
,
ndim
,
dimsize
);
else
if
(
apiCuda
())
return
dksfft
->
executeFFT
(
data_ptr
,
ndim
,
dimsize
,
streamId
);
DEBUG_MSG
(
"No implementation for selected platform"
);
return
DKS_ERROR
;
}
/* call OpenCL IFFT function for selected platform */
int
DKSOPAL
::
callIFFT
(
void
*
data_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
)
{
if
(
apiOpenCL
()
||
apiOpenMP
())
return
dksfft
->
executeIFFT
(
data_ptr
,
ndim
,
dimsize
);
else
if
(
apiCuda
())
return
dksfft
->
executeIFFT
(
data_ptr
,
ndim
,
dimsize
,
streamId
);
DEBUG_MSG
(
"No implementation for selected platform"
);
return
DKS_ERROR
;
}
/* call normalize FFT function for selected platform */
int
DKSOPAL
::
callNormalizeFFT
(
void
*
data_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
)
{
if
(
apiOpenCL
())
{
if
(
loadOpenCLKernel
(
"OpenCL/OpenCLKernels/OpenCLFFT.cl"
)
==
DKS_SUCCESS
)
return
dksfft
->
normalizeFFT
(
data_ptr
,
ndim
,
dimsize
);
else
return
DKS_ERROR
;
}
else
if
(
apiCuda
())
{
return
dksfft
->
normalizeFFT
(
data_ptr
,
ndim
,
dimsize
,
streamId
);
}
else
if
(
apiOpenMP
())
{
return
dksfft
->
normalizeFFT
(
data_ptr
,
ndim
,
dimsize
);
}
DEBUG_MSG
(
"No implementation for selected platform"
);
return
DKS_ERROR
;
}
/* call real to complex FFT */
int
DKSOPAL
::
callR2CFFT
(
void
*
real_ptr
,
void
*
comp_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
)
{
if
(
apiCuda
())
return
dksfft
->
executeRCFFT
(
real_ptr
,
comp_ptr
,
ndim
,
dimsize
,
streamId
);
else
if
(
apiOpenCL
()
||
apiOpenMP
())
return
dksfft
->
executeRCFFT
(
real_ptr
,
comp_ptr
,
ndim
,
dimsize
);
DEBUG_MSG
(
"No implementation for selected platform"
);
return
DKS_ERROR
;
}
/* call complex to real FFT */
int
DKSOPAL
::
callC2RFFT
(
void
*
real_ptr
,
void
*
comp_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
)
{
if
(
apiCuda
())
return
dksfft
->
executeCRFFT
(
real_ptr
,
comp_ptr
,
ndim
,
dimsize
,
streamId
);
else
if
(
apiOpenCL
()
||
apiOpenMP
())
return
dksfft
->
executeCRFFT
(
real_ptr
,
comp_ptr
,
ndim
,
dimsize
);
DEBUG_MSG
(
"No implementation for selected platform"
);
return
DKS_ERROR
;
}
/* normalize complex to real iFFT */
int
DKSOPAL
::
callNormalizeC2RFFT
(
void
*
real_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
)
{
if
(
apiCuda
())
return
dksfft
->
normalizeCRFFT
(
real_ptr
,
ndim
,
dimsize
,
streamId
);
else
if
(
apiOpenCL
())
return
DKS_ERROR
;
else
if
(
apiOpenMP
())
return
DKS_ERROR
;
DEBUG_MSG
(
"No implementation for selected platform"
);
return
DKS_ERROR
;
}
int
DKSOPAL
::
callGreensIntegral
(
void
*
tmp_ptr
,
int
I
,
int
J
,
int
K
,
int
NI
,
int
NJ
,
double
hz_m0
,
double
hz_m1
,
double
hz_m2
,
int
streamId
)
{
return
dksgreens
->
greensIntegral
(
tmp_ptr
,
I
,
J
,
K
,
NI
,
NJ
,
hz_m0
,
hz_m1
,
hz_m2
,
streamId
);
}
int
DKSOPAL
::
callGreensIntegration
(
void
*
mem_ptr
,
void
*
tmp_ptr
,
int
I
,
int
J
,
int
K
,
int
streamId
)
{
return
dksgreens
->
integrationGreensFunction
(
mem_ptr
,
tmp_ptr
,
I
,
J
,
K
,
streamId
);
}
int
DKSOPAL
::
callMirrorRhoField
(
void
*
mem_ptr
,
int
I
,
int
J
,
int
K
,
int
streamId
)
{
return
dksgreens
->
mirrorRhoField
(
mem_ptr
,
I
,
J
,
K
,
streamId
);
}
int
DKSOPAL
::
callMultiplyComplexFields
(
void
*
mem_ptr1
,
void
*
mem_ptr2
,
int
size
,
int
streamId
)
{
return
dksgreens
->
multiplyCompelxFields
(
mem_ptr1
,
mem_ptr2
,
size
,
streamId
);
}
int
DKSOPAL
::
callCollimatorPhysics
(
void
*
mem_ptr
,
void
*
par_ptr
,
int
numparticles
,
int
numparams
,
int
&
numaddback
,
int
&
numdead
)
{
return
dkscol
->
CollimatorPhysics
(
mem_ptr
,
par_ptr
,
numparticles
);
}
int
DKSOPAL
::
callCollimatorPhysics2
(
void
*
mem_ptr
,
void
*
par_ptr
,
int
numparticles
)
{
return
dkscol
->
CollimatorPhysics
(
mem_ptr
,
par_ptr
,
numparticles
);
}
int
DKSOPAL
::
callCollimatorPhysicsSoA
(
void
*
label_ptr
,
void
*
localID_ptr
,
void
*
rx_ptr
,
void
*
ry_ptr
,
void
*
rz_ptr
,
void
*
px_ptr
,
void
*
py_ptr
,
void
*
pz_ptr
,
void
*
par_ptr
,
int
numparticles
)
{
return
dkscol
->
CollimatorPhysicsSoA
(
label_ptr
,
localID_ptr
,
rx_ptr
,
ry_ptr
,
rz_ptr
,
px_ptr
,
py_ptr
,
pz_ptr
,
par_ptr
,
numparticles
);
}
int
DKSOPAL
::
callCollimatorPhysicsSort
(
void
*
mem_ptr
,
int
numparticles
,
int
&
numaddback
)
{
return
dkscol
->
CollimatorPhysicsSort
(
mem_ptr
,
numparticles
,
numaddback
);
}
int
DKSOPAL
::
callCollimatorPhysicsSortSoA
(
void
*
label_ptr
,
void
*
localID_ptr
,
void
*
rx_ptr
,
void
*
ry_ptr
,
void
*
rz_ptr
,
void
*
px_ptr
,
void
*
py_ptr
,
void
*
pz_ptr
,
void
*
par_ptr
,
int
numparticles
,
int
&
numaddback
)
{
return
MIC_SAFECALL
(
dkscol
->
CollimatorPhysicsSortSoA
(
label_ptr
,
localID_ptr
,
rx_ptr
,
ry_ptr
,
rz_ptr
,
px_ptr
,
py_ptr
,
pz_ptr
,
par_ptr
,
numparticles
,
numaddback
));
}
int
DKSOPAL
::
callParallelTTrackerPush
(
void
*
r_ptr
,
void
*
p_ptr
,
int
npart
,
void
*
dt_ptr
,
double
dt
,
double
c
,
bool
usedt
,
int
streamId
)
{
return
dkscol
->
ParallelTTrackerPush
(
r_ptr
,
p_ptr
,
npart
,
dt_ptr
,
dt
,
c
,
usedt
,
streamId
);
}
int
DKSOPAL
::
callParallelTTrackerPushTransform
(
void
*
x_ptr
,
void
*
p_ptr
,
void
*
lastSec_ptr
,
void
*
orient_ptr
,
int
npart
,
int
nsec
,
void
*
dt_ptr
,
double
dt
,
double
c
,
bool
usedt
,
int
streamId
)
{
return
dkscol
->
ParallelTTrackerPushTransform
(
x_ptr
,
p_ptr
,
lastSec_ptr
,
orient_ptr
,
npart
,
nsec
,
dt_ptr
,
dt
,
c
,
usedt
,
streamId
);
}
src/DKSOPAL.h
0 → 100644
View file @
eee9dfd8
#ifndef H_DKS_OPAL
#define H_DKS_OPAL
#include <iostream>
#include "AutoTuning/DKSAutoTuning.h"
#include "DKSBase.h"
#include "DKSDefinitions.h"
#include "Algorithms/GreensFunction.h"
#include "Algorithms/CollimatorPhysics.h"
#include "Algorithms/FFT.h"
#ifdef DKS_AMD
#include "OpenCL/OpenCLFFT.h"
#include "OpenCL/OpenCLGreensFunction.h"
#include "OpenCL/OpenCLCollimatorPhysics.h"
#endif
#ifdef DKS_CUDA
#include "CUDA/CudaFFT.cuh"
#include "CUDA/CudaGreensFunction.cuh"
#include "CUDA/CudaCollimatorPhysics.cuh"
#endif
#ifdef DKS_MIC
#include "MIC/MICFFT.h"
#include "MIC/MICGreensFunction.hpp"
#include "MIC/MICCollimatorPhysics.h"
#endif
class
DKSOPAL
:
public
DKSBase
{
private:
DKSFFT
*
dksfft
;
DKSCollimatorPhysics
*
dkscol
;
GreensFunction
*
dksgreens
;
int
setupOPAL
();
public:
DKSOPAL
();
~
DKSOPAL
();
int
initDevice
();
///////////////////////////////////////////////
///////Function library part of dksbase////////
///////////////////////////////////////////////
/**
* Setup FFT function.
* Initializes parameters for fft executuin. If ndim > 0 initializes handles for fft calls.
* If ffts of various sizes are needed setupFFT should be called with ndim 0, in this case
* each fft will do its own setup according to fft size and dimensions.
* TODO: opencl and mic implementations
*/
int
setupFFT
(
int
ndim
,
int
N
[
3
]);
//BENI:
int
setupFFTRC
(
int
ndim
,
int
N
[
3
],
double
scale
=
1.0
);
//BENI:
int
setupFFTCR
(
int
ndim
,
int
N
[
3
],
double
scale
=
1.0
);
/**
* Call complex-to-complex fft.
* Executes in place complex to compelx fft on the device on data pointed by data_ptr.
* stream id can be specified to use other streams than default.
* TODO: mic implementation
*/
int
callFFT
(
void
*
data_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
=
-
1
);
/**
* Call complex-to-complex ifft.
* Executes in place complex to compelx ifft on the device on data pointed by data_ptr.
* stream id can be specified to use other streams than default.
* TODO: mic implementation.
*/
int
callIFFT
(
void
*
data_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
=
-
1
);
/**
* Normalize complex to complex ifft.
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
* fft size
* TODO: mic implementation.
*/
int
callNormalizeFFT
(
void
*
data_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
=
-
1
);
/**
* Call real to complex FFT.
* Executes out of place real to complex fft, real_ptr points to real data, comp_pt - points
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
* TODO: opencl and mic implementations
*/
int
callR2CFFT
(
void
*
real_ptr
,
void
*
comp_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
=
-
1
);
/**
* Call complex to real iFFT.
* Executes out of place complex to real ifft, real_ptr points to real data, comp_pt - points
* to complex data, ndim - dimension of data, dimsize size of each dimension. real_ptr size
* should be dimsize[0]*dimsize[1]*disize[2], comp_ptr size should be atleast
* (dimsize[0]/2+1)*dimsize[1]*dimsize[2]
* TODO: opencl and mic implementations.
*/
int
callC2RFFT
(
void
*
real_ptr
,
void
*
comp_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
=
-
1
);
/**
* Normalize compelx to real ifft.
* Cuda, mic and OpenCL implementations return ifft unscaled, this function divides each element by
* fft size.
* TODO: opencl and mic implementations.
*/
int
callNormalizeC2RFFT
(
void
*
real_ptr
,
int
ndim
,
int
dimsize
[
3
],
int
streamId
=
-
1
);
/**
* Integrated greens function from OPAL FFTPoissonsolver.cpp put on device.
* For specifics check OPAL docs.
* TODO: opencl and mic implementations.
*/
int
callGreensIntegral
(
void
*
tmp_ptr
,
int
I
,
int
J
,
int
K
,
int
NI
,
int
NJ
,
double
hz_m0
,
double
hz_m1
,
double
hz_m2
,
int
streamId
=
-
1
);
/**
* Integrated greens function from OPAL FFTPoissonsolver.cpp put on device.
* For specifics check OPAL docs.
* TODO: opencl and mic implementations.
*/
int
callGreensIntegration
(
void
*
mem_ptr
,
void
*
tmp_ptr
,
int
I
,
int
J
,
int
K
,
int
streamId
=
-
1
);
/**
* Integrated greens function from OPAL FFTPoissonsolver.cpp put on device.
* For specifics check OPAL docs.
* TODO: opencl and mic implementations.
*/
int
callMirrorRhoField
(
void
*
mem_ptr
,
int
I
,
int
J
,
int
K
,
int
streamId
=
-
1
);
/**
* Element by element multiplication.
* Multiplies each element of mem_ptr1 with corresponding element of mem_ptr2, size specifies
* the number of elements in mem_ptr1 and mem_ptr2 to use. Results are put in mem_ptr1.
* TODO: opencl and mic implementations.
*/
int
callMultiplyComplexFields
(
void
*
mem_ptr1
,
void
*
mem_ptr2
,
int
size
,
int
streamId
=
-
1
);
/**
* Monte carlo code for the degrader from OPAL classic/5.0/src/Solvers/CollimatorPhysics.cpp on device.
* For specifics check OPAL docs and CudaCollimatorPhysics class documentation.
* TODO: opencl and mic implementations.
*/