Commit e32f9aaf authored by Uldis Locans's avatar Uldis Locans

include FFT in DKSOPAL and DKSBaseMuSR

parent f3527969
...@@ -28,4 +28,7 @@ IF (ENABLE_OPAL) ...@@ -28,4 +28,7 @@ IF (ENABLE_OPAL)
TARGET_LINK_LIBRARIES(testPushKick dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES}) TARGET_LINK_LIBRARIES(testPushKick dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
ENDIF(ENABLE_OPAL) ENDIF(ENABLE_OPAL)
ADD_EXECUTABLE(testFFT testFFT.cpp)
TARGET_LINK_LIBRARIES(testFFT dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
#include <iostream>
#include <cstdlib>
#include <complex>
#include "Utility/TimeStamp.h"
#include "DKSFFT.h"
#include "DKSOPAL.h"
#include "DKSBaseMuSR.h"
using namespace std;
void compareData(complex<double>* data1, complex<double>* data2, int N, int dim);
void compareData(double* data1, double *data2, int N, int dim);
void initData(complex<double> *data, int dimsize[3], int dim);
void initData(double *data, int dimsize[3], int dim);
bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &dim,
char *api_name, char *device_name);
void printHelp();
int main(int argc, char *argv[]) {
int ierr;
int N1 = 8;
int N2 = 8;
int N3 = 8;
int dim = 3;
char *api_name = new char[10];
char *device_name = new char[10];
if ( readParams(argc, argv, N1, N2, N3, dim, api_name, device_name) )
return 0;
cout << "Use api: " << api_name << ", " << device_name << endl;
int dimsize[3] = {N1, N2, N3};
int sizereal = dimsize[0] * dimsize[1] * dimsize[2];
int sizecomp = (dimsize[0]/2+1) * dimsize[1] *dimsize[2];
double *rdata = new double[sizereal];
double *ordata = new double[sizereal];
complex<double> *cdata = new complex<double>[sizereal];
complex<double> *codata = new complex<double>[sizereal];
initData(rdata, dimsize, 3);
initData(cdata, dimsize, 3);
/* init DKSBase */
cout << "Init device and set function" << endl;
DKSBaseMuSR base;
base.setAPI(api_name, strlen(api_name));
base.setDevice(device_name, strlen(device_name));
cout << "init device" << endl;
base.initDevice();
cout << "setup fft" << endl;
base.setupFFT(dim, dimsize);
//Test RC FFT -> CR FFT
void *real_ptr, *comp_ptr, *res_ptr;
cout << "allocate memory" << endl;
real_ptr = base.allocateMemory<double>(sizereal, ierr);
res_ptr = base.allocateMemory<double>(sizereal, ierr);
comp_ptr = base.allocateMemory< complex<double> >(sizecomp, ierr);
cout << "write data" << endl;
base.writeData<double>(real_ptr, rdata, sizereal);
cout << "perform fft" << endl;
base.callR2CFFT(real_ptr, comp_ptr, dim, dimsize);
base.callC2RFFT(res_ptr, comp_ptr, dim, dimsize);
base.callNormalizeC2RFFT(res_ptr, dim, dimsize);
cout << "read data" << endl;
base.readData<double>(res_ptr, ordata, sizereal);
compareData(rdata, ordata, N1, 3);
base.freeMemory<double>(real_ptr, sizereal);
base.freeMemory<double>(res_ptr, sizereal);
base.freeMemory< complex<double> >(comp_ptr, sizecomp);
//Test CC FFT
void *mem_ptr;
mem_ptr = base.allocateMemory< complex<double> >(sizereal, ierr);
base.writeData< complex<double> >(mem_ptr, cdata, sizereal);
base.callFFT(mem_ptr, 3, dimsize);
base.callIFFT(mem_ptr, 3, dimsize);
base.callNormalizeFFT(mem_ptr, 3, dimsize);
base.readData< complex<double> >(mem_ptr, codata, sizereal);
compareData(cdata, codata, N1, 3);
base.freeMemory< complex<double> > (mem_ptr, sizereal);
delete[] rdata;
delete[] ordata;
delete[] cdata;
delete[] codata;
}
void compareData(complex<double>* data1, complex<double>* data2, int N, int dim) {
int ni, nj, nk, id;
ni = (dim > 2) ? N : 1;
nj = (dim > 1) ? N : 1;
nk = N;
double sum = 0;
for (int i = 0; i < ni; i++) {
for (int j = 0; j < nj; j++) {
for (int k = 0; k < nk; k++) {
id = i*ni*ni + j*nj + k;
sum += fabs(data1[id].real() - data2[id].real());
sum += fabs(data1[id].imag() - data2[id].imag());
}
}
}
cout << "Size " << N << " CC <--> CC diff: " << sum << endl;
}
void compareData(double* data1, double* data2, int N, int dim) {
int ni, nj, nk, id;
ni = (dim > 2) ? N : 1;
nj = (dim > 1) ? N : 1;
nk = N;
double sum = 0;
for (int i = 0; i < ni; i++) {
for (int j = 0; j < nj; j++) {
for (int k = 0; k < nk; k++) {
id = i*ni*ni + j*nj + k;
sum += fabs(data1[id] - data2[id]);
}
}
}
cout << "Size " << N << " RC <--> CR diff: " << sum << endl;
}
void initData(complex<double> *data, int dimsize[3], int dim) {
if (dim == 3) {
for (int i = 0; i < dimsize[2]; i++)
for (int j = 0; j < dimsize[1]; j++)
for (int k = 0; k < dimsize[0]; k++)
data[i*dimsize[1]*dimsize[0] + j*dimsize[0] + k] = complex<double>(sin(k), 0.0);
} else if (dim == 2) {
for (int j = 0; j < dimsize[1]; j++) {
for (int k = 0; k < dimsize[0]; k++) {
data[j*dimsize[0] + k] = complex<double>(sin(k), 0.0);
}
}
} else {
for (int k = 0; k < dimsize[0]; k++)
data[k] = complex<double>(sin(k), 0.0);
}
}
void initData(double *data, int dimsize[3], int dim) {
if (dim == 3) {
for (int i = 0; i < dimsize[2]; i++)
for (int j = 0; j < dimsize[1]; j++)
for (int k = 0; k < dimsize[0]; k++)
data[i*dimsize[1]*dimsize[0] + j*dimsize[0] + k] = sin(k);
} else if (dim == 2) {
for (int j = 0; j < dimsize[1]; j++) {
for (int k = 0; k < dimsize[0]; k++) {
data[j*dimsize[0] + k] = sin(k);
}
}
} else {
for (int k = 0; k < dimsize[0]; k++)
data[k] = sin(k);
}
}
bool readParams(int argc, char *argv[], int &N1, int &N2, int &N3, int &dim,
char *api_name, char *device_name)
{
for (int i = 1; i < argc; i++) {
if ( argv[i] == std::string("-dim")) {
dim = atoi(argv[i + 1]);
i++;
}
if ( argv[i] == std::string("-grid") ) {
N1 = atoi(argv[i + 1]);
N2 = atoi(argv[i + 2]);
N3 = atoi(argv[i + 3]);
i += 3;
}
if (argv[i] == string("-cuda")) {
strcpy(api_name, "Cuda");
strcpy(device_name, "-gpu");
}
if (argv[i] == string("-opencl")) {
strcpy(api_name, "OpenCL");
strcpy(device_name, "-gpu");
}
if (argv[i] == string("-mic")) {
strcpy(api_name, "OpenMP");
strcpy(device_name, "-mic");
}
if (argv[i] == string("-cpu")) {
strcpy(api_name, "OpenCL");
strcpy(device_name, "-cpu");
}
}
return false;
}
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include "../DKSDefinitions.h" #include "../DKSDefinitions.h"
class DKSFFT { class FFT {
protected: protected:
int defaultN[3]; int defaultN[3];
...@@ -22,7 +22,7 @@ protected: ...@@ -22,7 +22,7 @@ protected:
public: public:
virtual ~DKSFFT() { } virtual ~FFT() { }
virtual int setupFFT(int ndim, int N[3]) = 0; virtual int setupFFT(int ndim, int N[3]) = 0;
virtual int setupFFTRC(int ndim, int N[3], double scale = 1.0) = 0; virtual int setupFFTRC(int ndim, int N[3], double scale = 1.0) = 0;
......
...@@ -35,12 +35,12 @@ ENDMACRO () ...@@ -35,12 +35,12 @@ ENDMACRO ()
SET (DKS_BASEDIR_HDRS SET (DKS_BASEDIR_HDRS
DKSBase.h DKSBase.h
DKSDefinitions.h DKSDefinitions.h
DKSOPAL.h DKSFFT.h
) )
SET (DKS_BASEDIR_SRCS SET (DKS_BASEDIR_SRCS
DKSBase.cpp DKSBase.cpp
DKSOPAL.cpp DKSFFT.cpp
) )
#add opal to DKS if enable_opal is set #add opal to DKS if enable_opal is set
......
SET (_HDRS CudaBase.cuh) SET (_HDRS CudaBase.cuh CudaFFT.cuh)
SET (_SRCS CudaBase.cu) SET (_SRCS CudaBase.cu CudaFFT.cu)
IF (ENABLE_OPAL) IF (ENABLE_OPAL)
SET (_HDRS ${_HDRS} CudaFFT.cuh CudaGreensFunction.cuh CudaCollimatorPhysics.cuh) SET (_HDRS ${_HDRS} CudaGreensFunction.cuh CudaCollimatorPhysics.cuh)
SET (_SRCS ${_SRCS} CudaFFT.cu CudaGreensFunction.cu CudaCollimatorPhysics.cu) SET (_SRCS ${_SRCS} CudaGreensFunction.cu CudaCollimatorPhysics.cu)
ENDIF (ENABLE_OPAL) ENDIF (ENABLE_OPAL)
IF (ENABLE_MUSR) IF (ENABLE_MUSR)
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
#include "../Algorithms/FFT.h" #include "../Algorithms/FFT.h"
#include "CudaBase.cuh" #include "CudaBase.cuh"
class CudaFFT : public DKSFFT{ class CudaFFT : public FFT {
private: private:
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "AutoTuning/DKSAutoTuningTester.h" #include "AutoTuning/DKSAutoTuningTester.h"
#include "DKSBase.h" #include "DKSBase.h"
#include "DKSFFT.h"
#include "Algorithms/ChiSquareRuntime.h" #include "Algorithms/ChiSquareRuntime.h"
...@@ -19,7 +20,7 @@ ...@@ -19,7 +20,7 @@
#include "OpenCL/OpenCLChiSquareRuntime.h" #include "OpenCL/OpenCLChiSquareRuntime.h"
#endif #endif
class DKSBaseMuSR : public DKSBase { class DKSBaseMuSR : public DKSFFT {
private: private:
......
...@@ -4,7 +4,7 @@ DKSFFT::DKSFFT() { ...@@ -4,7 +4,7 @@ DKSFFT::DKSFFT() {
dksfft = nullptr; dksfft = nullptr;
} }
~DKSFFT::DKSFFT() { DKSFFT::~DKSFFT() {
delete dksfft; delete dksfft;
} }
...@@ -12,8 +12,10 @@ DKSFFT::DKSFFT() { ...@@ -12,8 +12,10 @@ DKSFFT::DKSFFT() {
int DKSFFT::setupFFT(int ndim, int N[3]) { int DKSFFT::setupFFT(int ndim, int N[3]) {
if (apiCuda()) { if (apiCuda()) {
dksfft = CUDA_SAFEINIT( new CudaFFT(getCudaBase()) );
return dksfft->setupFFT(ndim, N); return dksfft->setupFFT(ndim, N);
} else if (apiOpenCL()) { } else if (apiOpenCL()) {
dksfft = OPENCL_SAFEINIT_AMD( new OpenCLFFT(getOpenCLBase()) );
int ierr1 = dksfft->setupFFT(ndim, N); int ierr1 = dksfft->setupFFT(ndim, N);
int ierr2 = dksfft->setupFFTRC(ndim, N); int ierr2 = dksfft->setupFFTRC(ndim, N);
int ierr3 = dksfft->setupFFTCR(ndim, N); int ierr3 = dksfft->setupFFTCR(ndim, N);
...@@ -24,6 +26,7 @@ int DKSFFT::setupFFT(int ndim, int N[3]) { ...@@ -24,6 +26,7 @@ int DKSFFT::setupFFT(int ndim, int N[3]) {
} else if (apiOpenMP()) { } else if (apiOpenMP()) {
//micbase.mic_setupFFT(ndim, N); //micbase.mic_setupFFT(ndim, N);
//BENI: setting up RC and CR transformations on MIC //BENI: setting up RC and CR transformations on MIC
dksfft = MIC_SAFEINIT( new MICFFT(getMICBase()) );
int ierr1 = dksfft->setupFFTRC(ndim, N, 1.); int ierr1 = dksfft->setupFFTRC(ndim, N, 1.);
int ierr2 = dksfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2])); int ierr2 = dksfft->setupFFTCR(ndim, N, 1./(N[0]*N[1]*N[2]));
if (ierr1 != DKS_SUCCESS) if (ierr1 != DKS_SUCCESS)
......
#ifndef H_DKS_FFT #ifndef H_DKSBASE_FFT
#define H_DKS_FFT #define H_DKSBASE_FFT
#include <iostream> #include <iostream>
#include "AutoTuning/DKSAutoTuning.h" #include "AutoTuning/DKSAutoTuning.h"
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
#ifdef DKS_CUDA #ifdef DKS_CUDA
#include "CUDA/CudaFFT.cuh" #include "CUDA/CudaFFT.cuh"
#endif #endif
#ifdef DKS_MIC #ifdef DKS_MIC
...@@ -29,7 +28,7 @@ class DKSFFT : public DKSBase { ...@@ -29,7 +28,7 @@ class DKSFFT : public DKSBase {
private: private:
DKSFFT *dksfft; FFT *dksfft;
int initFFT(); int initFFT();
...@@ -105,3 +104,5 @@ public: ...@@ -105,3 +104,5 @@ public:
}; };
#endif
...@@ -11,7 +11,6 @@ DKSOPAL::DKSOPAL(const char* api_name, const char* device_name) { ...@@ -11,7 +11,6 @@ DKSOPAL::DKSOPAL(const char* api_name, const char* device_name) {
} }
DKSOPAL::~DKSOPAL() { DKSOPAL::~DKSOPAL() {
delete dksfft;
delete dkscol; delete dkscol;
delete dksgreens; delete dksgreens;
} }
...@@ -21,17 +20,14 @@ int DKSOPAL::setupOPAL() { ...@@ -21,17 +20,14 @@ int DKSOPAL::setupOPAL() {
if (apiOpenCL()) { if (apiOpenCL()) {
ierr = OPENCL_SAFECALL( DKS_SUCCESS ); ierr = OPENCL_SAFECALL( DKS_SUCCESS );
//TODO: only enable if AMD libraries are available //TODO: only enable if AMD libraries are available
dksfft = OPENCL_SAFEINIT_AMD( new OpenCLFFT(getOpenCLBase()) );
dkscol = OPENCL_SAFEINIT_AMD( new OpenCLCollimatorPhysics(getOpenCLBase()) ); dkscol = OPENCL_SAFEINIT_AMD( new OpenCLCollimatorPhysics(getOpenCLBase()) );
dksgreens = OPENCL_SAFEINIT_AMD( new OpenCLGreensFunction(getOpenCLBase()) ); dksgreens = OPENCL_SAFEINIT_AMD( new OpenCLGreensFunction(getOpenCLBase()) );
} else if (apiCuda()) { } else if (apiCuda()) {
ierr = CUDA_SAFECALL( DKS_SUCCESS ); ierr = CUDA_SAFECALL( DKS_SUCCESS );
dksfft = CUDA_SAFEINIT( new CudaFFT(getCudaBase()) );
dkscol = CUDA_SAFEINIT( new CudaCollimatorPhysics(getCudaBase()) ); dkscol = CUDA_SAFEINIT( new CudaCollimatorPhysics(getCudaBase()) );
dksgreens = CUDA_SAFEINIT( new CudaGreensFunction(getCudaBase()) ); dksgreens = CUDA_SAFEINIT( new CudaGreensFunction(getCudaBase()) );
} else if (apiOpenMP()) { } else if (apiOpenMP()) {
ierr = MIC_SAFECALL( DKS_SUCCESS ); ierr = MIC_SAFECALL( DKS_SUCCESS );
dksfft = MIC_SAFEINIT( new MICFFT(getMICBase()) );
dkscol = MIC_SAFEINIT( new MICCollimatorPhysics(getMICBase()) ); dkscol = MIC_SAFEINIT( new MICCollimatorPhysics(getMICBase()) );
dksgreens = MIC_SAFEINIT( new MICGreensFunction(getMICBase()) ); dksgreens = MIC_SAFEINIT( new MICGreensFunction(getMICBase()) );
} else { } else {
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "AutoTuning/DKSAutoTuning.h" #include "AutoTuning/DKSAutoTuning.h"
#include "DKSBase.h" #include "DKSBase.h"
#include "DKSFFT.h"
#include "DKSDefinitions.h" #include "DKSDefinitions.h"
...@@ -32,11 +33,10 @@ ...@@ -32,11 +33,10 @@
#include "MIC/MICCollimatorPhysics.h" #include "MIC/MICCollimatorPhysics.h"
#endif #endif
class DKSOPAL : public DKSBase { class DKSOPAL : public DKSFFT {
private: private:
DKSFFT *dksfft;
DKSCollimatorPhysics *dkscol; DKSCollimatorPhysics *dkscol;
GreensFunction *dksgreens; GreensFunction *dksgreens;
......
SET (_SRCS MICBase.cpp) SET (_SRCS MICBase.cpp MICFFT.cpp)
SET (_HDRS MICBase.h) SET (_HDRS MICBase.h MICFFT.h)
IF (ENABLE_OPAL) IF (ENABLE_OPAL)
SET (_SRCS SET (_SRCS
${_SRCS} ${_SRCS}
MICChiSquare.cpp MICChiSquare.cpp
MICFFT.cpp
MICGreensFunction.cpp MICGreensFunction.cpp
MICCollimatorPhysics.cpp MICCollimatorPhysics.cpp
) )
...@@ -13,7 +12,6 @@ IF (ENABLE_OPAL) ...@@ -13,7 +12,6 @@ IF (ENABLE_OPAL)
SET (_HDRS SET (_HDRS
${_HDRS} ${_HDRS}
MICChiSquare.h MICChiSquare.h
MICFFT.h
MICCollimatorPhysics.h MICCollimatorPhysics.h
MICGreensFunction.hpp MICGreensFunction.hpp
MICMergeSort.h MICMergeSort.h
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
#include "../Algorithms/FFT.h" #include "../Algorithms/FFT.h"
#include "MICBase.h" #include "MICBase.h"
class MICFFT : public DKSFFT { class MICFFT : public FFT {
private: private:
......
...@@ -4,6 +4,25 @@ SET (_HDRS OpenCLBase.h) ...@@ -4,6 +4,25 @@ SET (_HDRS OpenCLBase.h)
SET (_SRCS OpenCLBase.cpp) SET (_SRCS OpenCLBase.cpp)
SET (_KERNELS "") SET (_KERNELS "")
IF (ENABLE_AMD)
SET (_SRCS
${_SRCS}
OpenCLFFT.cpp
)
SET (_HDRS
${_HDRS}
OpenCLFFT.h
)
SET (_KERNELS
${_KERNELS}
OpenCLKernels/OpenCLFFT.cl
OpenCLKernels/OpenCLFFTStockham.cl
OpenCLKernels/OpenCLTranspose.cl
)
ENDIF (ENABLE_AMD)
IF (ENABLE_MUSR) IF (ENABLE_MUSR)
SET (_HDRS ${_HDRS} OpenCLChiSquareRuntime.h) SET (_HDRS ${_HDRS} OpenCLChiSquareRuntime.h)
SET (_SRCS ${_SRCS} OpenCLChiSquareRuntime.cpp) SET (_SRCS ${_SRCS} OpenCLChiSquareRuntime.cpp)
...@@ -13,23 +32,18 @@ ENDIF (ENABLE_MUSR) ...@@ -13,23 +32,18 @@ ENDIF (ENABLE_MUSR)
IF (ENABLE_AMD AND ENABLE_OPAL) IF (ENABLE_AMD AND ENABLE_OPAL)
SET (_SRCS SET (_SRCS
${_SRCS} ${_SRCS}
OpenCLFFT.cpp
OpenCLCollimatorPhysics.cpp OpenCLCollimatorPhysics.cpp
OpenCLGreensFunction.cpp OpenCLGreensFunction.cpp
) )
SET (_HDRS SET (_HDRS
${_HDRS} ${_HDRS}
OpenCLFFT.h
OpenCLCollimatorPhysics.h OpenCLCollimatorPhysics.h
OpenCLGreensFunction.h OpenCLGreensFunction.h
) )
SET (_KERNELS SET (_KERNELS
${_KERNELS} ${_KERNELS}
OpenCLKernels/OpenCLFFT.cl
OpenCLKernels/OpenCLFFTStockham.cl
OpenCLKernels/OpenCLTranspose.cl
OpenCLKernels/OpenCLCollimatorPhysics.cl OpenCLKernels/OpenCLCollimatorPhysics.cl
OpenCLKernels/OpenCLGreensFunction.cl OpenCLKernels/OpenCLGreensFunction.cl
) )
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include "clFFT.h" #include "clFFT.h"
class OpenCLFFT : public DKSFFT { class OpenCLFFT : public FFT {
private: private:
...@@ -112,10 +112,9 @@ public: ...@@ -112,10 +112,9 @@ public:
int streamId = -1); int streamId = -1);
int executeCRFFT(void * real_ptr, void * comp_ptr, int ndim, int N[3], int executeCRFFT(void * real_ptr, void * comp_ptr, int ndim, int N[3],
int streamId = -1); int streamId = -1);
int normalizeCRFFT(void *real_ptr, int ndim, int N[3], int streamId = -1) int normalizeCRFFT(void *real_ptr, int ndim, int N[3], int streamId = -1) {
{ return DKS_ERROR;
return DKS_ERROR; }
}
//void printData3DN4(cl_double2* &data, int N); //void printData3DN4(cl_double2* &data, int N);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment