Commit 027bdc01 authored by Uldis Locans's avatar Uldis Locans
Browse files

FFT for OpenCL using clFFT library

parent e8386869
......@@ -38,10 +38,17 @@ IF (Boost_FOUND)
LINK_DIRECTORIES(${Boost_LIBRARY_DIRS})
ENDIF (Boost_FOUND)
#find clFFT
SET (clFFT_USE_STATIC_LIBS OFF)
FIND_PACKAGE(clFFT REQUIRED HINTS $ENV{CLFFT_PREFIX} $ENV{CLFFT_DIR} $ENV{CLFFT})
MESSAGE (STATUS "Found clFFT library: ${CLFFT_LIBRARIES}")
MESSAGE (STATUS "Found clFFT include dir: ${CLFFT_INCLUDE_DIRS}")
INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS})
LINK_DIRECTORIES (${CLFFT_LIBRARIES})
#enable UQTK
OPTION (USE_UQTK "Use UQTK" OFF)
#intel icpc compiler specific flags
IF (${CMAKE_C_COMPILER_ID} STREQUAL "Intel" OR USE_INTEL)
......
......@@ -3,17 +3,17 @@ LINK_DIRECTORIES( ${CMAKE_SOURCE_DIR}/src )
#chi square kernel tests
ADD_EXECUTABLE(testChiSquareRT testChiSquareRT.cpp)
TARGET_LINK_LIBRARIES(testChiSquareRT dks ${Boost_LIBRARIES})
TARGET_LINK_LIBRARIES(testChiSquareRT dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
ADD_EXECUTABLE(testChiSquareRTRandom testChiSquareRTRandom.cpp)
TARGET_LINK_LIBRARIES(testChiSquareRTRandom dks ${Boost_LIBRARIES})
TARGET_LINK_LIBRARIES(testChiSquareRTRandom dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
IF (USE_UQTK)
ADD_EXECUTABLE(testChiSquareRTUQTK testChiSquareRTUQTK.cpp)
TARGET_LINK_LIBRARIES(testChiSquareRTUQTK dks ${Boost_LIBRARIES} lreg UQTk quad bcs uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran)
TARGET_LINK_LIBRARIES(testChiSquareRTUQTK dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES} lreg UQTk quad bcs uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran)
ENDIF (USE_UQTK)
#TARGET_LINK_LIBRARIES(testChiSquareRTUQTK dks ${Boost_LIBRARIES})
#test to verify search functions
ADD_EXECUTABLE(testSearch testSearch.cpp)
TARGET_LINK_LIBRARIES(testSearch dks ${Boost_LIBRARIES})
TARGET_LINK_LIBRARIES(testSearch dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
......@@ -189,6 +189,7 @@ DKSBase::~DKSBase() {
delete oclbase;
#endif
#ifdef DKS_MIC
delete micfft;
delete miccol;
......@@ -461,6 +462,14 @@ int DKSBase::setupFFT(int ndim, int N[3]) {
if (apiCuda()) {
return CUDA_SAFECALL( cfft->setupFFT(ndim, N) );
} else if (apiOpenCL()) {
int ierr1 = OPENCL_SAFECALL( oclfft->setupFFT(ndim, N) );
int ierr2 = OPENCL_SAFECALL( oclfft->setupFFTRC(ndim, N) );
int ierr3 = OPENCL_SAFECALL( oclfft->setupFFTCR(ndim, N) );
if (ierr1 != DKS_SUCCESS || ierr2 != DKS_SUCCESS || ierr3 != DKS_SUCCESS)
return DKS_ERROR;
return DKS_SUCCESS;
} else if (apiOpenMP()) {
//micbase.mic_setupFFT(ndim, N);
//BENI: setting up RC and CR transformations on MIC
......@@ -481,6 +490,8 @@ int DKSBase::setupFFTRC(int ndim, int N[3], double scale) {
if (apiCuda())
return CUDA_SAFECALL(cfft->setupFFT(ndim, N));
if (apiOpenCL())
return OPENCL_SAFECALL(oclfft->setupFFTRC(ndim, N));
else if (apiOpenMP())
return MIC_SAFECALL(micfft->setupFFTRC(ndim, N, scale));
......@@ -493,6 +504,8 @@ int DKSBase::setupFFTCR(int ndim, int N[3], double scale) {
if (apiCuda())
return CUDA_SAFECALL(cfft->setupFFT(ndim, N));
if (apiOpenCL())
return OPENCL_SAFECALL(oclfft->setupFFTCR(ndim, N));
else if (apiOpenMP())
return MIC_SAFECALL(micfft->setupFFTCR(ndim, N, scale));
......@@ -559,6 +572,8 @@ int DKSBase::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[
if (apiCuda())
return CUDA_SAFECALL( cfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize, streamId) );
else if (apiOpenCL())
return OPENCL_SAFECALL( oclfft->executeRCFFT(real_ptr, comp_ptr, ndim, dimsize) );
else if (apiOpenMP())
return MIC_SAFECALL( micfft->executeRCFFT(real_ptr,comp_ptr, ndim, dimsize) );
......@@ -570,6 +585,8 @@ int DKSBase::callR2CFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[
int DKSBase::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[3], int streamId) {
if (apiCuda())
return CUDA_SAFECALL( cfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize, streamId) );
else if (apiOpenCL())
return OPENCL_SAFECALL( oclfft->executeCRFFT(real_ptr, comp_ptr, ndim, dimsize) );
else if (apiOpenMP())
return MIC_SAFECALL( micfft->executeCRFFT(comp_ptr,real_ptr, ndim, dimsize) );
......@@ -581,25 +598,15 @@ int DKSBase::callC2RFFT(void * real_ptr, void * comp_ptr, int ndim, int dimsize[
int DKSBase::callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId) {
if (apiCuda())
return CUDA_SAFECALL( cfft->normalizeCRFFT(real_ptr, ndim, dimsize, streamId) );
else if (apiOpenCL())
return DKS_SUCCESS;
else if (apiOpenMP())
return DKS_SUCCESS;
DEBUG_MSG("No implementation for selected platform");
return DKS_SUCCESS;
}
/* normalize complex to real iFFT */
int DKSBase::callTranspose(void *mem_ptr, int N[3], int ndim, int dim) {
if (apiOpenCL()) {
if (loadOpenCLKernel("OpenCL/OpenCLKernels/OpenCLTranspose.cl") == DKS_SUCCESS)
return OPENCL_SAFECALL(oclfft->ocl_executeTranspose(mem_ptr, N, ndim, dim));
else
return DKS_ERROR;
}
DEBUG_MSG("No implementation for selected platform");
return DKS_ERROR;
}
int DKSBase::callGreensIntegral(void *tmp_ptr, int I, int J, int K, int NI, int NJ,
double hz_m0, double hz_m1, double hz_m2, int streamId) {
......
......@@ -405,7 +405,7 @@ public:
} else if (apiOpenMP()) {
#ifdef DKS_MIC
void * mem_ptr = NULL;
mem_ptr = micbase.mic_allocateMemory<T>(elements);
mem_ptr = micbase->mic_allocateMemory<T>(elements);
return mem_ptr;
#endif
}
......@@ -498,7 +498,7 @@ public:
return CUDA_SAFECALL(cbase->cuda_writeData((T*)mem_ptr, data, size, offset));
} else if (apiOpenMP()) {
return MIC_SAFECALL(micbase.mic_writeData<T>(mem_ptr, data, elements, offset));
return MIC_SAFECALL(micbase->mic_writeData<T>(mem_ptr, data, elements, offset));
}
......@@ -532,7 +532,7 @@ public:
size_t size = sizeof(T)*elements;
return CUDA_SAFECALL(cbase->cuda_writeDataAsync((T*)mem_ptr, data, size, streamId, offset));
} else if (apiOpenMP()) {
return MIC_SAFECALL(micbase.mic_writeDataAsync<T>(mem_ptr, data, elements, streamId, offset));
return MIC_SAFECALL(micbase->mic_writeDataAsync<T>(mem_ptr, data, elements, streamId, offset));
}
return DKS_ERROR;
......@@ -832,7 +832,7 @@ public:
size_t size = sizeof(T)*elements;
return CUDA_SAFECALL(cbase->cuda_readData((T*)mem_ptr, out_data, size, offset));
} else if (apiOpenMP()) {
return MIC_SAFECALL(micbase.mic_readData<T>(mem_ptr, out_data, elements, offset));
return MIC_SAFECALL(micbase->mic_readData<T>(mem_ptr, out_data, elements, offset));
}
return DKS_ERROR;
......@@ -860,7 +860,7 @@ public:
size_t size = sizeof(T)*elements;
return CUDA_SAFECALL(cbase->cuda_readDataAsync((T*)mem_ptr, out_data, size, streamId, offset));
} else if (apiOpenMP()) {
return MIC_SAFECALL(micbase.mic_readDataAsync<T>(mem_ptr, out_data, elements,
return MIC_SAFECALL(micbase->mic_readDataAsync<T>(mem_ptr, out_data, elements,
streamId, offset));
}
......@@ -880,7 +880,7 @@ public:
else if (apiCuda())
return CUDA_SAFECALL(cbase->cuda_freeMemory(mem_ptr));
else if (apiOpenMP())
return MIC_SAFECALL(micbase.mic_freeMemory<T>(mem_ptr, elements));
return MIC_SAFECALL(micbase->mic_freeMemory<T>(mem_ptr, elements));
return DKS_ERROR;
}
......@@ -955,12 +955,6 @@ public:
*/
int callNormalizeC2RFFT(void * real_ptr, int ndim, int dimsize[3], int streamId = -1);
/**
* Transpose 2D and 3D arrays, OpenCL implementation
* N - size of dimensions, ndim - number of dimensions, dim - dim to transpose
*/
int callTranspose(void *mem_ptr, int N[3], int ndim, int dim);
/**
* Integrated greens function from OPAL FFTPoissonsolver.cpp put on device.
* For specifics check OPAL docs.
......
......@@ -52,9 +52,6 @@ class OpenCLBase {
private:
static cl_context m_context;
static cl_command_queue m_command_queue;
static cl_platform_id m_platform_id;
static cl_device_id m_device_id;
......@@ -118,6 +115,9 @@ protected:
public:
static cl_context m_context;
static cl_command_queue m_command_queue;
/*
constructor
......
......@@ -89,26 +89,82 @@ int OpenCLFFT::ocl_callBitReverseKernel(cl_mem &data, int cdim, int ndim, int N)
call fft execution on device for every dimension
*/
int OpenCLFFT::executeFFT(void *data, int ndim, int N[3], int streamId, bool forward) {
int ierr;
int dkserr = DKS_SUCCESS;
cl_int ierr;
cl_mem inout = (cl_mem)data;
int n = N[0];
for (int dim = 0; dim < ndim; dim++) {
ierr = ocl_callBitReverseKernel(inout, dim, ndim, n);
if (ierr != OCL_SUCCESS) {
DEBUG_MSG("Error executing bit reverse");
return OCL_ERROR;
}
if (forward)
ierr = clfftEnqueueTransform(planHandleZ2Z, CLFFT_FORWARD, 1, &m_oclbase->m_command_queue,
0, NULL, NULL, &inout, NULL, NULL);
else
ierr = clfftEnqueueTransform(planHandleZ2Z, CLFFT_BACKWARD, 1, &m_oclbase->m_command_queue,
0, NULL, NULL, &inout, NULL, NULL);
if (ierr != OCL_SUCCESS) {
dkserr = DKS_ERROR;
DEBUG_MSG("Error executing cfFFT\n");
if (ierr == CLFFT_INVALID_PLAN)
std::cout << "Invlalid plan" << std::endl;
else
std::cout << "CLFFT error" << std::endl;
}
ierr = ocl_callFFTKernel(inout, dim, ndim, n, forward);
if (ierr != OCL_SUCCESS) {
DEBUG_MSG("Error executing fft reverse");
return OCL_ERROR;
}
return dkserr;
}
/*
call rcfft execution on device for every dimension
*/
int OpenCLFFT::executeRCFFT(void *real_ptr, void *comp_ptr, int ndim, int N[3], int streamId) {
std::cout << "execute RCFFT" << std::endl;
int dkserr = DKS_SUCCESS;
cl_int ierr;
cl_mem real_in = (cl_mem)real_ptr;
cl_mem comp_out = (cl_mem)comp_ptr;
ierr = clfftEnqueueTransform(planHandleD2Z, CLFFT_FORWARD, 1, &m_oclbase->m_command_queue,
0, NULL, NULL, &real_in, &comp_out, NULL);
if (ierr != OCL_SUCCESS) {
dkserr = DKS_ERROR;
DEBUG_MSG("Error executing cfFFT\n");
if (ierr == CLFFT_INVALID_PLAN)
std::cout << "Invlalid plan" << std::endl;
else
std::cout << "CLFFT error" << std::endl;
}
return OCL_SUCCESS;
return dkserr;
}
/*
call rcfft execution on device for every dimension
*/
int OpenCLFFT::executeCRFFT(void *real_ptr, void *comp_ptr, int ndim, int N[3], int streamId) {
std::cout << "execute CRFFT" << std::endl;
int dkserr = DKS_SUCCESS;
cl_int ierr;
cl_mem real_in = (cl_mem)real_ptr;
cl_mem comp_out = (cl_mem)comp_ptr;
ierr = clfftEnqueueTransform(planHandleZ2D, CLFFT_BACKWARD, 1, &m_oclbase->m_command_queue,
0, NULL, NULL, &comp_out, &real_in, NULL);
if (ierr != OCL_SUCCESS) {
dkserr = DKS_ERROR;
DEBUG_MSG("Error executing cfFFT\n");
if (ierr == CLFFT_INVALID_PLAN)
std::cout << "Invlalid plan" << std::endl;
else
std::cout << "CLFFT error" << std::endl;
}
return dkserr;
}
/*
......@@ -120,10 +176,11 @@ int OpenCLFFT::executeIFFT(void *data, int ndim, int N[3], int streamId) {
}
/*
call kernel to normalize fft
call kernel to normalize fft. clFFT inverse already includes the scaling so this is disabled.
*/
int OpenCLFFT::normalizeFFT(void *data, int ndim, int N[3], int streamId) {
/*
cl_mem inout = (cl_mem)data;
int n = N[0];
......@@ -150,132 +207,143 @@ int OpenCLFFT::normalizeFFT(void *data, int ndim, int N[3], int streamId) {
DEBUG_MSG("Error executing kernel");
return OCL_ERROR;
}
*/
return OCL_SUCCESS;
}
int OpenCLFFT::ocl_executeFFTStockham(void* &src, int ndim, int N, bool forward) {
int ierr;
int size = sizeof(cl_double2)*pow(N,ndim);
cl_mem mem_tmp;
cl_mem mem_src = (cl_mem)src;
cl_mem mem_dst = (cl_mem)m_oclbase->ocl_allocateMemory(size, ierr);
int OpenCLFFT::setupFFT(int ndim, int N[3]) {
//set the number of work items in each dimension
size_t work_items[3];
int p = 1;
int threads = N / 2;
int f = (forward) ? -1 : 1;
//execute kernel
int n = (int)log2(N);
for (int i = 0; i < ndim; i++) {
int dim = i+1;
p = 1;
work_items[0] = (dim == 1) ? N/2 : N;
work_items[1] = (dim == 2) ? N/2 : N;
work_items[2] = (dim == 3) ? N/2 : N;
//transpose array if calculating dimension larger than 1
//if (dim > 1)
// ocl_executeTranspose(mem_src, N, ndim, dim);
//create kernel and set kernel arguments
if (m_oclbase->ocl_createKernel("fft3d_radix2") != OCL_SUCCESS)
return OCL_ERROR;
for (int t = 1; t <= log2(N); t++) {
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &mem_src);
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &mem_dst);
m_oclbase->ocl_setKernelArg(2, sizeof(int), &p);
m_oclbase->ocl_setKernelArg(3, sizeof(int), &threads);
m_oclbase->ocl_setKernelArg(4, sizeof(int), &dim);
m_oclbase->ocl_setKernelArg(5, sizeof(int), &f);
if (m_oclbase->ocl_executeKernel(ndim, work_items) != OCL_SUCCESS)
return OCL_ERROR;
cl_int err;
mem_tmp = mem_src;
mem_src = mem_dst;
mem_dst = mem_tmp;
p = 2*p;
}
//transpose array back if calculating dimension larger than 1
//if (dim > 1)
// ocl_executeTranspose(mem_src, N, ndim, dim);
}
if (ndim*n % 2 == 1) {
m_oclbase->ocl_copyData(mem_src, mem_dst, size);
mem_tmp = mem_src;
mem_src = mem_dst;
mem_dst = mem_tmp;
clfftDim dim = CLFFT_3D;
size_t clLength[3] = {(size_t)N[0], (size_t)N[1], (size_t)N[2]};
/* Create 3D fft plan*/
err = clfftCreateDefaultPlan(&planHandleZ2Z, m_oclbase->m_context, dim, clLength);
/* Set plan parameters */
err = clfftSetPlanPrecision(planHandleZ2Z, CLFFT_DOUBLE);
if (err != CL_SUCCESS)
std::cout << "Error setting precision" << std::endl;
err = clfftSetLayout(planHandleZ2Z, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
if (err != CL_SUCCESS)
std::cout << "Error setting layout" << std::endl;
err = clfftSetResultLocation(planHandleZ2Z, CLFFT_INPLACE);
if (err != CL_SUCCESS)
std::cout << "Error setting result location" << std::endl;
/* Bake the plan */
err = clfftBakePlan(planHandleZ2Z, 1, &m_oclbase->m_command_queue, NULL, NULL);
if (err != CL_SUCCESS) {
DEBUG_MSG("Error creating Complex-to-complex plan");
return DKS_ERROR;
}
m_oclbase->ocl_freeMemory(mem_dst);
return OCL_SUCCESS;
return DKS_SUCCESS;
}
int OpenCLFFT::ocl_executeFFTStockham2(void* &src, int ndim, int N, bool forward) {
int OpenCLFFT::setupFFTRC(int ndim, int N[3], double scale) {
cl_int err;
clfftDim dim = CLFFT_3D;
size_t clLength[3] = {(size_t)N[0], (size_t)N[1], (size_t)N[2]};
cl_mem mem_src = (cl_mem)src;
size_t work_items[3] = { (size_t)N/2, (size_t)N, (size_t)N};
size_t work_group_size[3] = {(size_t)N/2, 1, 1};
m_oclbase->ocl_createKernel("fft_batch3D");
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &mem_src);
m_oclbase->ocl_setKernelArg(1, sizeof(cl_double2)*N, NULL);
m_oclbase->ocl_setKernelArg(2, sizeof(cl_double2)*N, NULL);
m_oclbase->ocl_setKernelArg(3, sizeof(cl_double2), NULL);
m_oclbase->ocl_setKernelArg(4, sizeof(int), &N);
for (int dim = 1; dim < ndim+1; dim++) {
m_oclbase->ocl_setKernelArg(5, sizeof(int), &dim);
m_oclbase->ocl_executeKernel(3, work_items, work_group_size);
/* Create 3D fft plan*/
err = clfftCreateDefaultPlan(&planHandleD2Z, m_oclbase->m_context, dim, clLength);
/* Set plan parameters */
err = clfftSetPlanPrecision(planHandleD2Z, CLFFT_DOUBLE);
err = clfftSetLayout(planHandleD2Z, CLFFT_REAL, CLFFT_HERMITIAN_INTERLEAVED);
err = clfftSetResultLocation(planHandleD2Z, CLFFT_OUTOFPLACE);
/* Bake the plan */
err = clfftBakePlan(planHandleD2Z, 1, &m_oclbase->m_command_queue, NULL, NULL);
if (err != CL_SUCCESS) {
DEBUG_MSG("Error creating Real-to-complex plan");
return DKS_ERROR;
}
return OCL_SUCCESS;
return DKS_SUCCESS;
}
int OpenCLFFT::ocl_executeTranspose(void *src, int N[3], int ndim, int dim) {
cl_mem mem_src = (cl_mem)src;
if (ndim == 1)
return OCL_SUCCESS;
size_t work_items[3];
work_items[0] = N[0];
work_items[1] = N[1];
work_items[2] = 1;
int OpenCLFFT::setupFFTCR(int ndim, int N[3], double scale) {
cl_int err;
clfftDim dim = CLFFT_3D;
size_t clLength[3] = {(size_t)N[0], (size_t)N[1], (size_t)N[2]};
size_t work_group_size[3];
work_group_size[0] = N[0];
work_group_size[1] = N[1];
work_group_size[2] = 1;
/* Create 3D fft plan*/
err = clfftCreateDefaultPlan(&planHandleZ2D, m_oclbase->m_context, dim, clLength);
size_t local_size = work_group_size[0] * work_group_size[1] * work_group_size[2];
m_oclbase->ocl_createKernel("transpose");
m_oclbase->ocl_setKernelArg(0, sizeof(cl_mem), &mem_src);
m_oclbase->ocl_setKernelArg(1, sizeof(cl_mem), &mem_src);
m_oclbase->ocl_setKernelArg(2, sizeof(int), &N[0]);
m_oclbase->ocl_setKernelArg(3, sizeof(int), &N[1]);
m_oclbase->ocl_setKernelArg(4, sizeof(cl_double2)*local_size, NULL);
m_oclbase->ocl_executeKernel(ndim, work_items, work_group_size);
/* Set plan parameters */
err = clfftSetPlanPrecision(planHandleZ2D, CLFFT_DOUBLE);
err = clfftSetLayout(planHandleZ2D, CLFFT_HERMITIAN_INTERLEAVED, CLFFT_REAL);
err = clfftSetResultLocation(planHandleZ2D, CLFFT_OUTOFPLACE);
/* Bake the plan */
err = clfftBakePlan(planHandleZ2D, 1, &m_oclbase->m_command_queue, NULL, NULL);
if (err != CL_SUCCESS) {
DEBUG_MSG("Error creating Complex-to-real plan");
return DKS_ERROR;
}
return DKS_SUCCESS;
}
int OpenCLFFT::destroyFFT() {
clfftDestroyPlan(&planHandleZ2Z);
clfftDestroyPlan(&planHandleD2Z);
clfftDestroyPlan(&planHandleZ2D);
clfftTeardown();
return DKS_SUCCESS;
}
void OpenCLFFT::printError(clfftStatus err) {
if (err != CL_SUCCESS) {
std::cout << "Error creating default plan " << err << std::endl;
switch(err) {
case CLFFT_BUGCHECK:
std::cout << "bugcheck" << std::endl;
break;
case CLFFT_NOTIMPLEMENTED:
std::cout << "not implemented" << std::endl;
break;
case CLFFT_TRANSPOSED_NOTIMPLEMENTED:
std::cout << "transposed not implemented" << std::endl;
break;
case CLFFT_FILE_NOT_FOUND:
std::cout << "file not found" << std::endl;
break;
case CLFFT_FILE_CREATE_FAILURE:
std::cout << "file create failure" << std::endl;
break;
case CLFFT_VERSION_MISMATCH:
std::cout << "version missmatch" << std::endl;
break;
case CLFFT_INVALID_PLAN:
std::cout << "invalid plan" << std::endl;
break;
case CLFFT_DEVICE_NO_DOUBLE:
std::cout << "no double" << std::endl;
break;
case CLFFT_DEVICE_MISMATCH:
std::cout << "device missmatch" << std::endl;
break;
case CLFFT_ENDSTATUS:
std::cout << "end status" << std::endl;
break;
default:
std::cout << "other: " << err << std::endl;
break;
}
}
return OCL_SUCCESS;
}
/*
......
......@@ -20,12 +20,19 @@
#include "../Algorithms/FFT.h"
#include "OpenCLBase.h"
#include "clFFT.h"
class OpenCLFFT : public DKSFFT {
private:
OpenCLBase *m_oclbase;
clfftSetupData fftSetup;
clfftPlanHandle planHandleZ2Z;
clfftPlanHandle planHandleD2Z;
clfftPlanHandle planHandleZ2D;
/*
Info: call fft kernels to execute FFT of the given domain,
data - devevice memory ptr, cdim - current dim to transform,
......@@ -42,15 +49,31 @@ private:
*/
int ocl_callBitReverseKernel(cl_mem &data, int cdim, int ndim, int N);
/** Get clfftStatus and print the corresponding error message.
* clfftStatus is returned from all clFFT library functions, print error displays the
* corresponding error message. If "other" is printed then error code corresponds to
* OpenCL error code and not specifically to clFFT library, then OpenCL error codes should
* be checked to determine the reason for the error.
*/
void printError(clfftStatus err);
public:
/* constructor - currently does nothing*/
OpenCLFFT(OpenCLBase *base) {