Commit 4fa529aa authored by Uldis Locans's avatar Uldis Locans
Browse files

snapshot of svn

parents
CMAKE_MINIMUM_REQUIRED (VERSION 3.2)
PROJECT (DKS)
SET (DKS_VERSION_MAJOR 1)
SET (DKS_VERSION_MINOR 0.1)
SET (PACKAGE \"dks\")
SET (PACKAGE_BUGREPORT \"locagoons.uldis@psi.ch\")
SET (PACKAGE_NAME \"DKS\")
SET (PACKAGE_STRING \"DKS\ 1.0.1\")
SET (PACKAGE_TARNAME \"dks\")
SET (PACKAGE_VERSION \"1.0.1\")
SET (VERSION \"1.0.1\")
SET (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
#get compiler name
#STRING (REGEX REPLACE ".*/([A-Za-z]*)$" "\\1" COMPILER_NAME ${CMAKE_CXX_COMPILER})
STRING (REGEX REPLACE ".*/" "" COMPILER_NAME ${CMAKE_CXX_COMPILER})
MESSAGE (STATUS "Your compiler is: ${COMPILER_NAME}")
MESSAGE (STATUS "Your compiler is: ${CMAKE_CXX_COMPILER}")
MESSAGE (STATUS "C compiler: ${CMAKE_C_COMPILER_ID}")
MESSAGE (STATUS "CXX compiler: ${CMAKE_CXX_COMPILER_ID}")
#opencl and cuda kernel files are in the builds include directory
SET (OPENCL_KERNELS -DOPENCL_KERNELS=\\"${CMAKE_INSTALL_PREFIX}/include/\\")
MESSAGE (STATUS "OpenCL kernel files: ${OPENCL_KERNELS}")
#find boost
set (BOOSTROOT $ENV{BOOST_DIR})
SET (Boost_USE_STATIC_LIBS OFF)
SET (Boost_USE_STATIC_RUNTIME OFF)
FIND_PACKAGE(Boost 1.55.0 REQUIRED COMPONENTS filesystem system)
IF (Boost_FOUND)
MESSAGE (STATUS "Found boost include dir: ${Boost_INCLUDE_DIRS}")
MESSAGE (STATUS "Found boost library dir: ${Boost_LIBRARY_DIRS}")
MESSAGE (STATUS "Found boost libraries: ${Boost_LIBRARIES}")
INCLUDE_DIRECTORIES (${Boost_INCLUDE_DIRS})
LINK_DIRECTORIES(${Boost_LIBRARY_DIRS})
ENDIF (Boost_FOUND)
#enable UQTK
OPTION (USE_UQTK "Use UQTK" OFF)
#intel icpc compiler specific flags
IF (${CMAKE_C_COMPILER_ID} STREQUAL "Intel" OR USE_INTEL)
#for intel compiler turn on openmp and opencl
OPTION (USE_OPENCL "Use OpenCL" ON)
OPTION (USE_CUDA "Use CUDA" OFF)
OPTION (USE_MIC "Use intel MIC" ON)
#find xiar and xild and set flags for offload build on mic
FIND_PROGRAM(XIAR xiar)
IF(XIAR)
MESSAGE(STATUS "xiar found: ${XIAR}")
SET(CMAKE_AR "${XIAR}")
ENDIF(XIAR)
MARK_AS_ADVANCED(XIAR)
SET(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> rcs -qoffload-build <TARGET> <LINK_FLAGS> <OBJECTS>")
SET(CMAKE_C_ARCHIVE_CREATE "<CMAKE_AR> rcs -qoffload-build <TARGET> <LINK_FLAGS> <OBJECTS>")
FIND_PROGRAM(XILD xild)
IF(XILD)
SET(CMAKE_LINKER "${XILD}")
ENDIF(XILD)
MARK_AS_ADVANCED(XILD)
#set flags for openmp and opencl
#TODO: check which opencl to use: nvidia, amd, intel, apple
SET (CMAKE_CXX_FLAGS "-DDEBUG -O3 -Wall -offload -mkl -openmp -lOpenCL -lpthread -DDKS_MIC -DDKS_OPENCL -qopt-report=5 -qopt-report-phase=vec -std=c++11")
IF (${COMPILER_NAME} STREQUAL "mpicxx" OR ${COMPILER_NAME} STREQUAL "mpiicpc")
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDKS_MPI")
ENDIF (${COMPILER_NAME} STREQUAL "mpicxx" OR ${COMPILER_NAME} STREQUAL "mpiicpc")
ENDIF (${CMAKE_C_COMPILER_ID} STREQUAL "Intel" OR USE_INTEL)
#gnu copmpiler specific flags
IF ( (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") AND NOT USE_INTEL)
OPTION (USE_OPENCL "Use OpenCL" ON)
OPTION (USE_CUDA "Use CUDA" OFF)
OPTION (USE_MIC "Use intel MIC" OFF)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDEBUG -O3 -Wall -fopenmp -std=c++11 -D__wsu")
FIND_PACKAGE(CUDA)
IF (CUDA_FOUND)
SET (USE_CUDA ON)
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
LINK_DIRECTORIES(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
MESSAGE (STATUS "cuda include: ${CUDA_INCLUDE_DIRS}")
MESSAGE (STATUS "cuda libs: ${CUDA_TOOLKIT_ROOT_DIR}/lib64")
MESSAGE (STATUS "cuda version: ${CUDA_VERSION}")
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lcudart -lcufft -lcublas -lnvToolsExt -DDKS_CUDA")
SET (CUDA_NVCC_FLAGS "-arch=sm_35 -DDEBUG -lcufft -lcublas -lcudart -fmad=false")
SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${OPENCL_KERNELS}")
#if cuda version >= 7.0 add runtime commpilation flags
IF (NOT CUDA_VERSION VERSION_LESS "7.0")
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lnvrtc -lcuda")
ENDIF (NOT CUDA_VERSION VERSION_LESS "7.0")
MESSAGE (STATUS "nvcc flags: ${CUDA_NVCC_FLAGS}")
SET(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF)
#set(CUDA_SEPARABLE_COMPILATION ON)
SET(BUILD_SHARED_LIBS OFF)
ENDIF (CUDA_FOUND)
IF (NOT CUDA_FOUND)
MESSAGE(STATUS "CUDA not found, looking for OpenCL")
FIND_PACKAGE(OpenCL)
IF (OpenCL_FOUND)
MESSAGE(STATUS "OpenCL version : ${OpenCL_VERSION_STRING}")
MESSAGE(STATUS "OpenCL include dir: ${OpenCL_INCLUDE_DIR}")
MESSAGE(STATUS "OpenCL library dir: ${OpenCL_LIBRARY}")
INCLUDE_DIRECTORIES(${OpenCL_INCLUDE_DIR})
LINK_DIRECTORIES(${OpenCL_LIBRARY})
ENDIF (OpenCL_FOUND)
ENDIF (NOT CUDA_FOUND)
#if mac OS and no CUDA set apple opencl flags
IF (APPLE AND NOT CUDA_FOUND)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -framework opencl -lpthread -DDKS_OPENCL")
ENDIF(APPLE AND NOT CUDA_FOUND)
#if cuda found set cuda opencl flags
IF (CUDA_FOUND)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lOpenCL -lpthread -DDKS_OPENCL")
ENDIF (CUDA_FOUND)
#if cuda not found but amd opencl found set opencl flags
IF (NOT CUDA_FOUND AND OpenCL_FOUND)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lOpenCL -lpthread -DDKS_OPENCL")
ENDIF(NOT CUDA_FOUND AND OpenCL_FOUND)
#if mpi compiler used set mpi flag
IF (${COMPILER_NAME} STREQUAL "mpicxx")
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDKS_MPI")
ENDIF (${COMPILER_NAME} STREQUAL "mpicxx")
ENDIF ( (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") AND NOT USE_INTEL)
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENCL_KERNELS}")
MESSAGE (STATUS "Compiler flags: ${CMAKE_CXX_FLAGS}")
ADD_SUBDIRECTORY (src)
IF (ENABLE_TESTS)
ADD_SUBDIRECTORY (test)
ENDIF (ENABLE_TESTS)
ADD_SUBDIRECTORY (auto-tuning)
### write configure files ###
CONFIGURE_FILE ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${PROJECT_NAME}Config.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config_install.cmake )
### install files ###
INSTALL (
FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config_install.cmake
DESTINATION "${CMAKE_INSTALL_PREFIX}/lib/cmake/${PROJECT_NAME}"
RENAME ${PROJECT_NAME}Config.cmake
)
##################################################################
#
# Name: Dynamic Kernel Scheduler
# Version: 1.0
# Author: Uldis Locans
# Contacts: locans.uldis@psi.ch
#
##################################################################
Dynamic Kernel Scheduler is a library that provides a software layer between host application
and hardware accelerators. DKS handles communication between host and device and schedules task
execution using predefined algorithms writen using CUDA and OpenCL for GPUs, and OpenMP with
offload pragmas for IntelMIC. See DKSBase class documentation for full list of functions provided
by DKS.
#####Requirements#####
OpenMPI (Cuda aware OpenMPI enabled for full compatability)
g++ or icpc compiler
Cuda 7.0 or higher (optional)
Nvidia or Intel OpenCL SDK (optional)
Intel MIC compilers (optional)
######Install######
#check out DKS
svn co svn+ssh://YOULOGIN@savannah02.psi.ch/repos/amas/users/adelmann/Ph.D-students/Locans/work/DKS/trunk DKS
#set compilers to use
#supported c++ compilers: g++, icpc, mpicxx whith g++
#supported c compilers: gcc, icc, mpicc whith gcc
export CXX_COMPILER=cpp_compiler_name
export CC_COMPILER=c_compiler_name
#set dks root directory directory
cd DKS
export DKS_ROOT = $PWD
#set build directory
mkdir $DKS_BUILD_DIR
cd $DKS_BUILD_DIR
#set install directory
export DKS_INSTALL_DIR = $DKS_BUILD_DIR #default is /usr/local/
CXX=$CXX_COMPILER CC=$CC_COMPILER cmake -DCMAKE_INSTALL_PREFIX=$DKS_BUILD_DIR $DKS_ROOT
make
make install
######DKS usage######
Make install copies the include files and library files to $DKS_BUILD_DIR/build folder, lib folder
in the build directory contains libdks.a and libdksshared.so, on of these libraries can be used to link
with DKS. All the necessary include files are located in $DKS_BUILD_DIR/build/include.
Additional flags needed for CUDA and OpenCL mode:
-lcudart -lcufft -lcublas -lnvToolsExt -lOpenCL -lnvrtc -lcuda -DDKS_CUDA -DDKS_OPENCL
Additional flags needed for IntelMIC and OpenCL mode:
-offload -mkl -openmp -lOpenCL -DDKS_MIC -DDKS_OPENCL
Note: always run make install, during runtime OpenCL and CUDA will search for kernel files in
$DKS_INSTALL_DIR/build/include directory for runtime compilation.
######Running DKS######
#running with cuda
#nvidia multi process service started for better CUDA and MPI execution
#to start mps service (if multiple users use DKS start MPS as root)
nvidia-cuda-mps-control -d
#to stop mps service
echo quit | nvidia-cuda-mps-control
#runnign dks with MIC
#Intel Manycore Platform Software Stack (mpss) service started
#to start mpss
service mpss start
INCLUDE_DIRECTORIES( ${CMAKE_SOURCE_DIR}/src )
LINK_DIRECTORIES( ${CMAKE_SOURCE_DIR}/src )
#chi square kernel tests
ADD_EXECUTABLE(testChiSquareRT testChiSquareRT.cpp)
TARGET_LINK_LIBRARIES(testChiSquareRT dks ${Boost_LIBRARIES})
ADD_EXECUTABLE(testChiSquareRTRandom testChiSquareRTRandom.cpp)
TARGET_LINK_LIBRARIES(testChiSquareRTRandom dks ${Boost_LIBRARIES})
IF (USE_UQTK)
ADD_EXECUTABLE(testChiSquareRTUQTK testChiSquareRTUQTK.cpp)
TARGET_LINK_LIBRARIES(testChiSquareRTUQTK dks ${Boost_LIBRARIES} lreg UQTk quad bcs uqtktools cvode-2.6.0 dsfmt lbfgs uqtklapack uqtkslatec uqtkblas gfortran)
ENDIF (USE_UQTK)
#TARGET_LINK_LIBRARIES(testChiSquareRTUQTK dks ${Boost_LIBRARIES})
#test to verify search functions
ADD_EXECUTABLE(testSearch testSearch.cpp)
TARGET_LINK_LIBRARIES(testSearch dks ${Boost_LIBRARIES})
#include <iostream>
#include <cstdlib>
#include <string>
#include <cmath>
#include <fstream>
#include "DKSBaseMuSR.h"
#include "Utility/DKSTimer.h"
#define PI 3.14159265358979323846
#define TWO_PI 6.283185307179586231996
#define DEG_TO_RAD 1.7453292519943295474371681e-2
#define N0 0.25
#define TAU 2.197019
#define BKG 1.0
#define ALPHA 1.0
#define BETA 1.0
using namespace std;
void randData(double *data, int N, int scale = 1) {
for (int i = 0; i < N; i++)
data[i] = ((double)rand() / RAND_MAX ) * scale;
}
/** MusrFit predefined functions.
* Predefined functions from MusrFit that can be used to define the theory function.
* First parameter in all the functions is alwats time - t, rest of the parameters depend
* on the function.
*/
double se(double t, double lamda) {
return exp( -lamda*t );
}
double ge(double t, double lamda, double beta) {
return exp( -pow(lamda*t, beta) );
}
double sg(double t, double sigma) {
return exp( -0.5 * pow(sigma*t, 2) );
}
double stg(double t, double sigma) {
double sigmatsq = pow(sigma*t,2);
return (1.0/3.0) + (2.0/3.0)*(1.0 - sigmatsq) * exp(-0.5 * sigmatsq);
}
double sekt(double t, double lambda) {
double lambdat = lambda*t;
return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat) * exp(-lambdat);
}
double lgkt(double t, double lambda, double sigma) {
double lambdat = lambda*t;
double sigmatsq = pow(sigma*t, 2.0);
return (1.0/3.0) + (2.0/3.0)*(1.0 - lambdat - sigmatsq) * exp(-lambdat - 0.5*sigmatsq);
}
double skt(double t, double sigma, double beta) {
if (beta < 1.0e-3)
return 0.0;
double sigmatb = pow(sigma*t, beta);
return (1.0/3.0) + (2.0/3.0)*(1.0 - sigmatb) * exp(-sigmatb/beta);
}
double spg(double t, double lambda, double gamma, double q) {
double lam2 = lambda*lambda;
double lamt2q = t*t*lam2*q;
double rate2 = 4.0*lam2*(1.0-q)*t/gamma;
double rateL = sqrt(fabs(rate2));
double rateT = sqrt(fabs(rate2)+lamt2q);
return (1.0/3.0)*exp(-rateL) + (2.0/3.0)*(1.0 - lamt2q / rateT)*exp(-rateT);
}
double rahf(double t, double nu, double lambda) {
double nut = nu*t;
double nuth = nu*t/2.0;
double lamt = lambda*t;
return (1.0/6.0)*(1.0-nuth)*exp(-nuth) + (1.0/3.0)*(1.0-nut/4.0)*exp(-0.25*(nut+2.44949*lamt));
}
double tf(double t, double phi, double nu) {
double tmp_nu = TWO_PI*nu*t;
double tmp_phi = DEG_TO_RAD * phi;
return cos(tmp_nu + tmp_phi);
}
double ifld(double t, double alpha, double phi, double nu, double lambdaT, double lambdaL) {
double wt = TWO_PI*nu*t;
double ph = DEG_TO_RAD*phi;
return alpha*cos(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
}
double b(double t, double phi, double nu) {
return j0(TWO_PI*nu*t + DEG_TO_RAD*phi);
}
double ib(double t, double alpha, double phi, double nu, double lambdaT, double lambdaL) {
double wt = TWO_PI * nu * t;
double ph = DEG_TO_RAD * phi;
return alpha*j0(wt+ph)*exp(-lambdaT*t) + (1.0-alpha)*exp(-lambdaL*t);
}
double ab(double t, double sigma, double gamma) {
double gt = gamma*t;
return exp(-pow(sigma/gamma,2.0)*(exp(-gt) - 1.0 + gt));
}
double snkzf(double t, double Delta0, double Rb) {
double D0t2 = pow(Delta0*t, 2.0);
double aa = 1.0/(1.0+pow(Rb,2.0)*D0t2);
return (1.0/3.0) + (2.0/3.0)*pow(aa,1.5)*(1.0-D0t2*aa)*exp(-0.5*D0t2*aa);
}
double snktf(double t, double phi, double nu, double Delta0, double Rb) {
double wt = TWO_PI*nu*t;
double ph = DEG_TO_RAD*phi;
double D0t2 = pow(Delta0*t, 2.0);
double aa = 1.0/(1.0+pow(Rb,2.0)*D0t2);
return sqrt(aa)*exp(-0.5*D0t2*aa)*cos(wt+ph);
}
double dnkzf(double t, double Delta0, double Rb, double nuc) {
double nuct = nuc*t;
double theta = (exp(-nuct) - 1.0 -nuct)/pow(nuc, 2.0);
double aa = 1.0/(1.0+4.0*pow(Rb*Delta0,2.0)*theta);
return sqrt(aa)*exp(-2.0*Delta0*Delta0*theta*aa);
}
double dnktf(double t, double phi, double nu, double Delta0, double Rb, double nuc) {
double wt = TWO_PI*nu*t;
double ph = DEG_TO_RAD*phi;
double nuct = nuc*t;
double theta = (exp(-nuct) - 1.0 -nuct)/pow(nuc, 2.0);
double aa = 1.0/(1.0+2.0*pow(Rb*Delta0,2.0)*theta);
return sqrt(aa)*exp(-Delta0*Delta0*theta*aa)*cos(wt+ph);
}
double cpuChiSq(double *data, double *p, double *f, int Ndata, int Npar, int Nfnc,
double timeStart, double timeStep, bool mlh = false)
{
double result = 0.0;
for (int i = 0; i < Ndata; i++) {
double t = timeStart + i*timeStep;
double d = data[i];
double e = data[i];
double fTheory = p[0] * f[0] * sg(t, p[1]) * tf(t, p[2], f[1]);
double theo = N0 * exp(-t/TAU) * (1.0 + fTheory) + BKG;
if (mlh) {
if ((d > 1.0e-9) && (fabs(theo) > 1.0e-9))
result += 2.0 * ((theo - d) + d * log(d / theo));
else
result += 2.0 * (theo - d);
} else {
if (e != 0.0)
result += ( (theo - d) * (theo - d) ) / (e * e);
else
result += theo * theo;
}
}
return result;
}
double cpuChiSqAsym(double *data, double *p, double *f, int Ndata, int Npar, int Nfnc,
double timeStart, double timeStep, bool mlh = false)
{
double result = 0.0;
for (int i = 0; i < Ndata; i++) {
double t = timeStart + i*timeStep;
double d = data[i];
double e = data[i];
double theoVal = p[0] * f[0] * sg(t, p[1]) * tf(t, p[2], f[1]);
double ab = ALPHA * BETA;
double theo = ((ab+1.0)*theoVal - (ALPHA-1.0))/((ALPHA+1.0) - (ab-1.0)*theoVal);
if (mlh) {
result += 0.0; //log max likelihood not defined here
} else {
if (e != 0.0)
result += ( (theo - d) * (theo - d) ) / (e * e);
else
result += theo * theo;
}
}
return result;
}
int runTest(const char *api_name, const char *device_name, bool autotune, bool mlh, bool asym) {
int ierr;
/*
* Histogram size used in tests. If autotune run kernes with sizes from 1e5 to 1e6.
* If autotune is off just run the test once (used for debuging to test the kernel)
*/
int Nstart = 1e5;
int Nstep = 1e5;
int Nend = (autotune) ? 1e6 : 1e5;
//parameter, function and map sizes used in tests
int Npar = 66;
int Nfnc = 2;
int Nmap = 5;
//print test info
cout << "=========================BEGIN TEST=========================" << endl;
cout << "Use api: " << api_name << "\t" << device_name << endl;
cout << "Max log likelihood: " << std::boolalpha << mlh << endl;
cout << "Asymetry fit: " << std::boolalpha << asym << endl;
DKSBaseMuSR dksbase;
dksbase.setAPI(api_name);
dksbase.setDevice(device_name);
ierr = dksbase.initDevice();
if (ierr != DKS_SUCCESS) {
std::cout << "Device not supported!" << std::endl;
return DKS_ERROR;
}
//get the list of different devices
std::vector<int> devices;
dksbase.getDeviceList(devices);
std::cout << "Unique devices: " << devices.size() << std::endl;
//create the function string to use in test
string sFnc = "p[m[0]] * f[m[1]] * sg(t, p[m[2]]) * tf(t, p[m[3]], f[m[4]])";
int map[5] = {0, 0, 1, 2, 1};
//runt tests from 100k to 1mil data points
for (unsigned int device = 0; device < devices.size(); device++) {
for (int Ndata = Nstart; Ndata <= Nend; Ndata += Nstep) {
dksbase.setDefaultDevice(device);
std::cout << "Ndata: " << Ndata << std::endl;
//init the chi square calculations
dksbase.initChiSquare(Ndata, Npar, Nfnc, Nmap);
//create random arrays for data, parameter and function storage
double *data = new double[Ndata];
double *par = new double[Npar];
double *fnc = new double[Nfnc];
randData(data, Ndata);
randData(par, Npar);
randData(fnc, Nfnc, 100);
//allocate memory on device
void *data_ptr = dksbase.allocateMemory<double>(Ndata, ierr);
//write data, params, functions and maps to the device
dksbase.writeData<double>(data_ptr, data, Ndata);
dksbase.writeParams(par, Npar);
dksbase.writeFunctions(fnc, Nfnc);
dksbase.writeMaps(map, Nmap);
//set musrfit constants
dksbase.callSetConsts(N0, TAU, BKG);
dksbase.callSetConsts(ALPHA, BETA);
//compile the program created with the function string
dksbase.callCompileProgram(sFnc, mlh);
//set autotuning on/off
if (autotune)
dksbase.setAutoTuningOn();
//tmp values to store results and tmp values for time steps and start time
double result_gpu = 0.0;
double result_cpu = 0.0;
double dt = 1e-12;
double ts = 1e-7;
//execute kernel on the GPU and execute the same function on the cpu
if (!asym) {
dksbase.callLaunchChiSquare(1, data_ptr, data_ptr, Ndata, Npar, Nfnc,
Nmap, ts, dt, result_gpu);
result_cpu = cpuChiSq(data, par, fnc, Ndata, Npar, Nfnc, ts, dt, mlh);
} else {
dksbase.callLaunchChiSquare(2, data_ptr, data_ptr, Ndata, Npar, Nfnc,
Nmap, ts, dt, result_gpu);
result_cpu = cpuChiSqAsym(data, par, fnc, Ndata, Npar, Nfnc, ts, dt, mlh);
}
//check the results
cout << "DKS: " << result_gpu << endl;
cout << "CPU: " << result_cpu << endl;
//free CPU and GPU memory
dksbase.freeMemory<double>(data_ptr, Ndata);
dksbase.freeChiSquare();
delete[] data;
delete[] par;
delete[] fnc;
cout << "------------------------------------------------------------" << endl;
}
}
return DKS_SUCCESS;
}
<