Commit b5c5da29 authored by Uldis Locans's avatar Uldis Locans
Browse files

add function to generate list of random numbers with cuda and opencl on the device

parent 3a74d6cd
......@@ -46,6 +46,16 @@ MESSAGE (STATUS "Found clFFT include dir: ${CLFFT_INCLUDE_DIRS}")
INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS})
LINK_DIRECTORIES (${CLFFT_LIBRARIES})
#find clRNG
#SET (clRNG_USE_STATIC_LIBS OFF)
#FIND_PACKAGE(clRng REQUIRED HINTS &ENV{CLRNG_PREFIX} $ENV{CLRNG_DIR} $ENV{CLRNG})
#MESSAGE (STATUS "Found clRNG library: ${CLRNG_LIBRARIES}")
#MESSAGE (STATUS "Found clRNG include dir: ${CLRNG_INCLUDE_DIRS}")
#INCLUDE_DIRECTORIES (${CLFFT_INCLUDE_DIRS})
#LINK_DIRECTORIES (${CLRNG_LIBRARIES})
#find_package(PkgConfig)
#pkg_check_modules(clRng REQUIRED)
#enable UQTK
OPTION (USE_UQTK "Use UQTK" OFF)
......
......@@ -13,6 +13,13 @@ __global__ void initcuRandState(curandState *state, int size, int seed = 0) {
}
__global__ void kernelCreateRandNumbers(curandState *state, double *data, int size) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < size)
data[idx] = curand_uniform_double(&state[idx]);
}
//=====================================//
//==========Private functions==========//
......@@ -68,6 +75,15 @@ int CudaBase::cuda_deleteCurandStates() {
return DKS_SUCCESS;
}
int CudaBase::cuda_createRandomNumbers(void *mem_ptr, int size) {
int threads = BLOCK_SIZE;
int blocks = size / threads + 1;
kernelCreateRandNumbers<<<blocks, threads>>>(defaultRndState, (double *)mem_ptr, size);
return DKS_SUCCESS;
}
curandState* CudaBase::cuda_getCurandStates() {
return defaultRndState;
}
......
......@@ -15,6 +15,8 @@
#include <nvToolsExt.h>
#include <time.h>
#define BLOCK_SIZE 128
class CudaBase {
private:
......@@ -50,6 +52,11 @@ public:
*/
int cuda_deleteCurandStates();
/** Create 'size' random numbers on the device and save in mem_ptr array
*
*/
int cuda_createRandomNumbers(void *mem_ptr, int size);
/** Get a pointer to curand states
*
*/
......
......@@ -878,3 +878,12 @@ int DKSBase::callParallelTTrackerPushTransform(void *x_ptr, void *p_ptr,
return DKS_ERROR;
}
int DKSBase::callCreateRandomNumbers(void *mem_ptr, int size) {
if (apiCuda())
return CUDA_SAFECALL(cbase->cuda_createRandomNumbers(mem_ptr, size));
if (apiOpenCL())
return OPENCL_SAFECALL(oclbase->ocl_createRandomNumbers(mem_ptr, size));
return DKS_ERROR;
}
......@@ -1090,6 +1090,11 @@ public:
double dt, double c, bool usedt = false,
int streamId = -1);
/**
* Create random numbers on the device and fille mem_data array
*/
int callCreateRandomNumbers(void *mem_ptr, int size);
/**
* Print memory information on device (total, used, available)
* TODO: opencl and mic imlementation
......
......@@ -7,21 +7,13 @@ cl_device_id OpenCLBase::m_device_id = NULL;
cl_event OpenCLBase::m_last_event = NULL;
OpenCLBase::OpenCLBase() {
//m_context = NULL;
//m_command_queue = NULL;
m_program = NULL;
m_kernel = NULL;
//m_device_id = NULL;
//m_platform_id = NULL;
m_kernel_file = NULL;
m_last_event = NULL;
//m_events = new cl_event[500];
//m_num_events = 0;
defaultRndSet = 0;
}
OpenCLBase::~OpenCLBase() {
......@@ -41,11 +33,11 @@ int OpenCLBase::ocl_createRndStates(int size) {
strcat(kernel_file, "OpenCL/OpenCLKernels/OpenCLCollimatorPhysics.cl");
ocl_loadKernel(kernel_file);
delete[] kernel_file;
//allocate memory for rand states
int ierr;
defaultRndState = ocl_allocateMemory(sizeof(RNDState)*size, ierr);
//exec kernel
int seed = 0;
ocl_createKernel("initRand");
......@@ -55,13 +47,34 @@ int OpenCLBase::ocl_createRndStates(int size) {
size_t work_items = size;
size_t work_group_size = 1;
ocl_executeKernel(1, &work_items, &work_group_size);
defaultRndSet = 1;
return DKS_SUCCESS;
}
return OCL_SUCCESS;
int OpenCLBase::ocl_createRandomNumbers(void *mem_ptr, int size) {
//load kernel
char * kernel_file = new char[500];
kernel_file[0] = '\0';
strcat(kernel_file, OPENCL_KERNELS);
strcat(kernel_file, "OpenCL/OpenCLKernels/OpenCLCollimatorPhysics.cl");
ocl_loadKernel(kernel_file);
delete[] kernel_file;
//set kernel variables
cl_mem tmp_data = (cl_mem) mem_ptr;
ocl_createKernel("createRandoms");
ocl_setKernelArg(0, sizeof(cl_mem), &defaultRndState);
ocl_setKernelArg(1, sizeof(cl_mem), &tmp_data);
ocl_setKernelArg(2, sizeof(int), &size);
size_t work_size = 128;
size_t work_items = (size % work_size + 1) * work_size;
ocl_executeKernel(1, &work_items, &work_size);
return DKS_SUCCESS;
}
/* destroy rnd states */
......@@ -70,7 +83,7 @@ int OpenCLBase::ocl_deleteRndStates() {
ocl_freeMemory(defaultRndState);
defaultRndSet = 0;
return OCL_SUCCESS;
return DKS_SUCCESS;
}
......
......@@ -30,11 +30,20 @@
#include <CL/cl_ext.h>
#endif
#include "clRNG/clRNG.h"
#include "clRNG/mrg31k3p.h"
#include "../DKSDefinitions.h"
/* struct for random number state */
typedef struct {
double s10;
double s11;
double s12;
double s20;
double s21;
double s22;
double z;
bool gen;
} RNDState;
class OpenCLBase {
private:
......@@ -122,6 +131,11 @@ public:
*/
int ocl_createRndStates(int size);
/* Create an array of random numbers on the device
*
*/
int ocl_createRandomNumbers(void *mem_ptr, int size);
/*
Destroy rnd states
Return: success or error code
......
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#pragma OPENCL EXTENSION
/******Random numbers********/
......@@ -89,13 +87,14 @@ __kernel void initRand(__global RNDState *s, unsigned int seed, int N) {
if (id < N) {
RNDState tmp;
int tmp_seed = id;// * 0x100000000ULL;
int tmp_seed = 2*id;// * 0x100000000ULL;
tmp.s10 = 12345 + tmp_seed;
tmp.s11 = 12345 + tmp_seed;
tmp.s12 = 123 + tmp_seed;
tmp.s12 = 12345 + tmp_seed;
tmp.s20 = 12345 + tmp_seed;
tmp.s21 = 12345 + tmp_seed;
tmp.s22 = 123 + tmp_seed;
tmp.s22 = 12345 + tmp_seed;
tmp.z = 0;
tmp.gen = true;
......@@ -105,6 +104,19 @@ __kernel void initRand(__global RNDState *s, unsigned int seed, int N) {
}
/* create random numbers and fill an array */
__kernel void createRandoms(__global RNDState *states, __global double *data, int size) {
int idx = get_global_id(0);
if (idx < size) {
RNDState s = states[idx];
data[idx] = rand_uniform(&s);
states[idx] = s;
}
}
/**********Degrader**********/
enum PARAMS { POSITION,
......
......@@ -22,6 +22,7 @@ ADD_EXECUTABLE(testFFT3DRC testFFT3DRC.cpp)
#ADD_EXECUTABLE(testGather testGather.cpp)
#ADD_EXECUTABLE(testGatherAsync testGatherAsync.cpp)
#ADD_EXECUTABLE(testTranspose testTranspose.cpp)
ADD_EXECUTABLE(testRandom testRandom.cpp)
ADD_EXECUTABLE(testCollimatorPhysics testCollimatorPhysics.cpp)
ADD_EXECUTABLE(testCollimatorPhysicsSoA testCollimatorPhysicsSoA.cpp)
#ADD_EXECUTABLE(testPush testPush.cpp)
......@@ -53,6 +54,7 @@ TARGET_LINK_LIBRARIES(testFFT3DRC dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
#TARGET_LINK_LIBRARIES(testGather dks)
#TARGET_LINK_LIBRARIES(testGatherAsync dks)
#TARGET_LINK_LIBRARIES(testTranspose dks)
TARGET_LINK_LIBRARIES(testRandom dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
TARGET_LINK_LIBRARIES(testCollimatorPhysics dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
TARGET_LINK_LIBRARIES(testCollimatorPhysicsSoA dks ${Boost_LIBRARIES} ${CLFFT_LIBRARIES})
#TARGET_LINK_LIBRARIES(testPush dks)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment