Commit ccc4329b authored by Uldis Locans's avatar Uldis Locans

updated documentation

parent 7ca93a3a
......@@ -15,6 +15,9 @@
class DKSBaseMuSR;
/**
* Interface to implement ChiSquareRuntime class for musrfit.
*/
class ChiSquareRuntime {
friend class DKSBaseMuSR;
......@@ -63,23 +66,54 @@ public:
/** Default constructor */
//ChiSquareRuntime();
/** Default destructor */
/** Default destructor. */
virtual ~ChiSquareRuntime() { };
/**
* Compile GPU programm generated at runtime.
*/
virtual int compileProgram(std::string function, bool mlh = false) = 0;
/**
* Launche the compiled chiSquare kernel.
*/
virtual int launchChiSquare(int fitType, void *mem_data, void *mem_err, int length,
int numpar, int numfunc, int nummap,
double timeStart, double timeStep,
double &result) = 0;
/**
* Write the parameter values to the GPU.
*/
virtual int writeParams(const double *params, int numparams) = 0;
/**
* Write the function values to the GPU.
*/
virtual int writeFunc(const double *func, int numfunc) = 0;
/**
* Write map values to the GPU.
*/
virtual int writeMap(const int *map, int nummap) = 0;
/**
* Allocate temporary memory needed for the chi square calucaltios on the device.
*/
virtual int initChiSquare(int size_data, int size_param, int size_func, int size_map) = 0;
/**
* Free device memory allocated for chi square calculations.
*/
virtual int freeChiSquare() = 0;
/**
* Check if available device can run the chi square GPU code.
*/
virtual int checkChiSquareKernels(int fitType, int &threadsPerBlock) = 0;
/** Set N0, tau and bgk values to use for the kernel.
/**
* Set N0, tau and bgk values to use for the kernel.
* If values changes between data sets this needs to be called before
* every kernel call. Returns DKS_SUCCESS.
*/
......@@ -91,7 +125,8 @@ public:
return DKS_SUCCESS;
}
/** Set alpha and beta values to use for the kernel.
/**
* Set alpha and beta values to use for the kernel.
* If values changes between data sets this needs to be called before
* every kernel call. Returns DKS_SUCCESS.
*/
......@@ -101,8 +136,9 @@ public:
return DKS_SUCCESS;
}
/** Set number of blocks and threads.
* Used to set parameters obtained from auto-tuning
/**
* Set number of blocks and threads.
* Used to set parameters obtained from auto-tuning
*/
int setKernelParams(int numBlocks, int blockSize) {
int ierr = DKS_ERROR;
......@@ -118,8 +154,9 @@ public:
return ierr;
}
/** Get the number of operations in compiled kernel.
* Count the number of operation in the ptx file for the compiled program.
/**
* Get the number of operations in compiled kernel.
* Count the number of operation in the ptx file for the compiled program.
*/
int getOperations(int &oper) {
......
......@@ -5,6 +5,9 @@
#include <string>
#include "../DKSDefinitions.h"
/**
* Interface to impelment particle matter interaction for OPAL.
*/
class DKSCollimatorPhysics {
protected:
......@@ -15,29 +18,61 @@ protected:
public:
virtual ~DKSCollimatorPhysics() { }
/**
* Execute collimator physics kernel.
*
*/
virtual int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numpartices,
bool enableRutherforScattering = true) = 0;
/**
* Special calse CollimatorPhysics kernel that uses SoA instead of AoS.
* Used only on the MIC side, was not implemented on the GPU.
*/
virtual int CollimatorPhysicsSoA(void *label_ptr, void *localID_ptr,
void *rx_ptr, void *ry_ptr, void *rz_ptr,
void *px_ptr, void *py_ptr, void *pz_ptr,
void *par_ptr, int numparticles) = 0;
/**
* Sort particle array on GPU.
* Count particles that are dead (label -1) or leaving material (label -2) and sort particle
* array so these particles are at the end of array
*/
virtual int CollimatorPhysicsSort(void *mem_ptr, int numparticles, int &numaddback) = 0;
/**
* Special calse CollimatorPhysicsSort kernel that uses SoA instead of AoS.
* Used only on the MIC side, was not implemented on the GPU.
*/
virtual int CollimatorPhysicsSortSoA(void *label_ptr, void *localID_ptr,
void *rx_ptr, void *ry_ptr, void *rz_ptr,
void *px_ptr, void *py_ptr, void *pz_ptr,
void *par_ptr, int numparticles, int &numaddback) = 0;
/**
* BorisPusher push function for integration from OPAL.
* ParallelTTracker integration from OPAL implemented in cuda.
* For more details see ParallelTTracler docomentation in opal
*/
virtual int ParallelTTrackerPush(void *r_ptr, void *p_ptr, int npart, void *dt_ptr,
double dt, double c, bool usedt = false, int streamId = -1) = 0;
/**
* BorisPusher kick function for integration from OPAL.
* ParallelTTracker integration from OPAL implemented in cuda.
* For more details see ParallelTTracler docomentation in opal
*/
virtual int ParallelTTrackerKick(void *r_ptr, void *p_ptr, void *ef_ptr,
void *bf_ptr, void *dt_ptr, double charge,
double mass, int npart, double c, int streamId = -1) = 0;
/**
* BorisPusher push function with transformto function form OPAL.
* ParallelTTracker integration from OPAL implemented in cuda.
* For more details see ParallelTTracler docomentation in opal
*/
virtual int ParallelTTrackerPushTransform(void *x_ptr, void *p_ptr, void *lastSec_ptr,
void *orient_ptr, int npart, int nsec, void *dt_ptr,
double dt, double c, bool usedt = false,
......
......@@ -6,12 +6,21 @@
#include "../DKSDefinitions.h"
/**
* Abstract class defining methods for DKS FFT class.
* Used by CudaFFT, OpenCLFFT and MICFFT to create device specific FFT classes.
*/
class BaseFFT {
protected:
int defaultN[3];
int defaultNdim;
/**
* Check if FFT plan is created for the needed dimension and FFT size.
* Returns true if the plan has been created and false if no plan for specified dimension
* and size exists.
*/
bool useDefaultPlan(int ndim, int N[3]) {
if (ndim != defaultNdim)
return false;
......@@ -24,18 +33,57 @@ public:
virtual ~BaseFFT() { }
/** Setup FFT - init FFT library used by chosen device. */
virtual int setupFFT(int ndim, int N[3]) = 0;
/** Setup real to complex FFT - init FFT library used by chosen device. */
virtual int setupFFTRC(int ndim, int N[3], double scale = 1.0) = 0;
/** Setup real to complex complex to real FFT - init FFT library used by chosen device. */
virtual int setupFFTCR(int ndim, int N[3], double scale = 1.0) = 0;
/** Clean up. */
virtual int destroyFFT() = 0;
/**
* Exectute C2C FFT.
* mem_ptr - memory ptr on the device for complex data.
* Performs in place FFT.
*/
virtual int executeFFT(void * mem_ptr, int ndim, int N[3],
int streamId = -1, bool forward = true) = 0;
/**
* Exectute inverse C2C FFT.
* mem_ptr - memory ptr on the device for complex data.
* Performs in place FFT.
*/
virtual int executeIFFT(void * mem_ptr, int ndim, int N[3], int streamId = -1) = 0;
/**
* Normalize the FFT or IFFT.
* mem_ptr - memory to complex data.
*/
virtual int normalizeFFT(void * mem_ptr, int ndim, int N[3], int streamId = -1) = 0;
/**
* Exectute R2C FFT.
* real_ptr - real input data for FFT, comp_ptr - memory on the device where
* results for the FFT are stored as complex numbers.
*/
virtual int executeRCFFT(void * real_ptr, void * comp_ptr, int ndim, int N[3],
int streamId = -1) = 0;
/**
* Exectute C2R FFT.
* real_ptr - real output data from the C2R FFT, comp_ptr - complex input data for the FFT.
*/
virtual int executeCRFFT(void * real_ptr, void * comp_ptr, int ndim, int N[3],
int streamId = -1) = 0;
/**
* Normalize CR FFT.
*/
virtual int normalizeCRFFT(void *real_ptr, int ndim, int N[3], int streamId = -1) = 0;
};
......
......@@ -4,24 +4,27 @@
#include <iostream>
#include <cmath>
/**
* Interface to implement Greens function calculations for OPAL.
*/
class GreensFunction {
public:
virtual ~GreensFunction() { }
/** calc greens integral, as defined in OPAL */
/** calc greens integral, as defined in OPAL. */
virtual int greensIntegral(void *tmpgreen, int I, int J, int K, int NI, int NJ,
double hr_m0, double hr_m1, double hr_m2, int streamId = -1) = 0;
/** integration if rho2_m, see OPAL for more details */
/** integration if rho2_m, see OPAL for more details. */
virtual int integrationGreensFunction(void * rho2_m, void *tmpgreen, int I, int J, int K,
int streamId = -1) = 0;
/** mirror rho2_m field */
/** mirror rho2_m field. */
virtual int mirrorRhoField(void *rho2_m, int I, int J, int K, int streamId = -1) = 0;
/** multiply two complex fields from device memory */
/** multiply two complex fields from device memory. */
virtual int multiplyCompelxFields(void *ptr1, void *ptr2, int size, int streamId = -1) = 0;
};
......
......@@ -5,17 +5,22 @@
#define BLOCK_SIZE 128
/** Struct to hold voxel position for PET image. */
struct VoxelPosition {
float x;
float y;
float z;
};
/** Struct that holds pair of detectors that registered an envent. */
struct ListEvent {
unsigned detA : 16;
unsigned detB : 16;
};
/**
* Interface to implement PET image reconstruction.
*/
class ImageReconstruction {
protected:
......@@ -25,7 +30,8 @@ public:
virtual ~ImageReconstruction() { }
/** Caluclate source.
/**
* Caluclate source.
* Places a sphere at each voxel position and calculate the avg value and std value of pixels
* that are inside this sphere. All the sphere used have the same diameter.
*/
......@@ -33,7 +39,8 @@ public:
void *avg, void *std, float diameter, int total_voxels,
int total_sources, int start = 0) = 0;
/** Calculate background.
/**
* Calculate background.
* Places two sphere at each voxel position, calculates the avg value and std value of pixels
* that are inside the larger sphere, but are outside of the smaller sphere. The diameter of the
* smaller speher is given by parameter diameter, diameter of the larger sphere is 2*diameter.
......@@ -42,7 +49,8 @@ public:
void *avg, void *std, float diameter, int total_voxels,
int total_sources, int start = 0) = 0;
/** Caluclate source using differente sources.
/**
* Caluclate source using differente sources.
* Places two sphere at each voxel position, calculates the avg value and std value of pixels
* that are inside the larger sphere, but are outside of the smaller sphere. The diameter of the
* each sphere is given by *diameter array.
......@@ -52,7 +60,7 @@ public:
int total_sources, int start = 0) = 0;
/**
* Places two sphere at each voxel position, calculates the avg value and std value of pixels
* Places two sphere at each voxel position, calculates the avg value and std value of pixels.
* that are inside the larger sphere, but are outside of the smaller sphere. The diameter of the
* smaller sphere is given by *diameter array, diameter of the larger sphere is 2*diameter of the
* smaller sphere.
......@@ -61,7 +69,8 @@ public:
void *avg, void *std, void *diameter, int total_voxels,
int total_sources, int start = 0) = 0;
/** Generate normalization.
/**
* Generate normalization.
* Goes trough detectors pairs and if detector pair crosses image launches seperate kernel
* that updates voxel values in the image on the slope between these two detectors.
*/
......@@ -69,14 +78,16 @@ public:
void *det_position, int total_det) = 0;
/** Calculate forward projection.
/**
* Calculate forward projection.
* For image reconstruction calculates forward projections.
* see recon.cpp for details
*/
virtual int forwardProjection(void *correction, void *recon, void *list_data, void *det_position,
void *image_position, int num_events) = 0;
/** Calculate backward projection.
/**
* Calculate backward projection.
* For image reconstruction calculates backward projections.
* see recon.cpp for details
*/
......@@ -84,29 +95,29 @@ public:
void *det_position, void *image_position,
int num_events, int num_voxels) = 0;
/** Set the voxel dimensins on device.
*
/**
*Set the voxel dimensins on device.
*/
virtual int setDimensions(int voxel_x, int voxel_y, int voxel_z, float voxel_size) = 0;
/** Set the image edge variables on the device.
*
/**
* Set the image edge variables on the device.
*/
virtual int setEdge(float x_edge, float y_edge, float z_edge) = 0;
/** Set the image edge1 on the device.
*
/**
* Set the image edge1 on the device.
*/
virtual int setEdge1(float x_edge1, float y_edge1, float z_edge1, float z_edge2) = 0;
/** Set the minimum crystan in one ring values on the device.
*
/**
* Set the minimum crystan in one ring values on the device.
*/
virtual int setMinCrystalInRing(float min_CrystalDist_InOneRing,
float min_CrystalDist_InOneRing1) = 0;
/** Set all other required parameters for reconstruction.
*
/**
* Set all other required parameters for reconstruction.
*/
virtual int setParams(float matrix_distance_factor, float phantom_diameter,
float atten_per_mm, float ring_diameter) = 0;
......
......@@ -18,6 +18,17 @@
typedef std::vector<Parameter> Parameters;
typedef std::vector<State> States;
/**
* DKS autotuning class, allows to auto-tune the defince function.
* Executes the defined function for auto-tuning and searches for optimal parameters to improve
* the function execution time. The function that is auto-tuned, parameters and the ranges
* need to be set. Includes multiple search methods, that searches the parameter space to finde
* the optimal solution.
* 1) exaustive search
* 2) line search
* 3) hill climbimg
* 4) simulated annealing
*/
class DKSAutoTuning {
private:
......@@ -36,12 +47,13 @@ private:
int loops_m;
/** Update parameters from a state */
/** Update parameters from a state. */
int setParameterValues(States states);
/** Evaluate the function and set execution time
* Returns DKS_ERROR if errors occured during function execution.
* Returns DKS_SUCCESS if function executed as planned.
/**
* Evaluate the function and set execution time
* Returns DKS_ERROR if errors occured during function execution.
* Returns DKS_SUCCESS if function executed as planned.
*/
int evaluateFunction(double &value);
......@@ -50,12 +62,13 @@ public:
/** Constructor */
DKSAutoTuning(DKSBase *base, std::string api, std::string device, int loops = 100);
/** Destructor */
/** Destructor. */
~DKSAutoTuning();
/** Set function to auto tune.
* Caller of setFunction is responsible to bind the correct parameters
* to the function with std::bind.
/**
* Set function to auto tune.
* Caller of setFunction is responsible to bind the correct parameters
* to the function with std::bind.
*/
void setFunction(std::function<int()> f, std::string name, bool evaluate_time = true) {
f_m = f;
......@@ -63,15 +76,21 @@ public:
evaluate_time_m = evaluate_time;
}
/**
* Set function to auto tune.
* Caller of setFunction is responsible to bind the correct parameters
* to the function with std::bind.
*/
void setFunction(std::function<double()> f, std::string name, bool evaluate_time = false) {
fd_m = f;
function_name_m = name;
evaluate_time_m = evaluate_time;
}
/** Set parameter for auto tuning.
* Provide a pointer to a parameter that will be changed during auto-tuning
* and a min-max value for this element
/**
* Set parameter for auto tuning.
* Provide a pointer to a parameter that will be changed during auto-tuning
* and a min-max value for this element
*/
template <typename T1>
void addParameter(T1 *value, T1 min, T1 max, T1 step, std::string name) {
......@@ -85,9 +104,9 @@ public:
/** Perform exaustive search evaluating all the parameter configurations */
void exaustiveSearch();
/** Perform auto-tuning.
* Perform line-search auto-tuning by variying parameters one at a time and keeping other
* parameters constant.
/**
* Perform line-search auto-tuning by variying parameters one at a time.
* After one parameter is auto-tuned the next on is varied
*/
void lineSearch();
......
......@@ -4,6 +4,7 @@
#include <iostream>
#include <cmath>
/** Tester class for auto-tuning search algorithms. */
class DKSAutoTuningTester {
friend class DKSBaseMuSR;
......
/** Class to save and load DKS autotunning configs.
* Autotuning settings are saved and loaded from $HOME/.config/DKS/autotuning.xml.
* Uses boost xml_parser to read and write the xml file and boost property tree to store
* the xml content.
*/
#ifndef DKS_CONFIG
#define DKS_CONFIG
......@@ -29,6 +23,13 @@ namespace pt = boost::property_tree;
const std::string config_dir = "/.config/DKS";
const std::string config_file = "/autotuning.xml";
/** Class to save and load DKS autotunning configs.
* Autotuning settings are saved and loaded from $HOME/.config/DKS/autotuning.xml.
* Uses boost xml_parser to read and write the xml file and boost property tree to store
* the xml content.
* TODO: need an update boost::filesystem is disabled at the moment, no configuration file is saved
* so the auto-tuning has no effect.
*/
class DKSConfig {
private:
......
......@@ -9,6 +9,9 @@
enum VALUE_TYPE { DKS_INT, DKS_DOUBLE };
/**
* Parameter class allows to change the searchable parameters during the auto-tuning.
*/
class Parameter {
private:
......@@ -64,6 +67,10 @@ public:
};
/**
* Struct to hold a auto-tuning state.
* Holds the current value, min, max and a step to witch a state can change.
*/
struct State {
double value;
double min;
......@@ -74,6 +81,12 @@ struct State {
typedef std::vector<Parameter> Parameters;
typedef std::vector<State> States;
/**
* Used by auto-tuning search algorithms to move between parameter configurations.
* Allows to move from one parameter stat to another, get neighboring states,
* move to neighboring states and save state information. Print functions are available
* for debugging purposes, to follow how algorithm muves between sates.
*/
class DKSSearchStates {
private:
......
......@@ -342,62 +342,3 @@ int CudaBase::cuda_freeHostMemory(void * mem_ptr) {
return DKS_SUCCESS;
}
/*
Info: allcate memory and write data (push)
Return: pointer to memory object
*/
/*
void * CudaBase::cuda_pushData(const void * in_data, size_t size, int &ierr) {
void * mem_ptr;
mem_ptr = cuda_allocateMemory(size, ierr);
if (ierr == DKS_SUCCESS)
ierr = cuda_writeData(mem_ptr, in_data, size);
return mem_ptr;
}
*/
/*
Info: read data and free memory (pull)
Return: success or error code
*/
/*
int CudaBase::cuda_pullData(void * mem_ptr, void * out_data, size_t size, int &ierr) {
ierr = cuda_readData(mem_ptr, out_data, size);
if (ierr == DKS_SUCCESS)
ierr = cuda_freeMemory(mem_ptr);
else
return DKS_ERROR;
if (ierr == DKS_SUCCESS)
return DKS_SUCCESS;
else
return DKS_ERROR;
}
*/
/*
Info: execute function
Return: success or error code
*/
int CudaBase::cuda_executeFunction() {
std::cout << "Execute function" << std::endl;
return DKS_SUCCESS;
}
/*
Info: clean up
Return: success or error code
*/
int CudaBase::cuda_cleanUp() {
std::cout << "clean up" << std::endl;
return DKS_SUCCESS;
}
......@@ -16,6 +16,11 @@
#define BLOCK_SIZE 128
/**
* CUDA base class handles device setup and basic communication with the device.
* Handles devicew setup, memory manegement, data transfers and stream setup for
* asynchronous data transfers and kernel executions.
*/
class CudaBase {
private:
......@@ -48,17 +53,17 @@ public:
/**
* Delete curandState.
* Delete curandState array on the GPU and free memory.
* Return success or error code
* Return success or error code
*/
int cuda_deleteCurandStates();
/** Create 'size' random numbers on the device and save in mem_ptr array
*
/**
* Create 'size' random numbers on the device and save in mem_ptr array.
*/
int cuda_createRandomNumbers(void *mem_ptr, int size);
/** Get a pointer to curand states
*
/**
* Get a pointer to curand states.
*/
curandState* cuda_getCurandStates();
......@@ -75,93 +80,98 @@ public:
int cuda_addStream(cudaStream_t tmpStream, int &streamId);
/**
* delete cuda stream
* delete cuda stream.
* success or error code
*/
int cuda_deleteStream(int id);
/**
* delete all streams
* delete all streams.
* success or error code
*/
int cuda_deleteStreams();
/**
* set stream to use
* set stream to use.
* success or error code
*/
int cuda_setStream(int id);
/**
* Info: get stream that is used
* Return: return id of curretn stream
* get stream that is used.
* Return: return id of curretn stream
*/