DKSBaseMuSR.h 4.93 KB
Newer Older
Uldis Locans's avatar
Uldis Locans committed
1 2 3 4 5 6 7 8 9 10
#ifndef H_DKS_BASEMUSR
#define H_DKS_BASEMUSR

#include <iostream>
#include <string>

#include "AutoTuning/DKSAutoTuning.h"
#include "AutoTuning/DKSAutoTuningTester.h"

#include "DKSBase.h"
11
#include "DKSFFT.h"
Uldis Locans's avatar
Uldis Locans committed
12 13 14 15 16 17 18 19 20 21 22

#include "Algorithms/ChiSquareRuntime.h"

#ifdef DKS_CUDA
#include "CUDA/CudaChiSquareRuntime.cuh"
#endif

#ifdef DKS_OPENCL
#include "OpenCL/OpenCLChiSquareRuntime.h"
#endif

23
class DKSBaseMuSR : public DKSFFT {
Uldis Locans's avatar
Uldis Locans committed
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138

private:

  ChiSquareRuntime *chiSq;

  int chiSquareSize_m;

public:

  DKSBaseMuSR();

  ~DKSBaseMuSR();

  /** Compile the program with kernels to be run.
   * String function contains the string that will be added to the code to compile in the
   * function: __device__ double fTheory(double t, double *p, double *f, int *m);
   * Function string must be a valid C math expression. It can contain operators, math functions
   * and predefined functions listed in:
   * http://lmu.web.psi.ch/musrfit/user/MUSR/MusrFit.html#A_4.3_The_THEORY_Block
   * Predifined functions can be accessed by the abbreviation given in the table
   * Parameters can be accesed in form p[idx] or p[m[idx]] - where p represents parameter array
   * m represents map array and idx is the index to use from the maps. Precalculated function 
   * values can be accessed the same way - f[idx] or f[m[idx]]. Returns DKS_SUCCESS if everythin 
   * runs successfully, otherwise returns DKS_ERROR. If DKS is compiled with debug flag enabled 
   * prints DKS error message in case something fails
   */
  int callCompileProgram(std::string function, bool mlh = false);

  /** Launch chi square calculation on data set writen in mem_data memory on device.
   * mem_par, mem_map and mem_func hold pointers to parameter, function and map values
   * for this data set (parameter array is one for all the data sets, maps and functions
   * change between data sets). Resulting chi square value for this dataset will be put in
   * result variable. Returns DKS_SUCCESS if everythin runs successfully, otherwise returns
   * DKS_ERROR. If DKS is compiled with debug flag enabled prints DKS error message in case
   * something fails
   */
  int callLaunchChiSquare(int fitType,
			  void *mem_data, void *mem_err, int length,
			  int numpar, int numfunc, int nummap,
			  double timeStart, double timeStep,
			  double &result);

  /** Launch auto-tuning of chisquare function for the selected device.
   *  Creates a function pointer to callLaunchChiSquare with necessary arguments bind to
   *  function call. CUDA and OpenCL version - gives AutoTuning class access to numThreads
   *  parameter which is varied to find the optimal value by AutoTuning class. Uses brute force
   *  method to test all the values.
   */
  int callAutoTuningChiSquare(int fitType, void *mem_data, void *mem_err, int length, 
			      int numpar, int numfunc, int nummap,
			      double timeStart, double timeStep,
			      double &result, std::vector<int> &config);

  /** Set N0, tau and BKG values for the run.
   * Needs to be called before kernel launch if these values are changing
   */
  int callSetConsts(double N0, double tau, double bkg);

  /** Set alpha and beta values for the run.
   * Needs to be called before kernel launch if these values are changing
   */
  int callSetConsts(double alpha, double beta);

  /** Init chisquare calculations.
   * Size is the maximum number of elements in any of the data sets used.
   */
  int initChiSquare(int size_data, int size_param, int size_func, int size_map);

  /** Free temporary device storage allocated for chi^2 kernel.
   * Return error code if freeing the device fails.
   */
  int freeChiSquare();

  /** Write params to device.
   * Write pramas from double array to device, params device memory is managed by DKS.
   */
  int writeParams(const double *params, int numparams);

  /** Write function values to device.
   * Write precalculated function values to device, memory for functions on device is handled
   * by DKS.
   */
  int writeFunctions(const double *func, int numfunc);

  /** Write map indexes to device.
   * Write map indexes to use in defined theory function to devive. Memory for map indexes is
   * handeld by DKS.
   */
  int writeMaps(const int *map, int numfunc);

  /** Check if device can run necessary kernels.
   * Check selected device properties to see if device
   * suports double precision and if device can run the
   * necessary number of work_items / work_groups to successfully
   * execute CUDA/OpenCL kernels.
   */
  int checkMuSRKernels(int fitType);

  /** Perform the same check as checkMuSRKernels(int fitType) and return max threads per block.
   * Used for autotuning to check what is the device limit for threads per block to correctly
   * set the upper bound when searching the parameter space.
   */
  int checkMuSRKernels(int fitType, int &threadsPerBlock);

  /** Debug function to test auto-tuning search functions
   */
  int testAutoTuning();

  /** Get the number of operations in compiled kernel.
   */
  int getOperations(int &oper);

};

#endif