Commit 87cdf52f authored by Uldis Locans's avatar Uldis Locans
Browse files

Collimator physics for MIC fix

parent 027bdc01
......@@ -30,7 +30,7 @@ public:
/* destructor */
~CudaGreensFunction();
/*
/**
Info: calc itegral on device memory (taken from OPAL src code)
Return: success or error code
*/
......@@ -38,20 +38,20 @@ public:
double hr_m0, double hr_m1, double hr_m2,
int streamId = -1);
/*
/**
Info: integration of rho2_m field (taken from OPAL src code)
Return: success or error code
*/
int cuda_IntegrationGreensFunction(void *rho2_m, void *tmpgreen, int I, int J, int K,
int streamId = -1);
/*
/**
Info: mirror rho field (taken from OPAL src code)
Return: succes or error code
*/
int cuda_MirrorRhoField(void *mem_ptr, int I, int J, int K, int streamId = -1);
/*
/**
Info: multiply complex fields already on the GPU memory, result will be put in ptr1
Return: success or error code
*/
......
......@@ -18,30 +18,28 @@ int MICBase::mic_createRandStreams(int size) {
int seed = time(NULL);
#pragma offload target(mic:m_device_id) inout(defaultRndSet) in(seed)
int numThreads = 0;
#pragma offload target(mic:m_device_id) inout(numThreads)
{
//get the number of threads
int numThreads;
#pragma omp parallel
numThreads = omp_get_num_threads();
}
//if default rnd stream already allocated delete the array
if (defaultRndSet == 1)
delete[] defaultRndStream;
//allocate defaultRndStream array
defaultRndStream = new VSLStreamStatePtr[numThreads];
defaultRndStream = mic_allocateMemory<VSLStreamStatePtr>(numThreads);
VSLStreamStatePtr *tmpRndStream = (VSLStreamStatePtr*) defaultRndStream;
maxThreads = numThreads;
#pragma offload target(mic:m_device_id) \
in(tmpRndStream:length(0) DKS_REUSE DKS_RETAIN) \
in(seed)
{
//create stream states for each thread
#pragma omp parallel for
for (int i = 0; i < omp_get_num_threads(); i++)
vslNewStream(&defaultRndStream[i], VSL_BRNG_MT2203, seed + i);
defaultRndSet = 1;
vslNewStream(&tmpRndStream[i], VSL_BRNG_MT2203, seed + i);
}
defaultRndSet = 1;
return DKS_SUCCESS;
}
......@@ -49,15 +47,8 @@ int MICBase::mic_createRandStreams(int size) {
//delete default rand streams
int MICBase::mic_deleteRandStreams() {
#pragma offload target(mic:m_device_id) inout(defaultRndSet)
{
if (defaultRndSet == 1) {
delete[] defaultRndStream;
defaultRndSet = -1;
}
}
return DKS_ERROR;
//mic_freeMemory<VSLStreamStatePtr>(defaultRndStream, 236);
return DKS_SUCCESS;
}
//create a new signal for the mic
......
......@@ -30,14 +30,19 @@ class MICBase {
private:
std::vector<int> micStreams;
int maxThreads;
protected:
int defaultRndSet;
public:
VSLStreamStatePtr *defaultRndStream;
//#pragma offload_attribute(push,target(mic))
void *defaultRndStream; //VSLSStreamStatePtr
void *testPtr;
//#pragma offload_attribute(pop)
int m_device_id;
/* constructor */
......@@ -202,7 +207,6 @@ public:
#pragma offload_transfer target(mic:m_device_id) nocopy(tmp_ptr:length(totalsize) DKS_REUSE DKS_FREE)
{
}
return DKS_SUCCESS;
}
......
......@@ -292,7 +292,7 @@ void energyLoss(double &Eng, int &pdead, double *par, VSLStreamStatePtr &stream)
const double deltas = par[DT_M] * beta * C;
const double deltasrho = deltas * 100 * par[RHO_M];
const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (Z_M / par[A_M]) * deltas * 1E5);
const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (par[Z_M] / par[A_M]) * deltas * 1E5);
if ( (Eng > 0.00001) && (Eng < 0.0006) ) {
const double Ts = (Eng * 1E6) / 1.0073;
......@@ -338,7 +338,7 @@ void energyLoss(double &Eng, double &dEdx, double *par, double *randv, int ri) {
const double deltas = par[DT_M] * beta * C;
const double deltasrho = deltas * 100 * par[RHO_M];
const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (Z_M / par[A_M]) * deltas * 1E5);
const double sigma_E = sqrt(K * eM_E * par[RHO_M] * (par[Z_M] / par[A_M]) * deltas * 1E5);
if ( (Eng > 0.00001) && (Eng < 0.0006) ) {
const double Ts = (Eng * 1E6) / 1.0073;
......@@ -373,16 +373,18 @@ int MICCollimatorPhysics::CollimatorPhysics(void *mem_ptr, void *par_ptr, int nu
//cast device memory pointers to appropriate types
MIC_PART_SMALL *data = (MIC_PART_SMALL*) mem_ptr;
double *par = (double*) par_ptr;
VSLStreamStatePtr *streamArr = (VSLStreamStatePtr*) m_micbase->defaultRndStream;
#pragma offload target(mic:m_micbase->m_device_id) \
inout(data:length(0) DKS_RETAIN DKS_REUSE) \
in(par:length(0) DKS_RETAIN DKS_REUSE) \
in(streamArr:length(0) DKS_RETAIN DKS_REUSE) \
in(numparticles)
{
#pragma omp parallel
{
VSLStreamStatePtr stream = m_micbase->defaultRndStream[omp_get_thread_num()];
VSLStreamStatePtr stream = streamArr[omp_get_thread_num()];
//for loop trough particles if not checkhit set label to -2 and update R.x
......@@ -459,6 +461,8 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt
int padding = numparticles % MIC_WIDTH;
int totalpart = numparticles + padding;
VSLStreamStatePtr *streamArr = (VSLStreamStatePtr*) m_micbase->defaultRndStream;
#pragma offload target (mic:0) \
in(label:length(0) DKS_REUSE DKS_RETAIN) \
in(localID:length(0) DKS_REUSE DKS_RETAIN) \
......@@ -469,14 +473,16 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt
in(py:length(0) DKS_REUSE DKS_RETAIN) \
in(pz:length(0) DKS_REUSE DKS_RETAIN) \
in(par:length(0) DKS_RETAIN DKS_REUSE) \
in(streamArr:length(0) DKS_RETAIN DKS_REUSE) \
in(totalpart)
{
#pragma omp parallel
{
//every thread gets its own rnd stream state
VSLStreamStatePtr stream = m_micbase->defaultRndStream[omp_get_thread_num()];
//VSLStreamStatePtr stream = m_micbase->defaultRndStream[omp_get_thread_num()];
VSLStreamStatePtr stream = streamArr[omp_get_thread_num()];
#pragma omp for nowait
for (int ii = 0; ii < totalpart; ii += MIC_WIDTH) {
......@@ -512,9 +518,11 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt
double Eng = (sq - 1) * M_P;
double dEdx = 0;
if (label[i] == 0) {
energyLoss(Eng, dEdx, par, randv, i - ii);
}
if (Eng > 1e-4 && dEdx < 0) {
double ptot = sqrt((M_P + Eng) * (M_P + Eng) - (M_P * M_P)) / M_P;
......@@ -526,11 +534,12 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt
if (Eng < 1e-4 || dEdx > 0)
label[i] = -1;
} //end inner energy loss loop
} //end outer energy loss loop
} //end outer energy loss loop
//vectorize coulomb scattering as much as possible
#pragma omp for nowait
for (int ii = 0; ii < totalpart; ii += MIC_WIDTH) {
......@@ -540,7 +549,7 @@ int MICCollimatorPhysics::CollimatorPhysicsSoA(void *label_ptr, void *localID_pt
} //end omp parallel
} //end offload
return DKS_SUCCESS;
}
......
......@@ -26,7 +26,7 @@ typedef struct {
} MIC_PART_SMALL;
class MICCollimatorPhysics : DKSAlogorithms{
class MICCollimatorPhysics : public DKSCollimatorPhysics {
private:
......@@ -38,7 +38,7 @@ public:
m_micbase = base;
};
~MICCollimatorPhysics() { };
~MICCollimatorPhysics() { };
int CollimatorPhysics(void *mem_ptr, void *par_ptr, int numparticles);
......
......@@ -6,13 +6,16 @@
MICFFT::MICFFT(MICBase *base) {
m_micbase = base;
m_fftsetup = false;
}
MICFFT::~MICFFT() {
if (m_fftsetup) {
#pragma offload target(mic:0)
{
DftiFreeDescriptor(&FFTHandle_m);
DftiFreeDescriptor(&handle);
{
DftiFreeDescriptor(&FFTHandle_m);
DftiFreeDescriptor(&handle);
}
}
}
......@@ -35,7 +38,7 @@ int MICFFT::setupFFT(int ndim, int N[3]) {
}
m_fftsetup = true;
return DKS_SUCCESS;
}
//BENI:
......@@ -122,8 +125,8 @@ int MICFFT::executeFFT(void *mem_ptr, int ndim, int N[3], int streamId, bool for
}
//execute iFFT
int MICFFT::executeIFFT(void *mem_ptr, int ndim, int N[3]) {
return mic_executeFFT(mem_ptr, ndim, N, -1, false);
int MICFFT::executeIFFT(void *mem_ptr, int ndim, int N[3], int streamId) {
return executeFFT(mem_ptr, ndim, N, -1, false);
}
//execute REAL->COMPLEX FFT
......
......@@ -7,13 +7,14 @@
#include <offload.h>
#include <mkl_dfti.h>
#include "../Algorithm/DKSFFT.h"
#include "../Algorithms/FFT.h"
#include "MICBase.h"
class MICFFT : public DKSFFT {
private:
bool m_fftsetup;
MICBase *m_micbase;
/// Internal FFT object for performing serial FFTs.
......@@ -74,6 +75,18 @@ public:
/* normalize IFFT on MIC */
int normalizeFFT(void *mem_ptr, int ndim, int N[3], int streamId = -1);
/**
* Info: destroy default FFT plans
* Return: success or error code
*/
int destroyFFT() { return DKS_SUCCESS; }
/*
Info: execute normalize for complex to real iFFT
Return: success or error code
*/
int normalizeCRFFT(void *real_ptr, int ndim, int N[3], int streamId = -1) { return DKS_SUCCESS; }
};
#endif
......@@ -129,7 +129,9 @@ int main(int argc, char *argv[]) {
//init random
base.callInitRandoms(numpart);
//**test collimator physics and sort***//
void *label_ptr, *localID_ptr, *rx_ptr, *ry_ptr, *rz_ptr, *px_ptr, *py_ptr, *pz_ptr, *param_ptr;
//allocate memory for particles
......@@ -210,8 +212,8 @@ int main(int argc, char *argv[]) {
base.freeMemory<double>(pz_ptr, numpart);
base.freeMemory<double>(param_ptr, 12);
/*
/*
std::cout << std::fixed << std::setprecision(4);
for (int i = 0; i < 10; i++) {
std::cout << p.label[i] << "\t" << p.rx[i]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment