Commit 3747d34b authored by snuverink_j's avatar snuverink_j
Browse files

bugfix in makeDataSetInterpolated: first interpolate then dropna; avoid duplication and renaming

parent 6ab2402c
......@@ -163,10 +163,10 @@ def plotModelPerf2(m, modelDesc, X_RS, y_RSscaled, y_var_list, figDir):
errDf.loc[len(errDf)] = err
return errDf
def makeDataSetInterpolated(directory, excelFn, doInterpolate=True):
def makeDataSetInterpolated(directory, excelFn, doInterpolate=True, dropBadPulses=True, verbose=False):
first = True
for filename in os.listdir(directory):
data = []
for filename in sorted(os.listdir(directory)):
if filename.endswith(".csv"):
fntmp = re.sub(r'.*dp', '', filename)
expNumber = re.sub(r'-nomeans.csv', '', fntmp)
......@@ -174,41 +174,45 @@ def makeDataSetInterpolated(directory, excelFn, doInterpolate=True):
multVoltag = file_excel.iloc[int(expNumber)]['XeMultVoltag']
try:
dp = pd.read_csv(directory+filename, sep=";")
if doInterpolate:
dpco = dp[['SARFE10-PBIG050-EVR0:CALCT.value',
'SARFE10-PBIG050-EVR0:CALCS.value',
'SARFE10-PSSS059:SPECTRUM_CENTER.value',
'SARFE10-PBPG050:PHOTON-ENERGY-PER-PULSE-AVG.value']]
dpco.columns = ['CALCT','CALCS','SPECTRUM_CENTER','PHOTON-ENERGY-PER-PULSE-AVG']
dpa = dpco.dropna(subset=['CALCT', 'CALCS','SPECTRUM_CENTER']);
dpa = dpa.interpolate(method='linear', limit_direction='forward', axis=0).dropna()
# condition for bad pulse
validT = dpa['CALCT'] < -50.
validS = dpa['CALCS'] < -50.
dpc = dpa[validT & validS]
else:
dpc = dp[['SARFE10-PBIG050-EVR0:CALCT.value',
'SARFE10-PBIG050-EVR0:CALCS.value',
'SARFE10-PSSS059:SPECTRUM_CENTER.value',
'SARFE10-PBPG050:PHOTON-ENERGY-PER-PULSE-AVG.value']].dropna()
dpc.columns = ['CALCT','CALCS','SPECTRUM_CENTER','PHOTON-ENERGY-PER-PULSE-AVG']
dpc['XeMultVoltag'] = multVoltag
dpc['rawDataFiele'] = filename
if first:
data = dpc
first = False
else:
data = data.append(dpc,ignore_index=True)
except:
print ("Can not read " + directory+filename)
print ("Can not read " + directory + filename)
continue
dp = dp[['SARFE10-PBIG050-EVR0:CALCT.value',
'SARFE10-PBIG050-EVR0:CALCS.value',
'SARFE10-PSSS059:SPECTRUM_CENTER.value',
'SARFE10-PBPG050:PHOTON-ENERGY-PER-PULSE-AVG.value']]
dp.columns = ['CALCT','CALCS','SPECTRUM_CENTER','PHOTON-ENERGY-PER-PULSE-AVG']
if doInterpolate:
dp['PHOTON-ENERGY-PER-PULSE-AVG'].interpolate(method='linear',
inplace=True,
limit_direction='forward',
axis=0)
dp = dp.dropna();
# condition for bad pulse
if dropBadPulses:
validT = dp['CALCT'] < -50.
validS = dp['CALCS'] < -50.
dp = dp[validT & validS]
dp['XeMultVoltag'] = multVoltag
dp['rawDataFile'] = filename
if first:
data = dp
first = False
else:
data = data.append(dp,ignore_index=True)
if verbose:
print("Datapoint", expNumber, "gave", len(dp), "values")
data.reset_index(inplace=True)
data.dropna()
return data
# +
def build_ff_mdl_small(in_dim = 2, out_dim = 1, l1 = 8, l2 = 6, l3 = 4, l4= 4,
opt = 'adam', loss = 'mse', act = 'tanh', l2reg = 0.00, gn=0.001):
......@@ -250,7 +254,8 @@ data = makeDataSetInterpolated(directory,xlsxFn)
data
#nr files actually used
data['rawDataFile'].nunique()
data['SPECTRUM_CENTER'].hist(bins=100)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment