Commit 603fcb4a authored by adelmann's avatar adelmann 🎗
Browse files

fixed most of the stuff discussed yesterday

parent 70101184
This diff is collapsed.
......@@ -97,6 +97,7 @@ class PlotLosses(keras.callbacks.Callback):
plt.plot(self.x, self.losses, label="loss")
plt.plot(self.x, self.val_losses, label="val_loss")
plt.legend()
plt.yscale('log')
plt.ylabel('error')
plt.xlabel('epoch')
plt.show();
......@@ -163,7 +164,8 @@ def plotModelPerf2(m, modelDesc, X_RS, y_RSscaled, y_var_list, figDir):
errDf.loc[len(errDf)] = err
return errDf
def makeDataSetInterpolated(directory, excelFn, doInterpolate=True, dropBadPulses=True, verbose=False):
def makeDataSetInterpolated(directory, excelFn, doInterpolate=True, dropBadPulses=True, verbose=False,
CALCTthreshold=-50, CALCSthreshold=-50):
first = True
data = []
for filename in sorted(os.listdir(directory)):
......@@ -182,10 +184,10 @@ def makeDataSetInterpolated(directory, excelFn, doInterpolate=True, dropBadPulse
'SARFE10-PBIG050-EVR0:CALCS.value',
'SARFE10-PSSS059:SPECTRUM_CENTER.value',
'SARFE10-PBPG050:PHOTON-ENERGY-PER-PULSE-AVG.value']]
dp.columns = ['CALCT','CALCS','SPECTRUM_CENTER','PHOTON-ENERGY-PER-PULSE-AVG']
dp.columns = ['CALCT','CALCS','SPECTRUM_CENTER','PHOTON-ENERGY-PER-PULSE']
if doInterpolate:
dp['PHOTON-ENERGY-PER-PULSE-AVG'].interpolate(method='linear',
dp['PHOTON-ENERGY-PER-PULSE'].interpolate(method='linear',
inplace=True,
limit_direction='forward',
axis=0)
......@@ -194,8 +196,8 @@ def makeDataSetInterpolated(directory, excelFn, doInterpolate=True, dropBadPulse
# condition for bad pulse
if dropBadPulses:
validT = dp['CALCT'] < -50.
validS = dp['CALCS'] < -50.
validT = dp['CALCT'] < CALCTthreshold
validS = dp['CALCS'] < CALCSthreshold
dp = dp[validT & validS]
dp['XeMultVoltag'] = multVoltag
......@@ -246,35 +248,40 @@ def build_ff_mdl_smallnn(in_dim = 2, out_dim = 1, l1 = 8, l2 = 6, l3 = 4, l4= 4,
# -
# # Make dataset (Can not read /psi/home/adelmann/SwissFEL-Gas-1/cleaned/dp41-nomeans.csv is ok)
# # Make dataset
#
# (Can not read /psi/home/adelmann/SwissFEL-Gas-1/cleaned/dp41-nomeans.csv is ok)
directory = "/psi/home/adelmann/SwissFEL-Gas-1/cleaned/"
xlsxFn = '/psi/home/adelmann/SwissFEL-Gas-1/XeX77.xlsx'
data = makeDataSetInterpolated(directory,xlsxFn)
directory = "/psi/home/adelmann/data/ml-gasmon/cleaned/"
xlsxFn = '/psi/home/adelmann/data/ml-gasmon/XeX77.xlsx'
data = makeDataSetInterpolated(directory,xlsxFn,CALCTthreshold=-50,CALCSthreshold=-50,verbose=False)
data
data.tail()
#nr files actually used
data['rawDataFile'].nunique()
data['SPECTRUM_CENTER'].hist(bins=100)
# +
#data['SPECTRUM_CENTER'].hist(bins=100)
data['PHOTON-ENERGY-PER-PULSE-AVG'].hist(bins=100)
# +
#data['PHOTON-ENERGY-PER-PULSE'].hist(bins=100)
data['XeMultVoltag'].hist(bins=100)
# +
#data['XeMultVoltag'].hist(bins=100)
# -
# # Training/Test Split
# +
train, validate, test = np.split(data.sample(frac=1), [int(.6*len(data)), int(.8*len(data))])
x_train = train[['PHOTON-ENERGY-PER-PULSE-AVG','XeMultVoltag']]
y_train = train[['SPECTRUM_CENTER']]
x_validate = validate[['PHOTON-ENERGY-PER-PULSE-AVG','XeMultVoltag']]
y_validate = validate[['SPECTRUM_CENTER']]
x_test = test[['PHOTON-ENERGY-PER-PULSE-AVG','XeMultVoltag']]
y_test = test[['SPECTRUM_CENTER']]
y_train.head()
x_train = train[['SPECTRUM_CENTER','XeMultVoltag','CALCT','CALCS']]
y_train = train[['PHOTON-ENERGY-PER-PULSE']]
x_validate = validate[['SPECTRUM_CENTER','XeMultVoltag','CALCT','CALCS']]
y_validate = validate[['PHOTON-ENERGY-PER-PULSE']]
x_test = test[['SPECTRUM_CENTER','XeMultVoltag','CALCT','CALCS']]
y_test = test[['PHOTON-ENERGY-PER-PULSE']]
# -
# # SVM to see what a linear model can do
......@@ -290,8 +297,12 @@ print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_p
# ## Make a scaler and scale to -1 ... 1
transformer_x = MinMaxScaler(feature_range=(-1, 1)).fit(data[['PHOTON-ENERGY-PER-PULSE-AVG','XeMultVoltag']].values)
transformer_y = MinMaxScaler(feature_range=(-1, 1)).fit(data[['SPECTRUM_CENTER']].values)
transformer_x = 0
transformer_y = 0
transformer_x = MinMaxScaler(feature_range=(-1, 1)).fit(data[['SPECTRUM_CENTER','XeMultVoltag','CALCT','CALCS']].values)
transformer_y = MinMaxScaler(feature_range=(-1, 1)).fit(data[['PHOTON-ENERGY-PER-PULSE']].values)
# +
x_train = transformer_x.transform(x_train)
......@@ -302,23 +313,20 @@ y_train = transformer_y.transform(y_train)
y_validate = transformer_y.transform(y_validate)
y_test = transformer_y.transform(y_test)
# -
model_0 = build_ff_mdl_small(in_dim = x_train.shape[1],
out_dim = y_train.shape[1])
#mc = keras.callbacks.ModelCheckpoint('best_model_1.h5', monitor='val_loss', mode='min', save_best_only=True)
#es = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=100)
es = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=100)
hist_0 = model_0.fit(x=x_train, y=y_train,
validation_data=(x_validate,y_validate),
batch_size=250, shuffle='true',epochs=2000,
verbose='false', callbacks=[plot_losses])
verbose='false', callbacks=[plot_losses,es])
y_pred=model_0.predict(x_test)
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
plotModelPerf2(model_0, 'Model 0', x_test, y_test, test[['SPECTRUM_CENTER']].columns, '.')
plotModelPerf2(model_0, 'Model 0', x_test, y_test, test[['PHOTON-ENERGY-PER-PULSE']].columns, '.')
# # The code below is not yet tested
......@@ -584,19 +592,62 @@ y.dropna().plot()
def readDataSetInterpolated(directory, excelFn, doInterpolate=True, dropBadPulses=True, verbose=False):
first = True
data = []
for filename in sorted(os.listdir(directory)):
if filename.endswith(".csv"):
fntmp = re.sub(r'.*dp', '', filename)
expNumber = re.sub(r'-nomeans.csv', '', fntmp)
file_excel = pd.read_excel(excelFn)
multVoltag = file_excel.iloc[int(expNumber)]['XeMultVoltag']
try:
dp = pd.read_csv(directory+filename, sep=";")
except:
print ("Can not read " + directory + filename)
continue
# dp = dp.dropna();
dp['XeMultVoltag'] = multVoltag
dp['rawDataFile'] = filename
if first:
data = dp
first = False
else:
data = data.append(dp,ignore_index=True)
if verbose:
print("Datapoint", expNumber, "gave", len(dp), "values")
data.reset_index(inplace=True)
data.dropna()
return data
directory = "/psi/home/adelmann/data/ml-gasmon/cleaned/"
xlsxFn = '/psi/home/adelmann/data/ml-gasmon/XeX77.xlsx'
data = readDataSetInterpolated(directory,xlsxFn)
for col in data.columns:
print(col)
ax=data[['SARFE10-PBIG050-EVR0:CALCI.value','SARFE10-PBIG050-EVR0:CALCS.value',
'SARFE10-PBIG050-EVR0:CALCT.value']].plot()
ax.legend(loc='center left', bbox_to_anchor=(0.0, -0.3))
ax=data[['SARFE10-PBPG050:ENERGY.value','SARFE10-PSSS059:SPECTRUM_CENTER.value','XeMultVoltag']].plot()
ax.legend(loc='center left', bbox_to_anchor=(0.0, -0.3))
ax=data[['SARFE10-PBPG050:PHOTON-ENERGY-PER-PULSE-AVG.value','SARFE10-PBPG050:PHOTON-ENERGY-PER-PULSE-DS.value',
'SARFE10-PBPG050:PHOTON-ENERGY-PER-PULSE-US.value']].plot()
ax.legend(loc='center left', bbox_to_anchor=(0.0, -0.3))
data['SARFE10-PBPG050:PHOTON-ENERGY-PER-PULSE-DS.value'].plot()
data['SARFE10-PBPG050:PHOTON-ENERGY-PER-PULSE-US.value'].plot()
data['SARFE10-PBPG050:ENERGY.value'].plot()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment