Commit a9d9602c authored by feichtinger's avatar feichtinger
Browse files

Restructured for my initial scan tests

parent 603fcb4a
#!/bin/bash
#SBATCH --partition=general
#SBATCH --ntasks=1
#SBATCH --time=48:00:00
module use unstable
module load anaconda/2019.07
conda activate talos_py36
cd /psi/home/feichtinger/jupyterhub/ml-gasmon
python swissfell-gas-df-scanbest.py
#!/bin/bash
#SBATCH --partition=general
#SBATCH --ntasks=1
#SBATCH --time=48:00:00
module use unstable
module load anaconda/2019.07
conda activate talos_py36
cd /psi/home/feichtinger/jupyterhub/ml-gasmon
python swissfell-gas-df1.py
This diff is collapsed.
This diff is collapsed.
#!/bin/env python
# ---
# jupyter:
# jupytext:
# formats: ipynb,py:light
# text_representation:
# extension: .py
# format_name: light
# format_version: '1.4'
# jupytext_version: 1.2.4
# kernelspec:
# display_name: Python [conda env:talos_py36]
# language: python
# name: conda-env-talos_py36-py
# ---
import pandas as pd
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
import scipy.stats
import numpy as np
import os
import re
import keras
from keras.models import Sequential, Model,load_model
from keras.layers import Input, Dense, Conv1D, Dropout, Activation, GaussianNoise
from keras import regularizers
from keras.activations import relu, elu
from sklearn.model_selection import train_test_split
from sklearn import metrics, svm
from sklearn.metrics import mean_squared_error, mean_absolute_error
from IPython.display import clear_output
from sklearn.preprocessing import RobustScaler,MinMaxScaler
from keras.optimizers import SGD
import tensorflow
import talos as ta
import json
import array
import random
import numpy
import pickle
from math import sqrt
import time
import talos as ta
# CONFIG ###################################
topdir="/psi/home/feichtinger/jupyterhub/ml-gasmon"
directory = os.path.join(topdir,"cleaned/")
xlsxFn = os.path.join(topdir,"XeX77.xlsx")
# Particular configuration for this single run
params = {
'mult_neuron': 4,
'activation': 'tanh',
'batch_size': 50,
'noise': 0.001
}
############################################
def print_model_err(model, xt, yt):
y_pred=model.predict(xt)
print('Mean Absolute Error:', metrics.mean_absolute_error(yt, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(yt, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(yt, y_pred)))
def makeDataSetInterpolated(directory, excelFn, doInterpolate=True, dropBadPulses=True, verbose=False,
CALCTthreshold=-50, CALCSthreshold=-50):
first = True
data = []
for filename in sorted(os.listdir(directory)):
if filename.endswith(".csv"):
fntmp = re.sub(r'.*dp', '', filename)
expNumber = re.sub(r'-nomeans.csv', '', fntmp)
file_excel = pd.read_excel(excelFn)
multVoltag = file_excel.iloc[int(expNumber)]['XeMultVoltag']
try:
dp = pd.read_csv(directory+filename, sep=";")
except:
print ("Can not read " + directory + filename)
continue
dp = dp[['SARFE10-PBIG050-EVR0:CALCT.value',
'SARFE10-PBIG050-EVR0:CALCS.value',
'SARFE10-PSSS059:SPECTRUM_CENTER.value',
'SARFE10-PBPG050:PHOTON-ENERGY-PER-PULSE-AVG.value']]
dp.columns = ['CALCT','CALCS','SPECTRUM_CENTER','PHOTON-ENERGY-PER-PULSE']
if doInterpolate:
dp['PHOTON-ENERGY-PER-PULSE'].interpolate(method='linear',
inplace=True,
limit_direction='forward',
axis=0)
dp = dp.dropna();
# condition for bad pulse
if dropBadPulses:
validT = dp['CALCT'] < CALCTthreshold
validS = dp['CALCS'] < CALCSthreshold
dp = dp[validT & validS]
dp['XeMultVoltag'] = multVoltag
dp['rawDataFile'] = filename
if first:
data = dp
first = False
else:
data = data.append(dp,ignore_index=True)
if verbose:
print("Datapoint", expNumber, "gave", len(dp), "values")
data.reset_index(inplace=True)
data.dropna()
return data
data = makeDataSetInterpolated(directory,xlsxFn,CALCTthreshold=-50,CALCSthreshold=-50,verbose=False)
#nr files actually used
#data['rawDataFile'].nunique()
# # Training/Test Split
train, validate, test = np.split(data.sample(frac=1), [int(.6*len(data)), int(.8*len(data))])
x_train = train[['SPECTRUM_CENTER','XeMultVoltag','CALCT','CALCS']]
y_train = train[['PHOTON-ENERGY-PER-PULSE']]
x_validate = validate[['SPECTRUM_CENTER','XeMultVoltag','CALCT','CALCS']]
y_validate = validate[['PHOTON-ENERGY-PER-PULSE']]
x_test = test[['SPECTRUM_CENTER','XeMultVoltag','CALCT','CALCS']]
y_test = test[['PHOTON-ENERGY-PER-PULSE']]
# # Train DNN
# ## Make a scaler and scale to -1 ... 1
transformer_x = 0
transformer_y = 0
transformer_x = MinMaxScaler(feature_range=(-1, 1)).fit(data[['SPECTRUM_CENTER','XeMultVoltag','CALCT','CALCS']].values)
transformer_y = MinMaxScaler(feature_range=(-1, 1)).fit(data[['PHOTON-ENERGY-PER-PULSE']].values)
# +
x_train = transformer_x.transform(x_train)
x_validate = transformer_x.transform(x_validate)
x_test = transformer_x.transform(x_test)
y_train = transformer_y.transform(y_train)
y_validate = transformer_y.transform(y_validate)
y_test = transformer_y.transform(y_test)
# -
def build_ff_mdl_smallA(params, in_dim = 2, out_dim = 1,
l1 = 8, l2 = 6, l3 = 4, l4= 4, opt = 'adam', loss = 'mse', l2reg = 0.00, gn=0.001):
layer_0 = Input(shape=(in_dim,))
layer_1 = Dense(l1, activation=params['activation'],
activity_regularizer=regularizers.l2(l2reg))(layer_0)
layer_1 = GaussianNoise(gn)(layer_1)
layer_2 = Dense(l2, activation=params['activation'],
activity_regularizer=regularizers.l2(l2reg))(layer_1)
layer_2 = GaussianNoise(gn)(layer_2)
layer_3 = Dense(l3, activation=params['activation'],
activity_regularizer=regularizers.l2(l2reg))(layer_2)
layer_4 = Dense(l4, activation=params['activation'],
activity_regularizer=regularizers.l2(l2reg))(layer_3)
layer_4 = GaussianNoise(gn)(layer_4)
layer_5 = Dense(out_dim, activation='linear')(layer_4)
model = Model(inputs=layer_0, outputs=layer_5)
model.compile(loss=loss, optimizer=opt, metrics=['mse'])
return model
def modelA(x_train, y_train, x_val, y_val, params):
model = build_ff_mdl_smallA(params,
in_dim = x_train.shape[1],
out_dim = y_train.shape[1],
l1 = params['mult_neuron']*8, l2 = params['mult_neuron']*6,
l3 = params['mult_neuron']*4, l4 = params['mult_neuron']*4)
# make sure history object is returned by model.fit()
out = model.fit(x=x_train,
y=y_train,
validation_data=[x_val, y_val],
epochs=2000, shuffle='true',
batch_size=params['batch_size'],
verbose='false') # callbacks=[plot_losses]
# modify the output model
return out, model
history, mymodel = modelA(x_train, y_train, x_validate, y_validate, params)
mymodel.save('model-scanbest.h5')
print_model_err(mymodel, X_test, y_test)
#!/bin/env python
# ---
# jupyter:
# jupytext:
# formats: ipynb,py:light
# text_representation:
# extension: .py
# format_name: light
# format_version: '1.4'
# jupytext_version: 1.2.4
# kernelspec:
# display_name: Python [conda env:talos_py36]
# language: python
# name: conda-env-talos_py36-py
# ---
import pandas as pd
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
import scipy.stats
import numpy as np
import os
import re
import keras
from keras.models import Sequential, Model,load_model
from keras.layers import Input, Dense, Conv1D, Dropout, Activation, GaussianNoise
from keras import regularizers
from keras.activations import relu, elu
from sklearn.model_selection import train_test_split
from sklearn import metrics, svm
from sklearn.metrics import mean_squared_error, mean_absolute_error
from IPython.display import clear_output
from sklearn.preprocessing import RobustScaler,MinMaxScaler
from keras.optimizers import SGD
import tensorflow
import talos as ta
import json
import array
import random
import numpy
import pickle
from math import sqrt
import time
import talos as ta
# CONFIG ###################################
topdir="/psi/home/feichtinger/jupyterhub/ml-gasmon"
directory = os.path.join(topdir,"cleaned/")
xlsxFn = os.path.join(topdir,"XeX77.xlsx")
params = {
'mult_neuron': [1, 2, 4],
'activation': ['relu', 'elu', 'tanh'],
'batch_size': [10, 25, 50, 100],
'noise': [0.1, 0.01, 0.001]
}
############################################
def makeDataSetInterpolated(directory, excelFn, doInterpolate=True, dropBadPulses=True, verbose=False,
CALCTthreshold=-50, CALCSthreshold=-50):
first = True
data = []
for filename in sorted(os.listdir(directory)):
if filename.endswith(".csv"):
fntmp = re.sub(r'.*dp', '', filename)
expNumber = re.sub(r'-nomeans.csv', '', fntmp)
file_excel = pd.read_excel(excelFn)
multVoltag = file_excel.iloc[int(expNumber)]['XeMultVoltag']
try:
dp = pd.read_csv(directory+filename, sep=";")
except:
print ("Can not read " + directory + filename)
continue
dp = dp[['SARFE10-PBIG050-EVR0:CALCT.value',
'SARFE10-PBIG050-EVR0:CALCS.value',
'SARFE10-PSSS059:SPECTRUM_CENTER.value',
'SARFE10-PBPG050:PHOTON-ENERGY-PER-PULSE-AVG.value']]
dp.columns = ['CALCT','CALCS','SPECTRUM_CENTER','PHOTON-ENERGY-PER-PULSE']
if doInterpolate:
dp['PHOTON-ENERGY-PER-PULSE'].interpolate(method='linear',
inplace=True,
limit_direction='forward',
axis=0)
dp = dp.dropna();
# condition for bad pulse
if dropBadPulses:
validT = dp['CALCT'] < CALCTthreshold
validS = dp['CALCS'] < CALCSthreshold
dp = dp[validT & validS]
dp['XeMultVoltag'] = multVoltag
dp['rawDataFile'] = filename
if first:
data = dp
first = False
else:
data = data.append(dp,ignore_index=True)
if verbose:
print("Datapoint", expNumber, "gave", len(dp), "values")
data.reset_index(inplace=True)
data.dropna()
return data
data = makeDataSetInterpolated(directory,xlsxFn,CALCTthreshold=-50,CALCSthreshold=-50,verbose=False)
#nr files actually used
#data['rawDataFile'].nunique()
# # Training/Test Split
train, validate, test = np.split(data.sample(frac=1), [int(.6*len(data)), int(.8*len(data))])
x_train = train[['SPECTRUM_CENTER','XeMultVoltag','CALCT','CALCS']]
y_train = train[['PHOTON-ENERGY-PER-PULSE']]
x_validate = validate[['SPECTRUM_CENTER','XeMultVoltag','CALCT','CALCS']]
y_validate = validate[['PHOTON-ENERGY-PER-PULSE']]
x_test = test[['SPECTRUM_CENTER','XeMultVoltag','CALCT','CALCS']]
y_test = test[['PHOTON-ENERGY-PER-PULSE']]
# # Train DNN
# ## Make a scaler and scale to -1 ... 1
transformer_x = 0
transformer_y = 0
transformer_x = MinMaxScaler(feature_range=(-1, 1)).fit(data[['SPECTRUM_CENTER','XeMultVoltag','CALCT','CALCS']].values)
transformer_y = MinMaxScaler(feature_range=(-1, 1)).fit(data[['PHOTON-ENERGY-PER-PULSE']].values)
# +
x_train = transformer_x.transform(x_train)
x_validate = transformer_x.transform(x_validate)
x_test = transformer_x.transform(x_test)
y_train = transformer_y.transform(y_train)
y_validate = transformer_y.transform(y_validate)
y_test = transformer_y.transform(y_test)
# -
def build_ff_mdl_smallA(params, in_dim = 2, out_dim = 1,
l1 = 8, l2 = 6, l3 = 4, l4= 4, opt = 'adam', loss = 'mse', l2reg = 0.00, gn=0.001):
layer_0 = Input(shape=(in_dim,))
layer_1 = Dense(l1, activation=params['activation'],
activity_regularizer=regularizers.l2(l2reg))(layer_0)
layer_1 = GaussianNoise(gn)(layer_1)
layer_2 = Dense(l2, activation=params['activation'],
activity_regularizer=regularizers.l2(l2reg))(layer_1)
layer_2 = GaussianNoise(gn)(layer_2)
layer_3 = Dense(l3, activation=params['activation'],
activity_regularizer=regularizers.l2(l2reg))(layer_2)
layer_4 = Dense(l4, activation=params['activation'],
activity_regularizer=regularizers.l2(l2reg))(layer_3)
layer_4 = GaussianNoise(gn)(layer_4)
layer_5 = Dense(out_dim, activation='linear')(layer_4)
model = Model(inputs=layer_0, outputs=layer_5)
model.compile(loss=loss, optimizer=opt, metrics=['mse'])
return model
def modelA(x_train, y_train, x_val, y_val, params):
model = build_ff_mdl_smallA(params,
in_dim = x_train.shape[1],
out_dim = y_train.shape[1],
l1 = params['mult_neuron']*8, l2 = params['mult_neuron']*6,
l3 = params['mult_neuron']*4, l4 = params['mult_neuron']*4)
# make sure history object is returned by model.fit()
out = model.fit(x=x_train,
y=y_train,
validation_data=[x_val, y_val],
epochs=2000, shuffle='true',
batch_size=params['batch_size'],
verbose='false') # callbacks=[plot_losses]
# modify the output model
return out, model
scanObj = ta.Scan(x=x_train, y=y_train, x_val=x_validate, y_val=y_validate,
params=params, model=modelA, experiment_no='1', dataset_name='swissfel-modelA-df');
modelAEval = ta.Evaluate(scanObj)
print(modelAEval.data.sort_values(by=['mean_squared_error']))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment