Commit dce0461c authored by feichtinger's avatar feichtinger
Browse files

introduced more functions and visualized the new data sets

parent f5e5f724
This diff is collapsed.
......@@ -23,16 +23,20 @@ import seaborn as sns
import os
# ## helper functions
# TODO: At some point this should probably get wrapped into a class or a module
def sfel_gasmon_h5read(fname):
varmap = { 'SARFE10-PBIG050-EVR0:CALCI': 'CALCI',
varmap = {'SARFE10-PBIG050-EVR0:CALCI': 'CALCI',
'SARFE10-PBIG050-EVR0:CALCS': 'CALCS',
'SARFE10-PBIG050-EVR0:CALCT': 'CALCT',
'SARFE10-PBPG050:FELPHOTENE': 'Ephot',
'SARFE10-PBPG050:HAMP-HV-DS': 'Voltage_t',
'SARFE10-PBPG050:HAMP-HV-US': 'Voltage_s',
'SARFE10-PBPG050:MKS-PRESSURE': 'pressure',
'SARFE10-PBPG050:PHOTON-ENERGY-PER-PULSE-AVG': 'PEPavg',
'pulse_id': 'pulse_id'}
'SARFE10-PBIG050-EVR0:CALCT': 'CALCT',
'SARFE10-PBPG050:FELPHOTENE': 'Ephot',
'SARFE10-PBPG050:HAMP-HV-DS': 'Voltage_t',
'SARFE10-PBPG050:HAMP-HV-US': 'Voltage_s',
'SARFE10-PBPG050:MKS-PRESSURE': 'pressure',
'SARFE10-PBPG050:PHOTON-ENERGY-PER-PULSE-AVG': 'PEPavg',
'pulse_id': 'pulse_id'}
f = h5py.File(fname,'r')
df = pd.DataFrame()
......@@ -52,7 +56,11 @@ def sfel_gasmon_h5preproc(df, calc_thresh=50.0, interpolate=True):
if interpolate:
for col in ['Ephot', 'Voltage_s', 'Voltage_t', 'PEPavg','pressure']:
df[col] = df[col].interpolate(method='linear')
df[col] = df[col].interpolate(method='linear') \
.fillna(method='backfill')
# sometimes the initial rows of non-CALC columns contain NaN. This then
# is missed by the previous interpolation. We backfill them in addition
# to the interpolation
# CALCS and CALCT are negative in raw data
# TODO: base all calculations on positive CALC... I leave it for now
......@@ -71,29 +79,185 @@ def sfel_gasmon_h5preproc(df, calc_thresh=50.0, interpolate=True):
# we want to have Ephot in eV, not in keV
df.Ephot *= 1000
return df
return df[['CALCS', 'CALCT', 'PEPavg', 'Voltage_s', 'Voltage_t', 'Ephot', 'pressure']]
def sfel_gasmon_applymodel(df, model=None, s_or_t='s'):
if s_or_t == 's':
(calc, voltage) = 'CALCS','Voltage_s'
else:
(calc, voltage) = 'CALCT','Voltage_t'
return model(df[calc], df[voltage], df.Ephot)
def sfel_gasmon_plot_pepcompare(df, model=None, s_or_t='s'):
fig = plt.figure(figsize=(12, 6*2))
ax = fig.add_subplot(2,1,1)
ax.plot(sfel_gasmon_applymodel(df, model=model, s_or_t=s_or_t), linestyle='', marker='.', alpha=0.05)
ax.set_ylabel('predicted PEP')
ax = fig.add_subplot(2,1,2)
ax.plot(df.PEPavg, linestyle='', marker='.', alpha=0.05)
ax.set_ylabel('avg. PEP based on current')
def sfel_gasmon_plot_cols(df, columns=None):
if columns is None:
columns = df.columns
fig = plt.figure(figsize=(12, 6*len(columns)))
for idx,col in enumerate(columns, start=1):
ax = fig.add_subplot(len(columns),1,idx)
ax.plot(df[col], linestyle='', marker='.', alpha=0.05)
ax.set_ylabel(col)
def sfel_gasmon_data_info(df):
dfdict={}
cols = ['Voltage_s', 'Voltage_t', 'Ephot', 'pressure', 'PEPavg', 'CALCS', 'CALCT' ]
for col in cols:
dfdict[col] = [df[col].min(), df[col].max()]
return pd.DataFrame(dfdict).transpose().rename(columns={0: 'min', 1: 'max'})
# ## configuration
# +
datadir = 'pavledata'
#filename='test1.h5'
#filename='MLData_20200520_1.h5' # problematic - scale seems off
#filename='MLData_20200527_1.h5'
#filename='MLData_20200603_1.h5'
filename='MLData_20200608_1.h5'
datafiles = ['MLData_20200520_1.h5',
'MLData_20200527_1.h5',
'MLData_20200603_1.h5',
'MLData_20200608_1.h5'
]
# -
df = sfel_gasmon_h5read(os.path.join(datadir,filename))
# ## Models
# ### Model factories
# Classic EM gain model
#
# \begin{aligned} {\text CALC} = N_{\text {ph}} \cdot A \cdot V^{B} \end{aligned}
#
# \begin{aligned}
# N_{\text{ph, pulse}} = \frac{{\text {PEP}}_{\text {avg}}} {\hbar \omega}
# \end{aligned}
#
# \begin{aligned}
# {\text {PEP}}_{\text {pred}} = \frac{{\text CALC} \cdot E_{\text {beam}}}{A V^{B}}
# \end{aligned}
#
# classic EM gain model
def mk_pep_pred_EMgain(A, B):
return lambda calc, voltage, ebeam: (1/6.241509e12) * ebeam * calc / (A * np.power(voltage, B))
# EM gain model modulated by (1 + C * Ephot)
#
# \begin{aligned} {\text CALC} = (1 + C \cdot E_{\text {beam}} ) \cdot N_{\text {ph}} \cdot A \cdot V^{B} \end{aligned}
#
# \begin{aligned}
# N_{\text{ph, pulse}} = \frac{{\text {PEP}}_{\text {avg}}} {\hbar \omega}
# \end{aligned}
#
# \begin{aligned}
# {\text {PEP}}_{\text {pred}} = \frac{{\text CALC} \cdot E_{\text {beam}}}{(1 + C \cdot E_{\text {beam}}) A V^{B}}
# \end{aligned}
#
# EM gain model modulated by (1 + C * Ephot)
def mk_pep_pred_EMgain_mod(A, B, C):
return lambda calc, voltage, ebeam: (1/6.241509e12) * ebeam * calc / (1 + C * ebeam) / (A * np.power(voltage, B))
# ### Specific model implementations
# Model 1: Based on cleaned calibration run data set and the classic EM gain model. Parametrized for CALCS
popt1 = [-8.12506079e-29, 6.54642197e+00]
pep_pred_fn1 = mk_pep_pred_EMgain(popt1[0], popt1[1])
# Model 2: Based on cleaned calibration run data set and the by (1 + C * Ephot) empirically modulated EM gain model. Parametrized for CALCS.
popt2 = [-1.11290561e-29, 6.97740548e+00, -7.66814987e-05]
pep_pred_fn2 = mk_pep_pred_EMgain_mod(popt2[0], popt2[1], popt2[2])
# ## test
testfilename = datafiles[1]
df = sfel_gasmon_h5read(os.path.join(datadir,testfilename))
# This is the RAW data
df.describe()
df = sfel_gasmon_h5preproc(df)
# this is the preprocessed data
df.describe()
df2 = sfel_gasmon_h5preproc(df)
sfel_gasmon_plot_cols(df)
sfel_gasmon_plot_pepcompare(df, model=pep_pred_fn1, s_or_t='s')
sfel_gasmon_plot_pepcompare(df, model=pep_pred_fn2, s_or_t='s')
# # Analysis of new Data beginning from 2020 May
# ## a
fname = datafiles[0]
print(f'Datafile: {fname}')
df = sfel_gasmon_h5read(os.path.join(datadir,fname))
df = sfel_gasmon_h5preproc(df)
sfel_gasmon_data_info(df)
sfel_gasmon_plot_cols(df)
sfel_gasmon_plot_pepcompare(df, model=pep_pred_fn1, s_or_t='s')
# ## b
fname = datafiles[1]
print(f'Datafile: {fname}')
df = sfel_gasmon_h5read(os.path.join(datadir,fname))
df = sfel_gasmon_h5preproc(df)
sfel_gasmon_data_info(df)
sfel_gasmon_plot_pepcompare(df, model=pep_pred_fn1, s_or_t='s')
# ## c
fname = datafiles[2]
print(f'Datafile: {fname}')
df = sfel_gasmon_h5read(os.path.join(datadir,fname))
df = sfel_gasmon_h5preproc(df)
sfel_gasmon_data_info(df)
sfel_gasmon_plot_pepcompare(df, model=pep_pred_fn1, s_or_t='s')
# ## d
fname = datafiles[3]
print(f'Datafile: {fname}')
df = sfel_gasmon_h5read(os.path.join(datadir,fname))
df = sfel_gasmon_h5preproc(df)
df2.describe()
sfel_gasmon_data_info(df)
# TODO: it seems that the first row(s) of the non-CALC columns sometimes contain NaN. This is then
# not interpolated.... need to reverse interpolate or backwards fill
df.Voltage_s.isna()
sfel_gasmon_plot_pepcompare(df, model=pep_pred_fn1, s_or_t='s')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment