Commit 9db95c9d authored by feichtinger's avatar feichtinger
Browse files

added some visualizations of the original data

parent 344079bb
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -15,7 +15,7 @@
# + {"toc": true, "cell_type": "markdown"}
# <h1>Table of Contents<span class="tocSkip"></span></h1>
# <div class="toc"><ul class="toc-item"><li><span><a href="#Configuration" data-toc-modified-id="Configuration-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Configuration</a></span></li><li><span><a href="#Support-Routines" data-toc-modified-id="Support-Routines-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Support Routines</a></span><ul class="toc-item"><li><span><a href="#Visualization" data-toc-modified-id="Visualization-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Visualization</a></span></li></ul></li><li><span><a href="#Dataset-creation" data-toc-modified-id="Dataset-creation-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Dataset creation</a></span><ul class="toc-item"><li><span><a href="#Dataset-reading-and-preprocessing-definition" data-toc-modified-id="Dataset-reading-and-preprocessing-definition-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Dataset reading and preprocessing definition</a></span></li><li><span><a href="#Make-dataset" data-toc-modified-id="Make-dataset-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Make dataset</a></span></li><li><span><a href="#Training/Test-Split" data-toc-modified-id="Training/Test-Split-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Training/Test Split</a></span></li><li><span><a href="#Data-scaling-for-DNN-training" data-toc-modified-id="Data-scaling-for-DNN-training-3.4"><span class="toc-item-num">3.4&nbsp;&nbsp;</span>Data scaling for DNN training</a></span></li></ul></li><li><span><a href="#DNN-Model-definitions" data-toc-modified-id="DNN-Model-definitions-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>DNN Model definitions</a></span><ul class="toc-item"><li><span><a href="#L2reg-and-gaussian-noise" data-toc-modified-id="L2reg-and-gaussian-noise-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>L2reg and gaussian noise</a></span></li><li><span><a href="#Model-with-Dropout" data-toc-modified-id="Model-with-Dropout-4.2"><span class="toc-item-num">4.2&nbsp;&nbsp;</span>Model with Dropout</a></span></li></ul></li><li><span><a href="#DNN-Training-runs-(data-not-corrected-for-zeroes)" data-toc-modified-id="DNN-Training-runs-(data-not-corrected-for-zeroes)-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>DNN Training runs (data not corrected for zeroes)</a></span><ul class="toc-item"><li><span><a href="#Andi's-initial-DNN-using-gn" data-toc-modified-id="Andi's-initial-DNN-using-gn-5.1"><span class="toc-item-num">5.1&nbsp;&nbsp;</span>Andi's initial DNN using gn</a></span></li><li><span><a href="#without-any-regularization" data-toc-modified-id="without-any-regularization-5.2"><span class="toc-item-num">5.2&nbsp;&nbsp;</span>without any regularization</a></span><ul class="toc-item"><li><span><a href="#Investigation-of-model-performance-errors" data-toc-modified-id="Investigation-of-model-performance-errors-5.2.1"><span class="toc-item-num">5.2.1&nbsp;&nbsp;</span>Investigation of model performance errors</a></span></li><li><span><a href="#outlier-investigation-after-answer-by-Jochem/Pavle" data-toc-modified-id="outlier-investigation-after-answer-by-Jochem/Pavle-5.2.2"><span class="toc-item-num">5.2.2&nbsp;&nbsp;</span>outlier investigation after answer by Jochem/Pavle</a></span></li></ul></li><li><span><a href="#Trying-to-reproduce-best-hyperscan-run" data-toc-modified-id="Trying-to-reproduce-best-hyperscan-run-5.3"><span class="toc-item-num">5.3&nbsp;&nbsp;</span>Trying to reproduce best hyperscan run</a></span></li><li><span><a href="#Try-out-DNN-with-dropout" data-toc-modified-id="Try-out-DNN-with-dropout-5.4"><span class="toc-item-num">5.4&nbsp;&nbsp;</span>Try out DNN with dropout</a></span></li></ul></li><li><span><a href="#Hyperparameter-scans-(data-not-corrected-for-zeroes)" data-toc-modified-id="Hyperparameter-scans-(data-not-corrected-for-zeroes)-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Hyperparameter scans (data not corrected for zeroes)</a></span><ul class="toc-item"><li><span><a href="#Test:-Make-a-hyperparameter-scan-A" data-toc-modified-id="Test:-Make-a-hyperparameter-scan-A-6.1"><span class="toc-item-num">6.1&nbsp;&nbsp;</span>Test: Make a hyperparameter scan A</a></span></li><li><span><a href="#Offline-batch-parameter-scan" data-toc-modified-id="Offline-batch-parameter-scan-6.2"><span class="toc-item-num">6.2&nbsp;&nbsp;</span>Offline batch parameter scan</a></span></li><li><span><a href="#TODO-ModelB" data-toc-modified-id="TODO-ModelB-6.3"><span class="toc-item-num">6.3&nbsp;&nbsp;</span>TODO ModelB</a></span></li><li><span><a href="#TODO:-Model-C:-scan-regulatisation-and-noise" data-toc-modified-id="TODO:-Model-C:-scan-regulatisation-and-noise-6.4"><span class="toc-item-num">6.4&nbsp;&nbsp;</span>TODO: Model C: scan regulatisation and noise</a></span></li></ul></li><li><span><a href="#DNN-runs-with-data-set-cleaned-for-zero-energy-measurements" data-toc-modified-id="DNN-runs-with-data-set-cleaned-for-zero-energy-measurements-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>DNN runs with data set cleaned for zero energy measurements</a></span><ul class="toc-item"><li><span><a href="#without-any-regularization" data-toc-modified-id="without-any-regularization-7.1"><span class="toc-item-num">7.1&nbsp;&nbsp;</span>without any regularization</a></span></li></ul></li><li><span><a href="#SVM-to-see-what-a-linear-model-can-do" data-toc-modified-id="SVM-to-see-what-a-linear-model-can-do-8"><span class="toc-item-num">8&nbsp;&nbsp;</span>SVM to see what a linear model can do</a></span></li></ul></div>
# <div class="toc"><ul class="toc-item"><li><span><a href="#Configuration" data-toc-modified-id="Configuration-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Configuration</a></span></li><li><span><a href="#Support-Routines" data-toc-modified-id="Support-Routines-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Support Routines</a></span><ul class="toc-item"><li><span><a href="#Visualization" data-toc-modified-id="Visualization-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Visualization</a></span></li></ul></li><li><span><a href="#Dataset-creation" data-toc-modified-id="Dataset-creation-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Dataset creation</a></span><ul class="toc-item"><li><span><a href="#Dataset-reading-and-preprocessing-definition" data-toc-modified-id="Dataset-reading-and-preprocessing-definition-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Dataset reading and preprocessing definition</a></span></li><li><span><a href="#Make-dataset" data-toc-modified-id="Make-dataset-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Make dataset</a></span></li><li><span><a href="#Examining-data" data-toc-modified-id="Examining-data-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Examining data</a></span></li><li><span><a href="#Training/Test-Split" data-toc-modified-id="Training/Test-Split-3.4"><span class="toc-item-num">3.4&nbsp;&nbsp;</span>Training/Test Split</a></span></li><li><span><a href="#Data-scaling-for-DNN-training" data-toc-modified-id="Data-scaling-for-DNN-training-3.5"><span class="toc-item-num">3.5&nbsp;&nbsp;</span>Data scaling for DNN training</a></span></li></ul></li><li><span><a href="#DNN-Model-definitions" data-toc-modified-id="DNN-Model-definitions-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>DNN Model definitions</a></span><ul class="toc-item"><li><span><a href="#L2reg-and-gaussian-noise" data-toc-modified-id="L2reg-and-gaussian-noise-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>L2reg and gaussian noise</a></span></li><li><span><a href="#Model-with-Dropout" data-toc-modified-id="Model-with-Dropout-4.2"><span class="toc-item-num">4.2&nbsp;&nbsp;</span>Model with Dropout</a></span></li></ul></li><li><span><a href="#DNN-Training-runs-(data-not-corrected-for-zeroes)" data-toc-modified-id="DNN-Training-runs-(data-not-corrected-for-zeroes)-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>DNN Training runs (data not corrected for zeroes)</a></span><ul class="toc-item"><li><span><a href="#Andi's-initial-DNN-using-gn" data-toc-modified-id="Andi's-initial-DNN-using-gn-5.1"><span class="toc-item-num">5.1&nbsp;&nbsp;</span>Andi's initial DNN using gn</a></span></li><li><span><a href="#without-any-regularization" data-toc-modified-id="without-any-regularization-5.2"><span class="toc-item-num">5.2&nbsp;&nbsp;</span>without any regularization</a></span><ul class="toc-item"><li><span><a href="#Investigation-of-model-performance-errors" data-toc-modified-id="Investigation-of-model-performance-errors-5.2.1"><span class="toc-item-num">5.2.1&nbsp;&nbsp;</span>Investigation of model performance errors</a></span></li><li><span><a href="#outlier-investigation-after-answer-by-Jochem/Pavle" data-toc-modified-id="outlier-investigation-after-answer-by-Jochem/Pavle-5.2.2"><span class="toc-item-num">5.2.2&nbsp;&nbsp;</span>outlier investigation after answer by Jochem/Pavle</a></span></li></ul></li><li><span><a href="#Trying-to-reproduce-best-hyperscan-run" data-toc-modified-id="Trying-to-reproduce-best-hyperscan-run-5.3"><span class="toc-item-num">5.3&nbsp;&nbsp;</span>Trying to reproduce best hyperscan run</a></span></li><li><span><a href="#Try-out-DNN-with-dropout" data-toc-modified-id="Try-out-DNN-with-dropout-5.4"><span class="toc-item-num">5.4&nbsp;&nbsp;</span>Try out DNN with dropout</a></span></li></ul></li><li><span><a href="#Hyperparameter-scans-(data-not-corrected-for-zeroes)" data-toc-modified-id="Hyperparameter-scans-(data-not-corrected-for-zeroes)-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Hyperparameter scans (data not corrected for zeroes)</a></span><ul class="toc-item"><li><span><a href="#Test:-Make-a-hyperparameter-scan-A" data-toc-modified-id="Test:-Make-a-hyperparameter-scan-A-6.1"><span class="toc-item-num">6.1&nbsp;&nbsp;</span>Test: Make a hyperparameter scan A</a></span></li><li><span><a href="#Offline-batch-parameter-scan" data-toc-modified-id="Offline-batch-parameter-scan-6.2"><span class="toc-item-num">6.2&nbsp;&nbsp;</span>Offline batch parameter scan</a></span></li><li><span><a href="#TODO-ModelB" data-toc-modified-id="TODO-ModelB-6.3"><span class="toc-item-num">6.3&nbsp;&nbsp;</span>TODO ModelB</a></span></li><li><span><a href="#TODO:-Model-C:-scan-regulatisation-and-noise" data-toc-modified-id="TODO:-Model-C:-scan-regulatisation-and-noise-6.4"><span class="toc-item-num">6.4&nbsp;&nbsp;</span>TODO: Model C: scan regulatisation and noise</a></span></li></ul></li><li><span><a href="#DNN-runs-with-data-set-cleaned-for-zero-energy-measurements" data-toc-modified-id="DNN-runs-with-data-set-cleaned-for-zero-energy-measurements-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>DNN runs with data set cleaned for zero energy measurements</a></span><ul class="toc-item"><li><span><a href="#without-any-regularization" data-toc-modified-id="without-any-regularization-7.1"><span class="toc-item-num">7.1&nbsp;&nbsp;</span>without any regularization</a></span></li></ul></li><li><span><a href="#SVM-to-see-what-a-linear-model-can-do" data-toc-modified-id="SVM-to-see-what-a-linear-model-can-do-8"><span class="toc-item-num">8&nbsp;&nbsp;</span>SVM to see what a linear model can do</a></span></li></ul></div>
# +
import os
......@@ -306,7 +306,7 @@ def makeDataSetInterpolated2(directory, excelFn, doInterpolate=True, dropBadPuls
for filename in sorted(glob.glob(os.path.join(directory,'dp*-nomeans.csv'))):
mat = re.match('^dp(\d+).*', os.path.basename(filename))
if mat is None:
print(f"WARNING: Could not parse number part of file {os.path.basename(filename)}")
print(f"WARNING: Could not parse number part of filename {os.path.basename(filename)}")
continue
expNumber = int(mat.group(1))
file_excel = pd.read_excel(excelFn)
......@@ -366,11 +366,51 @@ data.tail()
#nr files actually used
data['rawDataFile'].nunique()
data['SPECTRUM_CENTER'].hist(bins=100)
# Some data files contain almost no measurement points
data['PHOTON-ENERGY-PER-PULSE'].hist(bins=100)
fig, ax = plt.subplots(figsize=(16,5))
data['rawDataFile'].value_counts().sort_index().plot(kind='bar', ax=ax)
tmp = ax.set_xticklabels(ax.get_xticklabels(),rotation=45,horizontalalignment='right')
# ## Examining data
fig = plt.figure(figsize=(12,12))
fchoice = "01,30,55,76"
#colname = "XeMultVoltag"
colname = "CALCS"
nfigrows = int(sqrt(len(fchoice.split(","))))
for i,fnum in enumerate(fchoice.split(",")):
fname = f"dp{fnum}-nomeans.csv"
ax = fig.add_subplot(nfigrows,nfigrows,i+1)
ax.plot(data.loc[data["rawDataFile"] == fname, colname])
ax.set_xlabel(colname)
# +
# fig,ax = plt.subplots(figsize=(12,12))
fchoice = "01,02,03,08,09,10,34,35,36,64,65,66,76"
#colname = "XeMultVoltag"
colname = ["PHOTON-ENERGY-PER-PULSE","XeMultVoltag","SPECTRUM_CENTER","CALCS","CALCT"]
flist = [f'rawDataFile == "dp{fnum}-nomeans.csv"' for fnum in fchoice.split(",")]
#data.query(" | ".join(flist)).plot(y=colname, marker='o', linestyle='')
tmpdata = data.query(" | ".join(flist)) \
.drop(labels="index", axis=1) \
.reset_index(drop=True).reset_index()
for col in colname:
sns.lmplot(x="index", y=col, hue="rawDataFile",
data=tmpdata,
fit_reg=False, size=4, aspect=3)
# -
fig = plt.figure(figsize=(16,8))
colname = ["PHOTON-ENERGY-PER-PULSE","XeMultVoltag","SPECTRUM_CENTER","CALCS","CALCT"]
for num,col in enumerate(colname):
ax = fig.add_subplot(2,3,num+1)
data[col].hist(bins=100)
ax.set_xlabel(col)
data['XeMultVoltag'].hist(bins=100)
# XePhotEnergyL = 6000. # Ev
# XePhotEnergyH = 12500.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment