Commit f10627b7 authored by bellotti_r's avatar bellotti_r
Browse files

Merge branch 'master' into 'master'

Master

See merge request !4
parents 8c2dc855 3efb909f
import hashlib import hashlib
"""
Simple path name generator that ensures that actual path lengths do not exceed
the UNIX 255 chars.
Directory names that are longer than 255 chars will be shortened to a sha
hash. The mapping can later be writte to stdout or file to have access to the
original filename.
"""
class PathNameGenerator: class PathNameGenerator:
"""
Ensures that actual path lengths do not exceed the UNIX 255 chars.
def __init__(self): Directory names that are longer than 255 chars will be shortened to a sha
hash. The mapping can later be written to stdout or file to have access to
the original filename.
"""
def __init__(self):
self.mapping = {} self.mapping = {}
self.max_path_length = 160 self.max_path_length = 160
def __str__(self): def __str__(self):
mapping = "" mapping = ""
for hash_value, dir_name in self.mapping.items(): for hash_value, dir_name in self.mapping.items():
mapping += hash_value + " => " + dir_name + "\n" mapping += hash_value + " => " + dir_name + "\n"
return mapping return mapping
def compress(self, path_name): def compress(self, path_name):
if len(path_name) < self.max_path_length: if len(path_name) < self.max_path_length:
return path_name return path_name
...@@ -37,5 +32,3 @@ class PathNameGenerator: ...@@ -37,5 +32,3 @@ class PathNameGenerator:
self.mapping[path_name_hex] = path_name self.mapping[path_name_hex] = path_name
return path_name_hex return path_name_hex
from .simulation import Simulation
from .opaldict import OpalDict
from .slurmjob import SlurmJob
from .opalrunner import OpalRunner
from PathNameGenerator import PathNameGenerator from .PathNameGenerator import PathNameGenerator
from decimal import Decimal from decimal import Decimal
from ast import literal_eval from ast import literal_eval
import sys import sys
...@@ -9,9 +9,20 @@ OpalDictionary class ...@@ -9,9 +9,20 @@ OpalDictionary class
@author: Yves Ineichen @author: Yves Ineichen
@version: 0.1 @version: 0.1
""" """
class OpalDict: class OpalDict:
'''
This file contains values from a .data file, plus some user provided values.
'''
def __init__(self, template): def __init__(self, template):
'''
Parameters
==========
template: str
Path the .data file.
'''
self.dict = {} self.dict = {}
self.rangevars = {} self.rangevars = {}
self.uservars = [] self.uservars = []
...@@ -25,7 +36,7 @@ class OpalDict: ...@@ -25,7 +36,7 @@ class OpalDict:
def __setitem__(self, key, value): def __setitem__(self, key, value):
scalevars = {} scalevars = {}
scalevars['GUNSOLB'] = 1.0 scalevars['GUNSOLB'] = 1.0
try: try:
self.dict[key] = value * scalevars[key] self.dict[key] = value * scalevars[key]
...@@ -39,13 +50,19 @@ class OpalDict: ...@@ -39,13 +50,19 @@ class OpalDict:
return self.dict.items() return self.dict.items()
def fillDictionary(self, fileName): def fillDictionary(self, fileName):
fp = open(fileName,"r") '''
Read the given .data file and and the key-value pairs to self.
'''
fp = open(fileName, "r")
for line in fp: for line in fp:
if not line == "\n": if not line == "\n":
li = line.strip() li = line.strip()
# ignore outcommented lines
if not li.startswith("#"): if not li.startswith("#"):
# cut off comments at the end of the line
aline = line.split("#")[0] aline = line.split("#")[0]
name,val = aline.split() # the name-value pairs are separated by whitespace
name, val = aline.split()
self.dict[name.rstrip()] = val.lstrip().rstrip() self.dict[name.rstrip()] = val.lstrip().rstrip()
fp.close() fp.close()
...@@ -69,7 +86,7 @@ class OpalDict: ...@@ -69,7 +86,7 @@ class OpalDict:
#if self.dict.has_key(var): #if self.dict.has_key(var):
self.dict[var] = float(self.dict[var])*scaleWith self.dict[var] = float(self.dict[var])*scaleWith
def getType(self,s): def getType(self, s):
try: try:
return int(s) return int(s)
except ValueError: except ValueError:
...@@ -85,17 +102,25 @@ class OpalDict: ...@@ -85,17 +102,25 @@ class OpalDict:
self.scaleDictVar('GUNSOLB', 1.) self.scaleDictVar('GUNSOLB', 1.)
def addUserValues(self, argv): def addUserValues(self, argv):
'''
Add user-provided key-value pairs to those from the .data fileself.
Parameters
==========
argv: str
Command line arguments to runOPAL.py
'''
for arg in argv: for arg in argv:
if arg.find("=") > 0: if arg.find("=") > 0:
data = str(arg.split(" ")) # arguments are separated by spaces data = str(arg.split(" ")) # arguments are separated by spaces
eqsidx = data.find("=") # idx of = eqsidx = data.find("=") # idx of =
var = data[2:eqsidx] var = data[2:eqsidx]
rhs = data[eqsidx+1:len(data)-2] rhs = data[eqsidx+1:len(data)-2]
if var in self.dict: if var in self.dict:
#if self.dict.has_key(var): #if self.dict.has_key(var):
#check if we have a range # check if we have a range
if rhs.find(':') > 0: if rhs.find(':') > 0:
range = rhs.split(":") range = rhs.split(":")
if len(range) == 3: if len(range) == 3:
...@@ -105,18 +130,18 @@ class OpalDict: ...@@ -105,18 +130,18 @@ class OpalDict:
self.rangevars[var] = rvar self.rangevars[var] = rvar
self.numRanges = self.numRanges + 1 self.numRanges = self.numRanges + 1
else: else:
print( "OpalDict: Range has to be of the form from:to:step!") print("OpalDict: Range has to be of the form from:to:step!")
sys.exit(1) sys.exit(1)
else: else:
try: try:
val = literal_eval(rhs) val = literal_eval(rhs)
if (isinstance(val, int) or isinstance(val, float)): if (isinstance(val, int) or isinstance(val, float)):
self.uservars.append( (var, Decimal(rhs)) ) self.uservars.append((var, Decimal(rhs)))
self.dict[var] = Decimal(rhs) #self.getType(rhs) self.dict[var] = Decimal(rhs) #self.getType(rhs)
except: # add string except: # add string
self.uservars.append( (var, rhs) ) self.uservars.append( (var, rhs) )
self.dict[var] = rhs self.dict[var] = rhs
else: else:
if var.find("--") < 0: # not a regular option if var.find("--") < 0: # not a regular option
print( 'OpalDict: Key (' + var + ')not found can not add to dictionary, check the OPAL template file') print(f'OpalDict: Key ({var}) not found cannot add to dictionary, check the OPAL template file')
sys.exit(1) sys.exit(1)
import os
import json
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d
from runOPAL import OpalDict, Simulation, SlurmJob
from mllib.data.opal_stat_file_to_dataframe import StatFile
class OpalRunner:
def __init__(self,
input_directory,
output_directory,
fieldmap_directory,
base_name,
hyperthreading=0,
quiet=True,
partition='hourly',
slurm_time='00:59:59',
slurm_ram='16'):
'''
Initialise the runner.
Parameters
==========
input_directory: str
Directory where the `<base_name>.data` file is stored.
Must also contain a file `tmpl/<base_name>.tmpl`.
output_directory: str
Directory where all output files are written to.
If multiple design variables are given, the output of each is
written to a subdirectory of `output_directory`. The name of the
subdirectory is the row index of the design variable configuration.
fieldmap_directory: str
Directory where the fieldmaps are stored.
base_name: str
Name of the .data file without the extension.
The template file has `base_name` as its base name, too.
hyperthreading: int (optional)
Defines the number of Hyper-Threads used. Default: 0
quiet: bool (optional)
Whether to silence output. Default: True
partition: str (optional)
SLURM partition to run the jobs in. Default: 'hourly'
slurm_time: str (optional)
Maximum runtime of the job on SLURM.
Must be in the format 'HH:MM:ss'.
Default: '00:59:59'
slurm_ram: str (optional)
How much RAM [GB] to allocate for a single job. Default: 16
'''
self._input_dir = input_directory
self._total_output_dir = output_directory
self._fieldmap_dir = fieldmap_directory
self._base_name = base_name
self._tmpl_file = f'{input_directory}/tmpl/{base_name}.tmpl'
self._data_file = f'{input_directory}/{base_name}.data'
self._hyperthreading = hyperthreading
self._quiet = quiet
self._partition = partition
self._slurm_time = slurm_time
self._slurm_ram = slurm_ram
def run_configurations(self, design_variables):
'''
Enqueues OPAL simulations for the given design variables.
The output of each run is written to a separate subdirectory.
Additional to the OPAL output, a file `design_values.json` representing
the design values is written to each subdirectory.
Parameters
==========
design_variables: pandas.DataFrame
A DataFrame containing the input variables.
Each row is a configuration. The column names are the names of the
design values as they would be put in the .data file.
Returns
=======
list of str
A list containing the SLURM IDs of the enqueued jobs.
The jobs have just been submitted to SLURM, they have not
necessarily run yet.
'''
do_test = False
do_keep = False
do_no_batch = False
do_optimise = False
info = 6
launched_jobs = []
for row, dvars in design_variables.iterrows():
output_path = f'{self._total_output_dir}/{row}'
if not os.path.exists(output_path):
os.makedirs(output_path)
input_file = f'{output_path}/{self._base_name}.in'
# Log the design variable configuration.
dvar_values = dvars.to_dict()
with open(f'{output_path}/dvar_values.json', 'w') as file:
json.dump(dvar_values, file, indent=4)
# Collect the values from the .data file.
parameters = OpalDict(self._data_file)
# Add the design variables to the parameters that will be
# substituted in the template file.
for key, val in dvar_values.items():
parameters[key] = val
os.environ['FIELDMAPS'] = self._fieldmap_dir
os.environ['SLURM_TIME'] = self._slurm_time
os.environ['SLURM_PARTITION'] = self._partition
os.environ['SLURM_RAM'] = self._slurm_ram
# commands to execute before running OPAL
pre_cmd = [
'module use /afs/psi.ch/project/amas/modulefiles',
'module load opal-toolchain/master',
]
pre_cmd = '\n'.join(pre_cmd)
# commands to execute after running OPAL
post_cmd = [
f'rm {output_path}/*.lbal',
f'rm {output_path}/*.h5',
]
post_cmd = '\n'.join(post_cmd)
# Queue the simulation.
sim = Simulation(parameters)
job_ID = sim.run(row, self._base_name, self._input_dir,
self._tmpl_file, input_file,
do_test, do_keep, do_no_batch, do_optimise,
info, self._partition, self._hyperthreading,
self._quiet,
preCommand=pre_cmd,
postCommand=post_cmd)
launched_jobs.append(job_ID)
return launched_jobs
def run_configurations_blocking(self, design_variables):
'''
Run the design variable configurations in a blocking way.
Calls self.run_configurations(design_variables) and wait for completion
of all jobs.
Parameters
==========
design_variables: pandas.DataFrame
Returns
=======
IDs: list of str
'''
IDs = self.run_configurations(design_variables)
for ID in IDs:
SlurmJob(ID).wait_for_completion()
return IDs
class Result:
def __init__(self, functions, columns):
self._functions = functions
columns = columns.copy()
columns.remove('Path length')
self._columns = columns
def __call__(self, s):
rows = []
for f in self._functions:
rows.append(f(s))
result = np.vstack(rows)
return pd.DataFrame(data=result, columns=self._columns)
def get_quantities_of_interest(self, stat_file_columns, dvar_IDs,
kind='slinear'):
'''
Returns a function that allows to evaluate the quantities of interest.
This function assumes that all jobs have already finished successfully.
Parameters
==========
stat_file_columns: list of str
Columns of the .stat files that are interesting
dvar_IDs: list
Must be the indices of a pandas.DataFrame that was used as input
to run_configurations() or run_configurations_blocking() earlier.
kind: str
Which kind of interpolation to perform.
Must be a valid `kind` parameter for `scipy.interpolate.interp1d`.
Returns
=======
callable(float)
The callable takes the longitudinal position as its only argument.
It returns a pandas.DataFrame that whose column names are
the `stat_file_columns`. The indices are the `dvar_IDs`.
The function interpolates the .stat file values of the given
columns, and returns the values at the desired position.
'''
if 'Path length' not in stat_file_columns:
stat_file_columns.append('Path length')
functions = []
for ID in dvar_IDs:
# get the path to the .stat file
output_dir = f'{self._total_output_dir}/{ID}'
output_path = f'{output_dir}/{self._base_name}.stat'
# load the relevant content
df = StatFile(output_path).getDataFrame()
df = df[stat_file_columns]
# interpolate
s_fix = df['Path length'].values
y_fix = df.drop(columns='Path length').values
f = interp1d(
s_fix, y_fix,
axis=0,
kind=kind,
bounds_error=False,
fill_value='extrapolate')
functions.append(f)
return self.Result(functions, stat_file_columns)
...@@ -9,28 +9,41 @@ Script that launches OPAL simulations ...@@ -9,28 +9,41 @@ Script that launches OPAL simulations
""" """
import sys,os,shutil,glob import sys
import subprocess import os
import shutil
import glob
from simulation import Simulation from simulation import Simulation
from opaldict import OpalDict from opaldict import OpalDict
def getPaths(path, pattern, name): def getTemplatePaths(parentDir, pattern):
result = glob.glob(os.path.join(path,pattern)) '''
Return the paths to template files.
The template files are files in the parent directory
that follow the given pattern.
Returns
=======
The paths to the template files.
'''
result = glob.glob(os.path.join(parentDir, pattern))
if not result: if not result:
print('No '+name+' file ('+pattern+') found') print(f'No template file ({pattern}) found')
sys.exit() sys.exit()
return result return result
def getBaseName(inputfilePath): def getBaseName(inputfilePath):
templates = getPaths(inputfilePath, '*.tmpl', 'template') templates = getTemplatePaths(inputfilePath, '*.tmpl')
name = templates[0].split('/')[-1][:-5] #NOTE: choose first (alphanumeric order) *.tmpl file by default # NOTE: choose first (alphanumeric order) *.tmpl file by default
if os.path.isfile(os.path.join('.',name+'.data')): name = templates[0].split('/')[-1][:-5]
if os.path.isfile(os.path.join('.', name + '.data')):
return name return name
print('Template and data filename do not match, '+name+'.data expected') print(f'Template and data filename do not match, {name}.data expected')
sys.exit() sys.exit()
...@@ -57,8 +70,9 @@ def printUsage(): ...@@ -57,8 +70,9 @@ def printUsage():
print("Important: runOPAL is currently not compatible with the command SAMPLE") print("Important: runOPAL is currently not compatible with the command SAMPLE")
def checkCompat(tmplFile, incompatible): #NOTE: SAMPLE command not compatible with runOPAL (issue #8) # NOTE: SAMPLE command not compatible with runOPAL (issue #8)
templateFile = open(tmplFile,'r') def checkCompat(tmplFile, incompatible):
templateFile = open(tmplFile, 'r')
for line in templateFile: for line in templateFile:
if line.startswith('//'): if line.startswith('//'):
continue continue
...@@ -70,52 +84,53 @@ def checkCompat(tmplFile, incompatible): #NOTE: SAMPLE command not compatible wi ...@@ -70,52 +84,53 @@ def checkCompat(tmplFile, incompatible): #NOTE: SAMPLE command not compatible wi
def traverseRanges(list, opaldict, args, doNobatch): def traverseRanges(list, opaldict, args, doNobatch):
""" """
Traverse all possible combinations of range variable values. Start simulation Traverse all possible combinations of range variable values.
once all range variables are fixed to a value. A list entry has the following Start simulation once all range variables are fixed to a value.
structure: A list entry has the following structure:
['name of var', start_value, end_value, step_value] ['name of var', start_value, end_value, step_value]
""" """
head = list[0] head = list[0]
tail = list[1:] tail = list[1:]
curval = head[1][0] curval = head[1][0]
endval = head[1][1] endval = head[1][1]
step = head[1][2] step = head[1][2]
qid = -1 qid = -1
if curval > endval: if curval > endval:
print('range is empty, start value',curval,'needs to be higher than end value',endval) print('range is empty, start value', curval,
'needs to be higher than end value', endval)
while curval <= endval: while curval <= endval:
opaldict[head[0]] = curval opaldict[head[0]] = curval
if len(tail) == 0: if len(tail) == 0:
#run simulation # run simulation
sim = Simulation(opaldict) sim = Simulation(opaldict)
qid = sim.run(*args) qid = sim.run(*args)
if doNobatch: if doNobatch:
print("... finished!\n") print("... finished!\n")
else:
print("SGE-ID= {}\n".format(qid))
else: else:
traverseRanges(tail, opaldict, args, doNobatch) print("SGE-ID= {}\n".format(qid))
curval = curval + step else:
traverseRanges(tail, opaldict, args, doNobatch)
curval = curval + step
def main(argv): def main(argv):
""" """
main method main method
""" """
N = -1 # a running number; if given use it to label directory! N = -1 # a running number; if given use it to label directory!
quiet = False quiet = False
doTest = False doTest = False
doKeep = False doKeep = False
doNobatch = False