Commit f10627b7 authored by bellotti_r's avatar bellotti_r
Browse files

Merge branch 'master' into 'master'

Master

See merge request !4
parents 8c2dc855 3efb909f
import hashlib
"""
Simple path name generator that ensures that actual path lengths do not exceed
the UNIX 255 chars.
Directory names that are longer than 255 chars will be shortened to a sha
hash. The mapping can later be writte to stdout or file to have access to the
original filename.
"""
class PathNameGenerator:
"""
Ensures that actual path lengths do not exceed the UNIX 255 chars.
def __init__(self):
Directory names that are longer than 255 chars will be shortened to a sha
hash. The mapping can later be written to stdout or file to have access to
the original filename.
"""
def __init__(self):
self.mapping = {}
self.max_path_length = 160
def __str__(self):
mapping = ""
for hash_value, dir_name in self.mapping.items():
mapping += hash_value + " => " + dir_name + "\n"
return mapping
def compress(self, path_name):
if len(path_name) < self.max_path_length:
return path_name
......@@ -37,5 +32,3 @@ class PathNameGenerator:
self.mapping[path_name_hex] = path_name
return path_name_hex
from .simulation import Simulation
from .opaldict import OpalDict
from .slurmjob import SlurmJob
from .opalrunner import OpalRunner
from PathNameGenerator import PathNameGenerator
from .PathNameGenerator import PathNameGenerator
from decimal import Decimal
from ast import literal_eval
import sys
......@@ -9,9 +9,20 @@ OpalDictionary class
@author: Yves Ineichen
@version: 0.1
"""
class OpalDict:
'''
This file contains values from a .data file, plus some user provided values.
'''
def __init__(self, template):
'''
Parameters
==========
template: str
Path the .data file.
'''
self.dict = {}
self.rangevars = {}
self.uservars = []
......@@ -39,13 +50,19 @@ class OpalDict:
return self.dict.items()
def fillDictionary(self, fileName):
fp = open(fileName,"r")
'''
Read the given .data file and and the key-value pairs to self.
'''
fp = open(fileName, "r")
for line in fp:
if not line == "\n":
li = line.strip()
# ignore outcommented lines
if not li.startswith("#"):
# cut off comments at the end of the line
aline = line.split("#")[0]
name,val = aline.split()
# the name-value pairs are separated by whitespace
name, val = aline.split()
self.dict[name.rstrip()] = val.lstrip().rstrip()
fp.close()
......@@ -69,7 +86,7 @@ class OpalDict:
#if self.dict.has_key(var):
self.dict[var] = float(self.dict[var])*scaleWith
def getType(self,s):
def getType(self, s):
try:
return int(s)
except ValueError:
......@@ -85,6 +102,14 @@ class OpalDict:
self.scaleDictVar('GUNSOLB', 1.)
def addUserValues(self, argv):
'''
Add user-provided key-value pairs to those from the .data fileself.
Parameters
==========
argv: str
Command line arguments to runOPAL.py
'''
for arg in argv:
if arg.find("=") > 0:
......@@ -95,7 +120,7 @@ class OpalDict:
if var in self.dict:
#if self.dict.has_key(var):
#check if we have a range
# check if we have a range
if rhs.find(':') > 0:
range = rhs.split(":")
if len(range) == 3:
......@@ -105,18 +130,18 @@ class OpalDict:
self.rangevars[var] = rvar
self.numRanges = self.numRanges + 1
else:
print( "OpalDict: Range has to be of the form from:to:step!")
print("OpalDict: Range has to be of the form from:to:step!")
sys.exit(1)
else:
try:
val = literal_eval(rhs)
if (isinstance(val, int) or isinstance(val, float)):
self.uservars.append( (var, Decimal(rhs)) )
self.uservars.append((var, Decimal(rhs)))
self.dict[var] = Decimal(rhs) #self.getType(rhs)
except: # add string
self.uservars.append( (var, rhs) )
self.dict[var] = rhs
else:
if var.find("--") < 0: # not a regular option
print( 'OpalDict: Key (' + var + ')not found can not add to dictionary, check the OPAL template file')
print(f'OpalDict: Key ({var}) not found cannot add to dictionary, check the OPAL template file')
sys.exit(1)
import os
import json
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d
from runOPAL import OpalDict, Simulation, SlurmJob
from mllib.data.opal_stat_file_to_dataframe import StatFile
class OpalRunner:
def __init__(self,
input_directory,
output_directory,
fieldmap_directory,
base_name,
hyperthreading=0,
quiet=True,
partition='hourly',
slurm_time='00:59:59',
slurm_ram='16'):
'''
Initialise the runner.
Parameters
==========
input_directory: str
Directory where the `<base_name>.data` file is stored.
Must also contain a file `tmpl/<base_name>.tmpl`.
output_directory: str
Directory where all output files are written to.
If multiple design variables are given, the output of each is
written to a subdirectory of `output_directory`. The name of the
subdirectory is the row index of the design variable configuration.
fieldmap_directory: str
Directory where the fieldmaps are stored.
base_name: str
Name of the .data file without the extension.
The template file has `base_name` as its base name, too.
hyperthreading: int (optional)
Defines the number of Hyper-Threads used. Default: 0
quiet: bool (optional)
Whether to silence output. Default: True
partition: str (optional)
SLURM partition to run the jobs in. Default: 'hourly'
slurm_time: str (optional)
Maximum runtime of the job on SLURM.
Must be in the format 'HH:MM:ss'.
Default: '00:59:59'
slurm_ram: str (optional)
How much RAM [GB] to allocate for a single job. Default: 16
'''
self._input_dir = input_directory
self._total_output_dir = output_directory
self._fieldmap_dir = fieldmap_directory
self._base_name = base_name
self._tmpl_file = f'{input_directory}/tmpl/{base_name}.tmpl'
self._data_file = f'{input_directory}/{base_name}.data'
self._hyperthreading = hyperthreading
self._quiet = quiet
self._partition = partition
self._slurm_time = slurm_time
self._slurm_ram = slurm_ram
def run_configurations(self, design_variables):
'''
Enqueues OPAL simulations for the given design variables.
The output of each run is written to a separate subdirectory.
Additional to the OPAL output, a file `design_values.json` representing
the design values is written to each subdirectory.
Parameters
==========
design_variables: pandas.DataFrame
A DataFrame containing the input variables.
Each row is a configuration. The column names are the names of the
design values as they would be put in the .data file.
Returns
=======
list of str
A list containing the SLURM IDs of the enqueued jobs.
The jobs have just been submitted to SLURM, they have not
necessarily run yet.
'''
do_test = False
do_keep = False
do_no_batch = False
do_optimise = False
info = 6
launched_jobs = []
for row, dvars in design_variables.iterrows():
output_path = f'{self._total_output_dir}/{row}'
if not os.path.exists(output_path):
os.makedirs(output_path)
input_file = f'{output_path}/{self._base_name}.in'
# Log the design variable configuration.
dvar_values = dvars.to_dict()
with open(f'{output_path}/dvar_values.json', 'w') as file:
json.dump(dvar_values, file, indent=4)
# Collect the values from the .data file.
parameters = OpalDict(self._data_file)
# Add the design variables to the parameters that will be
# substituted in the template file.
for key, val in dvar_values.items():
parameters[key] = val
os.environ['FIELDMAPS'] = self._fieldmap_dir
os.environ['SLURM_TIME'] = self._slurm_time
os.environ['SLURM_PARTITION'] = self._partition
os.environ['SLURM_RAM'] = self._slurm_ram
# commands to execute before running OPAL
pre_cmd = [
'module use /afs/psi.ch/project/amas/modulefiles',
'module load opal-toolchain/master',
]
pre_cmd = '\n'.join(pre_cmd)
# commands to execute after running OPAL
post_cmd = [
f'rm {output_path}/*.lbal',
f'rm {output_path}/*.h5',
]
post_cmd = '\n'.join(post_cmd)
# Queue the simulation.
sim = Simulation(parameters)
job_ID = sim.run(row, self._base_name, self._input_dir,
self._tmpl_file, input_file,
do_test, do_keep, do_no_batch, do_optimise,
info, self._partition, self._hyperthreading,
self._quiet,
preCommand=pre_cmd,
postCommand=post_cmd)
launched_jobs.append(job_ID)
return launched_jobs
def run_configurations_blocking(self, design_variables):
'''
Run the design variable configurations in a blocking way.
Calls self.run_configurations(design_variables) and wait for completion
of all jobs.
Parameters
==========
design_variables: pandas.DataFrame
Returns
=======
IDs: list of str
'''
IDs = self.run_configurations(design_variables)
for ID in IDs:
SlurmJob(ID).wait_for_completion()
return IDs
class Result:
def __init__(self, functions, columns):
self._functions = functions
columns = columns.copy()
columns.remove('Path length')
self._columns = columns
def __call__(self, s):
rows = []
for f in self._functions:
rows.append(f(s))
result = np.vstack(rows)
return pd.DataFrame(data=result, columns=self._columns)
def get_quantities_of_interest(self, stat_file_columns, dvar_IDs,
kind='slinear'):
'''
Returns a function that allows to evaluate the quantities of interest.
This function assumes that all jobs have already finished successfully.
Parameters
==========
stat_file_columns: list of str
Columns of the .stat files that are interesting
dvar_IDs: list
Must be the indices of a pandas.DataFrame that was used as input
to run_configurations() or run_configurations_blocking() earlier.
kind: str
Which kind of interpolation to perform.
Must be a valid `kind` parameter for `scipy.interpolate.interp1d`.
Returns
=======
callable(float)
The callable takes the longitudinal position as its only argument.
It returns a pandas.DataFrame that whose column names are
the `stat_file_columns`. The indices are the `dvar_IDs`.
The function interpolates the .stat file values of the given
columns, and returns the values at the desired position.
'''
if 'Path length' not in stat_file_columns:
stat_file_columns.append('Path length')
functions = []
for ID in dvar_IDs:
# get the path to the .stat file
output_dir = f'{self._total_output_dir}/{ID}'
output_path = f'{output_dir}/{self._base_name}.stat'
# load the relevant content
df = StatFile(output_path).getDataFrame()
df = df[stat_file_columns]
# interpolate
s_fix = df['Path length'].values
y_fix = df.drop(columns='Path length').values
f = interp1d(
s_fix, y_fix,
axis=0,
kind=kind,
bounds_error=False,
fill_value='extrapolate')
functions.append(f)
return self.Result(functions, stat_file_columns)
......@@ -9,28 +9,41 @@ Script that launches OPAL simulations
"""
import sys,os,shutil,glob
import subprocess
import sys
import os
import shutil
import glob
from simulation import Simulation
from opaldict import OpalDict
def getPaths(path, pattern, name):
result = glob.glob(os.path.join(path,pattern))
def getTemplatePaths(parentDir, pattern):
'''
Return the paths to template files.
The template files are files in the parent directory
that follow the given pattern.
Returns
=======
The paths to the template files.
'''
result = glob.glob(os.path.join(parentDir, pattern))
if not result:
print('No '+name+' file ('+pattern+') found')
print(f'No template file ({pattern}) found')
sys.exit()
return result
def getBaseName(inputfilePath):
templates = getPaths(inputfilePath, '*.tmpl', 'template')
templates = getTemplatePaths(inputfilePath, '*.tmpl')
name = templates[0].split('/')[-1][:-5] #NOTE: choose first (alphanumeric order) *.tmpl file by default
if os.path.isfile(os.path.join('.',name+'.data')):
# NOTE: choose first (alphanumeric order) *.tmpl file by default
name = templates[0].split('/')[-1][:-5]
if os.path.isfile(os.path.join('.', name + '.data')):
return name
print('Template and data filename do not match, '+name+'.data expected')
print(f'Template and data filename do not match, {name}.data expected')
sys.exit()
......@@ -57,8 +70,9 @@ def printUsage():
print("Important: runOPAL is currently not compatible with the command SAMPLE")
def checkCompat(tmplFile, incompatible): #NOTE: SAMPLE command not compatible with runOPAL (issue #8)
templateFile = open(tmplFile,'r')
# NOTE: SAMPLE command not compatible with runOPAL (issue #8)
def checkCompat(tmplFile, incompatible):
templateFile = open(tmplFile, 'r')
for line in templateFile:
if line.startswith('//'):
continue
......@@ -70,9 +84,9 @@ def checkCompat(tmplFile, incompatible): #NOTE: SAMPLE command not compatible wi
def traverseRanges(list, opaldict, args, doNobatch):
"""
Traverse all possible combinations of range variable values. Start simulation
once all range variables are fixed to a value. A list entry has the following
structure:
Traverse all possible combinations of range variable values.
Start simulation once all range variables are fixed to a value.
A list entry has the following structure:
['name of var', start_value, end_value, step_value]
"""
head = list[0]
......@@ -82,11 +96,12 @@ def traverseRanges(list, opaldict, args, doNobatch):
step = head[1][2]
qid = -1
if curval > endval:
print('range is empty, start value',curval,'needs to be higher than end value',endval)
print('range is empty, start value', curval,
'needs to be higher than end value', endval)
while curval <= endval:
opaldict[head[0]] = curval
if len(tail) == 0:
#run simulation
# run simulation
sim = Simulation(opaldict)
qid = sim.run(*args)
if doNobatch:
......@@ -107,7 +122,7 @@ def main(argv):
doTest = False
doKeep = False
doNobatch = False
doOptimize = True #NOTE: this flag is opposite of --noopt
doOptimize = True # NOTE: this flag is opposite of --noopt
queue = ""
info = 6
hypert = 0
......@@ -139,7 +154,7 @@ def main(argv):
elif arg.startswith("--hypert"):
hypert = int(arg.split("=")[1])
elif arg.startswith("-"):
print(arg,'is not a valid option, see --help for the available options')
print(arg, 'is not a valid option, see --help for the available options')
exit()
# safety check
......@@ -149,37 +164,37 @@ def main(argv):
# determine what kind of job should be ran, simulation by default
if doOptimize and os.environ.get('OPTIMIZER'):
if quiet == False:
if not quiet:
print('job type: OPTIMIZATION')
inputfilePath = os.environ.get('OPTIMIZER')
if not (inputfilePath and glob.glob(os.path.join(inputfilePath,'*.tmpl'))):
if quiet == False:
if not (inputfilePath and glob.glob(os.path.join(inputfilePath, '*.tmpl'))):
if not quiet:
print('job type: SIMULATION')
doOptimize = False
if os.environ.get('TEMPLATES'):
inputfilePath = os.environ.get('TEMPLATES')
elif (glob.glob(os.path.join('.','*.tmpl'))):
elif (glob.glob(os.path.join('.', '*.tmpl'))):
inputfilePath = '../'
else:
print('Template file unknown -> exiting ...')
sys.exit()
#check that tmpl and data files can be found or guessed
# check that tmpl and data files can be found or guessed
if not baseFileName:
baseFileName = getBaseName(inputfilePath)
elif not os.path.isfile(os.path.join(inputfilePath,baseFileName+'.tmpl')):
print(baseFileName+'.tmpl cannot be found! Check if it exists in '+inputfilePath)
elif not os.path.isfile(os.path.join(inputfilePath, baseFileName+'.tmpl')):
print(f'{baseFileName}.tmpl cannot be found! Check if it exists in {inputfilePath}')
sys.exit()
if quiet == False:
if not quiet:
print('baseFileName = '+baseFileName)
dataFile = baseFileName + '.data'
tmplFile = os.path.join(inputfilePath,baseFileName+'.tmpl')
tmplFile = os.path.join(inputfilePath, baseFileName + '.tmpl')
oinpFile = baseFileName + '.in' # the resulting OPAL input file
checkCompat(tmplFile, ['SAMPLE']) # check compatibility
#create the dictionary
# create the dictionary
opaldict = OpalDict(dataFile)
# check if template values must be changed
# if so add update the dictionary with the default values
......@@ -188,16 +203,18 @@ def main(argv):
if not opaldict.hasRanges():
sim = Simulation(opaldict)
qid = sim.run(N, baseFileName, inputfilePath, tmplFile, oinpFile, doTest, doKeep, doNobatch, doOptimize, info, queue, hypert, quiet)
qid = sim.run(N, baseFileName, inputfilePath, tmplFile, oinpFile,
doTest, doKeep, doNobatch, doOptimize,
info, queue, hypert, quiet)
if doNobatch:
if quiet == False:
print( "... finished!\n")
if not quiet:
print("... finished!\n")
#else:
# print( "SGE-ID= {}\n".format(qid))
else:
ranges = opaldict.Range()
#create range toplevel dir
# create range toplevel dir
dirname = baseFileName
for p in opaldict.uservars:
dirname += "_" + str(p[0]) + "=" + str(p[1])
......@@ -206,9 +223,9 @@ def main(argv):
# If there's already a directory remove it...
if os.path.isdir(dirname):
if doKeep:
print( 'KEEP existing directory ', dirname)
print('KEEP existing directory ', dirname)
else:
print( 'REMOVE existing directory', dirname)
print('REMOVE existing directory', dirname)
shutil.rmtree(dirname)
# create directory and change to the directory
os.mkdir(dirname)
......@@ -218,8 +235,10 @@ def main(argv):
os.chdir(dirname)
print(ranges)
#run simulations of all possible combinations
args = [N, baseFileName, inputfilePath, tmplFile, oinpFile, doTest, doKeep, doNobatch, doOptimize, info, queue, hypert, quiet]
# run simulations of all possible combinations
args = [N, baseFileName, inputfilePath, tmplFile, oinpFile,
doTest, doKeep, doNobatch, doOptimize,
info, queue, hypert, quiet]
traverseRanges(list(ranges.items()), opaldict, args, doNobatch)
# clean up
......@@ -227,6 +246,6 @@ def main(argv):
os.chdir("..")
#call main
# call main
if __name__ == "__main__":
main(sys.argv[1:])
This diff is collapsed.
import subprocess
import time
class SlurmJob:
'''
Class representing a SLURM job.
'''
def __init__(self, ID):
'''
Parameters
==========
ID: int or str
Identification number of the slurm job.
'''
self._ID = ID
@property
def status(self):
'''
Returns the current job status.
'''
cmd = 'sacct -j {} -o state'.format(self._ID).<