Commit d1187540 authored by Renato Bellotti's avatar Renato Bellotti
Browse files

Reformatted code to agree more with the Python conventions and be more readable

parent 8c2dc855
......@@ -6,21 +6,24 @@ Simulation class handles batch job related things
@version: 0.1
"""
import sys,os,shutil, subprocess
#import numpy as np
import sys
import os
import shutil
import subprocess
### Helper methods
# Helper methods
def isInDirectory(filepath, directory):
# From https://stackoverflow.com/questions/3812849/how-to-check-whether-a-directory-is-a-sub-directory-of-another-directory
''' Check if filepath is inside directory '''
return os.path.realpath(filepath).startswith(os.path.realpath(directory) + os.sep)
def linkDirectory(path, name=''):
'''Make files available in working directory with recursive symbolic links'''
# Check for recursiveness
if isInDirectory(os.getcwd(),path):
print (name + ' directory is subdirectory of working directory! runOPAL cannot handle this.. bye!')
if isInDirectory(os.getcwd(), path):
print(name + ' directory is subdirectory of working directory! runOPAL cannot handle this.. bye!')
sys.exit()
# lndir and if fails try cp
if os.system('lndir '+path) != 0:
......@@ -28,20 +31,22 @@ def linkDirectory(path, name=''):
if os.listdir(path):
os.system('cp -rs '+path+'/* .')
def linkFile(path, name):
'''Make a file available in working directory with a symbolic link'''
path = os.path.join(path,name)
path = os.path.join(path, name)
if not os.path.isfile(path):
print (name+' cannot be found')
print(f'{name} cannot be found')
sys.exit()
os.system('ln -s '+path+' .')
def extractStr(line, name):
zero = line.find(name)
if zero < 0:
return None
start = min(x for x in [line.find('"',zero ), line.find("'", zero )] if x > 0) +1
end = min(x for x in [line.find('"',start), line.find("'", start)] if x > 0)
start = min(x for x in [line.find('"', zero), line.find("'", zero)] if x > 0) +1
end = min(x for x in [line.find('"', start), line.find("'", start)] if x > 0)
return line[start:end]
......@@ -58,7 +63,7 @@ class Simulation:
print(self.dirname)
return False
else:
if quiet == False:
if not quiet:
print('REMOVE existing directory {}'.format(self.dirname))
shutil.rmtree(self.dirname)
......@@ -66,74 +71,77 @@ class Simulation:
os.mkdir(self.dirname)
return True
def run(self,N, baseFileName, inputfilePath, tmplFile, oinpFile, doTest, doKeep, doNobatch, doOptimize, info, queue, hypert, quiet):
def run(self, N, baseFileName, inputfilePath, tmplFile, oinpFile,
doTest, doKeep, doNobatch, doOptimize, info, queue, hypert, quiet):
# make directory name indicating changed values
self.dirname = baseFileName
if N >= 0:
self.dirname += str(N)
self.dirname += self.opaldict.generateDirectoryName()
try:
CORES = self.opaldict['CORES']
except KeyError:
print("CORES not set bye bye")
sys.exit(1)
if self.createDirectory(self.dirname, doKeep, quiet) == False:
print( "Simulation results already exist")
if not self.createDirectory(self.dirname, doKeep, quiet):
print("Simulation results already exist")
return
os.chdir(self.dirname)
# Linking magnet and RF files
if (os.environ.get('FIELDMAPS')):
fieldmapPath = os.environ.get('FIELDMAPS')
else:
fieldmapPath = '../fieldmaps'
if not (os.path.isdir(fieldmapPath)):
print( 'Fieldmap directory unknown exiting ...')
print('Fieldmap directory unknown exiting ...')
sys.exit()
linkDirectory(fieldmapPath,'Fieldmap')
linkDirectory(fieldmapPath, 'Fieldmap')
# Link distribution directory if present
if (os.environ.get('DISTRIBUTIONS')):
distributionPath = os.environ.get('DISTRIBUTIONS')
if os.path.isdir(distributionPath):
linkDirectory(distributionPath,'Distribution')
linkDirectory(distributionPath, 'Distribution')
# Read in the file
filedata = None
with open(tmplFile, 'r') as file :
with open(tmplFile, 'r') as file:
filedata = file.read()
# do the replacements in the templatefile
for s,value in self.opaldict.items():
for s, value in self.opaldict.items():
# Replace the target string
filedata = filedata.replace('_'+s+'_', str(value))
# Write the file out again
with open(oinpFile, 'w') as file:
file.write(filedata)
#NOTE: What's the best place to link tmpl file? $TEMPLATES, _TEMPLATEDIR_, or parisng?
# NOTE:
# What's the best place to link tmpl file?
# $TEMPLATES, _TEMPLATEDIR_, or parisng?
if doOptimize:
flag = False
tmplDir = None
tmplIn = None
templateFile = open(oinpFile,'r')
tmplIn = None
templateFile = open(oinpFile, 'r')
for line in templateFile:
if not line.startswith('//'):
if 'OPTIMIZE' in line:
flag = True
if flag and not tmplDir:
tmplDir = extractStr(line,'TEMPLATEDIR')
tmplDir = extractStr(line, 'TEMPLATEDIR')
if flag and not tmplIn:
tmplIn = extractStr(line,'INPUT').split('/')[-1]
tmplIn = extractStr(line, 'INPUT').split('/')[-1]
templateFile.close()
linkFile('..', tmplIn[:-5]+'.data')
os.mkdir(tmplDir)
os.chdir(tmplDir)
linkFile(os.path.join('../..',tmplDir), tmplIn)
linkFile(os.path.join('../..', tmplDir), tmplIn)
os.chdir('..')
if os.environ.get('OPAL_EXE_PATH'):
if doNobatch:
opalexe = os.environ.get('OPAL_EXE_PATH') + '/opal'
......@@ -141,96 +149,106 @@ class Simulation:
opalexe = '$OPAL_EXE_PATH/opal'
else:
opalexe = 'opal'
if quiet == False:
print( 'Simulation directory is {} using OPAL at {}'.format(self.dirname, os.environ.get('OPAL_EXE_PATH')))
print( 'Using templatefile at ' + inputfilePath)
print( 'Using fieldmaps at ' + fieldmapPath)
print( 'Parameter set in ' + oinpFile + ' are:')
for s, value in sorted(self.opaldict.items()): #EDIT: fixed indentation
if quiet == False:
print( ' :::: ' + s + ' = ' + str(value))
if not quiet:
print(f'Simulation directory is {self.dirname} using OPAL at {os.environ.get("OPAL_EXE_PATH")}')
print('Using templatefile at ' + inputfilePath)
print('Using fieldmaps at ' + fieldmapPath)
print('Parameter set in ' + oinpFile + ' are:')
for s, value in sorted(self.opaldict.items()):
if not quiet:
print(' :::: ' + s + ' = ' + str(value))
if not doNobatch:
#hostname = commands.getoutput("hostname")
hostname = (subprocess.check_output('hostname').decode('utf-8')).strip()
if quiet == False:
if not quiet:
print("On host {}".format(hostname))
if os.getenv("SGE_TIME"):
print( "You use deprecated environment variable SGE_TIME. Please use in the future TIME")
print("You use deprecated environment variable SGE_TIME. Please use in the future TIME")
time = os.getenv("SGE_TIME")
else:
#print('You did not set a time limit. Using default: s_rt=23:59:00,h_rt=24:00:00')
time = os.getenv("TIME", "s_rt=23:59:00,h_rt=24:00:00")
if os.getenv("SGE_RAM"):
print( "You use deprecated environment variable SGE_RAM. Please use in the future RAM")
print("You use deprecated environment variable SGE_RAM. Please use in the future RAM")
ram = os.getenv("SGE_RAM")
else:
ram = os.getenv("RAM", "4")
if not queue:
try:
queue = os.environ.get('QUEUE')
try:
queue = os.environ.get('QUEUE')
except:
queue = os.getenv("SGE_QUEUE", "prime_bd.q")
# Merlin6
if (hostname.startswith("merlin-l")):
batchsys = 'SLURM'
runfile = 'run.merlin6'
time = os.getenv("SLURM_TIME", "24:00:00")
ram = os.getenv("SLURM_RAM", "36")
batchsys = 'SLURM'
runfile = 'run.merlin6'
time = os.getenv("SLURM_TIME", "24:00:00")
ram = os.getenv("SLURM_RAM", "36")
partition = os.getenv("SLURM_PARTITION", "general")
self.WriteMerlin6(opalexe, oinpFile, CORES, time, ram, info, runfile, partition)
self.WriteMerlin6(opalexe, oinpFile, CORES, time,
ram, info, runfile, partition)
# ANL theta.alcf.anl.gov
elif (hostname.startswith("theta")):
batchsys = 'COBALT'
runfile = 'run.sh'
self.WriteTheta(opalexe, oinpFile, CORES, time, ram, info, queue, hypert)
runfile = 'run.sh'
self.WriteTheta(opalexe, oinpFile, CORES, time,
ram, info, queue, hypert)
# ANL blues.lcrc.anl.gov
elif (hostname.startswith("blogin")):
batchsys = 'PBS'
runfile = 'run.blues'
self.WritePBSBlues(opalexe, oinpFile, CORES, time, ram, info, queue)
runfile = 'run.blues'
self.WritePBSBlues(opalexe, oinpFile, CORES, time,
ram, info, queue)
# ANL Bebop
elif (hostname.startswith("bebop") or hostname.startswith("bdw") or hostname.startswith("knl")):
elif (hostname.startswith("bebop")
or hostname.startswith("bdw")
or hostname.startswith("knl")):
batchsys = 'SLURM'
runfile = 'run.bebop'
time = os.environ["TIME"]
self.WriteBebop(opalexe, oinpFile, CORES, time, ram, info, runfile, queue, hypert, quiet)
runfile = 'run.bebop'
time = os.environ["TIME"]
self.WriteBebop(opalexe, oinpFile, CORES, time,
ram, info, runfile, queue, hypert, quiet)
# NERSC Cori Haswell
elif (hostname.startswith("cori")):
batchsys = 'SLURM'
runfile = 'run.cori'
self.WriteCori(opalexe, oinpFile, CORES, time, ram, info, runfile)
runfile = 'run.cori'
self.WriteCori(opalexe, oinpFile, CORES, time,
ram, info, runfile)
# NERSC Edison
elif (hostname.startswith("edison")):
batchsys = 'SLURM'
runfile = 'run.edison'
self.WriteEdison(opalexe, oinpFile, CORES, time, ram, info, runfile)
runfile = 'run.edison'
self.WriteEdison(opalexe, oinpFile, CORES, time,
ram, info, runfile)
# CSCS Piz-Daint
elif (hostname.startswith("daint")):
batchsys = 'SLURM'
runfile = 'run.daint'
runfile = 'run.daint'
time = os.getenv("SLURM_TIME", "00:01:00")
ram = os.getenv("SLURM_RAM", "36")
ram = os.getenv("SLURM_RAM", "36")
partition = os.getenv("SLURM_PARTITION", "normal")
account = os.getenv("SLURM_ACCOUNT", "psi07")
self.WritePizDaint(opalexe, oinpFile, CORES, time, ram, info, runfile, partition, account)
self.WritePizDaint(opalexe, oinpFile, CORES, time,
ram, info, runfile, partition, account)
elif (hostname.startswith("eofe")):
batchsys = 'SLURM'
runfile = 'run.engaging'
time = os.getenv("SLURM_TIME", "24:00:00")
ram = os.getenv("SLURM_RAM", "120")
self.WriteEngaging(opalexe, oinpFile, CORES, time, ram, info, runfile)
ram = os.getenv("SLURM_RAM", "120")
self.WriteEngaging(opalexe, oinpFile, CORES, time,
ram, info, runfile)
else:
print("Hostname not known bye bye")
......@@ -239,21 +257,23 @@ class Simulation:
qid = -1
if doTest:
if quiet == False:
print( 'Done with setup of the OPAL simulation but not submitting the job (--test) \n\n\n')
if not quiet:
print('Done with setup of the OPAL simulation but not submitting the job (--test) \n\n\n')
elif doNobatch:
if quiet == False:
print( 'Done with setup of the OPAL simulation and executing the job on {} cores...\n\n\n'.format(CORES))
if not quiet:
print(f'Done with setup of the OPAL simulation and executing the job on {CORES} cores...\n\n\n')
ofn, fileExtension = os.path.splitext(oinpFile)
if quiet == False:
print( 'STD output is written to {}.out'.format(ofn))
if not quiet:
print('STD output is written to {}.out'.format(ofn))
#execommand = 'mpirun -np ' + str(CORES) + ' ' + opalexe + ' ' + oinpFile + ' 2>&1 | tee ' + ofn + '.out'
outfileName = ofn +'.out'
outfileName = ofn + '.out'
# Currently not writing to screen anymore
# There is a solution described at https://stackoverflow.com/questions/15535240/python-popen-write-to-stdout-and-log-file-simultaneously
with open(outfileName,'w') as outfile:
qid = subprocess.call(['mpirun', '-np', str(CORES), opalexe, oinpFile], stdout=outfile, stderr=outfile)
with open(outfileName, 'w') as outfile:
qid = subprocess.call(['mpirun', '-np', str(CORES), opalexe, oinpFile],
stdout=outfile,
stderr=outfile)
else:
if batchsys == 'SLURM' or batchsys == 'COBALT':
......@@ -262,50 +282,48 @@ class Simulation:
elif batchsys == 'COBALT':
command = 'qsub'
qid = subprocess.call([command, runfile, '|', 'awk','\'{print $3}\''])
if quiet == False:
print( 'Done with setup of the OPAL simulation and submitting the job with {} cores \n\n\n'.format(CORES))
qid = subprocess.call([command, runfile, '|', 'awk', "\'{print $3}\'"])
if not quiet:
print(f'Done with setup of the OPAL simulation and submitting the job with {CORES} cores \n\n\n')
elif batchsys == 'PBS':
if quiet == False:
print( 'Done with setup of the OPAL simulation, please submit the job yourself')
if not quiet:
print('Done with setup of the OPAL simulation, please submit the job yourself')
else:
print("Batch system", batchsys, "not known!")
os.chdir('..')
return qid
### Write for host
# Write for host
def WriteCori(self, opalexe, oinpFile, cores, time, ram, info, name):
title=oinpFile.partition(".")[0]
myfile = open(name,'w')
title = oinpFile.partition(".")[0]
myfile = open(name, 'w')
s1 = "#!/bin/bash -l \n"
s1 += "#SBATCH -p regular \n"
s1 += "#SBATCH -N 1 \n"
s1 += "#SBATCH -t " + time + "G\n"
s1 += "#SBATCH -t " + time + "G\n"
s1 += "#SBATCH -J " + title + "\n"
s1 += "#SBATCH --qos=premium \n"
s1 += "srun -n 1 .... \n"
myfile.write(s1)
myfile.close()
def WriteEngaging(self, opalexe, oinpFile, cores, time, ram, info, name):
print("Writing SLURM run file for Engaging cluster at MIT")
cores = int(cores)
coresPerNode = 32
partition = os.getenv("SLURM_PARTITION", "sched_mit_psfc")
if ((cores%coresPerNode) is 0):
if ((cores % coresPerNode) is 0):
nodes = int(cores/coresPerNode)
else:
nodes = int(cores/coresPerNode) + 1
with open(name, 'w') as outfile:
outfile.write("#!/bin/bash\n"
outfile.write("#!/bin/bash\n"
"# submit with sbatch {}\n"
"# commandline arguments may instead by supplied with #SBATCH <flag> <value>\n"
"# commandline arguments override these values\n"
......@@ -339,42 +357,41 @@ class Simulation:
outfile.write("# BEGIN DEBUG\n")
outfile.write("# Print the SLURM environment on master host: \n")
outfile.write("####################################################\n")
outfile.write("echo '=== Slurm job JOB_NAME=$JOB_NAME JOB_ID=$JOB_ID'\n")
outfile.write("echo '=== Slurm job JOB_NAME=$JOB_NAME JOB_ID=$JOB_ID'\n")
outfile.write("####################################################\n")
outfile.write("echo DATE=`date`\n")
outfile.write("echo HOSTNAME=`hostname`\n")
outfile.write("echo HOSTNAME=`hostname`\n")
outfile.write("echo PWD=`pwd`\n")
outfile.write("####################################################\n")
outfile.write("echo 'Running environment:' \n")
outfile.write("env \n")
outfile.write("####################################################\n")
outfile.write("echo 'Loaded environment modules:' \n")
outfile.write("module list 2>&1\n")
outfile.write("module list 2>&1\n")
outfile.write("echo \n")
outfile.write("# END DEBUG\n")
outfile.write("# END DEBUG\n")
outfile.write("####################################################\n")
outfile.write("\n")
outfile.write("#Finally, the command to execute.\n")
outfile.write("#The job starts in the directory it was submitted from.\n")
outfile.write("#Note that mpirun knows from SLURM how many processor we have\n")
outfile.write("mpirun {} {} --info {} --warn 6\n".format(opalexe, oinpFile, info))
outfile.write(f"mpirun {opalexe} {oinpFile} --info {info} --warn 6\n")
def WriteEdison(self, opalexe, oinpFile, cores, time, ram, info, name):
title=oinpFile.partition(".")[0]
title = oinpFile.partition(".")[0]
coresPerNode = 24
cores = int(cores)
if cores % coresPerNode == 0:
nodes = int(cores / coresPerNode)
else:
nodes = int(cores / coresPerNode) + 1
s1 = "#!/bin/bash -l \n"
s1 += "#SBATCH -q regular \n"
s1 += "#SBATCH -N " + str(nodes) + " \n"
s1 += "#SBATCH -t " + time + "\n"
s1 += "#SBATCH -t " + time + "\n"
s1 += "#SBATCH -J " + title + "\n"
s1 += "#SBATCH -o " + title + ".o%j\n"
s1 += "#SBATCH -L SCRATCH \n"
......@@ -383,14 +400,15 @@ class Simulation:
myfile = open(name, 'w')
myfile.write(s1)
myfile.close()
def WriteMerlin6(self, opalexe, oinpFile, cores, time, ram, info, name, partition):
# ADA this is for the new PSI Merlin6
def WriteMerlin6(self, opalexe, oinpFile, cores, time,
ram, info, name, partition):
# ADA this is for the new PSI Merlin6
title = oinpFile.partition(".")[0]
myfile = open(name, 'w')
s1 = "#!/bin/bash -l \n"
s1 = "#!/bin/bash -l \n"
s1 += "#SBATCH --job-name=" + title + "\n"
s1 += "#SBATCH --output=" + title + ".o%j\n"
s1 += "#SBATCH --output=" + title + ".o%j\n"
s1 += "#SBATCH --time=" + time + "\n"
s1 += "#SBATCH --ntasks=" + str(cores) + "\n"
s1 += "#SBATCH --ntasks-per-core=1 \n"
......@@ -404,18 +422,19 @@ class Simulation:
# s1 += "#SBATCH --exclude=merlin-c-001 \n"
s1 += "#SBATCH --cores-per-socket=22 \n"
s1 += "#SBATCH --sockets-per-node=2 \n"
s1 += "mpirun " + opalexe + " " + oinpFile + " --info " + str(info) + "\n"
s1 += f"mpirun {opalexe} {oinpFile} --info {str(info)}\n"
myfile.write(s1)
myfile.close()
def WritePizDaint(self, opalexe, oinpFile, cores, time, ram, info, name, partition, account):
def WritePizDaint(self, opalexe, oinpFile, cores, time,
ram, info, name, partition, account):
# XC40 Compute Nodes
# Intel Xeon E5-2696 v4 @ 2.10GHz (2x18 cores, 64/128 GB RAM)
# http://user.cscs.ch/computing_systems/piz_daint/index.html
coresPerNode = 36
title = oinpFile.partition(".")[0]
myfile = open(name, 'w')
s1 = "#!/bin/bash -l \n"
s1 = "#!/bin/bash -l \n"
s1 += "#SBATCH --job-name=" + title + "\n"
s1 += "#SBATCH --time=" + time + "\n"
s1 += "#SBATCH --ntasks=" + str(cores) + "\n"
......@@ -431,13 +450,12 @@ class Simulation:
s1 += "srun " + opalexe + " " + oinpFile + "\n"
myfile.write(s1)
myfile.close()
def WritePBSBlues(self, opalexe, oinpFile, cores, time, ram, info, queue):
# time <- export SGE_TIME="walltime=0:20:00"
# cores <- export CORES="nodes=1:ppn=16"
title=oinpFile.partition(".")[0]
myfile = open('run.pbs','w')
title = oinpFile.partition(".")[0]
myfile = open('run.pbs', 'w')
s1 = "#!/bin/sh \n"
s1 += "#PBS -o " + title + "_log \n"
s1 += "#PBS -r n \n"
......@@ -463,38 +481,38 @@ class Simulation:
s1 += "####################################################\n"
s1 += "CMD=$OPAL_EXE_PATH/opal \n"
s1 += "echo $CMD\n"
s1 += "ARGS=" + "\"" + oinpFile + " --info " + str(info) + " --warn 6 \"\n"
s1 += f'ARGS="{oinpFile} --info {str(info)} --warn 6 "\n'
s1 += "####################################################\n"
s1 += "MPICMD=\"mpirun -np $NSLOTS $CMD $ARGS\" \n"
s1 += "echo $MPICMD\n"
s1 += "$MPICMD \n"
s1 += "####################################################\n"
myfile.write(s1)
myfile.close()
def WriteBebop(self, opalexe, oinpFile, cores, time, ram, info, name, queue, hypert, quiet):
myfile.close()
def WriteBebop(self, opalexe, oinpFile, cores, time,
ram, info, name, queue, hypert, quiet):
# BDW and KNL Compute Nodes at ANL
# http://www.lcrc.anl.gov/for-users/using-lcrc/running-jobs/running-jobs-on-bebop/
if type(cores) is str:
cores = int(cores)
else:
cores = int(cores)
#Checking that a valid queue is selected
#Adjusting number of cores for specified queue
if (queue=='bdw' or queue=='bdwall' or queue=='bdwd'):
if quiet == False:
print('Running on BDW')
coresPerNode = 36 * (hypert+1) # hypert == 0 -> no hyper threading
elif (queue=='knl' or queue=='knlall' or queue=='knld'):
if quiet == False:
# Checking that a valid queue is selected
# Adjusting number of cores for specified queue
if (queue == 'bdw' or queue == 'bdwall' or queue == 'bdwd'):
if not quiet:
print('Running on BDW')
coresPerNode = 36 * (hypert+1) # hypert == 0 -> no hyper threading
elif (queue == 'knl' or queue == 'knlall' or queue == 'knld'):
if not quiet:
print('Running on KNL')
coresPerNode = 64 * (hypert+1)
else:
print('You have picked a non-valid queue!! Your run will fail!!')
#Calculating # of nodes needed, and # of tasks per node
# Only calc tasks per node if total core number
# Calculating # of nodes needed, and # of tasks per node
# Only calc tasks per node if total core number
# is not evenly divisible by # of nodes
if (cores % coresPerNode) is 0:
if (cores < coresPerNode):
......@@ -503,19 +521,19 @@ class Simulation:
nodes = cores / coresPerNode
tasks_per_node = cores/nodes
else:
while((cores % coresPerNode) != 0):
while((cores % coresPerNode) != 0):
coresPerNode -= 1
nodes = cores/coresPerNode
nodes = cores/coresPerNode
tasks_per_node = cores/nodes
#print(nodes,cores, tasks_per_node)
title = oinpFile.partition(".")[0]
myfile = open(name, 'w')
s1 = "#!/bin/bash -l \n"
s1 = "#!/bin/bash -l \n"
s1 += "#SBATCH --job-name=" + title + "\n"
s1 += "#SBATCH -o " + title + ".%j.%N.out \n"
s1 += "#SBATCH -o " + title + ".%j.%N.out \n"
s1 += "#SBATCH -e " + title + ".%j.%N.error \n"
s1 += "#SBATCH -p " + queue + " \n"
s1 += "#SBATCH --time=" + time + "\n"
......@@ -524,30 +542,30 @@ class Simulation:
s1 += "cd $SLURM_SUBMIT_DIR \n"
#s1 += "export I_MPI_SLURM_EXT=0 \n"
s1 += "export I_MPI_FABRICS=shm:tmi \n"
if (queue=='knl' or queue=='knlall' or queue=='knld'):
if (queue == 'knl' or queue == 'knlall' or queue == 'knld'):
s1 += "#SBATCH -C knl,quad,cache \n"