simulation.py 25.1 KB
Newer Older
gsell's avatar
gsell committed
1
"""
snuverink_j's avatar
snuverink_j committed
2
Simulation class handles batch job related things
gsell's avatar
gsell committed
3 4

@author: Andreas Adelmann <andreas.adelmann@psi.ch>
5
@author: Yves Ineichen
gsell's avatar
gsell committed
6 7 8
@version: 0.1
"""

9 10
import sys,os,shutil, subprocess
#import numpy as np
gsell's avatar
gsell committed
11

12

13
### Helper methods
14 15 16 17 18 19 20 21 22 23 24 25
def isInDirectory(filepath, directory):
    # From https://stackoverflow.com/questions/3812849/how-to-check-whether-a-directory-is-a-sub-directory-of-another-directory
    ''' Check if filepath is inside directory '''
    return os.path.realpath(filepath).startswith(os.path.realpath(directory) + os.sep)

def linkDirectory(path, name=''):
    '''Make files available in working directory with recursive symbolic links'''
    # Check for recursiveness
    if isInDirectory(os.getcwd(),path):
        print (name + ' directory is subdirectory of working directory! runOPAL cannot handle this.. bye!')
        sys.exit()
    # lndir and if fails try cp
26 27
    if os.system('lndir '+path) != 0:
        print("lndir failed (possibly doesn't exist on this system), using ln -rs... \n"), #EDIT: '\n' added
28
        if os.listdir(path):
29
            os.system('ln -rs '+path+'/* .') #EDIT: changed from cp to ln
30

31 32 33 34 35 36 37
def linkFile(path, name): #EDIT: new helper function for convenience
    '''Make a file available in working directory with a symbolic link'''
    path = os.path.join(path,name)
    if not os.path.isfile(path):
        print (name+' cannot be found')
        sys.exit()
    os.system('ln -s '+path+' .')
38

39 40 41 42 43 44 45
def extractStr(line, name):
    zero = line.find(name)
    if zero < 0:
        return None
    start = min(x for x in [line.find('"',zero ), line.find("'", zero )] if x > 0) +1
    end   = min(x for x in [line.find('"',start), line.find("'", start)] if x > 0)
    return line[start:end]
gsell's avatar
gsell committed
46

47 48

class Simulation:
gsell's avatar
gsell committed
49 50 51 52
    def __init__(self, opaldict):
        self.opaldict = opaldict
        self.dirname = ""

ext-neveu_n's avatar
ext-neveu_n committed
53
    def createDirectory(self, dirname, doKeep, quiet):
54
        # If there's already a directory remove it...
gsell's avatar
gsell committed
55 56
        if os.path.isdir(self.dirname):
            if doKeep:
57
                print( 'KEEP existing directory {}'.format(self.dirname))
ext-neveu_n's avatar
ext-neveu_n committed
58
                print( self.dirname)
gsell's avatar
gsell committed
59 60
                return False
            else:
ext-neveu_n's avatar
ext-neveu_n committed
61 62
                if quiet == False:
                    print( 'REMOVE existing directory {}'.format(self.dirname))
gsell's avatar
gsell committed
63 64 65 66 67 68
                shutil.rmtree(self.dirname)

        # create directory
        os.mkdir(self.dirname)
        return True

69
    def run(self,N, baseFileName, inputfilePath, tmplFile, oinpFile, doTest, doKeep, doNobatch, doOptimize, info, queue, hypert, quiet):
70
        # make directory name indicating changed values
71
        self.dirname = baseFileName
gsell's avatar
gsell committed
72 73 74
        if N >= 0:
            self.dirname += str(N)
        self.dirname += self.opaldict.generateDirectoryName()
75
        
adelmann's avatar
adelmann committed
76 77 78 79 80
        try:
            CORES = self.opaldict['CORES']
        except KeyError:
            print("CORES not set bye bye")
            sys.exit(1)
81

ext-neveu_n's avatar
ext-neveu_n committed
82
        if self.createDirectory(self.dirname, doKeep, quiet) == False:
ext-neveu_n's avatar
ext-neveu_n committed
83
            print( "Simulation results already exist")
84
            return
gsell's avatar
gsell committed
85
        os.chdir(self.dirname)
86
        
87
        # Linking magnet and RF files
gsell's avatar
gsell committed
88 89 90 91 92
        if (os.environ.get('FIELDMAPS')):
            fieldmapPath = os.environ.get('FIELDMAPS')
        else:
            fieldmapPath = '../fieldmaps'
            if not (os.path.isdir(fieldmapPath)):
ext-neveu_n's avatar
ext-neveu_n committed
93
                print( 'Fieldmap directory unknown exiting ...')
gsell's avatar
gsell committed
94
                sys.exit()
95
        linkDirectory(fieldmapPath,'Fieldmap')
96
        
97 98 99 100
        # Link distribution directory if present
        if (os.environ.get('DISTRIBUTIONS')):
            distributionPath = os.environ.get('DISTRIBUTIONS')
            if os.path.isdir(distributionPath):
101
                linkDirectory(distributionPath,'Distribution')
102
        
103 104
        # Read in the file
        filedata = None
105
        with open(tmplFile, 'r') as file :
106
            filedata = file.read()
gsell's avatar
gsell committed
107
        # do the replacements in the templatefile
ext-neveu_n's avatar
ext-neveu_n committed
108
        for s,value in self.opaldict.items():
109
            # Replace the target string
110
            filedata = filedata.replace('_'+s+'_', str(value))
111 112 113
        # Write the file out again
        with open(oinpFile, 'w') as file:
            file.write(filedata)
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
        
        #EDIT: Link .tmpl and .dat files or OPTIMIZE
        #NOTE: What's the best place to link tmpl file? $TEMPLATES, _TEMPLATEDIR_, or parisng?
        if doOptimize:
            flag = False
            tmplDir = None
            tmplIn  = None
            templateFile = open(oinpFile,'r')
            for line in templateFile:
                if not line.startswith('//'):
                    if 'OPTIMIZE' in line:
                        flag = True
                    if flag and not tmplDir:
                        tmplDir = extractStr(line,'TEMPLATEDIR')
                    if flag and not tmplIn:
                        tmplIn = extractStr(line,'INPUT').split('/')[-1]
            templateFile.close()
            
            linkFile('..', tmplIn[:-5]+'.data')
            os.mkdir(tmplDir)
            os.chdir(tmplDir)
            linkFile(os.path.join('../..',tmplDir), tmplIn)
            os.chdir('..')
        
138
        if os.environ.get('OPAL_EXE_PATH'):
139 140 141 142
            if doNobatch:
                opalexe = os.environ.get('OPAL_EXE_PATH') + '/opal'
            else:
                opalexe = '$OPAL_EXE_PATH/opal'
143 144
        else:
            opalexe = 'opal'
ext-neveu_n's avatar
ext-neveu_n committed
145 146 147 148 149
        if quiet == False:
            print( 'Simulation directory is {} using OPAL at {}'.format(self.dirname, os.environ.get('OPAL_EXE_PATH')))
            print( 'Using templatefile at ' + inputfilePath)
            print( 'Using fieldmaps at    ' + fieldmapPath)
            print( 'Parameter set in ' + oinpFile + ' are:')
150 151 152
            for s, value in sorted(self.opaldict.items()): #EDIT: fixed indentation
                if quiet == False:
                    print( ' :::: ' + s + ' = ' + str(value))
gsell's avatar
gsell committed
153

snuverink_j's avatar
snuverink_j committed
154
        if not doNobatch:
ext-neveu_n's avatar
ext-neveu_n committed
155
            #hostname = commands.getoutput("hostname")
156
            hostname = (subprocess.check_output('hostname').decode('utf-8')).strip()
ext-neveu_n's avatar
ext-neveu_n committed
157 158
            if quiet == False:
                print("On host {}".format(hostname))
adelmann's avatar
adelmann committed
159

snuverink_j's avatar
snuverink_j committed
160
            if os.getenv("SGE_TIME"):
ext-neveu_n's avatar
ext-neveu_n committed
161
                print( "You use deprecated environment variable SGE_TIME. Please use in the future TIME")
snuverink_j's avatar
snuverink_j committed
162 163
                time = os.getenv("SGE_TIME")
            else:
ext-neveu_n's avatar
ext-neveu_n committed
164
                #print('You did not set a time limit. Using default: s_rt=23:59:00,h_rt=24:00:00')
snuverink_j's avatar
snuverink_j committed
165
                time = os.getenv("TIME", "s_rt=23:59:00,h_rt=24:00:00")
adelmann's avatar
adelmann committed
166

snuverink_j's avatar
snuverink_j committed
167
            if os.getenv("SGE_RAM"):
ext-neveu_n's avatar
ext-neveu_n committed
168
                print( "You use deprecated environment variable SGE_RAM. Please use in the future RAM")
snuverink_j's avatar
snuverink_j committed
169 170
                ram = os.getenv("SGE_RAM")
            else:
171
                ram = os.getenv("RAM", "4")
snuverink_j's avatar
snuverink_j committed
172 173

            if not queue:
174 175 176 177
                try: 
                    queue = os.environ.get('QUEUE') 
                except:
                    queue = os.getenv("SGE_QUEUE", "prime_bd.q")
snuverink_j's avatar
snuverink_j committed
178

179
            # Merlin5
180
            if (hostname.startswith("merlin-l")):
181 182 183 184 185 186
                batchsys  = 'SLURM'
                runfile   = 'run.merlin5'
                time      = os.getenv("SLURM_TIME", "24:00:00")
                ram       = os.getenv("SLURM_RAM",  "36")
                partition = os.getenv("SLURM_PARTITION", "merlin")
                self.WriteMerlin5(opalexe, oinpFile, CORES, time, ram, info, runfile, partition)
187 188 189 190
            
            # ANL theta.alcf.anl.gov
            elif (hostname.startswith("theta")):
                batchsys = 'COBALT'
ext-neveu_n's avatar
ext-neveu_n committed
191
                runfile  = 'run.sh'
192
                self.WriteTheta(opalexe, oinpFile, CORES, time, ram, info, queue, hypert)
193

snuverink_j's avatar
snuverink_j committed
194
            # ANL blues.lcrc.anl.gov
195
            elif (hostname.startswith("blogin")):
snuverink_j's avatar
snuverink_j committed
196 197 198 199
                batchsys = 'PBS'
                runfile  = 'run.blues'
                self.WritePBSBlues(opalexe, oinpFile, CORES, time, ram, info, queue)

200
            # ANL Bebop
201
            elif (hostname.startswith("bebop") or hostname.startswith("bdw") or hostname.startswith("knl")):
202 203
                batchsys = 'SLURM'
                runfile  = 'run.bebop'
ext-neveu_n's avatar
ext-neveu_n committed
204
                time     = os.environ["TIME"]
ext-neveu_n's avatar
ext-neveu_n committed
205
                self.WriteBebop(opalexe, oinpFile, CORES, time, ram, info, runfile, queue, hypert, quiet)
206

207
            # NERSC Cori Haswell
208
            elif (hostname.startswith("cori")):
snuverink_j's avatar
snuverink_j committed
209 210 211 212
                batchsys = 'SLURM'
                runfile  = 'run.cori'
                self.WriteCori(opalexe, oinpFile, CORES, time, ram, info, runfile)

213
            # NERSC Edison
214
            elif (hostname.startswith("edison")):
snuverink_j's avatar
snuverink_j committed
215 216 217 218
                batchsys = 'SLURM'
                runfile  = 'run.edison'
                self.WriteEdison(opalexe, oinpFile, CORES, time, ram, info, runfile)

219
            # CSCS Piz-Daint
220
            elif (hostname.startswith("daint")):
snuverink_j's avatar
snuverink_j committed
221 222 223 224 225 226 227
                batchsys = 'SLURM'
                runfile  = 'run.daint'
                time = os.getenv("SLURM_TIME", "24:00:00")
                ram  = os.getenv("SLURM_RAM", "36")
                partition = os.getenv("SLURM_PARTITION", "normal")
                self.WritePizDaint(opalexe, oinpFile, CORES, time, ram, info, runfile, partition)

228
            elif (hostname.startswith("eofe")):
snuverink_j's avatar
snuverink_j committed
229 230 231 232 233
                batchsys = 'SLURM'
                runfile = 'run.engaging'
                time = os.getenv("SLURM_TIME", "24:00:00")
                ram  = os.getenv("SLURM_RAM", "120")            
                self.WriteEngaging(opalexe, oinpFile, CORES, time, ram, info, runfile)
gsell's avatar
gsell committed
234

snuverink_j's avatar
snuverink_j committed
235 236 237
            else:
                print("Hostname not known bye bye")
                sys.exit(1)
238 239 240

        qid = -1

241
        if doTest:
ext-neveu_n's avatar
ext-neveu_n committed
242 243
            if quiet == False:
                print( 'Done with setup of the OPAL simulation but not submitting the job (--test) \n\n\n')
244

245
        elif doNobatch:
ext-neveu_n's avatar
ext-neveu_n committed
246 247
            if quiet == False:
                print( 'Done with setup of the OPAL simulation and executing the job on {} cores...\n\n\n'.format(CORES)) 
248
            ofn, fileExtension = os.path.splitext(oinpFile)
249
            print( 'STD output is written to {}.out'.format(ofn))
250
            #execommand = 'mpirun -np ' + str(CORES)  + ' ' + opalexe + ' ' + oinpFile + '  2>&1 | tee ' + ofn + '.out'
251 252 253 254 255
            outfileName = ofn +'.out'
            # Currently not writing to screen anymore
            # There is a solution described at https://stackoverflow.com/questions/15535240/python-popen-write-to-stdout-and-log-file-simultaneously
            with open(outfileName,'w') as outfile:
                qid = subprocess.call(['mpirun', '-np', str(CORES), opalexe, oinpFile], stdout=outfile, stderr=outfile)
256

257
        else:
258 259 260 261 262 263 264
            if batchsys == 'SLURM' or batchsys == 'COBALT':
                if batchsys == 'SLURM':
                    command = 'sbatch'
                elif batchsys == 'COBALT':
                    command = 'qsub'

                qid = subprocess.call([command, runfile, '|', 'awk','\'{print $3}\''])
ext-neveu_n's avatar
ext-neveu_n committed
265 266
                if quiet == False:
                    print( 'Done with setup of the OPAL simulation and submitting the job with {} cores \n\n\n'.format(CORES))
267

snuverink_j's avatar
snuverink_j committed
268
            elif batchsys == 'PBS':
ext-neveu_n's avatar
ext-neveu_n committed
269 270
                if quiet == False:
                    print( 'Done with setup of the OPAL simulation, please submit the job yourself')
snuverink_j's avatar
snuverink_j committed
271

272 273
            else:
                print("Batch system", batchsys, "not known!")
ext-neveu_n's avatar
ext-neveu_n committed
274

275 276
        os.chdir('..')
        return qid
277 278 279
    
    
    ### Write for host
frey_m's avatar
frey_m committed
280
    def WriteCori(self, opalexe, oinpFile, cores, time, ram, info, name):
adelmann's avatar
adelmann committed
281
        title=oinpFile.partition(".")[0]
frey_m's avatar
frey_m committed
282
        myfile = open(name,'w')
adelmann's avatar
adelmann committed
283 284 285 286 287 288 289 290 291
        s1 = "#!/bin/bash -l \n"
        s1 += "#SBATCH -p regular \n"
        s1 += "#SBATCH -N 1 \n"
        s1 += "#SBATCH -t " + time + "G\n" 
        s1 += "#SBATCH -J " + title + "\n"
        s1 += "#SBATCH --qos=premium \n"
        s1 += "srun -n 1 .... \n"
        myfile.write(s1)
        myfile.close()
292 293
    
    
294 295
    def WriteEngaging(self, opalexe, oinpFile, cores, time, ram, info, name):
        print("Writing SLURM run file for Engaging cluster at MIT")
296
        
297 298 299
        cores = int(cores)
        coresPerNode = 32
        partition = os.getenv("SLURM_PARTITION", "sched_mit_psfc")
300
        
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
        if ((cores%coresPerNode) is 0):
            nodes = int(cores/coresPerNode)
        else:
            nodes = int(cores/coresPerNode) + 1

        with open(name, 'w') as outfile:
            outfile.write("#!/bin/bash\n" 
                          "# submit with sbatch {}\n"
                          "# commandline arguments may instead by supplied with #SBATCH <flag> <value>\n"
                          "# commandline arguments override these values\n"
                          "\n"
                          "# Number of nodes\n".format(name))
            outfile.write("#SBATCH -N {}\n".format(nodes))
            outfile.write("# Number of total processor cores \n")
            outfile.write("#SBATCH -n {}\n".format(cores))
            outfile.write("# Memory (MB) \n")
            outfile.write("#SBATCH --mem {}\n".format(int(ram) * 1000))
            outfile.write("# specify how long your job needs.\n")
            outfile.write("#SBATCH --time={}\n".format(time))
            outfile.write("# which partition or queue the jobs runs in\n")
            outfile.write("#SBATCH -p {}\n".format(partition))
            outfile.write("#customize the name of the stderr/stdout file. %j is the job number\n")
            outfile.write("#SBATCH -o {}.o%j".format(os.path.splitext(oinpFile)[0]))
            outfile.write("\n")
325 326 327 328 329 330 331 332 333 334 335 336
#            outfile.write("#load default system modules\n")
#            outfile.write(". /etc/profile.d/modules.sh")
#            outfile.write("\n")
#            outfile.write("#load modules your job depends on.\n")
#            outfile.write("#better here than in your $HOME/.bashrc to make "
#                         "debugging and requirements easier to track.\n")
#            outfile.write("module load gcc/4.8.4\n")
#            outfile.write("module load engaging/openmpi/1.8.8\n")
#            outfile.write("module load engaging/cmake/3.5.2\n")
#            outfile.write("module load engaging/boost/1.56.0\n")
#            outfile.write("module load engaging/gsl/2.2.1\n")
#            outfile.write("\n")
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
            outfile.write("####################################################\n")
            outfile.write("# BEGIN DEBUG\n")
            outfile.write("# Print the SLURM environment on master host: \n")
            outfile.write("####################################################\n")
            outfile.write("echo '=== Slurm job  JOB_NAME=$JOB_NAME  JOB_ID=$JOB_ID'\n") 
            outfile.write("####################################################\n")
            outfile.write("echo DATE=`date`\n")
            outfile.write("echo HOSTNAME=`hostname`\n") 
            outfile.write("echo PWD=`pwd`\n")
            outfile.write("####################################################\n")
            outfile.write("echo 'Running environment:' \n")
            outfile.write("env \n")
            outfile.write("####################################################\n")
            outfile.write("echo 'Loaded environment modules:' \n")
            outfile.write("module list 2>&1\n") 
            outfile.write("echo \n")
            outfile.write("# END DEBUG\n") 
            outfile.write("####################################################\n")
            outfile.write("\n")
            outfile.write("#Finally, the command to execute.\n")
            outfile.write("#The job starts in the directory it was submitted from.\n")
            outfile.write("#Note that mpirun knows from SLURM how many processor we have\n")
            outfile.write("mpirun {} {} --info {} --warn 6\n".format(opalexe, oinpFile, info))
360 361
    
    
frey_m's avatar
frey_m committed
362
    def WriteEdison(self, opalexe, oinpFile, cores, time, ram, info, name):
adelmann's avatar
adelmann committed
363
        title=oinpFile.partition(".")[0]
364
        
adelmann's avatar
adelmann committed
365
        coresPerNode = 24
Matthias Frey's avatar
Matthias Frey committed
366
        cores = int(cores)
367
        
Matthias Frey's avatar
Matthias Frey committed
368 369
        if cores % coresPerNode == 0:
            nodes = int(cores / coresPerNode)
adelmann's avatar
adelmann committed
370
        else:
Matthias Frey's avatar
Matthias Frey committed
371 372
            nodes = int(cores / coresPerNode) + 1
        
adelmann's avatar
adelmann committed
373
        s1 = "#!/bin/bash -l \n"
Matthias Frey's avatar
Matthias Frey committed
374
        s1 += "#SBATCH -q regular \n"
adelmann's avatar
adelmann committed
375
        s1 += "#SBATCH -N " + str(nodes) + " \n"
Matthias Frey's avatar
Matthias Frey committed
376
        s1 += "#SBATCH -t " + time + "\n" 
adelmann's avatar
adelmann committed
377
        s1 += "#SBATCH -J " + title + "\n"
adelmann's avatar
adelmann committed
378 379 380
        s1 += "#SBATCH -o " + title + ".o%j\n"
        s1 += "#SBATCH -L SCRATCH \n"
        s1 += "srun -n " + str(cores) + " " + opalexe + " " + oinpFile + "\n"
Matthias Frey's avatar
Matthias Frey committed
381 382

        myfile = open(name, 'w')
adelmann's avatar
adelmann committed
383 384
        myfile.write(s1)
        myfile.close()
frey_m's avatar
frey_m committed
385
        
386
    
adelmann's avatar
adelmann committed
387
    def WriteMerlin5(self, opalexe, oinpFile, cores, time, ram, info, name, partition):
388
        # ADA this is for the new PSI Merlin5     
adelmann's avatar
adelmann committed
389 390 391 392
        title = oinpFile.partition(".")[0]
        myfile = open(name, 'w')
        s1 =  "#!/bin/bash -l \n"
        s1 += "#SBATCH --job-name=" + title + "\n"
393
        s1 += "#SBATCH --output="   + title + ".o%j\n"
adelmann's avatar
adelmann committed
394 395
        s1 += "#SBATCH --time=" + time + "\n"
        s1 += "#SBATCH --ntasks=" + str(cores) + "\n"
396 397 398 399 400 401
        # Discussed in https://gitlab.psi.ch/OPAL/runOPAL/issues/7:
        if (int(cores) > 16):
            s1 += "#SBATCH --ntasks-per-node=16 \n"
        else:
            s1 += "#SBATCH --nodes=1 \n"

adelmann's avatar
adelmann committed
402 403
#        s1 += "#SBATCH --mem=" + str(ram) + "GB \n"
        s1 += "#SBATCH --partition=" + str(partition) + " \n"
404
        s1 += "mpirun " + opalexe + " " + oinpFile + " --info " + str(info) + "\n"
adelmann's avatar
adelmann committed
405 406
        myfile.write(s1)
        myfile.close()
407 408 409
    
    
    def WritePizDaint(self, opalexe, oinpFile, cores, time, ram, info, name, partition): 
frey_m's avatar
frey_m committed
410 411 412 413 414 415 416 417 418 419 420
        # XC40 Compute Nodes
        # Intel Xeon E5-2696 v4 @ 2.10GHz (2x18 cores, 64/128 GB RAM)
        # http://user.cscs.ch/computing_systems/piz_daint/index.html
        coresPerNode = 36
        title = oinpFile.partition(".")[0]
        myfile = open(name, 'w')
        s1 =  "#!/bin/bash -l \n"
        s1 += "#SBATCH --job-name=" + title + "\n"
        s1 += "#SBATCH --time=" + time + "\n"
        s1 += "#SBATCH --ntasks=" + str(cores) + "\n"
        s1 += "#SBATCH --ntasks-per-node=" + str(coresPerNode) + " \n"
421
        s1 += "#SBATCH --ntasks-per-core=1 \n"
frey_m's avatar
frey_m committed
422 423
        s1 += "#SBATCH --cpus-per-task=1 \n"
        s1 += "#SBATCH --constraint=mc \n"
adelmann's avatar
adelmann committed
424 425 426
        s1 += "#SBATCH --mem=" + str(ram) + "GB \n"
        s1 += "#SBATCH --partition=" + str(partition) + " \n"
        s1 += "#SBATCH --account=psi07 \n"
frey_m's avatar
frey_m committed
427 428
        s1 += "export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK \n"
        s1 += "module load daint-mc \n"
adelmann's avatar
adelmann committed
429
        s1 += "srun " + opalexe + " " + oinpFile + "\n"
frey_m's avatar
frey_m committed
430 431
        myfile.write(s1)
        myfile.close()
adelmann's avatar
adelmann committed
432
    
433
    
434
    def WritePBSBlues(self, opalexe, oinpFile, cores, time, ram, info, queue):
adelmann's avatar
adelmann committed
435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458
        # time  <- export SGE_TIME="walltime=0:20:00"
        # cores <- export CORES="nodes=1:ppn=16"
        title=oinpFile.partition(".")[0]
        myfile = open('run.pbs','w')
        s1 = "#!/bin/sh \n"
        s1 += "#PBS -o " + title + "_log  \n"
        s1 += "#PBS -r n \n"
        s1 += "#PBS -j oe \n"
        s1 += "#PBS -N " + title + "\n"
        s1 += "#PBS -m aeb \n"
        s1 += "#PBS -M nneveu@anl.gov \n"
        s1 += "#PBS -l " + time + " \n"
        s1 += "#PBS -l " + cores + " \n"
        s1 += "#PBS -q " + queue + " \n"
        try:
            v = os.environ["OPAL_EXE_PATH"]
        except KeyError:
            print("OPAL_EXE_PATH not set bye bye")
            sys.exit(1)
        s1 += "cd $PBS_O_WORKDIR \n"
        s1 += "####################################################\n"
        s1 += "echo DATE=`date`\n"
        s1 += "echo HOSTNAME=`hostname` \n"
        s1 += "echo PWD=`pwd`\n"
459
        s1 += "cat $PBS_NODEFILE\n"
460
        s1 += "NSLOTS=$(wc -l < $PBS_NODEFILE)\n"
adelmann's avatar
adelmann committed
461
        s1 += "####################################################\n"
ext-neveu_n's avatar
ext-neveu_n committed
462
        s1 += "CMD=$OPAL_EXE_PATH/opal \n"
463
        s1 += "echo $CMD\n"
adelmann's avatar
adelmann committed
464 465 466
        s1 += "ARGS=" + "\"" + oinpFile + " --info " + str(info) + " --warn 6 \"\n"
        s1 += "####################################################\n"
        s1 += "MPICMD=\"mpirun -np $NSLOTS $CMD $ARGS\" \n"
467
        s1 += "echo $MPICMD\n"
adelmann's avatar
adelmann committed
468 469 470 471
        s1 += "$MPICMD \n"
        s1 += "####################################################\n"
        myfile.write(s1)
        myfile.close()              
472 473
    
    
ext-neveu_n's avatar
ext-neveu_n committed
474
    def WriteBebop(self, opalexe, oinpFile, cores, time, ram, info, name, queue, hypert, quiet):
475 476 477 478
        # BDW and KNL Compute Nodes at ANL
        # http://www.lcrc.anl.gov/for-users/using-lcrc/running-jobs/running-jobs-on-bebop/
        if type(cores) is str:
            cores = int(cores)
479 480
        else:
            cores = int(cores)
ext-neveu_n's avatar
ext-neveu_n committed
481 482 483
        #Checking that a valid queue is selected
        #Adjusting number of cores for specified queue 
        if (queue=='bdw' or queue=='bdwall' or queue=='bdwd'):
ext-neveu_n's avatar
ext-neveu_n committed
484 485
            if quiet == False:
                print('Running on BDW') 
adelmann's avatar
adelmann committed
486
            coresPerNode = 36 * (hypert+1)     # hypert == 0 -> no hyper threading 
ext-neveu_n's avatar
ext-neveu_n committed
487
        elif (queue=='knl' or queue=='knlall' or queue=='knld'):
ext-neveu_n's avatar
ext-neveu_n committed
488 489
            if quiet == False:
                print('Running on KNL')
490
            coresPerNode = 64 * (hypert+1)
ext-neveu_n's avatar
ext-neveu_n committed
491 492
        else:
            print('You have picked a non-valid queue!! Your run will fail!!')
493

ext-neveu_n's avatar
ext-neveu_n committed
494 495 496
        #Calculating # of nodes needed, and # of tasks per node 
        #  Only calc tasks per node if total core number 
        #  is not evenly divisible by # of nodes
497 498 499 500 501
        if (cores % coresPerNode) is 0:
            if (cores < coresPerNode):
                nodes = 1
            else:
                nodes = cores / coresPerNode
ext-neveu_n's avatar
ext-neveu_n committed
502
                tasks_per_node = cores/nodes
503
        else:
ext-neveu_n's avatar
ext-neveu_n committed
504 505 506 507 508
            while((cores % coresPerNode) != 0): 
                coresPerNode -= 1
                nodes = cores/coresPerNode 

            tasks_per_node = cores/nodes
ext-neveu_n's avatar
ext-neveu_n committed
509
            #print(nodes,cores, tasks_per_node)
510 511 512 513 514 515 516 517

        title = oinpFile.partition(".")[0]
        myfile = open(name, 'w')
        
        s1 =  "#!/bin/bash -l \n"
        s1 += "#SBATCH --job-name=" + title + "\n"
        s1 += "#SBATCH -o " + title + ".%j.%N.out \n" 
        s1 += "#SBATCH -e " + title + ".%j.%N.error \n"
ext-neveu_n's avatar
ext-neveu_n committed
518
        s1 += "#SBATCH -p " + queue + " \n"
519 520
        s1 += "#SBATCH --time=" + time + "\n"
        s1 += "#SBATCH --ntasks=" + str(cores) + "\n"
adelmann's avatar
adelmann committed
521
        s1 += "#SBATCH --ntasks-per-node=" + str(coresPerNode) + "\n"
522 523
        s1 += "cd $SLURM_SUBMIT_DIR \n"
        #s1 += "export I_MPI_SLURM_EXT=0 \n"
524
        s1 += "export I_MPI_FABRICS=shm:tmi \n"
adelmann's avatar
adelmann committed
525 526
        if (queue=='knl' or queue=='knlall' or queue=='knld'):
            s1 += "#SBATCH -C knl,quad,cache \n"
ext-neveu_n's avatar
ext-neveu_n committed
527 528 529
        if int(nodes) > 1:
            s1 += "#SBATCH --ntasks-per-node=" + str(tasks_per_node) + " \n"
            s1 += "mpirun -n $SLURM_NTASKS "+ opalexe + " " + oinpFile + "\n"
530
        else:
ext-neveu_n's avatar
ext-neveu_n committed
531
            s1 += "mpirun -n $SLURM_NTASKS " + opalexe + " " + oinpFile + "\n"
532 533 534
        #s1 += "#SBATCH --mem=" + ram + "GB \n"
        #s1 += "export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK \n"
        #s1 += "--hint=nomultithread " + opalexe + " " + oinpFile + "\n"
535
       
ext-neveu_n's avatar
ext-neveu_n committed
536
        myfile.write(s1)
537 538
    
    
539 540 541 542 543 544 545 546 547 548 549 550 551 552
    def WriteTheta(self, opalexe, oinpFile, cores, time, ram, info, queue, hypert):
        # queue = default, debug-cache-quad, debug-flat-quad
        # cores = min of 8 nodes for default queue 
        try:
            v = os.environ["OPAL_EXE_PATH"]
        except KeyError:
            print("OPAL_EXE_PATH not set bye bye")
            sys.exit(1)
              
        cores        = int(cores)
        coresPerNode = 64 * (hypert+1)

        if (cores % coresPerNode) is 0:
            if (cores < coresPerNode):
ext-neveu_n's avatar
ext-neveu_n committed
553
                nodes = int(1)
554
            else:
ext-neveu_n's avatar
ext-neveu_n committed
555 556
                nodes = int(cores / coresPerNode)
                tasks_per_node = int(cores/nodes)
557 558
        else:
            while((cores % coresPerNode) != 0): 
ext-neveu_n's avatar
ext-neveu_n committed
559 560
                coresPerNode -= int(1)
                nodes = int(cores/coresPerNode) 
561 562

            tasks_per_node = cores/nodes
ext-neveu_n's avatar
ext-neveu_n committed
563
            #print(nodes,cores, tasks_per_node)
564 565 566
   
        if cores < 512:
            queue = 'debug-cache-quad'
ext-neveu_n's avatar
ext-neveu_n committed
567
            time  = '00:59:00'
568 569 570
        #elif cores > 512: 
        #nodes = np.ceil(cores/64)

ext-neveu_n's avatar
ext-neveu_n committed
571
        total_mpi_ranks = int(nodes*coresPerNode)
572 573 574 575 576 577 578 579 580 581 582 583 584 585

        title=oinpFile.partition(".")[0]
        myfile = open('run.sh','w')
        s1 =  "#!/bin/bash  \n"
        s1 += "#COBALT -t " + time + " \n"
        s1 += "#COBALT -n " + str(nodes) + " \n"
        s1 += "#COBALT -q " + queue + " \n"
        s1 += "#COBALT --attrs mcdram=cache:numa=quad \n"
        s1 += "#COBALT -A awa \n"
        s1 += 'echo "Starting Cobalt job script"\n'
        s1 += "export n_nodes=$COBALT_JOBSIZE \n"
        s1 += "export n_mpi_ranks_per_node=" + str(coresPerNode)+ " \n"
        s1 += "export n_mpi_ranks=" + str(total_mpi_ranks) + "\n"
        #s1 += "export n_openmp_threads_per_rank=4"
ext-neveu_n's avatar
ext-neveu_n committed
586
        if hypert > 0:       
ext-neveu_n's avatar
ext-neveu_n committed
587
            s1 += "export n_hyperthreads_per_core=2 \n"
588 589 590 591
        #s1 += "export n_hyperthreads_skipped_between_ranks=4"
        s1 += "####################################################\n"
        s1 += "ARGS=" + "\"" + oinpFile + " --info " + str(info) + " --warn 6 \"\n"
        s1 += "CMD=$OPAL_EXE_PATH/opal \n"
ext-neveu_n's avatar
ext-neveu_n committed
592 593 594 595
        if hypert > 0:
            s1 += "MPICMD=\"aprun -n $n_mpi_ranks -N $n_mpi_ranks_per_node -j $n_hyperthreads_per_core $CMD $ARGS\" \n"
        else:
            s1 += "MPICMD=\"aprun -n $n_mpi_ranks -N $n_mpi_ranks_per_node $CMD $ARGS\" \n"
596 597 598 599 600
        s1 += "echo $MPICMD\n"
        s1 += "$MPICMD \n"
        s1 += "####################################################\n"
        myfile.write(s1)
        myfile.close()              
ext-neveu_n's avatar
ext-neveu_n committed
601
        os.chmod("run.sh", 0o775)
602