simulation.py 23.6 KB
Newer Older
gsell's avatar
gsell committed
1
"""
snuverink_j's avatar
snuverink_j committed
2
Simulation class handles batch job related things
gsell's avatar
gsell committed
3 4

@author: Andreas Adelmann <andreas.adelmann@psi.ch>
5
@author: Yves Ineichen
gsell's avatar
gsell committed
6 7 8
@version: 0.1
"""

9 10
import sys,os,shutil, subprocess
#import numpy as np
gsell's avatar
gsell committed
11

12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31

# Helper methods
def isInDirectory(filepath, directory):
    # From https://stackoverflow.com/questions/3812849/how-to-check-whether-a-directory-is-a-sub-directory-of-another-directory
    ''' Check if filepath is inside directory '''
    return os.path.realpath(filepath).startswith(os.path.realpath(directory) + os.sep)

def linkDirectory(path, name=''):
    '''Make files available in working directory with recursive symbolic links'''
    # Check for recursiveness
    if isInDirectory(os.getcwd(),path):
        print (name + ' directory is subdirectory of working directory! runOPAL cannot handle this.. bye!')
        sys.exit()
    # lndir and if fails try cp
    if os.system('lndir ' + path) != 0:
        #print("lndir failed (possibly doesn't exist on this system), using cp -rs..."),
        if os.listdir(path):
            os.system('cp -rs ' + path + '/* .')


gsell's avatar
gsell committed
32 33 34 35 36 37
class Simulation:

    def __init__(self, opaldict):
        self.opaldict = opaldict
        self.dirname = ""

ext-neveu_n's avatar
ext-neveu_n committed
38
    def createDirectory(self, dirname, doKeep, quiet):
39
        # If there's already a directory remove it...
gsell's avatar
gsell committed
40 41
        if os.path.isdir(self.dirname):
            if doKeep:
42
                print( 'KEEP existing directory {}'.format(self.dirname))
ext-neveu_n's avatar
ext-neveu_n committed
43
                print( self.dirname)
gsell's avatar
gsell committed
44 45
                return False
            else:
ext-neveu_n's avatar
ext-neveu_n committed
46 47
                if quiet == False:
                    print( 'REMOVE existing directory {}'.format(self.dirname))
gsell's avatar
gsell committed
48 49 50 51 52 53
                shutil.rmtree(self.dirname)

        # create directory
        os.mkdir(self.dirname)
        return True

54
    def run(self,N, baseFileName, inputfilePath, tmplFile, oinpFile, doTest, doKeep, doNobatch, info, queue, hypert, quiet):
55
        # make directory name indicating changed values
56
        self.dirname = baseFileName
gsell's avatar
gsell committed
57 58 59 60 61
        if N >= 0:
            self.dirname += str(N)

        self.dirname += self.opaldict.generateDirectoryName()

adelmann's avatar
adelmann committed
62 63 64 65 66
        try:
            CORES = self.opaldict['CORES']
        except KeyError:
            print("CORES not set bye bye")
            sys.exit(1)
67

ext-neveu_n's avatar
ext-neveu_n committed
68
        if self.createDirectory(self.dirname, doKeep, quiet) == False:
ext-neveu_n's avatar
ext-neveu_n committed
69
            print( "Simulation results already exist")
70
            return 
gsell's avatar
gsell committed
71 72
        os.chdir(self.dirname)

73
        # Linking magnet and RF files
gsell's avatar
gsell committed
74 75 76 77 78
        if (os.environ.get('FIELDMAPS')):
            fieldmapPath = os.environ.get('FIELDMAPS')
        else:
            fieldmapPath = '../fieldmaps'
            if not (os.path.isdir(fieldmapPath)):
ext-neveu_n's avatar
ext-neveu_n committed
79
                print( 'Fieldmap directory unknown exiting ...')
gsell's avatar
gsell committed
80
                sys.exit()
81

82
        linkDirectory(fieldmapPath,'Fieldmap')
83 84 85 86 87

        # Link distribution directory if present
        if (os.environ.get('DISTRIBUTIONS')):
            distributionPath = os.environ.get('DISTRIBUTIONS')
            if os.path.isdir(distributionPath):
88
                linkDirectory(distributionPath,'Distribution')
gsell's avatar
gsell committed
89

90 91
        # Read in the file
        filedata = None
adelmann's avatar
cleanup  
adelmann committed
92
        with open(inputfilePath + tmplFile, 'r') as file :
93 94
            filedata = file.read()

gsell's avatar
gsell committed
95
        # do the replacements in the templatefile
ext-neveu_n's avatar
ext-neveu_n committed
96
        for s,value in self.opaldict.items():
97
            # Replace the target string
98
            filedata = filedata.replace('_'+s+'_', str(value))
99 100 101 102

        # Write the file out again
        with open(oinpFile, 'w') as file:
            file.write(filedata)
gsell's avatar
gsell committed
103

104
        if os.environ.get('OPAL_EXE_PATH'):
105 106 107 108
            if doNobatch:
                opalexe = os.environ.get('OPAL_EXE_PATH') + '/opal'
            else:
                opalexe = '$OPAL_EXE_PATH/opal'
109 110
        else:
            opalexe = 'opal'
ext-neveu_n's avatar
ext-neveu_n committed
111 112 113 114 115
        if quiet == False:
            print( 'Simulation directory is {} using OPAL at {}'.format(self.dirname, os.environ.get('OPAL_EXE_PATH')))
            print( 'Using templatefile at ' + inputfilePath)
            print( 'Using fieldmaps at    ' + fieldmapPath)
            print( 'Parameter set in ' + oinpFile + ' are:')
gsell's avatar
gsell committed
116

ext-neveu_n's avatar
ext-neveu_n committed
117
        for s, value in sorted(self.opaldict.items()):
ext-neveu_n's avatar
ext-neveu_n committed
118 119
            if quiet == False:
                print( ' :::: ' + s + ' = ' + str(value))
gsell's avatar
gsell committed
120

snuverink_j's avatar
snuverink_j committed
121
        if not doNobatch:
ext-neveu_n's avatar
ext-neveu_n committed
122
            #hostname = commands.getoutput("hostname")
123
            hostname = (subprocess.check_output('hostname').decode('utf-8')).strip()
ext-neveu_n's avatar
ext-neveu_n committed
124 125
            if quiet == False:
                print("On host {}".format(hostname))
adelmann's avatar
adelmann committed
126

snuverink_j's avatar
snuverink_j committed
127
            if os.getenv("SGE_TIME"):
ext-neveu_n's avatar
ext-neveu_n committed
128
                print( "You use deprecated environment variable SGE_TIME. Please use in the future TIME")
snuverink_j's avatar
snuverink_j committed
129 130
                time = os.getenv("SGE_TIME")
            else:
ext-neveu_n's avatar
ext-neveu_n committed
131
                #print('You did not set a time limit. Using default: s_rt=23:59:00,h_rt=24:00:00')
snuverink_j's avatar
snuverink_j committed
132
                time = os.getenv("TIME", "s_rt=23:59:00,h_rt=24:00:00")
adelmann's avatar
adelmann committed
133

snuverink_j's avatar
snuverink_j committed
134
            if os.getenv("SGE_RAM"):
ext-neveu_n's avatar
ext-neveu_n committed
135
                print( "You use deprecated environment variable SGE_RAM. Please use in the future RAM")
snuverink_j's avatar
snuverink_j committed
136 137
                ram = os.getenv("SGE_RAM")
            else:
138
                ram = os.getenv("RAM", "4")
snuverink_j's avatar
snuverink_j committed
139 140

            if not queue:
141 142 143 144
                try: 
                    queue = os.environ.get('QUEUE') 
                except:
                    queue = os.getenv("SGE_QUEUE", "prime_bd.q")
snuverink_j's avatar
snuverink_j committed
145

146
            # Merlin5
147
            if (hostname.startswith("merlin-l")):
148 149 150 151 152 153
                batchsys  = 'SLURM'
                runfile   = 'run.merlin5'
                time      = os.getenv("SLURM_TIME", "24:00:00")
                ram       = os.getenv("SLURM_RAM",  "36")
                partition = os.getenv("SLURM_PARTITION", "merlin")
                self.WriteMerlin5(opalexe, oinpFile, CORES, time, ram, info, runfile, partition)
154 155 156 157
            
            # ANL theta.alcf.anl.gov
            elif (hostname.startswith("theta")):
                batchsys = 'COBALT'
ext-neveu_n's avatar
ext-neveu_n committed
158
                runfile  = 'run.sh'
159
                self.WriteTheta(opalexe, oinpFile, CORES, time, ram, info, queue, hypert)
160

snuverink_j's avatar
snuverink_j committed
161
            # ANL blues.lcrc.anl.gov
162
            elif (hostname.startswith("blogin")):
snuverink_j's avatar
snuverink_j committed
163 164 165 166
                batchsys = 'PBS'
                runfile  = 'run.blues'
                self.WritePBSBlues(opalexe, oinpFile, CORES, time, ram, info, queue)

167
            # ANL Bebop
168
            elif (hostname.startswith("bebop") or hostname.startswith("bdw") or hostname.startswith("knl")):
169 170
                batchsys = 'SLURM'
                runfile  = 'run.bebop'
ext-neveu_n's avatar
ext-neveu_n committed
171
                time     = os.environ["TIME"]
ext-neveu_n's avatar
ext-neveu_n committed
172
                self.WriteBebop(opalexe, oinpFile, CORES, time, ram, info, runfile, queue, hypert, quiet)
173

174
            # NERSC Cori Haswell
175
            elif (hostname.startswith("cori")):
snuverink_j's avatar
snuverink_j committed
176 177 178 179
                batchsys = 'SLURM'
                runfile  = 'run.cori'
                self.WriteCori(opalexe, oinpFile, CORES, time, ram, info, runfile)

180
            # NERSC Edison
181
            elif (hostname.startswith("edison")):
snuverink_j's avatar
snuverink_j committed
182 183 184 185
                batchsys = 'SLURM'
                runfile  = 'run.edison'
                self.WriteEdison(opalexe, oinpFile, CORES, time, ram, info, runfile)

186
            # CSCS Piz-Daint
187
            elif (hostname.startswith("daint")):
snuverink_j's avatar
snuverink_j committed
188 189 190 191 192 193 194
                batchsys = 'SLURM'
                runfile  = 'run.daint'
                time = os.getenv("SLURM_TIME", "24:00:00")
                ram  = os.getenv("SLURM_RAM", "36")
                partition = os.getenv("SLURM_PARTITION", "normal")
                self.WritePizDaint(opalexe, oinpFile, CORES, time, ram, info, runfile, partition)

195
            elif (hostname.startswith("eofe")):
snuverink_j's avatar
snuverink_j committed
196 197 198 199 200
                batchsys = 'SLURM'
                runfile = 'run.engaging'
                time = os.getenv("SLURM_TIME", "24:00:00")
                ram  = os.getenv("SLURM_RAM", "120")            
                self.WriteEngaging(opalexe, oinpFile, CORES, time, ram, info, runfile)
gsell's avatar
gsell committed
201

snuverink_j's avatar
snuverink_j committed
202 203 204
            else:
                print("Hostname not known bye bye")
                sys.exit(1)
205 206 207

        qid = -1

208
        if doTest:
ext-neveu_n's avatar
ext-neveu_n committed
209 210
            if quiet == False:
                print( 'Done with setup of the OPAL simulation but not submitting the job (--test) \n\n\n')
211

212
        elif doNobatch:
ext-neveu_n's avatar
ext-neveu_n committed
213 214
            if quiet == False:
                print( 'Done with setup of the OPAL simulation and executing the job on {} cores...\n\n\n'.format(CORES)) 
215
            ofn, fileExtension = os.path.splitext(oinpFile)
216
            print( 'STD output is written to {}.out'.format(ofn))
217
            #execommand = 'mpirun -np ' + str(CORES)  + ' ' + opalexe + ' ' + oinpFile + '  2>&1 | tee ' + ofn + '.out'
218 219 220 221 222
            outfileName = ofn +'.out'
            # Currently not writing to screen anymore
            # There is a solution described at https://stackoverflow.com/questions/15535240/python-popen-write-to-stdout-and-log-file-simultaneously
            with open(outfileName,'w') as outfile:
                qid = subprocess.call(['mpirun', '-np', str(CORES), opalexe, oinpFile], stdout=outfile, stderr=outfile)
223

224
        else:
225 226 227 228 229 230 231
            if batchsys == 'SLURM' or batchsys == 'COBALT':
                if batchsys == 'SLURM':
                    command = 'sbatch'
                elif batchsys == 'COBALT':
                    command = 'qsub'

                qid = subprocess.call([command, runfile, '|', 'awk','\'{print $3}\''])
ext-neveu_n's avatar
ext-neveu_n committed
232 233
                if quiet == False:
                    print( 'Done with setup of the OPAL simulation and submitting the job with {} cores \n\n\n'.format(CORES))
234

snuverink_j's avatar
snuverink_j committed
235
            elif batchsys == 'PBS':
ext-neveu_n's avatar
ext-neveu_n committed
236 237
                if quiet == False:
                    print( 'Done with setup of the OPAL simulation, please submit the job yourself')
snuverink_j's avatar
snuverink_j committed
238

239 240
            else:
                print("Batch system", batchsys, "not known!")
ext-neveu_n's avatar
ext-neveu_n committed
241

242 243
        os.chdir('..')
        return qid
244

frey_m's avatar
frey_m committed
245
    def WriteCori(self, opalexe, oinpFile, cores, time, ram, info, name):
adelmann's avatar
adelmann committed
246
        title=oinpFile.partition(".")[0]
frey_m's avatar
frey_m committed
247
        myfile = open(name,'w')
adelmann's avatar
adelmann committed
248 249 250 251 252 253 254 255 256 257
        s1 = "#!/bin/bash -l \n"
        s1 += "#SBATCH -p regular \n"
        s1 += "#SBATCH -N 1 \n"
        s1 += "#SBATCH -t " + time + "G\n" 
        s1 += "#SBATCH -J " + title + "\n"
        s1 += "#SBATCH --qos=premium \n"
        s1 += "srun -n 1 .... \n"
        myfile.write(s1)
        myfile.close()

258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
    def WriteEngaging(self, opalexe, oinpFile, cores, time, ram, info, name):
        print("Writing SLURM run file for Engaging cluster at MIT")

        cores = int(cores)
        coresPerNode = 32
        partition = os.getenv("SLURM_PARTITION", "sched_mit_psfc")

        if ((cores%coresPerNode) is 0):
            nodes = int(cores/coresPerNode)
        else:
            nodes = int(cores/coresPerNode) + 1

        with open(name, 'w') as outfile:
            outfile.write("#!/bin/bash\n" 
                          "# submit with sbatch {}\n"
                          "# commandline arguments may instead by supplied with #SBATCH <flag> <value>\n"
                          "# commandline arguments override these values\n"
                          "\n"
                          "# Number of nodes\n".format(name))
            outfile.write("#SBATCH -N {}\n".format(nodes))
            outfile.write("# Number of total processor cores \n")
            outfile.write("#SBATCH -n {}\n".format(cores))
            outfile.write("# Memory (MB) \n")
            outfile.write("#SBATCH --mem {}\n".format(int(ram) * 1000))
            outfile.write("# specify how long your job needs.\n")
            outfile.write("#SBATCH --time={}\n".format(time))
            outfile.write("# which partition or queue the jobs runs in\n")
            outfile.write("#SBATCH -p {}\n".format(partition))
            outfile.write("#customize the name of the stderr/stdout file. %j is the job number\n")
            outfile.write("#SBATCH -o {}.o%j".format(os.path.splitext(oinpFile)[0]))
            outfile.write("\n")
289 290 291 292 293 294 295 296 297 298 299 300
#            outfile.write("#load default system modules\n")
#            outfile.write(". /etc/profile.d/modules.sh")
#            outfile.write("\n")
#            outfile.write("#load modules your job depends on.\n")
#            outfile.write("#better here than in your $HOME/.bashrc to make "
#                         "debugging and requirements easier to track.\n")
#            outfile.write("module load gcc/4.8.4\n")
#            outfile.write("module load engaging/openmpi/1.8.8\n")
#            outfile.write("module load engaging/cmake/3.5.2\n")
#            outfile.write("module load engaging/boost/1.56.0\n")
#            outfile.write("module load engaging/gsl/2.2.1\n")
#            outfile.write("\n")
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
            outfile.write("####################################################\n")
            outfile.write("# BEGIN DEBUG\n")
            outfile.write("# Print the SLURM environment on master host: \n")
            outfile.write("####################################################\n")
            outfile.write("echo '=== Slurm job  JOB_NAME=$JOB_NAME  JOB_ID=$JOB_ID'\n") 
            outfile.write("####################################################\n")
            outfile.write("echo DATE=`date`\n")
            outfile.write("echo HOSTNAME=`hostname`\n") 
            outfile.write("echo PWD=`pwd`\n")
            outfile.write("####################################################\n")
            outfile.write("echo 'Running environment:' \n")
            outfile.write("env \n")
            outfile.write("####################################################\n")
            outfile.write("echo 'Loaded environment modules:' \n")
            outfile.write("module list 2>&1\n") 
            outfile.write("echo \n")
            outfile.write("# END DEBUG\n") 
            outfile.write("####################################################\n")
            outfile.write("\n")
            outfile.write("#Finally, the command to execute.\n")
            outfile.write("#The job starts in the directory it was submitted from.\n")
            outfile.write("#Note that mpirun knows from SLURM how many processor we have\n")
            outfile.write("mpirun {} {} --info {} --warn 6\n".format(opalexe, oinpFile, info))
adelmann's avatar
adelmann committed
324

frey_m's avatar
frey_m committed
325
    def WriteEdison(self, opalexe, oinpFile, cores, time, ram, info, name):
adelmann's avatar
adelmann committed
326
        title=oinpFile.partition(".")[0]
adelmann's avatar
adelmann committed
327 328

        coresPerNode = 24
Matthias Frey's avatar
Matthias Frey committed
329
        cores = int(cores)
adelmann's avatar
adelmann committed
330

Matthias Frey's avatar
Matthias Frey committed
331 332
        if cores % coresPerNode == 0:
            nodes = int(cores / coresPerNode)
adelmann's avatar
adelmann committed
333
        else:
Matthias Frey's avatar
Matthias Frey committed
334 335
            nodes = int(cores / coresPerNode) + 1
        
adelmann's avatar
adelmann committed
336
        s1 = "#!/bin/bash -l \n"
Matthias Frey's avatar
Matthias Frey committed
337
        s1 += "#SBATCH -q regular \n"
adelmann's avatar
adelmann committed
338
        s1 += "#SBATCH -N " + str(nodes) + " \n"
Matthias Frey's avatar
Matthias Frey committed
339
        s1 += "#SBATCH -t " + time + "\n" 
adelmann's avatar
adelmann committed
340
        s1 += "#SBATCH -J " + title + "\n"
adelmann's avatar
adelmann committed
341 342 343
        s1 += "#SBATCH -o " + title + ".o%j\n"
        s1 += "#SBATCH -L SCRATCH \n"
        s1 += "srun -n " + str(cores) + " " + opalexe + " " + oinpFile + "\n"
Matthias Frey's avatar
Matthias Frey committed
344 345

        myfile = open(name, 'w')
adelmann's avatar
adelmann committed
346 347
        myfile.write(s1)
        myfile.close()
frey_m's avatar
frey_m committed
348
        
adelmann's avatar
adelmann committed
349 350 351 352 353 354 355 356 357 358 359

    def WriteMerlin5(self, opalexe, oinpFile, cores, time, ram, info, name, partition):

        # 
        # ADA this is for the new PSI Merlin5 
        # 
        
        title = oinpFile.partition(".")[0]
        myfile = open(name, 'w')
        s1 =  "#!/bin/bash -l \n"
        s1 += "#SBATCH --job-name=" + title + "\n"
360
        s1 += "#SBATCH --output="   + title + ".o%j\n"
adelmann's avatar
adelmann committed
361 362
        s1 += "#SBATCH --time=" + time + "\n"
        s1 += "#SBATCH --ntasks=" + str(cores) + "\n"
363 364 365 366 367 368
        # Discussed in https://gitlab.psi.ch/OPAL/runOPAL/issues/7:
        if (int(cores) > 16):
            s1 += "#SBATCH --ntasks-per-node=16 \n"
        else:
            s1 += "#SBATCH --nodes=1 \n"

adelmann's avatar
adelmann committed
369 370
#        s1 += "#SBATCH --mem=" + str(ram) + "GB \n"
        s1 += "#SBATCH --partition=" + str(partition) + " \n"
371
        s1 += "mpirun " + opalexe + " " + oinpFile + " --info " + str(info) + "\n"
adelmann's avatar
adelmann committed
372 373 374
        myfile.write(s1)
        myfile.close()

adelmann's avatar
adelmann committed
375
    def WritePizDaint(self, opalexe, oinpFile, cores, time, ram, info, name, partition):
frey_m's avatar
frey_m committed
376 377 378 379 380 381 382 383 384 385 386 387 388
        
        # XC40 Compute Nodes
        # Intel Xeon E5-2696 v4 @ 2.10GHz (2x18 cores, 64/128 GB RAM)
        # http://user.cscs.ch/computing_systems/piz_daint/index.html

        coresPerNode = 36
        title = oinpFile.partition(".")[0]
        myfile = open(name, 'w')
        s1 =  "#!/bin/bash -l \n"
        s1 += "#SBATCH --job-name=" + title + "\n"
        s1 += "#SBATCH --time=" + time + "\n"
        s1 += "#SBATCH --ntasks=" + str(cores) + "\n"
        s1 += "#SBATCH --ntasks-per-node=" + str(coresPerNode) + " \n"
389
        s1 += "#SBATCH --ntasks-per-core=1 \n"
frey_m's avatar
frey_m committed
390 391
        s1 += "#SBATCH --cpus-per-task=1 \n"
        s1 += "#SBATCH --constraint=mc \n"
adelmann's avatar
adelmann committed
392 393 394
        s1 += "#SBATCH --mem=" + str(ram) + "GB \n"
        s1 += "#SBATCH --partition=" + str(partition) + " \n"
        s1 += "#SBATCH --account=psi07 \n"
frey_m's avatar
frey_m committed
395 396
        s1 += "export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK \n"
        s1 += "module load daint-mc \n"
adelmann's avatar
adelmann committed
397
        s1 += "srun " + opalexe + " " + oinpFile + "\n"
frey_m's avatar
frey_m committed
398 399
        myfile.write(s1)
        myfile.close()
adelmann's avatar
adelmann committed
400
    
401
    def WritePBSBlues(self, opalexe, oinpFile, cores, time, ram, info, queue):
adelmann's avatar
adelmann committed
402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
        # time  <- export SGE_TIME="walltime=0:20:00"
        # cores <- export CORES="nodes=1:ppn=16"
        title=oinpFile.partition(".")[0]
        myfile = open('run.pbs','w')
        s1 = "#!/bin/sh \n"
        s1 += "#PBS -o " + title + "_log  \n"
        s1 += "#PBS -r n \n"
        s1 += "#PBS -j oe \n"
        s1 += "#PBS -N " + title + "\n"
        s1 += "#PBS -m aeb \n"
        s1 += "#PBS -M nneveu@anl.gov \n"
        s1 += "#PBS -l " + time + " \n"
        s1 += "#PBS -l " + cores + " \n"
        s1 += "#PBS -q " + queue + " \n"
        try:
            v = os.environ["OPAL_EXE_PATH"]
        except KeyError:
            print("OPAL_EXE_PATH not set bye bye")
            sys.exit(1)
        s1 += "cd $PBS_O_WORKDIR \n"
        s1 += "####################################################\n"
        s1 += "echo DATE=`date`\n"
        s1 += "echo HOSTNAME=`hostname` \n"
        s1 += "echo PWD=`pwd`\n"
426
        s1 += "cat $PBS_NODEFILE\n"
427
        s1 += "NSLOTS=$(wc -l < $PBS_NODEFILE)\n"
adelmann's avatar
adelmann committed
428
        s1 += "####################################################\n"
ext-neveu_n's avatar
ext-neveu_n committed
429
        s1 += "CMD=$OPAL_EXE_PATH/opal \n"
430
        s1 += "echo $CMD\n"
adelmann's avatar
adelmann committed
431 432 433
        s1 += "ARGS=" + "\"" + oinpFile + " --info " + str(info) + " --warn 6 \"\n"
        s1 += "####################################################\n"
        s1 += "MPICMD=\"mpirun -np $NSLOTS $CMD $ARGS\" \n"
434
        s1 += "echo $MPICMD\n"
adelmann's avatar
adelmann committed
435 436 437 438
        s1 += "$MPICMD \n"
        s1 += "####################################################\n"
        myfile.write(s1)
        myfile.close()              
439
   
ext-neveu_n's avatar
ext-neveu_n committed
440
    def WriteBebop(self, opalexe, oinpFile, cores, time, ram, info, name, queue, hypert, quiet):
441 442 443 444
        # BDW and KNL Compute Nodes at ANL
        # http://www.lcrc.anl.gov/for-users/using-lcrc/running-jobs/running-jobs-on-bebop/
        if type(cores) is str:
            cores = int(cores)
445 446
        else:
            cores = int(cores)
ext-neveu_n's avatar
ext-neveu_n committed
447 448 449
        #Checking that a valid queue is selected
        #Adjusting number of cores for specified queue 
        if (queue=='bdw' or queue=='bdwall' or queue=='bdwd'):
ext-neveu_n's avatar
ext-neveu_n committed
450 451
            if quiet == False:
                print('Running on BDW') 
adelmann's avatar
adelmann committed
452
            coresPerNode = 36 * (hypert+1)     # hypert == 0 -> no hyper threading 
ext-neveu_n's avatar
ext-neveu_n committed
453
        elif (queue=='knl' or queue=='knlall' or queue=='knld'):
ext-neveu_n's avatar
ext-neveu_n committed
454 455
            if quiet == False:
                print('Running on KNL')
456
            coresPerNode = 64 * (hypert+1)
ext-neveu_n's avatar
ext-neveu_n committed
457 458
        else:
            print('You have picked a non-valid queue!! Your run will fail!!')
459

ext-neveu_n's avatar
ext-neveu_n committed
460 461 462
        #Calculating # of nodes needed, and # of tasks per node 
        #  Only calc tasks per node if total core number 
        #  is not evenly divisible by # of nodes
463 464 465 466 467
        if (cores % coresPerNode) is 0:
            if (cores < coresPerNode):
                nodes = 1
            else:
                nodes = cores / coresPerNode
ext-neveu_n's avatar
ext-neveu_n committed
468
                tasks_per_node = cores/nodes
469
        else:
ext-neveu_n's avatar
ext-neveu_n committed
470 471 472 473 474
            while((cores % coresPerNode) != 0): 
                coresPerNode -= 1
                nodes = cores/coresPerNode 

            tasks_per_node = cores/nodes
ext-neveu_n's avatar
ext-neveu_n committed
475
            #print(nodes,cores, tasks_per_node)
476 477 478 479 480 481 482 483

        title = oinpFile.partition(".")[0]
        myfile = open(name, 'w')
        
        s1 =  "#!/bin/bash -l \n"
        s1 += "#SBATCH --job-name=" + title + "\n"
        s1 += "#SBATCH -o " + title + ".%j.%N.out \n" 
        s1 += "#SBATCH -e " + title + ".%j.%N.error \n"
ext-neveu_n's avatar
ext-neveu_n committed
484
        s1 += "#SBATCH -p " + queue + " \n"
485 486
        s1 += "#SBATCH --time=" + time + "\n"
        s1 += "#SBATCH --ntasks=" + str(cores) + "\n"
adelmann's avatar
adelmann committed
487
        s1 += "#SBATCH --ntasks-per-node=" + str(coresPerNode) + "\n"
488 489
        s1 += "cd $SLURM_SUBMIT_DIR \n"
        #s1 += "export I_MPI_SLURM_EXT=0 \n"
490
        s1 += "export I_MPI_FABRICS=shm:tmi \n"
adelmann's avatar
adelmann committed
491 492
        if (queue=='knl' or queue=='knlall' or queue=='knld'):
            s1 += "#SBATCH -C knl,quad,cache \n"
ext-neveu_n's avatar
ext-neveu_n committed
493 494 495
        if int(nodes) > 1:
            s1 += "#SBATCH --ntasks-per-node=" + str(tasks_per_node) + " \n"
            s1 += "mpirun -n $SLURM_NTASKS "+ opalexe + " " + oinpFile + "\n"
496
        else:
ext-neveu_n's avatar
ext-neveu_n committed
497
            s1 += "mpirun -n $SLURM_NTASKS " + opalexe + " " + oinpFile + "\n"
498 499 500
        #s1 += "#SBATCH --mem=" + ram + "GB \n"
        #s1 += "export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK \n"
        #s1 += "--hint=nomultithread " + opalexe + " " + oinpFile + "\n"
501
       
ext-neveu_n's avatar
ext-neveu_n committed
502
        myfile.write(s1)
503 504 505 506 507 508 509 510 511 512 513 514 515 516 517

    def WriteTheta(self, opalexe, oinpFile, cores, time, ram, info, queue, hypert):
        # queue = default, debug-cache-quad, debug-flat-quad
        # cores = min of 8 nodes for default queue 
        try:
            v = os.environ["OPAL_EXE_PATH"]
        except KeyError:
            print("OPAL_EXE_PATH not set bye bye")
            sys.exit(1)
              
        cores        = int(cores)
        coresPerNode = 64 * (hypert+1)

        if (cores % coresPerNode) is 0:
            if (cores < coresPerNode):
ext-neveu_n's avatar
ext-neveu_n committed
518
                nodes = int(1)
519
            else:
ext-neveu_n's avatar
ext-neveu_n committed
520 521
                nodes = int(cores / coresPerNode)
                tasks_per_node = int(cores/nodes)
522 523
        else:
            while((cores % coresPerNode) != 0): 
ext-neveu_n's avatar
ext-neveu_n committed
524 525
                coresPerNode -= int(1)
                nodes = int(cores/coresPerNode) 
526 527

            tasks_per_node = cores/nodes
ext-neveu_n's avatar
ext-neveu_n committed
528
            #print(nodes,cores, tasks_per_node)
529 530 531
   
        if cores < 512:
            queue = 'debug-cache-quad'
ext-neveu_n's avatar
ext-neveu_n committed
532
            time  = '00:59:00'
533 534 535
        #elif cores > 512: 
        #nodes = np.ceil(cores/64)

ext-neveu_n's avatar
ext-neveu_n committed
536
        total_mpi_ranks = int(nodes*coresPerNode)
537 538 539 540 541 542 543 544 545 546 547 548 549 550

        title=oinpFile.partition(".")[0]
        myfile = open('run.sh','w')
        s1 =  "#!/bin/bash  \n"
        s1 += "#COBALT -t " + time + " \n"
        s1 += "#COBALT -n " + str(nodes) + " \n"
        s1 += "#COBALT -q " + queue + " \n"
        s1 += "#COBALT --attrs mcdram=cache:numa=quad \n"
        s1 += "#COBALT -A awa \n"
        s1 += 'echo "Starting Cobalt job script"\n'
        s1 += "export n_nodes=$COBALT_JOBSIZE \n"
        s1 += "export n_mpi_ranks_per_node=" + str(coresPerNode)+ " \n"
        s1 += "export n_mpi_ranks=" + str(total_mpi_ranks) + "\n"
        #s1 += "export n_openmp_threads_per_rank=4"
ext-neveu_n's avatar
ext-neveu_n committed
551
        if hypert > 0:       
ext-neveu_n's avatar
ext-neveu_n committed
552
            s1 += "export n_hyperthreads_per_core=2 \n"
553 554 555 556
        #s1 += "export n_hyperthreads_skipped_between_ranks=4"
        s1 += "####################################################\n"
        s1 += "ARGS=" + "\"" + oinpFile + " --info " + str(info) + " --warn 6 \"\n"
        s1 += "CMD=$OPAL_EXE_PATH/opal \n"
ext-neveu_n's avatar
ext-neveu_n committed
557 558 559 560
        if hypert > 0:
            s1 += "MPICMD=\"aprun -n $n_mpi_ranks -N $n_mpi_ranks_per_node -j $n_hyperthreads_per_core $CMD $ARGS\" \n"
        else:
            s1 += "MPICMD=\"aprun -n $n_mpi_ranks -N $n_mpi_ranks_per_node $CMD $ARGS\" \n"
561 562 563 564 565
        s1 += "echo $MPICMD\n"
        s1 += "$MPICMD \n"
        s1 += "####################################################\n"
        myfile.write(s1)
        myfile.close()              
ext-neveu_n's avatar
ext-neveu_n committed
566
        os.chmod("run.sh", 0o775)
567 568 569 570 571
#aprun -n $n_mpi_ranks -N $n_mpi_ranks_per_node \
#--env OMP_NUM_THREADS=$n_openmp_threads_per_rank -cc depth \
#  -d $n_hyperthreads_skipped_between_ranks \
#  -j $n_hyperthreads_per_core \
#  <executable> <executable args>