Commit de821cc7 authored by bellotti_r's avatar bellotti_r
Browse files

Added class that is able to wait for a job to finish and track its state

parent 0c8ce134
from .simulation import Simulation from .simulation import Simulation
from .opaldict import OpalDict from .opaldict import OpalDict
from .slurmjob import SlurmJob
import subprocess
import time
class SlurmJob:
'''
Class representing a SLURM job.
'''
def __init__(self, ID):
'''
Parameters
==========
ID: int or str
Identification number of the slurm job.
'''
self._ID = ID
@property
def status(self):
'''
Returns the current job status.
'''
cmd = 'sacct -j {} -o state'.format(self._ID).split(' ')
completed_process = subprocess.run(cmd,
stdout=subprocess.PIPE,
encoding='utf-8')
output = completed_process.stdout
# Format of output:
#
# State
# ----------
# CANCELLED+
#
state = output.split('\n')[2].strip()
return state
def wait_for_completion(self, timeout=None):
'''
Wait until the job has finished.
Parameters
==========
timeout: int or None
Maximum time to wait (in s). If None: Wait forever for completion.
Returns
=======
True if the the job completed successfully, False if it failed
Raises
======
TimeoutError
If the timeout is exceeded
RuntimeError
If the job state is not in
['PENDING', 'RUNNING', 'COMPLETED', 'FAILED']
'''
start_time = time.time()
has_started = False
while True:
if (timeout is not None) and (time.time() - start_time > timeout):
raise TimeoutError(f'Job {self._ID} has timed out!')
state = self.status
if state == '':
pass
elif state == 'PENDING':
pass
elif state == 'RUNNING':
if not has_started:
print('Job is running...')
has_started = True
elif state == 'COMPLETED':
return True
elif state == 'FAILED':
return False
else:
raise RuntimeError(f'Unknown job state: {state}')
time.sleep(3)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment