Commit de821cc7 authored by bellotti_r's avatar bellotti_r
Browse files

Added class that is able to wait for a job to finish and track its state

parent 0c8ce134
from .simulation import Simulation
from .opaldict import OpalDict
from .slurmjob import SlurmJob
import subprocess
import time
class SlurmJob:
'''
Class representing a SLURM job.
'''
def __init__(self, ID):
'''
Parameters
==========
ID: int or str
Identification number of the slurm job.
'''
self._ID = ID
@property
def status(self):
'''
Returns the current job status.
'''
cmd = 'sacct -j {} -o state'.format(self._ID).split(' ')
completed_process = subprocess.run(cmd,
stdout=subprocess.PIPE,
encoding='utf-8')
output = completed_process.stdout
# Format of output:
#
# State
# ----------
# CANCELLED+
#
state = output.split('\n')[2].strip()
return state
def wait_for_completion(self, timeout=None):
'''
Wait until the job has finished.
Parameters
==========
timeout: int or None
Maximum time to wait (in s). If None: Wait forever for completion.
Returns
=======
True if the the job completed successfully, False if it failed
Raises
======
TimeoutError
If the timeout is exceeded
RuntimeError
If the job state is not in
['PENDING', 'RUNNING', 'COMPLETED', 'FAILED']
'''
start_time = time.time()
has_started = False
while True:
if (timeout is not None) and (time.time() - start_time > timeout):
raise TimeoutError(f'Job {self._ID} has timed out!')
state = self.status
if state == '':
pass
elif state == 'PENDING':
pass
elif state == 'RUNNING':
if not has_started:
print('Job is running...')
has_started = True
elif state == 'COMPLETED':
return True
elif state == 'FAILED':
return False
else:
raise RuntimeError(f'Unknown job state: {state}')
time.sleep(3)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment