Job#

class nnodes.job.DTN(job: dict, state: list)#

Oak Ridge National Lab Data Transfer Node.

class nnodes.job.Job(job: dict, state: list)#

Base class for clusters.

property aborted#

Any task failed twice during execution.

create(dst: Optional[str] = None)#

Creates a directory as job workspace.

property failed#

Any task failed during execution.

property inqueue: bool#

Job is allocated from scheduler (enables automatic requeue and mpiexec timeout).

mpiexec(cmd: str, nprocs: int, cpus_per_proc: int = 1, gpus_per_proc: int = 0, mps: Optional[int] = None) str#

Run a MPI task.

property paused#

Job paused due to insuffcient time.

property remaining: float#

Remaining walltime in minutes.

requeue()#

Resubmit current job.

write(cmd: str, dst: str)#

Write job submission script to target directory.

class nnodes.job.LSF(job: dict, state: list)#

LSF-based cluster.

property inqueue#

Job is allocated from scheduler (enables automatic requeue and mpiexec timeout).

mpiexec(cmd: str, nprocs: int, cpus_per_proc: int = 1, gpus_per_proc: int = 0, mps: Optional[int] = None)#

Get the command to call MPI.

requeue()#

Run current job again.

write(cmd, dst)#

Write job submission script to target directory.

class nnodes.job.Local(job: dict, state: list)#

Local computer using multiprocessing instead of MPI.

class nnodes.job.LocalMPI(job: dict, state: list)#

Local computer with MPI installed.

mpiexec(cmd: str, nprocs: int, *_)#

Get the command to call MPI.

class nnodes.job.Slurm(job: dict, state: list)#

Slurm-based cluster.

property inqueue#

Job is allocated from scheduler (enables automatic requeue and mpiexec timeout).

mpiexec(cmd: str, nprocs: int, cpus_per_proc: int = 1, gpus_per_proc: int = 0, mps: Optional[int] = None)#

Get the command to call MPI.

requeue()#

Run current job again.

write(cmd, dst)#

Write job submission script to target directory.

class nnodes.job.Summit(job: dict, state: list)#
class nnodes.job.Tiger(job: dict, state: list)#

Princeton TigerGPU

class nnodes.job.Traverse(job: dict, state: list)#

Princeton Traverse