... | ... | @@ -476,6 +476,89 @@ An explanation of some state follows: |
|
|
| R | RUNNING | Job currently has an allocation. |
|
|
|
| S | SUSPENDED | Job has an allocation, but execution has been suspended and CPUs have been released for other jobs. |
|
|
|
|
|
|
#### scontrol
|
|
|
With the slurm command **scontrol** you can get a more detailed overview of your running job, node hardware and partitions:
|
|
|
|
|
|
```
|
|
|
[user@res-hpc-lo01 ~]$ scontrol show job 260
|
|
|
JobId=260 JobName=IMB
|
|
|
UserId=user(225812) GroupId=Domain Users(513) MCS_label=N/A
|
|
|
Priority=35603 Nice=0 Account=dnst-ict QOS=normal
|
|
|
JobState=RUNNING Reason=None Dependency=(null)
|
|
|
Requeue=1 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0
|
|
|
RunTime=00:00:13 TimeLimit=00:30:00 TimeMin=N/A
|
|
|
SubmitTime=2020-01-23T10:27:45 EligibleTime=2020-01-23T10:27:45
|
|
|
AccrueTime=2020-01-23T10:27:45
|
|
|
StartTime=2020-01-23T10:27:45 EndTime=2020-01-23T10:57:45 Deadline=N/A
|
|
|
SuspendTime=None SecsPreSuspend=0 LastSchedEval=2020-01-23T10:27:45
|
|
|
Partition=all AllocNode:Sid=res-hpc-ma01:46428
|
|
|
ReqNodeList=(null) ExcNodeList=(null)
|
|
|
NodeList=res-hpc-exe[013-014]
|
|
|
BatchHost=res-hpc-exe013
|
|
|
NumNodes=2 NumCPUs=32 NumTasks=32 CPUs/Task=1 ReqB:S:C:T=0:0:*:*
|
|
|
TRES=cpu=32,mem=64G,node=2,billing=32
|
|
|
Socks/Node=* NtasksPerN:B:S:C=16:0:*:* CoreSpec=*
|
|
|
MinCPUsNode=16 MinMemoryCPU=2G MinTmpDiskNode=0
|
|
|
Features=(null) DelayBoot=00:00:00
|
|
|
OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null)
|
|
|
Command=/home/user/Software/imb/mpi-benchmarks/imb.slurm
|
|
|
WorkDir=/home/user/Software/imb/mpi-benchmarks
|
|
|
StdErr=/home/user/Software/imb/mpi-benchmarks/job.%J.err
|
|
|
StdIn=/dev/null
|
|
|
StdOut=/home/user/Software/imb/mpi-benchmarks/job.%J.out
|
|
|
Power=
|
|
|
MailUser=user@gmail.com MailType=BEGIN,END,FAIL
|
|
|
|
|
|
[user@res-hpc-lo01 ~]$ scontrol show node res-hpc-exe013
|
|
|
NodeName=res-hpc-exe013 Arch=x86_64 CoresPerSocket=8
|
|
|
CPUAlloc=16 CPUTot=16 CPULoad=0.00
|
|
|
AvailableFeatures=(null)
|
|
|
ActiveFeatures=(null)
|
|
|
Gres=(null)
|
|
|
NodeAddr=res-hpc-exe013 NodeHostName=res-hpc-exe013 Version=20.02.0-0pre1
|
|
|
OS=Linux 4.18.0-80.11.2.el8_0.x86_64 #1 SMP Tue Sep 24 11:32:19 UTC 2019
|
|
|
RealMemory=128800 AllocMem=32768 FreeMem=121945 Sockets=2 Boards=1
|
|
|
State=ALLOCATED ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
|
|
|
Partitions=all
|
|
|
BootTime=2019-12-11T11:50:20 SlurmdStartTime=2020-01-14T15:35:41
|
|
|
CfgTRES=cpu=16,mem=128800M,billing=16
|
|
|
AllocTRES=cpu=16,mem=32G
|
|
|
CapWatts=n/a
|
|
|
CurrentWatts=0 AveWatts=0
|
|
|
ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s
|
|
|
|
|
|
[user@res-hpc-lo01 ~]$ scontrol show node res-hpc-exe014
|
|
|
NodeName=res-hpc-exe014 Arch=x86_64 CoresPerSocket=12
|
|
|
CPUAlloc=16 CPUTot=24 CPULoad=0.00
|
|
|
AvailableFeatures=(null)
|
|
|
ActiveFeatures=(null)
|
|
|
Gres=(null)
|
|
|
NodeAddr=res-hpc-exe014 NodeHostName=res-hpc-exe014 Version=20.02.0-0pre1
|
|
|
OS=Linux 4.18.0-80.11.2.el8_0.x86_64 #1 SMP Tue Sep 24 11:32:19 UTC 2019
|
|
|
RealMemory=386800 AllocMem=32768 FreeMem=380208 Sockets=2 Boards=1
|
|
|
State=MIXED ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
|
|
|
Partitions=all
|
|
|
BootTime=2019-12-11T11:51:40 SlurmdStartTime=2020-01-14T15:36:20
|
|
|
CfgTRES=cpu=24,mem=386800M,billing=24
|
|
|
AllocTRES=cpu=16,mem=32G
|
|
|
CapWatts=n/a
|
|
|
CurrentWatts=0 AveWatts=0
|
|
|
ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s
|
|
|
|
|
|
[user@res-hpc-lo01 ~]$ scontrol show partition all
|
|
|
PartitionName=all
|
|
|
AllowGroups=ALL AllowAccounts=ALL AllowQos=ALL
|
|
|
AllocNodes=ALL Default=YES QoS=N/A
|
|
|
DefaultTime=01:00:00 DisableRootJobs=NO ExclusiveUser=NO GraceTime=0 Hidden=NO
|
|
|
MaxNodes=UNLIMITED MaxTime=UNLIMITED MinNodes=0 LLN=NO MaxCPUsPerNode=UNLIMITED
|
|
|
Nodes=res-hpc-exe[013-014]
|
|
|
PriorityJobFactor=1 PriorityTier=1 RootOnly=NO ReqResv=NO OverSubscribe=NO
|
|
|
OverTimeLimit=NONE PreemptMode=OFF
|
|
|
State=UP TotalCPUs=40 TotalNodes=2 SelectTypeParameters=NONE
|
|
|
JobDefaults=(null)
|
|
|
DefMemPerCPU=2048 MaxMemPerNode=UNLIMITED
|
|
|
```
|
|
|
|
|
|
### Submitting jobs
|
|
|
|
|
|
```
|
... | ... | |