--- etc/slurm.conf.example.orig 2020-03-26 21:44:05 UTC +++ etc/slurm.conf.example @@ -8,8 +8,9 @@ # # See the slurm.conf man page for more information. # -ClusterName=linux -ControlMachine=linux0 +ClusterName=Beastie +# Short hostname of the head node +ControlMachine=head #ControlAddr= #BackupController= #BackupAddr= @@ -25,8 +26,8 @@ StateSaveLocation=/var/spool/slurm/ctld SlurmdSpoolDir=/var/spool/slurm/d SwitchType=switch/none MpiDefault=none -SlurmctldPidFile=/var/run/slurmctld.pid -SlurmdPidFile=/var/run/slurmd.pid +SlurmctldPidFile=/var/run/slurm/slurmctld.pid +SlurmdPidFile=/var/run/slurm/slurmd.pid ProctrackType=proctrack/pgid #PluginDir= #FirstJobId= @@ -34,7 +35,7 @@ ReturnToService=0 #MaxJobCount= #PlugStackConfig= #PropagatePrioProcess= -#PropagateResourceLimits= +PropagateResourceLimits=NONE #PropagateResourceLimitsExcept= #Prolog= #Epilog= @@ -42,7 +43,9 @@ ReturnToService=0 #SrunEpilog= #TaskProlog= #TaskEpilog= -#TaskPlugin= +TaskPlugin=task/affinity +TaskPluginParam=cores +# For debugging: TaskPluginParam=cores,verbose #TrackWCKey=no #TreeWidth=50 #TmpFS= @@ -57,10 +60,11 @@ KillWait=30 Waittime=0 # # SCHEDULING +DefMemPerCPU=256 SchedulerType=sched/backfill #SchedulerAuth= SelectType=select/cons_tres -SelectTypeParameters=CR_Core +SelectTypeParameters=CR_Core_Memory #PriorityType=priority/multifactor #PriorityDecayHalfLife=14-0 #PriorityUsageResetPeriod=14-0 @@ -72,22 +76,58 @@ SelectTypeParameters=CR_Core # # LOGGING SlurmctldDebug=info -SlurmctldLogFile=/var/log/slurmctld.log +SlurmctldLogFile=/var/log/slurm/slurmctld SlurmdDebug=info -SlurmdLogFile=/var/log/slurmd.log +SlurmdLogFile=/var/log/slurm/slurmd JobCompType=jobcomp/none #JobCompLoc= # # ACCOUNTING -#JobAcctGatherType=jobacct_gather/linux +JobAcctGatherType=jobacct_gather/none #JobAcctGatherFrequency=30 # -#AccountingStorageType=accounting_storage/slurmdbd +AccountingStorageType=accounting_storage/filetxt +AccountingStorageLoc=/home/slurm/Accounting +AccountingStoreJobComment=YES #AccountingStorageHost= #AccountingStorageLoc= #AccountingStoragePass= #AccountingStorageUser= + +############################################################################ +# Enable power saving if remote IPMI power-on is available on compute nodes. +# If unavailable on some nodes, list them in SuspendExcNodes. +# SlurmUser must be a member of operator and wheel and have a valid +# login shell in order to execute shutdown on compute nodes. +# If you prefer to control power manually, see the following scripts +# from the SPCM port: # +# auto-ipmi-remote-power +# cluster-power-saver +# cluster-power-waster +# cluster-ipmi-power-on +############################################################################ + +# SuspendProgram=/usr/local/etc/spcm/slurm-node-suspend +# SuspendTime should be >= SuspendTimeout + ResumeTimeout. +# SuspendTime=600 +# SuspendTimeout=60 +# +# ResumeProgram=/usr/local/etc/spcm/slurm-node-resume +# ResumeTimeout=300 +# BatchStartTimeout=300 +# +# Exempt compute nodes that double as file servers or don't have IPMI +# remote power-on enabled. +# +# SuspendExcNodes=compute-001 + +# # COMPUTE NODES -NodeName=linux[1-32] Procs=1 State=UNKNOWN -PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP +# Set RealMemory < avail memory in /var/run/dmesg.boot +# Note that it may change slightly following freebsd-update +NodeName=compute-[001-002] Sockets=2 CoresPerSocket=6 RealMemory=30000 State=UNKNOWN +# NodeName=compute-256g-[001-002] Sockets=2 CoresPerSocket=6 RealMemory=250000 State=UNKNOWN +# PartitionName=debug Nodes=ALL Default=NO MaxTime=INFINITE State=UP +PartitionName=batch Nodes=compute-[001-002] Default=YES MaxTime=INFINITE State=UP +# PartitionName=256g Nodes=compute-256g-[001-002] Default=NO MaxTime=INFINITE State=UP