Slurmのインストール
データ共有:NFSサーバの導入
サーバ側の設定
sudo apt install nfs-kernel-server
sudo mkdir /mnt/NFS
sudo chmod 777 /mnt/NFS
$ sudo vim /etc/exports
------------------
/mnt/NFS 192.168.1.0/24(rw,no_root_squash)
------------------
sudo /etc/init.d/nfs-kernel-server restart
クライアント側の設定
sudo apt install nfs-common
sudo mkdir /mnt/NFS
sudo chmod 777 /mnt/NFS
sudo mount -t nfs 192.168.1.2:/mnt/NFS /mnt/NFS
Slurmのインストール
mungeユーザとslurmユーザの作成
# Group: slurm
dn: cn=slurm,ou=Groups,dc=example,dc=com
objectClass: posixGroup
cn: slurm
gidNumber: 5000
# User: slurm
dn: uid=slurm,ou=People,dc=example,dc=com
objectClass: inetOrgPerson
objectClass: posixAccount
objectClass: shadowAccount
uid: slurm
sn: slurm
cn: slurm
uidNumber: 5000
gidNumber: 5000
homeDirectory: /var/lib/slurm
loginShell: /sbin/nologin
userPassword: {CRYPT}x
# Group: munge
dn: cn=munge,ou=Groups,dc=example,dc=com
objectClass: posixGroup
cn: munge
gidNumber: 5001
# User: munge
dn: uid=munge,ou=People,dc=example,dc=com
objectClass: inetOrgPerson
objectClass: posixAccount
objectClass: shadowAccount
uid: munge
sn: munge
cn: munge
uidNumber: 5001
gidNumber: 5001
homeDirectory: /var/lib/munge
loginShell: /bin/bash
userPassword: {CRYPT}x
ldapadd -x -D "cn=admin,dc=example,dc=com" -W -f add-slurm-munge.ldif
ldappasswd -x -D cn=admin,dc=example,dc=com -W -S uid=slurm,ou=People,dc=example,dc=com
ldappasswd -x -D cn=admin,dc=example,dc=com -W -S uid=munge,ou=People,dc=example,dc=com
mungeのインストール
必要パッケージのインストール
openssl
wget https://www.openssl.org/source/openssl-3.3.0.tar.gz
tar -xf openssl-3.3.0.tar.gz
cd openssl-3.3.0/
./Configure
make
sudo make install
その他
sudo apt install bzip2 pkgconf autoconf automake libtool libz-dev -y
mungeのビルド
$ git clone https://github.com/dun/munge.git
$ cd munge
$ ./bootstrap
$ ./configure \
--prefix=/usr \
--sysconfdir=/etc \
--localstatedir=/var \
--runstatedir=/run
$ make
$ make check
$ sudo make install
sudo chown -R munge: /etc/munge/ /var/log/munge/ /var/lib/munge/
sudo chmod 0700 /etc/munge/ /var/log/munge/ /var/lib/munge/
sudo systemctl enable munge.service
sudo systemctl start munge.service
鍵の作成(管理ノード)
sudo -u munge /usr/sbin/mungekey --verbose
鍵のコピー(計算ノード)
sudo scp /mnt/NFS/munge.key /etc/munge/
sudo chown munge:munge /etc/munge/munge.key
sudo chmod 400 /etc/munge/munge.key
MariaDBのインストール(管理ノードのみ)
sudo apt install mariadb-server
slurmユーザの作成および権限設定
grant all on slurm_acct_db.* TO 'slurm'@'localhost' identified by 'password' with grant option;
slurmのインストール
必要パッケージのインストール
sudo apt install build-essential fakeroot devscripts libdbus-1-dev -y
wget https://download.schedmd.com/slurm/slurm-23.11.6.tar.bz2
tar -xaf slurm*tar.bz2
cd slurm-23.11.6/
./configure --with-mysql_config=/usr/bin/
make
sudo make install
/etc/slurm.conf.exampleを書き換えて/usr/local/etc/slurm.confに保存
#
# Example slurm.conf file. Please run configurator.html
# (in doc/html) to build a configuration file customized
# for your environment.
#
#
# slurm.conf file generated by configurator.html.
# Put this file on all nodes of your cluster.
# See the slurm.conf man page for more information.
#
ClusterName=cluster
SlurmctldHost=node1
#SlurmctldHost=
#
#DisableRootJobs=NO
#EnforcePartLimits=NO
#Epilog=
#EpilogSlurmctld=
#FirstJobId=1
#MaxJobId=67043328
#GresTypes=
#GroupUpdateForce=0
#GroupUpdateTime=600
#JobFileAppend=0
#JobRequeue=1
#JobSubmitPlugins=lua
#KillOnBadExit=0
#LaunchType=launch/slurm
#Licenses=foo*4,bar
#MailProg=/bin/mail
#MaxJobCount=10000
#MaxStepCount=40000
#MaxTasksPerNode=512
MpiDefault=none
#MpiParams=ports=#-#
#PluginDir=
#PlugStackConfig=
#PrivateData=jobs
ProctrackType=proctrack/cgroup
#Prolog=
#PrologFlags=
#PrologSlurmctld=
#PropagatePrioProcess=0
#PropagateResourceLimits=
#PropagateResourceLimitsExcept=
#RebootProgram=
ReturnToService=1
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmctldPort=6817
SlurmdPidFile=/var/run/slurmd.pid
SlurmdPort=6818
SlurmdSpoolDir=/var/spool/slurmd
SlurmUser=slurm
#SlurmdUser=root
#SrunEpilog=
#SrunProlog=
StateSaveLocation=/var/spool/slurmctld
SwitchType=switch/none
#TaskEpilog=
TaskPlugin=task/affinity
#TaskProlog=
#TopologyPlugin=topology/tree
#TmpFS=/tmp
#TrackWCKey=no
#TreeWidth=
#UnkillableStepProgram=
#UsePAM=0
#
#
# TIMERS
#BatchStartTimeout=10
#CompleteWait=0
#EpilogMsgTime=2000
#GetEnvTimeout=2
#HealthCheckInterval=0
#HealthCheckProgram=
InactiveLimit=0
KillWait=30
#MessageTimeout=10
#ResvOverRun=0
MinJobAge=300
#OverTimeLimit=0
SlurmctldTimeout=120
SlurmdTimeout=300
#UnkillableStepTimeout=60
#VSizeFactor=0
Waittime=0
#
#
# SCHEDULING
#DefMemPerCPU=0
#MaxMemPerCPU=0
#SchedulerTimeSlice=30
SchedulerType=sched/backfill
SelectType=select/cons_tres
#
#
# JOB PRIORITY
#PriorityFlags=
#PriorityType=priority/multifactor
#PriorityDecayHalfLife=
#PriorityCalcPeriod=
#PriorityFavorSmall=
#PriorityMaxAge=
#PriorityUsageResetPeriod=
#PriorityWeightAge=
#PriorityWeightFairshare=
#PriorityWeightJobSize=
#PriorityWeightPartition=
#PriorityWeightQOS=
#
#
# LOGGING AND ACCOUNTING
#AccountingStorageEnforce=0
AccountingStorageHost=node1
AccountingStoragePass=/var/run/munge/munge.socket.2
AccountingStoragePort=6819
#AccountingStorageType=accounting_storage/none
AccountingStorageType=accounting_storage/slurmdbd
#AccountingStorageUser=
#AccountingStoreFlags=
#JobCompHost=
#JobCompLoc=
#JobCompPass=
#JobCompPort=
JobCompType=jobcomp/none
#JobCompUser=
#JobContainerType=
JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/none
SlurmctldDebug=info
SlurmctldLogFile=/var/log/slurm/slurmctld.log
SlurmdDebug=info
SlurmdLogFile=/var/log/slurm/slurmd.log
#SlurmSchedLogFile=
#SlurmSchedLogLevel=
#DebugFlags=
#
#
# POWER SAVE SUPPORT FOR IDLE NODES (optional)
#SuspendProgram=
#ResumeProgram=
#SuspendTimeout=
#ResumeTimeout=
#ResumeRate=
#SuspendExcNodes=
#SuspendExcParts=
#SuspendRate=
#SuspendTime=
#
#
# COMPUTE NODES
NodeName=node[1-4] CPUs=2 State=UNKNOWN
PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP \
OverSubscribe=Yes
usr/lib/systemd/system/slurmdbd.confの作成
#
# Example slurmdbd.conf file.
#
# See the slurmdbd.conf man page for more information.
#
# Archive info
#ArchiveJobs=yes
#ArchiveDir="/tmp"
#ArchiveSteps=yes
#ArchiveScript=
#JobPurge=12
#StepPurge=1
#
# Authentication info
AuthType=auth/munge
#AuthInfo=/var/run/munge/munge.socket.2
#
# slurmDBD info
DbdAddr=localhost
DbdHost=localhost
DbdPort=6819
SlurmUser=slurm
#MessageTimeout=300
DebugLevel=verbose
#DefaultQOS=normal,standby
LogFile=/var/log/slurm/slurmdbd.log
PidFile=/run/slurm/slurmctld.pid
#PluginDir=/usr/lib/slurm
#PrivateData=accounts,users,usage,jobs
#TrackWCKey=yes
#
# Database info
StorageType=accounting_storage/mysql
StorageHost=localhost
#StoragePort=1234
StoragePass=password
StorageUser=slurm
StorageLoc=slurm_acct_db
chown slurm:slurm /usr/local/etc/slurmdbd.conf
chmod -R 600 /usr/local/etc/slurmdbd.conf
mkdir /var/spool/slurmd
chown slurm:slurm /var/spool/slurmd
chmod 755 /var/spool/slurmd
mkdir /var/spool/slurmctld
chown slurm:slurm /var/spool/slurmctld
chmod 755 /var/spool/slurmctld
mkdir /var/log/slurm
touch /var/log/slurm/slurmctld.log
touch /var/log/slurm/slurm_jobacct.log /var/log/slurm/slurm_jobcomp.log
chown -R slurm:slurm /var/log/slurm/
chmod 755 /var/log/slurm
なんかよくわからんけど設定するらしい
echo CgroupMountpoint=/sys/fs/cgroup >> /usr/local/etc/cgroup.conf
/run/slurmを自動で作るように設定
$ vim /etc/tmpfiles.d/slurm.conf
---------
d /run/slurm 0770 slurm slurm -
---------
munge.keyおよびslurm.confの共有
cp /etc/munge/munge.key /mnt/NFS
cp /usr/local/etc/slurm.conf /mnt/NFS
計算ノード
ジョブの動作確認
こんな感じのジョブスクリプトを書く。(このスクリプトは正しいか確かめていません。適宜修正してください)
雰囲気だけお届けします。
#!/bin/bash
#SBATCH --job-name=mpi_job # ジョブの名前
#SBATCH --output=mpi_job_%j.out # 標準出力と標準エラーの出力先ファイル
#SBATCH --nodes=4 # 使用するノード数
#SBATCH --ntasks=8 # タスク(プロセス)の総数
#SBATCH --ntasks-per-node=2 # ノードあたりのタスク数
#SBATCH --oversubscribe # ノードのオーバーサブスクライブを許可
#SBATCH --time=00:30:00 # 最大実行時間 (例: 30分)
#SBATCH --partition=your_partition # 使用するパーティション
module load mpi # 必要なMPIモジュールの読み込み
mpirun /mnt/NFS/mpicode/a.out # MPIプログラムの実行
a.outはこんな感じ
#include <stdio.h>
#include <mpi.h>
int main(int argc, char **argv){
int rank,size,i;
MPI_Init(&argc,&argv);
// MPIを使う準備
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
// 現在のランクの取得
MPI_Comm_size(MPI_COMM_WORLD,&size);
// 使用できるrankの最大数を取得
printf("Hello World, I am %d of %d\n",rank,size);
MPI_Finalize();
// MPIの終了。
return(0);
}
sbatch ./myjob.sh
うまく行くとこんな感じの出力が得られます。
(感動のあまりXに投稿した時の写真)
参考資料
https://changineer.info/vmware/hypervisor/vmware_ubuntu_nfs.html
https://in-neuro.hatenablog.com/entry/2020/03/07/150139
https://itpass.scitec.kobe-u.ac.jp/seminar/lecture/fy2022/220422/pub/pc_cluster_20220422.pdf
https://www.hpc.co.jp/support/wp-content/uploads/sites/9/2023/09/SlurmManual.pdf
https://www.j-focus.jp/user_guide/ug0004020000/#ug0004020300