Slurmのインストール:計算ノード
NFSの設定
sudo apt install nfs-common
sudo mkdir /mnt/NFS
sudo chmod 777 /mnt/NFS
sudo mount -t nfs 192.168.1.2:/mnt/NFS /mnt/NFS
mungeのインストール
必要パッケージのインストール
- openssl
wget https://www.openssl.org/source/openssl-3.3.0.tar.gz
tar -xf openssl-3.3.0.tar.gz
cd openssl-3.3.0/
./Configure
make
sudo make install
sudo apt install bzip2 pkgconf autoconf automake libtool libz-dev -y
mungeのビルド
$ git clone https://github.com/dun/munge.git
$ cd munge
$ ./bootstrap
$ ./configure \
--prefix=/usr \
--sysconfdir=/etc \
--localstatedir=/var \
--runstatedir=/run
$ make
$ make check
$ sudo make install
sudo chown -R munge: /etc/munge/ /var/log/munge/ /var/lib/munge/
sudo chmod 0700 /etc/munge/ /var/log/munge/ /var/lib/munge/
鍵のコピー(計算ノード)
sudo scp /mnt/NFS/munge.key /etc/munge/
sudo chown munge:munge /etc/munge/munge.key
sudo chmod 400 /etc/munge/munge.key
sudo systemctl enable munge.service
sudo systemctl start munge.service
sudo systemctl status munge
Slurmのインストール
sudo apt install build-essential fakeroot devscripts libdbus-1-dev -y
wget https://download.schedmd.com/slurm/slurm-23.11.6.tar.bz2
tar -xaf slurm*tar.bz2
cd slurm-23.11.6/
./configure
make
sudo make install
sudo scp /mnt/NFS/slurm.conf /usr/local/etc/slurm.conf
mkdir /var/spool/slurmd
chown slurm: /var/spool/slurmd
chmod 755 /var/spool/slurmd
mkdir /var/log/slurm/
touch /var/log/slurm/slurmd.log
chown -R slurm:slurm /var/log/slurm/slurmd.log
chmod 755 /var/log/slurm
mkdir /run/slurm
chown slurm /run/slurm
chown slurm:slurm /run/slurm
chmod -R 770 /run/slurm
slurmd.serviceの編集
ConditionPathExists=/usr/local/etc/slurm.conf #uncomment
cp <slurmd.service> /usr/lib/systemd/system/slurmd.service
echo CgroupMountpoint=/sys/fs/cgroup >> /usr/local/etc/cgroup.conf
systemctl enable slurmd.service
systemctl start slurmd.service
systemctl status slurmd.service
参考資料
https://www.youtube.com/watch?v=Fm5RIE3NSN8&t=131s
https://qiita.com/believeriver/items/9993c3730fed2a777d4a
https://web.chaperone.jp/w/index.php?slurm