More than 5 years have passed since last update.

Radeon RX 570でROCmを使えるようにする

Last updated at 2020-02-25Posted at 2020-02-23

ROCmを有効にしてPyTorchをCMAKEする

環境

OS : Ubuntu-server-18.04
CPU :  AMD Ryzen 7 3700X
GPU : Radeon RX 470
メモリ： 16Gb

カーネルの更新

$ sudo apt update
$ sudo apt dist-upgrade
$ sudo apt install --install-recommends linux-generic-hwe-18.04 xserver-xorg-hwe-18.04
$ sudo reboot

Ubuntu18ではカーネルが５である必要があるため最初にカーネルを更新します。

$ uname -a
Linux ryzen 5.3.0-40-generic #32~18.04.1-Ubuntu SMP Mon Feb 3 14:05:59 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux

ドライバのインストール

$ wget https://drivers.amd.com/drivers/linux/19.50/amdgpu-pro-19.50-967956-ubuntu-18.04.tar.xz
$ tar Jxvf amdgpu-pro-19.50-967956-ubuntu-18.04.tar.xz
$ ./amdgpu-pro-19.50-967956-ubuntu-18.04/amdgpu-install --headless --no-dkms --no-32

ubuntuサーバなのとあとからROCmのランタイムを入れるため上記のオプションにしています。

開発ツールとROCmライブラリのインストール

$ curl -fsSL http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key | sudo apt-key add -
$ sudo sh -c 'echo deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main > /etc/apt/sources.list.d/rocm.list'
$ sudo apt update

$ sudo apt-get install -y --no-install-recommends \
    sudo \
    libelf1 \
    build-essential \
    bzip2 \
    ca-certificates \
    cmake \
    ccache \
    ssh \
    apt-utils \
    pkg-config \
    g++-multilib \
    gdb \
    git \
    less \
    libunwind-dev \
    libfftw3-dev \
    libelf-dev \
    libncurses5-dev \
    libomp-dev \
    libpthread-stubs0-dev \
    make \
    miopen-hip \
    python3-dev \
    python3-future \
    python3-yaml \
    python3-pip \
    libssl-dev \
    libboost-dev \
    libboost-system-dev \
    libboost-filesystem-dev \
    libopenblas-dev \
    rpm \
    net-tools \
    iputils-ping \
    libnuma-dev \
    rccl \
    rocm-dev \
    rocm-profiler \
    roctracer-dev \
    rocrand \
    rocblas \
    rocfft \
    hipcub \
    rocthrust \
    hipsparse

全ユーザーで python を　python3 として動くように

$ sudo update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1
$ sudo update-alternatives --install /usr/bin/python python /usr/bin/python3.6 2
$ sudo update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 2

ROCm ランタイムライブラリのインストール

$ sudo apt-get install -y --no-install-recommends rocm-dkms
$ sudo usermod -a -G video $LOGNAME

$ echo 'ADD_EXTRA_GROUPS=1' | sudo tee -a /etc/adduser.conf
$ echo 'EXTRA_GROUPS=video' | sudo tee -a /etc/adduser.conf

$ sudo reboot

fix capitalization in some cmake files...

$ sudo sed -i 's/find_dependency(hip)/find_dependency(HIP)/g' /opt/rocm/rocsparse/lib/cmake/rocsparse/rocsparse-config.cmake
$ sudo sed -i 's/find_dependency(hip)/find_dependency(HIP)/g' /opt/rocm/rocfft/lib/cmake/rocfft/rocfft-config.cmake
$ sudo sed -i 's/find_dependency(hip)/find_dependency(HIP)/g' /opt/rocm/miopen/lib/cmake/miopen/miopen-config.cmake
$ sudo sed -i 's/find_dependency(hip)/find_dependency(HIP)/g' /opt/rocm/rocblas/lib/cmake/rocblas/rocblas-config.cmake

python で作られたビルドツールで使われているモジュールのインストール

$ pip install setuptools numpy scipy typing enum34 hypothesis

環境変数の設定

.bashrc

#
# ROCm関連設定
#
export PATH=$PATH:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
export RCCL_DIR=/opt/rocm/rccl/lib/cmake/rccl
export hip_DIR=/opt/rocm/hip/cmake
export HIP_PLATFORM='hcc'
export LC_ALL='en_US.UTF-8'
export LC_CTYPE='en_US.UTF-8'
export PYTORCH_ROCM_ARCH='gfx803'

$ source ~/.bashrc

PyTorchのインストール

$ git clone --recursive -b v1.4.0 https://github.com/pytorch/pytorch.git
$ cd pytorch/
$ ./tools/amd_build/build_amd.py
$ mkdir build
$ cd build
$ cmake \
    -DBUILD_PYTHON=OFF \
    -DBUILD_CAFFE2_OPS=OFF \
    -DBUILD_CAFFE2_MOBILE=OFF \
    -DUSE_CUDA=OFF \
    -DUSE_ROCM=ON \
    -DBUILD_TEST=OFF \
    -DCMAKE_BUILD_TYPE=Release \
    ..
$ make -j 12
$ sudo make -j 12 install

makeにすべての１６スレッドを使うとソース生成部分でエラーになるので少なめを指定したほうが安全

動作確認

sample /
    CMakeLists.txt
    main.cc

CMakeLists.txt

cmake_minimum_required(VERSION 3.0 FATAL_ERROR)

project(torch-sample)

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

find_package(Torch REQUIRED)

# 任意の場所にインストールした場合
# find_package(Torch REQUIRED PATHS ${TORCH_INSTALL_PREFIX}/lib)

add_executable(sample-bin ${CMAKE_CURRENT_SOURCE_DIR}/main.cc)
target_link_libraries(sample-bin ${TORCH_LIBRARIES})
set_target_properties(sample-bin PROPERTIES OUTPUT_NAME sample)

main.cc

# include <iostream>
# include <torch/script.h>

int main(int argc, char **argv)
{
    torch::DeviceType device_type = torch::kCUDA;
    torch::Device device(device_type);

    torch::Tensor tensor = torch::ones({2, 3}, device);
    std::cout << tensor << std::endl;

    return 0;
}

$ mkdir build
$ cd build
$ make -j
$ ./sample
 1  1  1
 1  1  1
[ CUDAFloatType{2,3} ]

付録

LLVM C-langのインストール

ひょっとしたら必須かも多分、任意

$ curl -fsSL https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
$ sudo sh -c 'echo deb [arch=amd64] http://apt.llvm.org/xenial/ llvm-toolchain-xenial-10 main > /etc/apt/sources.list.d/llvm10.list'
$ sudo sh -c 'echo deb-src http://apt.llvm.org/xenial/ llvm-toolchain-xenial-10 main >> /etc/apt/sources.list.d/llvm10.list'
$ sudo apt-get update
$ sudo apt-get install -y --no-install-recommends clang-10

$ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/clang-10 5
$ sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/clang++-10 5

ドライバの確認

$ /opt/rocm/bin/rocminfo
        :
    N/A
*******
Agent 2
*******
  Name:                    gfx803
  Marketing Name:          Ellesmere [Radeon RX 470/480/570/570X/580/580X]
  Vendor Name:             AMD
  Feature:                 KERNEL_DISPATCH
        :

中央部部に表示されるGPUの識別名を環境変数 PYTORCH_ROCM_ARCH へセットする

/opt/rocm/bin/rocm-smi
========================ROCm System Management Interface========================
================================================================================
GPU  Temp   AvgPwr   SCLK    MCLK    Fan     Perf  PwrCap  VRAM%  GPU%
0    34.0c  33.095W  300Mhz  300Mhz  23.92%  auto  120.0W    0%   0%
================================================================================
==============================End of ROCm SMI Log ==============================

Pythonのsetup.py でインストール

$ pip install wheel
$ BUILD_CAFFE2_OPS=0 python setup.py bdist_wheel

スレッド数を指定する時は上限を指定しない方がよい様です。

感想

とにかくコンパイルに時間がかかります。
SSDに展開しているはずですが、メモリ足らない？
Juman++をコンパイルした場合、Ryzen7(3700X)はとんでもなく早かったです。
Intel i7 の第６世代と比べた場合です。

BUILD_CAFFE2_OPS を有効にするとエラーでわからんでした。
とりあえず libtorch が動くまでをアップしました。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up