メモです。
SageMakerのライフサイクルにMecab+neologdの設定を作成
.sh
#!/bin/bash
sudo -u ec2-user -i <<'EOF'
# python3の環境に適応
source activate python3
sudo yum install -y ca-certificates
sudo yum install -y python-devel
sudo yum install -y mysql-devel
pip install mysqlclient
pip install lightgbm
pip install mecab-python3
...
git clone https://github.com/taku910/mecab.git
git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git
sudo mkdir -p /opt/mecab
PATH=${PATH}:/usr/local/bin
cd ~/mecab/mecab && ./configure --enable-utf8-only && make
sudo make install
cd ~/mecab/mecab-ipadic && ./configure --with-charset=utf8 && make
sudo make install
cd ~/mecab-ipadic-neologd && ./bin/install-mecab-ipadic-neologd -n -y -p /opt/mecab/lib/mecab/dic/neologd
sudo ln -s /usr/libexec/gcc/x86_64-amazon-linux/4.8.5/cc1plus /usr/local/bin/
source deactivate
EOF
logはCloudWatchで確認が可能
/aws/sagemaker/NotebookInstances → notebook名/LifecycleConfigOnStart