OpenCV
docker
python3
tesseract-ocr

Python3 + OpenCV3 + Tesseract4.0alpha(jpn)なdockerコンテナ

別のdockerイメージに Python3 + OpenCV3 + Tesseract4.0alpha(jpn) を追加した時のメモ

* なぜか `pip install opencv-python` が見つからないと言われるのでビルドしてインストールする
* イメージのサイズは一切考慮できていない・・・
# use alpine as base image
FROM openjdk:8-jdk-alpine

# repos mirror(たまに繋がらないのでその時は下記のどれかに変更する)
# http://dl-cdn.alpinelinux.org/alpine/
# http://nl.alpinelinux.org/alpine/
# http://dl-2.alpinelinux.org/alpine/
# http://dl-3.alpinelinux.org/alpine/
# http://dl-4.alpinelinux.org/alpine/
# http://dl-5.alpinelinux.org/alpine/
# http://dl-6.alpinelinux.org/alpine/
# http://dl-8.alpinelinux.org/alpine/
# http://distrib-coffee.ipsl.jussieu.fr/pub/linux/alpine/alpine/
# http://mirror.yandex.ru/mirrors/alpine/
# http://mirrors.gigenet.com/alpinelinux/
# http://repos.lax-noc.com/alpine/
# http://repos.dfw.lax-noc.com/alpine/
# http://repos.mia.lax-noc.com/alpine/
# http://mirror1.hs-esslingen.de/pub/Mirrors/alpine/
# http://liskamm.alpinelinux.uk/
# http://mirrors.2f30.org/alpine/
# http://mirror.leaseweb.com/alpine/
# http://repository.fit.cvut.cz/mirrors/alpine/
# http://alpine.mirror.far.fi/
# http://lasca.ic.unicamp.br/pub/alpine/
# http://alpinelinux.c3sl.ufpr.br/

RUN apk add --no-cache ca-certificates

# tesseract-ocr
# https://github.com/gnkm/docker-alpine-tesseract-jpn/blob/master/Dockerfile
# https://hub.docker.com/r/gnkm/alpine-tesseract-jpn/~/dockerfile/
ENV TESSDATA_PREFIX /usr/share
RUN set -x && \
  # enable to use wget command for donwloading from https site
  apk add --update --no-cache --virtual wget-dependencies \
    openssl \
    tar \
    xz && \
  # tesseract is in testing repo
  apk add --no-cache --repository http://dl-cdn.alpinelinux.org/alpine/edge/testing tesseract-git --allow-untrusted && \
  # download traineddata
  wget -q -P /usr/share/tessdata/ https://github.com/tesseract-ocr/tessdata_best/raw/master/eng.traineddata && \
  wget -q -P /usr/share/tessdata/ https://github.com/tesseract-ocr/tessdata_best/raw/master/jpn.traineddata && \
  wget -q -P /usr/share/tessdata/ https://github.com/tesseract-ocr/tessdata_best/raw/master/jpn_vert.traineddata && \
  # delete wget-dependencies
  apk del wget-dependencies


# python
# https://hub.docker.com/r/frolvlad/alpine-python3/~/dockerfile/
ENV PATH $PATH:/usr/local/bin
# ENV LANG C.UTF-8
ENV GPG_KEY 0D96DF4D4110E5C43FBFB17F2D347EA6AA65421D
ENV PYTHON_VERSION 3.6.3
RUN set -ex \
    && apk add --no-cache --virtual .fetch-deps \
        gnupg \
        libressl \
        tar \
        xz \
    && wget -O python.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-${PYTHON_VERSION}.tar.xz" \
    && wget -O python.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-${PYTHON_VERSION}.tar.xz.asc" \
    && export GNUPGHOME=$(mktemp -d) \
    && gpg --keyserver ha.pool.sks-keyservers.net --recv-keys ${GPG_KEY} \
    && gpg --batch --verify python.tar.xz.asc python.tar.xz \
    && rm -rf ${GNUPGHOME} python.tar.xz.asc \
    && mkdir -p /usr/src/python \
    && tar -xJC /usr/src/python --strip-components=1 -f python.tar.xz \
    && rm -rf python.tar.xz \
    && apk add --no-cache --virtual .build-deps  \
        bzip2-dev \
        coreutils \
        dpkg-dev dpkg \
        expat-dev \
        gcc \
        gdbm-dev \
        libc-dev \
        libffi-dev \
        linux-headers \
        make \
        ncurses-dev \
        libressl \
        libressl-dev \
        pax-utils \
        readline-dev \
        sqlite-dev \
        tcl-dev \
        tk \
        tk-dev \
        xz-dev \
        zlib-dev \
    # add build deps before removing fetch deps in case there's overlap
    && apk del .fetch-deps \
    && cd /usr/src/python \
    && gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)" \
    && ./configure \
        --build=$gnuArch \
        --enable-loadable-sqlite-extensions \
        --enable-shared \
        --with-system-expat \
        --with-system-ffi \
        --without-ensurepip \
    && make -j $(nproc) \
    && make install \
    && runDeps="$( \
        scanelf --needed --nobanner --format '%n#p' --recursive /usr/local \
            | tr ',' '\n' \
            | sort -u \
            | awk 'system("[ -e /usr/local/lib/" $1 " ]") == 0 { next } { print "so:" $1 }' \
    )" \
    && apk add --virtual .python-rundeps $runDeps \
    && apk del .build-deps \
    && find /usr/local -depth \
        \( \
            \( -type d -a \( -name test -o -name tests \) \) \
            -o \
            \( -type f -a \( -name '*.pyc' -o -name '*.pyo' \) \) \
        \) -exec rm -rf '{}' + \
    && rm -rf /usr/src/python

# make some useful symlinks that are expected to exist
RUN cd /usr/local/bin \
    && ln -s idle3 idle \
    && ln -s pydoc3 pydoc \
    && ln -s python3 python \
    && ln -s python3-config python-config

# if this is called "PIP_VERSION", pip explodes with "ValueError: invalid truth value '<VERSION>'"
ENV PYTHON_PIP_VERSION 9.0.1
RUN set -ex; \
    apk add --no-cache --virtual .fetch-deps libressl; \
    wget -O get-pip.py 'https://bootstrap.pypa.io/get-pip.py'; \
    apk del .fetch-deps; \
    python get-pip.py \
        --disable-pip-version-check \
        --no-cache-dir \
        "pip==$PYTHON_PIP_VERSION" \
    ; \
    pip --version; \
    find /usr/local -depth \
        \( \
            \( -type d -a \( -name test -o -name tests \) \) \
            -o \
            \( -type f -a \( -name '*.pyc' -o -name '*.pyo' \) \) \
        \) -exec rm -rf '{}' +; \
    rm -f get-pip.py
ENV PYCURL_SSL_LIBRARY openssl
RUN apk add --no-cache --virtual .build-dependencies \
        openssl-dev \
        libcurl \
        build-base \
        python-dev \
        py-pip \
        curl-dev \
        libjpeg \
        libjpeg-turbo-dev \
        zlib-dev && \
    pip install \
        influxdb \
        pycurl \
        numpy \
        ngram \
        pyocr && \
    apk del .build-dependencies


# opencv
# https://github.com/julianbei/alpine-opencv-microimage/blob/master/python3/3.3.0/Dockerfile
# https://github.com/sho-soar/alpine-python-opencv/blob/master/default/Dockerfile
RUN echo -e '@edgunity http://nl.alpinelinux.org/alpine/edge/community\n\
@edge http://nl.alpinelinux.org/alpine/edge/main\n\
@testing http://nl.alpinelinux.org/alpine/edge/testing\n\
@community http://dl-cdn.alpinelinux.org/alpine/edge/community'\
  >> /etc/apk/repositories

RUN apk add --no-cache \
  # --virtual .build-deps \
      build-base \
      openblas-dev \
      unzip \
      wget \
      cmake \
      libtbb@testing  \
      libtbb-dev@testing   \
      libjpeg  \
      libjpeg-turbo-dev \
      libpng-dev \
      jasper-dev \
      tiff-dev \
      libwebp-dev \
      clang-dev \
      linux-headers

ENV CC /usr/bin/clang
ENV CXX /usr/bin/clang++
ENV OPENCV_VERSION 3.3.0

RUN mkdir /opt && cd /opt && \
  wget https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.zip && \
  unzip ${OPENCV_VERSION}.zip && \
  rm -rf ${OPENCV_VERSION}.zip

RUN mkdir -p /opt/opencv-${OPENCV_VERSION}/build && \
  cd /opt/opencv-${OPENCV_VERSION}/build && \
  cmake \
  -D CMAKE_BUILD_TYPE=RELEASE \
  -D CMAKE_INSTALL_PREFIX=/usr/local \
  -D WITH_FFMPEG=NO \
  -D WITH_IPP=NO \
  -D WITH_OPENEXR=NO \
  -D WITH_TBB=YES \
  -D BUILD_EXAMPLES=NO \
  -D BUILD_ANDROID_EXAMPLES=NO \
  -D INSTALL_PYTHON_EXAMPLES=NO \
  -D BUILD_DOCS=NO \
  -D BUILD_opencv_python2=NO \
  -D BUILD_opencv_python3=ON \
  -D PYTHON3_EXECUTABLE=/usr/local/bin/python \
  -D PYTHON3_INCLUDE_DIR=/usr/local/include/python3.6m/ \
  -D PYTHON3_LIBRARY=/usr/local/lib/libpython3.so \
  -D PYTHON_LIBRARY=/usr/local/lib/libpython3.so \
  -D PYTHON3_PACKAGES_PATH=/usr/local/lib/python3.6/site-packages/ \
  -D PYTHON3_NUMPY_INCLUDE_DIRS=/usr/local/lib/python3.6/site-packages/numpy/core/include/ \
  .. && \
  make VERBOSE=1 && \
  make -j $(nproc) && \
  make install && \
  rm -rf /opt/opencv-${OPENCV_VERSION}