More than 3 years have passed since last update.

ElixirAdvent Calendar 2021

@the_haigo(Yuisho Takafuji)

YOLO(cvlib)を使用してLiveView上で物体検知をやってみた

Last updated at 2021-12-11Posted at 2021-12-11

はじめに

この記事はelixir Advent Calendar 2021の12日目の記事です。11日目は @koga1020 さんのphx.gen.releaseを試して爆速でfly.ioにデプロイしてみようでした。

本記事はこちらを参考にLiveView上でYOLO(cvlib)を動かして物体検知を行います

環境

Operating System: macOS
CPU Information: Apple M1
Elixir 1.12.3
Erlang 24.1.5
Phoenix 1.6.2

install

python周りはこちらを参考にセットアップ
YOLOを簡単に扱えるcvlibと関連するものをインストールします

pip install tensorflow opencv cvlib

setup

DBは使わないので--no-ectoオプションでphoenixプロジェクトを作成します

mix phx.new live_yolo --no-ecto
cd live_yolo

ライブラリにuuidを追加します

mix.exs

defmodule LiveYolo.MixProject do
...
  defp deps do
    [
      ...
      {:plug_cowboy, "~> 2.5"},
      {:uuid, "~> 1.1"}, # 追加
    ]
  end
...
end

ファイル構成

detect.py -> elixirから画像のバイナリ文字列を受け取って、YOLOで物体検知を行ってラベルと検知範囲を返す
worker.ex -> detect.pyをportで開いて待ち受けるGenServer
page_live.ex -> LiveViewでCanvasにアップロードした画像を表示、detectボタンでYOLOの結果を反映
hooks.js -> Canvasの操作、画像の表示、YOLOのBBoxを描画

なぜPortなのか？

pythonのスクリプトを実行するならばSystem.cmdで問題ないのではと思う
だがSystem.cmdだとpythonを起動して必要なライブラリとYOLOのモデルのメモリへの読み込みが完了してやっと物体検知のコードが実行される。だいたい数秒かかるもので、これは現実的ではない。

Portはpythonを起動して必要なライブラリを読み込みElixirとは別のOSのプロセスとして立ち上げ、stdin/stdoutを介してデータをやり取りします
PortとPythonでのデータのやり取りを簡単な例で慣れたい場合は元記事のElixir Portsを試してみましょう

Worker

PythonのスクリプトをSupervisorで常時起動しつつ、LiveViewから実行できるインターフェースとしてworkerを作成します

lib/live_yolo/worker.ex

defmodule LiveYolo.Worker do
  use GenServer

  def start_link(opts \\ []) do
    GenServer.start_link(__MODULE__, :ok, opts)
  end

  def init(:ok) do
    config = config()

    port =
      Port.open(
        {:spawn_executable, config.python},
        [
          :binary,
          :nouse_stdio,
          {:packet, 4},
          args: [config.detect_script, config.model]
        ]
      )

    {:ok, %{port: port, requests: %{}}}
  end

  @default_config [
    python: "python",
    detect_script: "lib/python/detect.py",
    model: "yolov4"
  ]
  def config do
    @default_config
    |> Keyword.merge(Application.get_env(:yolo, __MODULE__, []))
    |> Enum.map(fn
      {:python, path} ->
        {:python, System.find_executable(path)}

      {option, {:system, env_variable}} ->
        {option, System.get_env(env_variable, @default_config[option])}

      config ->
        config
    end)
    |> Enum.into(%{})
  end

  def request_detection(pid, image) do
    image_id = UUID.uuid4() |> UUID.string_to_binary!()
    request_detection(pid, image_id, image)
  end

  @uuid4_size 16
  def request_detection(pid, image_id, image)
      when byte_size(image_id) == @uuid4_size do
    GenServer.call(pid, {:detect, image_id, image})
  end

  def handle_call({:detect, image_id, image_data}, {from_pid, _}, worker) do
    Port.command(worker.port, [image_id, image_data])
    worker = put_in(worker, [:requests, image_id], from_pid)
    {:reply, image_id, worker}
  end

  def handle_info(
        {port, {:data, <<image_id::binary-size(@uuid4_size), json_string::binary()>>}},
        %{port: port} = worker
      ) do
    result = get_result!(json_string)
    {from_pid, worker} = pop_in(worker, [:requests, image_id])
    send(from_pid, {:detected, image_id, result})
    {:noreply, worker}
  end

  defp get_result!(json_string) do
    result = Jason.decode!(json_string)

    %{
      shape: %{width: result["shape"]["width"], height: result["shape"]["height"]},
      objects: get_objects(result["labels"], result["boxes"])
    }
  end

  def get_objects(labels, boxes) do
    Enum.zip(labels, boxes)
    |> Enum.map(fn {label, [x, y, bottom_right_x, bottom_right_y]} ->
      w = bottom_right_x - x
      h = bottom_right_y - y
      %{label: label, x: x, y: y, w: w, h: h}
    end)
  end

  def await(image_id, timeout \\ 5_000) do
    receive do
      {:detected, ^image_id, result} -> result
    after
      timeout -> {:timeout, image_id}
    end
  end
end

各関数の補足をしていきます

start_link -> supervisorでGenServerアプリを起動したときに実行される関数
init
start_linkで実行される関数
config関数で実行するPythonのコマンド、実行するスクリプト、使用するモデルを取得します
Portを以下のオプションで開きます ref

spawn_executable -> Kernel.spawnで外部のファイルを実行します。
binary -> バイナリデータオブジェクトを送信する
nouse_stdio -> ElixirとPythonとの通信にファイルディスクリプター3と4を使用する
packet, 4 -> 送信するバイナリデータオブジェクトの長さを先頭に４bytesで追加する

GenServerのstateとしてPortと物体検知を行う画像のUUIDの入れ物を返します

config -> 引数から実行可能なpythonスクリプトを探す
request_detection -> LiveViewから実行されるトリガー UUIDを生成して request_detectionを実行
request_detection2 -> UUIDの桁数をチェックして call :detectを実行
handle_call(:detect, from, state) -> initで開いたPythonスクリプト　PortにUUIDと画像データを送信
handle_info(port, {:data, data}, state)
Pythonスクリプト側でflushが実行された際に呼ばれる
dataを解析して使いやすい形に変換(get_result & get_objects)
uuidを元にrequestsスタックから削除
結果をawait関数のreceiveに送信
get_result! -> 7のdataがJSON形式の文字列なのでencodeする
get_objects -> 8のencodeしたデータから %{label, x, y, width, height}の形式に変換
await -> request_detectionの結果を待ち受ける関数

workerができたので supervisorに追加します

lib/live_yolo/application.ex

defmodule LiveYolo.Application do
...
  def start(_type, _args) do
    children = [
      # Start the Telemetry supervisor
      LiveYoloWeb.Telemetry,
      # Start the PubSub system
      {Phoenix.PubSub, name: LiveYolo.PubSub},
      # Start the Endpoint (http/https)
      LiveYoloWeb.Endpoint,
      # Start a worker by calling: LiveYolo.Worker.start_link(arg)
      {LiveYolo.Worker, [name: LiveYolo.Worker]}, # ここを追加
    ]

    # See https://hexdocs.pm/elixir/Supervisor.html
    # for other strategies and supported options
    opts = [strategy: :one_for_one, name: LiveYolo.Supervisor]
    Supervisor.start_link(children, opts)
  end
...
end

Detect

標準出力でバイナリデータを受け取り、YOLOで物体検知を行いその結果をバイナリデータに変換してElixir側に返すPythonスクリプトです

lib/python/detect.py

import os, sys
from struct import unpack, pack
import numpy as np
import cv2
import cvlib as cv
import json
from cvlib.object_detection import YOLO

UUID4_SIZE = 16

# setup of FD 3 for input (instead of stdin)
# FD 4 for output (instead of stdout)
def setup_io():
    return os.fdopen(3,"rb"), os.fdopen(4,"wb")

def read_message(input_f):
    # reading the first 4 bytes with the length of the data
    # the other 32 bytes are the UUID string, 
    # the rest is the image

    header = input_f.read(4)
    if len(header) != 4: 
        return None # EOF
    
    (total_msg_size,) = unpack("!I", header)
    # image id
    image_id = input_f.read(UUID4_SIZE)
        
    # read image data
    image_data = input_f.read(total_msg_size - UUID4_SIZE)

    # converting the binary to a opencv image
    nparr = np.fromstring(image_data, np.uint8)
    image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

    return {'id': image_id, 'image': image}

def detect(image, model):
    #yolo = YOLO("lib/python/yolov4-custom_2000.weights","lib/python/yolov4-custom.cfg","lib/python/yolov4-custom.names")

    #boxes, labels, _conf = yolo.detect_objects(image)
    boxes, labels, _conf = cv.detect_common_objects(image, model=model)
    return boxes, labels

def write_result(output, image_id, image_shape, boxes, labels):
    result = json.dumps({
        'shape': image_shape,
        'boxes': boxes, 
        'labels': labels
    }).encode("ascii")

    header = pack("!I", len(result) + UUID4_SIZE)
    output.write(header)
    output.write(image_id)
    output.write(result)
    output.flush()

def run(model):
    input_f, output_f = setup_io()
    
    while True:
        msg = read_message(input_f)
        if msg is None: break
        
        #image shape
        height, width, _ = msg["image"].shape
        shape = {'width': width, 'height': height}

        #detect object
        boxes, labels = detect(msg["image"], model)

        #send result back to elixir
        write_result(output_f, msg["id"], shape, boxes, labels)

if __name__ == "__main__":
    model = "yolov4"
    if len(sys.argv) > 1: 
        model = sys.argv[1]
        
    run(model)

実行される順番と各関数の補足をしていきます

__main__ -> detect.pyで実行される
run -> main ループ
setup_io -> stdin(read binary) と stdout(write binary)を開く
read_message
ElixirのPortからバイナリデータが送信してくる際に 4byteで表現されたファイルサイズ + 16byteで表現されたUUIDが先頭についてくるので、分割した後にnumpyとopenCVで画像ファイルとして復元させます
detect　-> 4の画像データでYOLOの物体検知を行う
wire_result
5の結果としてラベルとラベルの対象がどの位置いるかのデータが返ってくるのでバイナリに変換
バイナリデータサイズ+UUID+バイナリデータにしてElixir側に返す

これでYOLOを実行する部分ができたので次はLiveViewの方を実装します

準備

スタイリングにはbulmaを使います
miligramと競合する箇所があるのでphoxnix.cssの9行目を削除しておくこと

assets/css/app.css

@import "./phoenix.css";
@import "https://cdn.jsdelivr.net/npm/bulma@0.9.3/css/bulma.min.css"; // 追加
...

page_live.ex

最初にブランクページを作ります

live/live_yolo_wev/live/page_live.ex

defmodule LiveYoloWeb.PageLive do
  use LiveYoloWeb, :live_view

  @impl true
  def mount(_params, _session, socket) do
    {
      :ok,
      socket
    }
  end
end

lib/live_yolo_web/live/page_live.html.heex

<div>
    page live
</div>

lib/live_yolo_web/router.ex

defmodule LiveYoloWeb.Router do
...
  scope "/", LiveYoloWeb do
    pipe_through :browser

    live "/", PageLive, :index
  end
...
end

ファイルアップロード

ファイルアップロードに必要な項目は３つで
mount時にフィアルアップロードの設定を行うallow_upload
LiveView用のfile inputの live_file_input
formの変更検知を行うvalidateイベント

live/live_yolo_wev/live/page_live.ex

defmodule LiveYoloWeb.PageLive do
  use LiveYoloWeb, :live_view

  @impl true
  def mount(_params, _session, socket) do
    {
      :ok,
      socket
      |> allow_upload(:image, accept: :any)
    }
  end

  @impl true
  def handle_event("validate", _params, socket) do
    {:noreply, socket}
  end
end

allow_uploadを追加するとファイルアップロード関連の変数が@uploadsでアクセスできます
@uploads.image.entriesでformにセットされた画像にアクセスできるので live_img_previewで表示することができます
またphx-drop-targetを使うことでdrag and dropでのアップロードもできます

lib/live_yolo_web/live/page_live.html.heex

<div>
  <div class="columns is-centered" style={ if @uploads.image.entries != [], do: "display:none" }>
    <form phx-change="validate" >
        <div class="file is-boxed" phx-drop-target={ @uploads.image.ref }>
          <label class="file-label">
            <%= live_file_input @uploads.image, class: "file-input" %>
            <input class="file-input" type="file" name="resume">
            <span class="file-cta">
              <span class="file-label p-6">
                Choose a file…
              </span>
            </span>
          </label>
        </div>
    </form>
  </div>
  <%= for entry <- @uploads.image.entries do %>
    <figure>
      <%= live_img_preview entry %>
      <figcaption><%= entry.client_name %></figcaption>
    </figure>
  <% end %>
</div>

このままだとYOLOの結果の反映ができないのでCanvasで表示します

アップロードした画像をCanvasで描画

通常サーバーサイド側からJSを実行するのはめんどくさいのですが、LiveViewではJS Hooksという機能があり
Elixir側からpush_eventという関数を実行する事によって簡単にJSを実行することができます

LiveViewマウント時にcanvasオブジェクトとコンテキストを作成して
Elixir側からdrawイベントを実行された際にDataURL形式で画像を生成してCanvasに描画します

assets/js/hooks.js

let Hooks = {};
Hooks.Canvas = {
  mounted() {
    let canvas = this.el.firstElementChild;
    let context = canvas.getContext("2d");
    let img = new Image();

    Object.assign(this, {
      canvas,
      context,
    });

    this.handleEvent("draw", (path) => {
      img.src = `data:${path.mime};base64,${path.src}`;
      img.onload = () => {
        let width = img.width;
        let height = img.height;
        canvas.width = width;
        canvas.height = height;
        context.drawImage(img, 0, 0);
      };
    });
  },
};
export default Hooks;

hooksを参照できるように liveSocketに追加します

assets/js/app.js

...
import Hooks from "./hooks"
let csrfToken = document.querySelector("meta[name='csrf-token']").getAttribute("content")
let liveSocket = new LiveSocket("/live", Socket, {hooks: Hooks, params: {_csrf_token: csrfToken}})
...

id属性を付けた要素に使用するphx-hookを指定します
canvasやsvgはlive_viewに変更検知されて再レンダリングされないようにphx-update="ignore"を追加する必要があります

lib/live_yolo_web/live/page_live.html.heex

<div>
...
  <div id="canvas" phx-hook="Canvas">
    <canvas phx-update="ignore"></canvas>
  </div>
</div>

LiveView側
allow_upload option

chunk_size -> progress内でゴニョゴニョする際にデフォルトだと足りないため x100 詳細はドキュメント読んでもよくわからなかった
progress -> upload時に実行する関数
auto_upload -> ファイルが選択 or dndされた時点でアップロードされ progressで指定した関数が実行されます

handle_progress
progressで指定する関数
アップロードしたファイルは一時ファイルでprogressの関数の実行後すぐ削除されるのでconsume_uploaded_entries関数内で File.cp!なりクラウドストレージにアップロードする必要がある
今回はバイナリデータとしてsocketにアサインしています
最後にpush_event("draw")で Canvasにbase64 encodeした画像データを送信して描画しています

lib/live_yolo_web/live/page_live.ex

defmodule LiveYoloWeb.PageLive do
  use LiveYoloWeb, :live_view

  @impl true
  def mount(_params, _session, socket) do
    {
      :ok,
      socket
      |> assign(:upload_file, nil)
      |> allow_upload(
        :image,
        accept: :any,
        chunk_size: 6400_000,
        progress: &handle_progress/3,
        auto_upload: true
      )
    }
  end

  def handle_progress(:image, _entry, socket) do
    {upload_file, mime} =
      consume_uploaded_entries(socket, :image, fn %{path: path}, entry ->
        {:ok, file} = File.read(path)
        {file, entry.client_type}
      end)
      |> List.first()
    {
      :noreply,
      socket
      |> assign(:upload_file, upload_file)
      |> push_event("draw", %{src: Base.encode64(upload_file), mime: mime})
    }
  end

  @impl true
  def handle_event("validate", _params, socket) do
    {:noreply, socket}
  end
end

file_inputの表示はauto_uploadの場合はentriesが空でない状態が一瞬なので upload_fileの有無で判断するようにします

lib/live_yolo_web/live/page_live.html.heex

<div>
  <div class="columns is-centered" style={ if @upload_file != nil, do: "display:none" }>
    <form phx-change="validate" >
      ...
    </form>
  </div>
  <div id="canvas" phx-hook="Canvas">
    <canvas phx-update="ignore"></canvas>
  </div>
</div>

結果は上の動画と同じなので割愛

detect

サイドバー領域を確保し、detectとremoveボタンを追加

lib/live_yolo_web/live/page_live.html.heex

<div class="columns">
  <aside class="column is-2 menu">
    <p class="menu-label">Actions</p>
    <ul class="menu-list">
      <li><button class="button is-fullwidth mb-3" phx-click="detect">detect</button></li>
      <li><button class="button is-fullwidth" phx-click="remove">remove</button></li>
    </ul>
  </aside>

  <div class="column is-10">
    <div class="columns is-centered" style={ if @upload_file != nil, do: "display:none" }>
      <form phx-change="validate" >
      ...
      </form>
    </div>
    <div id="canvas" phx-hook="Canvas">
      <canvas phx-update="ignore"></canvas>
    </div>
  </div>
</div>

detectとremove実装
detectはbinaryデータをWorker.request_detectionに渡して、await()で待ち受けます
removeはassignを削除してcanvasも初期化します

lib/live_yolo_web/live/page_live.ex

defmodule LiveYoloWeb.PageLive do
  use LiveYoloWeb, :live_view

  ...
  @impl true
  def handle_event("detect", _params, socket) do
    detection =
      LiveYolo.Worker.request_detection(LiveYolo.Worker, socket.assigns.upload_file)
      |> LiveYolo.Worker.await()

    {
      :noreply,
      socket
      |> assign(:detect, detection)
      |> push_event("detect", %{detect: detection})
    }
  end

  @impl true
  def handle_event("remove", _params, socket) do
    {
      :noreply,
      socket
      |> assign(upload_file: nil, detect: nil)
      |> push_event("remove", %{})
    }
  end
end

detectの反映は結果が{:label, :x, :y, :w, :h}[]なJSONなので
eachで回して左上にラベルを表示して、青線で囲うようにしています

assets/js/hooks.js

let Hooks = {};
Hooks.Canvas = {
  mounted() {
    ...
    this.handleEvent("detect", (path) => {
      path.detect.objects.forEach((d) => {
        context.fillStyle = "blue";
        context.font = "30px";
        context.textAlign = "left";
        context.textBaseline = "top";
        context.fillText(d.label, d.x, d.y - 10, 20);
        context.strokeStyle = "rgb(0, 0, 255)";
        context.strokeRect(d.x, d.y, d.w, d.h);
      });
    });

    this.handleEvent("remove", () => {
      context.clearRect(0, 0, canvas.width, canvas.height);
    });
  },
};
export default Hooks;

これで完成になります
初回実行時はYOLOの学習データ取得でタイムアウトするかもしれませんので注意してください

おまけ

検知した物体を別画像にくり抜くclipを実装します
最初にclipボタンと描画領域を追加

lib/live_yolo_web/live/page_live.html.heex

<div class="columns">
  <aside class="column is-2 menu">
    <p class="menu-label">Actions</p>
    <ul class="menu-list">
      <li><button class="button is-fullwidth mb-3" phx-click="detect">detect</button></li>
      <li><button class="button is-fullwidth mb-3" phx-click="clip">clip</button></li>
      <li><button class="button is-fullwidth" phx-click="remove">remove</button></li>
    </ul>
  </aside>

  <div class="column is-10" style={ if @clip_images != [], do: "display:none" }>
  ...
  </div>
  <div class="columns column is-10">
    <%= for image <- @clip_images do %>
      <div class="card column is-3 m-2">
        <header class="card-header">
          <p class="card-header-title"><%= image.label %></p>
        </header>
        <div class="card-image">
          <img src={ image.src }>
        </div>
      </div>
    <% end %>
  </div>
</div>

<div style="display:none;">
  <canvas id="clip"></canvas>
</div>

hooksでは以下のような処理をしています

くり抜き画像を描画する canvas id="clip"を取得して
画像サイズを大きい方に合わせる
clip canvasを初期化
画像を表示して、くり抜く領域の画像データを取得
putImageDataで上書き
dataURLに変換
pushEventでElixir側に送信

assets/js/hooks.js

let Hooks = {};
Hooks.Canvas = {
  mounted() {
    ...
    this.handleEvent("clip", (path) => {
      let clip = document.getElementById("clip");
      let ctx = clip.getContext("2d");
      const data = path.detect.objects.map((d) => {
        let w = d.w > d.h ? d.w : d.h;
        let h = d.h > d.w ? d.h : d.w;
        clip.width = canvas.width;
        clip.height = canvas.height;
        ctx.clearRect(0, 0, clip.width, clip.height);
        let a = context.getImageData(d.x, d.y, w, h);
        clip.width = w;
        clip.height = h;
        ctx.putImageData(a, 0, 0);
        return { src: clip.toDataURL(), label: d.label };
      });
      this.pushEvent("cliped", data);
    });
  },
};
export default Hooks;

JS側を実行する handle_event("clip")と JS側から実行される handle("clipped")を実装します

lib/live_yolo_web/live/page_live.ex

defmodule LiveYoloWeb.PageLive do
  use LiveYoloWeb, :live_view

  ....
  @impl true
  def handle_event("clip", _params, socket) do
    detection =
      LiveYolo.Worker.request_detection(LiveYolo.Worker, socket.assigns.upload_file)
      |> LiveYolo.Worker.await()

    {
      :noreply,
      socket
      |> assign(:detect, detection)
      |> push_event("clip", %{detect: detection})
    }
  end

  @impl true
  def handle_event("clipped", params, socket) do
    images = Enum.map(params, fn param ->
      %{ label: param["label"], src: param["src"] }
    end)
    {:noreply, assign(socket,:clip_images, images)}
  end
end

最後に

いかがでしたでしょうか？
PhoenixとLiveViewを使うことで　YOLOを使ったwebアプリケーションが簡単(?)に作ることができました
こちらを参考にカスタムデータセットで物体検知したり
これを応用して検知した人の顔写真を保存したり、クリップした画像を画像分類に掛けて更に詳細なラベルをつけるなど色々できそうですね
元記事にはweb camを使ったリアルタイム物体検知もあるので是非試してみてください

次は @Papillon6814 さんの Elixir始めたてのころからのコードの書き方変遷です

今回のコード

参考ページ

https://github.com/arunponnusamy/cvlib
https://github.com/poeticoding/yolo_example
https://www.poeticoding.com/real-time-object-detection-with-phoenix-and-python/
https://edge.sincar.jp/web/base64-inline-image/
https://qiita.com/geekduck/items/2db28daa9e27df9b861d
https://qiita.com/c60evaporator/items/aef6cc1581d2c4676504
https://bulma.io/documentation/
http://www.htmq.com/canvas/putImageData.shtml
https://developer.mozilla.org/en-US/docs/Web/API/CanvasRenderingContext2D

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up