iPhone 上の物体検出を ElixirDesktop + Evision で実装する

Last updated at 2024-11-18Posted at 2024-11-18

はじめに

前回の記事で iPhone 上の画像処理を ElixirDesktop + Evision で実装しました

本記事では更に Evision による物体検出を実装します

Evision の導入、画像処理の実装、 iOS アプリの起動までは前回の記事通りに進めているものとします

モデルの用意

YOLO 公式で公開している YOLOv3 モデルの重み、設定、ラベルデータをそれぞれダウンロードし、 priv/models 配下に保存します

モデルの読込処理

lib/elixir_desktop_evision/store.ex を以下の内容で作成します

defmodule ElixirDesktopEvision.Store do
  use Agent

  require Logger

  def start_link(_opts) do
    # priv ディレクトリー配下から取得する
    priv_path = List.to_string(:code.priv_dir(:elixir_desktop_evision))

    cfg_path = priv_path <> "/models/yolov3.cfg"
    weights_path = priv_path <> "/models/yolov3.weights"
    labels_path = priv_path <> "/models/labels.txt"

    Logger.info("Load labels from #{labels_path}")

    model =
      weights_path
      |> Evision.DNN.DetectionModel.detectionModel(config: cfg_path)
      |> Evision.DNN.DetectionModel.setInputParams(
        scale: 1.0 / 255.0,
        size: {608, 608},
        swapRB: true,
        crop: false
      )

    label_list =
      labels_path
      |> File.stream!()
      |> Enum.map(&String.trim/1)

    # Agent に入れておく
    Agent.start_link(
      fn ->
        %{
          model: model,
          label_list: label_list
        }
      end,
      name: __MODULE__
    )
  end

  # 使用時に Agent から取り出す
  def get(key) do
    Agent.get(__MODULE__, &Map.get(&1, key))
  end
end

lib/elixir_desktop_evision.ex を以下のように編集します

...
    children = [
      {Phoenix.PubSub, name: ElixirDesktopEvision.PubSub},
      {Finch, name: ElixirDesktopEvision.Finch},
+     ElixirDesktopEvision.Store,
      ElixirDesktopEvisionWeb.Endpoint
    ]
...

これにより、アプリケーション起動時にモデルが読み込まれるようになります

物体検出処理の実装

lib/elixir_desktop_evision/worker.ex を以下の内容で作成します

defmodule ElixirDesktopEvision.Worker do
  use Agent

  require Logger

  alias ElixirDesktopEvision.Store

  def detect(binary) do
    mat = to_mat(binary)

    predictions = predict(mat)

    drawed =
      Evision.imencode(".png", draw_predictions(mat, predictions))
      |> IO.iodata_to_binary()

    {predictions, drawed}
  end

  def measure(function) do
    {time, result} = :timer.tc(function)
    IO.puts("Time: #{time}ms")
    result
  end

  def to_mat(binary) do
    Evision.imdecode(binary, Evision.Constant.cv_IMREAD_COLOR())
  end

  def predict(img) do
    label_list = Store.get(:label_list)

    # 読み込んでおいたモデルを使って物体検出を実行
    Store.get(:model)
    |> Evision.DNN.DetectionModel.detect(img, confThreshold: 0.8, nmsThreshold: 0.7)
    |> then(fn {class_ids, scores, boxes} ->
      Enum.zip_with([class_ids, scores, boxes], fn [class_id, score, box] ->
        %{
          box: box,
          score: Float.round(score, 2),
          class: Enum.at(label_list, class_id)
        }
      end)
    end)
    |> IO.inspect(label: "Predictions")
  end

  def draw_predictions(mat, predictions) do
    # 検出した物体を四角形で囲む
    predictions
    |> Enum.reduce(mat, fn prediction, drawed_mat ->
      {left, top, width, height} = prediction.box

      drawed_mat
      |> Evision.rectangle(
        {left, top},
        {left + width, top + height},
        {255, 0, 0},
        thickness: 4
      )
      |> Evision.putText(
        prediction.class,
        {left + 6, top + 26},
        Evision.Constant.cv_FONT_HERSHEY_SIMPLEX(),
        0.8,
        {0, 0, 255},
        thickness: 2
      )
    end)
  end
end

lib/elixir_desktop_evision_web/live/camera_live.ex を以下のように編集します

  # 写真撮影時の処理
  @impl true
  def handle_event("take", %{"image" => base64}, socket) do
    "data:image/jpeg;base64," <> raw = base64

-  image =
+  {_results, processed_image} =
      raw
      |> Base.decode64!()
-     |> Evision.imdecode(Evision.Constant.cv_IMREAD_COLOR())
+     |> ElixirDesktopEvision.Worker.detect()
-
-   {_dims, [height, width]} = Evision.Mat.size(image)
-   affine = Evision.getRotationMatrix2D({width / 2, height / 2}, 30, 1)
-
-   processed_image =
-     image
-     # 四角形の描画
-     |> Evision.rectangle(
-       # 左上座標{x, y}
-       {50, 30},
-       # 右下座標{x, y}
-       {80, 70},
-       # 色{R, G, B}
-       {0, 0, 255},
-       # 線の太さ
-       thickness: 5,
-       # 線の引き方（角がギザギザになる）
-       lineType: Evision.Constant.cv_LINE_4()
-     )
-     # 回転
-     |> Evision.warpAffine(affine, {width, height})
-     # 文字列の描画
-     |> Evision.putText(
-       # 文字列
-       "Hello",
-       # 文字の左下座標{x, y}
-       {100, 100},
-       # フォント種類
-       Evision.Constant.cv_FONT_HERSHEY_SIMPLEX(),
-       # フォントサイズ
-       1,
-       # 文字色
-       {0, 0, 255},
-       # 文字太さ
-       thickness: 2
-     )
-     |> then(&Evision.imencode(".jpg", &1))

    {:noreply, assign(socket, processed_image: processed_image)}
  end
end

"Take a picture" クリック時に物体検出が実行されるようになります

実行結果

iOS 上で実行すると、物体検出できることが確認できます

ただし、 GPU を使えていないので実行速度は遅いです

まとめ

Evision を使うことで、 YOLOv3 による物体検出を簡単にモバイル上でも実行できました

NxIREE などで iOS 上の GPU が使えるようになれば高速処理もできるかもしれません

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up