More than 1 year has passed since last update.

Custom Visionでリアルタイム判別

Posted at 2023-01-17

Custom Visionでリアルタイム判別

今回は、Microsoft の Cognitive Services のひとつ、Custom Visionを使用し、カメラを起動させ、その映像についてリアル判別を行うプログラムを作成しました。Custom Vision API の説明については、本記事では省きますが、実際のリアルタイムでの処理について、まとめておきたいと思います。

事前準備

事前に、今回のプログラムで使用するためのモデルを、Custom Vission で作成します。
作成後、Predection　URLを発行しておきます。

プログラムについて

カメラの起動

カメラの起動には、MediaDevices.getUserMedia()を使用し、起動を行う。
プログラムについては下記の通りとなっています。


const constraints = {
  audio: false, video: {
    facingMode: 'environment',
    width: videoWidth,
    height: videoWidth,

  }
};

navigator.mediaDevices.getUserMedia(constraints)
  .then(function(mediaStream) {
    const video = document.querySelector('video');
    video.srcObject = mediaStream;
    video.onloadedmetadata = function(e) {
      video.play();
    };
  })
  .catch(function(err) { console.log(err.name + ": " + err.message); });

フレームごとの処理

カメラ映像のフレームごとに処理を行う。
各フレームごとの画像をCataURL→blob→Fileオブジェクトの順にデータを変換し、APIを呼び出すプログラムへと渡す。本プログラムでは、20フレームごとに関数を呼ぶようにしている。

let processor = {
  timerCallback: function() {
    if (this.video.paused || this.video.ended) {
      return;
    }
    this.computeFrame();
    let self = this;
    var video = document.createElement('video');
    video.autoplay = true;
    video.muted = true;
    video.playsInline = true;
    setTimeout(function() {
      self.timerCallback();
    }, 0);
  },
  
  doLoad: function() {
    this.video = document.querySelector("video");
    this.c1 = document.getElementById("c1");
    this.ctx1 = this.c1.getContext("2d");

    let self = this;
    this.video.addEventListener("play", function() {
      self.width = window.innerWidth;
      self.height = window.innerHeight;
      self.timerCallback();
    }, false);
  },
  
  //フレームごとの処理
  computeFrame: function() {
    this.ctx1.drawImage(this.video, 0, 0, this.width, this.height);

    let dataURI = this.c1.toDataURL();
    var byteString = atob(dataURI.split(",")[1]);
    var mimeType = dataURI.match(/(:)([a-z\/]+)(;)/)[2];

    for (var i = 0, l = byteString.length, content = new Uint8Array(l); l > i; i++) {
      content[i] = byteString.charCodeAt(i);
    }

    var blob = new Blob([content], {
      type: "application/octet-stream",
    });

    const file = new File([blob], "file1.png", { type: "application/octet-stream" });
    count++;
    if (count % 20 == 0) getFaceInfo(file);

    return;
  }
};

document.addEventListener("DOMContentLoaded", () => {
  processor.doLoad();
});

APIの呼び出し

フレームごとの処理から下記の関数でAPI１をたたき、結果を返してきている。

//画像の分析    
function getFaceInfo(file) {

  // Custom Vision の Subscription Key と URL をセット
  // サブスクリプション画面に表示される URL および Key をコピーしてください
  const predictionKey = "<Custom Vision の Prediction Key を入力>";
  const endpoint = "EndPointのURLを入れる";

  // Custom Vision 呼び出し URL をセット
  const webSvcUrl = endpoint;

  // Face API を呼び出すためのパラメーターをセットして呼び出し
  let xmlHttp = new XMLHttpRequest();
  xmlHttp.open("POST", webSvcUrl, true);
  xmlHttp.setRequestHeader("Prediction-Key", predictionKey);
  xmlHttp.setRequestHeader("Content-Type", "application/octet-stream");
  xmlHttp.send(file);
  xmlHttp.onreadystatechange = function() {
    if (this.readyState == 4 && this.status == 200) {
      json = JSON.parse(this.responseText);
      console.log(json.predictions);
    } 
  };
}

プログラムの全体

今回のプログラムについて、全体像を示す。

index.html

<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1.0">
<title>Webカメラから画像処理</title>
</head>
 <body onload="init()">
    <video  autoplay muted playsinline style="display: none;"></video>  
  <div id="wrapper">
    <canvas id="c1"></canvas>
  </div>
      <script src="script.js"></script>
    
</body>
</html>

<style>
* {
      margin: 0;
      padding: 0;
    }
    canvas {
      display:block;
    }
    html, body, #wrapper{
        width: 100%;
        height: 100%;
    }
</style>

<script>
 
  function init() {
    let wpr = document.getElementById("wrapper");
    let cvs = document.getElementById("c1");
    cvs.width = wpr.offsetWidth;
    cvs.height = wpr.offsetHeight;
  }
 
</script>

script.js

//video width&height
let video = document.createElement('video');
const videoWidth = 1600;
video.width = videoWidth;
video.height = videoWidth;

let count = 0;

//video config 
const constraints = {
  audio: false, video: {
    facingMode: 'environment',
    width: videoWidth,
    height: videoWidth,

  }
};

// https://developer.mozilla.org/ja/docs/Web/API/MediaDevices/getUserMedia
//camera launch

navigator.mediaDevices.getUserMedia(constraints)
  .then(function(mediaStream) {
    const video = document.querySelector('video');
    video.srcObject = mediaStream;
    video.onloadedmetadata = function(e) {
      video.play();
    };
  })
  .catch(function(err) { console.log(err.name + ": " + err.message); }); 
  // always check for errors at the end.


let processor = {
  timerCallback: function() {
    if (this.video.paused || this.video.ended) {
      return;
    }
    this.computeFrame();
    let self = this;
    var video = document.createElement('video');
    video.autoplay = true;
    video.muted = true;
    video.playsInline = true;
    setTimeout(function() {
      self.timerCallback();
    }, 0);
  },
  
  doLoad: function() {
    this.video = document.querySelector("video");
    this.c1 = document.getElementById("c1");
    this.ctx1 = this.c1.getContext("2d");

    let self = this;
    this.video.addEventListener("play", function() {
      self.width = window.innerWidth;
      self.height = window.innerHeight;
      self.timerCallback();
    }, false);
  },
  
  //processing per frame
  computeFrame: function() {
    this.ctx1.drawImage(this.video, 0, 0, this.width, this.height);

    let dataURI = this.c1.toDataURL();
    var byteString = atob(dataURI.split(",")[1]);
    var mimeType = dataURI.match(/(:)([a-z\/]+)(;)/)[2];

    for (var i = 0, l = byteString.length, content = new Uint8Array(l); l > i; i++) {
      content[i] = byteString.charCodeAt(i);
    }

    var blob = new Blob([content], {
      type: "application/octet-stream",
    });

    const file = new File([blob], "file1.png", { type: "application/octet-stream" });
    //処理の低速化
    count++;
    if (count % 10 == 0) getFaceInfo(file);

    return;
  }
};

document.addEventListener("DOMContentLoaded", () => {
  processor.doLoad();
});


//画像の分析    
function getFaceInfo(file) {

  // Custom Vision の Subscription Key と URL をセット
  // サブスクリプション画面に表示される URL および Key をコピーしてください
  const predictionKey = "<Custom Vision の Prediction Key を入力>";
  const endpoint = "EndPointのURLを入れる";

  // Custom Vision 呼び出し URL をセット
  const webSvcUrl = endpoint;

  // Face API を呼び出すためのパラメーターをセットして呼び出し
  let xmlHttp = new XMLHttpRequest();
  xmlHttp.open("POST", webSvcUrl, true);
  xmlHttp.setRequestHeader("Prediction-Key", predictionKey);
  xmlHttp.setRequestHeader("Content-Type", "application/octet-stream");
  xmlHttp.send(file);
  xmlHttp.onreadystatechange = function() {
    if (this.readyState == 4 && this.status == 200) {
      json = JSON.parse(this.responseText);
      console.log(json.predictions);
    } 
  };
}

注意事項

今回のプログラムでは、実行時に高速でCustomVisonAPIをたたくため、使用時の従量課金に必ず注意の上使用してください。

参考

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up