More than 3 years have passed since last update.

4.9.7 単眼カメラによるVisual SLAM(ORB-SLAM)

Posted at 2020-06-09

目次へのリンク

MATLABによる画像処理・コンピュータービジョン入門目次

概要

MATLABによるVisual SLAMの例題をご紹介します。

ORB-SLAMを用いて動画からカメラ軌跡と点群マップの推定を行います。

特徴点マッチングによるカメラのトラッキングや地図生成、バンドル調整など使われているテクニックを学びながらご自身のアルゴリズムを開発できます。

対応ファイル

I4_09_7_ORB_SLAM.m

Code

% 実行に必要なサポート関数へのパスを追加
addpath(fullfile(matlabroot,'examples\vision\main'));

RGB-Dのデータセットのダウンロード

Code

baseDownloadURL = 'https://vision.in.tum.de/rgbd/dataset/freiburg3/rgbd_dataset_freiburg3_long_office_household.tgz'; 
dataFolder      = fullfile('D:\Dataset', 'tum_rgbd_dataset', filesep); 
options         = weboptions('Timeout', Inf);
tgzFileName     = [dataFolder, 'fr3_office.tgz'];
folderExists    = exist(dataFolder, 'dir');

% データのダウンロード
if ~folderExists  
    mkdir(dataFolder); 
    disp('Downloading fr3_office.tgz (1.38 GB). This download can take a few minutes.') 
    websave(tgzFileName, baseDownloadURL, options); 
    
    % Extract contents of the downloaded file
    disp('Extracting fr3_office.tgz (1.38 GB) ...') 
    untar(tgzFileName, dataFolder); 
end
imageFolder   = [dataFolder,'rgbd_dataset_freiburg3_long_office_household/rgb/'];
imds          = imageDatastore(imageFolder);

% 最初のフレーム画像の確認
currFrameIdx = 1;
currI = readimage(imds, currFrameIdx);
himage = imshow(currI);

マップの初期化

最初のフレームを1番目のキーフレームとし、カメラ姿勢推定に十分なマッチング点が得られるまでフレームを読み込み続けます。

十分なマッチング点が得られたフレームを2番目のキーフレームとします。

Code

rng(123); % ランダムシードの設定

% カメラ内部パラメータの設定（読み込んだ画像は全て補正済み）
focalLength    = [535.4, 539.2];    % in units of pixels
principalPoint = [320.1, 247.6];    % in units of pixels
imageSize      = size(currI,[1 2]);  % in units of pixels
intrinsics     = cameraIntrinsics(focalLength, principalPoint, imageSize);

% ORB特徴検出と特徴量抽出
[preFeatures, prePoints] = helperDetectAndExtractFeatures(currI); 

currFrameIdx = currFrameIdx + 1;
firstI       = currI; % 最初のフレームを保存

isMapInitialized  = false;

while ~isMapInitialized && hasdata(imds)
    
    % 1フレーム読み込み
    currI = readimage(imds, currFrameIdx);
    
    % ORB特徴量の検出と抽出
    [currFeatures, currPoints] = helperDetectAndExtractFeatures(currI); 
    
    % インデックスを一つ進める
    currFrameIdx = currFrameIdx + 1;
    
    % 特徴のマッチング推定
    indexPairs = matchFeatures(preFeatures, currFeatures, 'Unique', true, ...
        'MaxRatio', 0.7, 'MatchThreshold', 70);
    
    % 十分なマッチング点数が得られなかった場合は次フレームの処理へ
    minMatches = 100;
    if size(indexPairs, 1) < minMatches
        continue
    end
    
    % 十分なマッチング点数が得られたら、前後のマッチング点を登録
    preMatchedPoints  = prePoints(indexPairs(:,1),:);
    currMatchedPoints = currPoints(indexPairs(:,2),:);
    
    % ホモグラフィーの計算と再構築の評価
    [tformH, scoreH, inliersIdxH] = helperComputeHomography(preMatchedPoints, currMatchedPoints);

    % 基礎行列の計算と再構築の評価
    [tformF, scoreF, inliersIdxF] = helperComputeFundamentalMatrix(preMatchedPoints, currMatchedPoints);
    
    % スコアが高い方の手法を変換行列に使用
    ratio = scoreH/(scoreH + scoreF);
    ratioThreshold = 0.45;
    if ratio > ratioThreshold
        inlierTformIdx = inliersIdxH;
        tform          = tformH;
    else
        inlierTformIdx = inliersIdxF;
        tform          = tformF;
    end

    % カメラ位置の計算（処理速度向上の為、半分のインライアを使用）
    inlierPrePoints  = preMatchedPoints(inlierTformIdx);
    inlierCurrPoints = currMatchedPoints(inlierTformIdx);
    [relOrient, relLoc, validFraction] = relativeCameraPose(tform, intrinsics, ...
        inlierPrePoints(1:2:end), inlierCurrPoints(1:2:end));
    
    % 十分なインライアが得られなかった場合は次フレームの処理へ
    if validFraction < 0.7 || numel(size(relOrient))==3
        continue
    end
    
    % 三角法で3Dマップ点を計算
    relPose = rigid3d(relOrient, relLoc);
    [isValid, xyzWorldPoints, inlierTriangulationIdx] = helperTriangulateTwoFrames(...
        rigid3d, relPose, inlierPrePoints, inlierCurrPoints, intrinsics);
    
    if ~isValid
        continue
    end
    
    % 2つのキーフレームにおける特徴点のインデックスを取得
    indexPairs = indexPairs(inlierTformIdx(inlierTriangulationIdx),:);
    
    isMapInitialized = true;
    
    disp(['Map initialized with frame 1 and frame ', num2str(currFrameIdx-1)])
end

Output

Map initialized with frame 1 and frame 39

Code


if isMapInitialized

    close(himage.Parent.Parent); % 前回のFigureを閉じる

    % マッチした特徴点を表示
    hfeature = showMatchedFeatures(firstI, currI, prePoints(indexPairs(:,1)), ...
        currPoints(indexPairs(:, 2)), 'Montage');
else
    error('Unable to initialize map.')
end

キーフレームとマップ点の管理用オブジェクトの作成

Code

% imageviewsetオブジェクトを使用してキーフレームを管理
vSetKeyFrames = imageviewset;

% 3Dマップ点を保存するhelperMapPointSetオブジェクトを作成
mapPointSet   = helperMapPointSet;

% 1番目のキーフレームを保存
preViewId     = 1;
vSetKeyFrames = addView(vSetKeyFrames, preViewId, rigid3d, 'Points', prePoints,...
    'Features', preFeatures.Features);

% 2番目のキーフレームを保存
currViewId    = 2;
vSetKeyFrames = addView(vSetKeyFrames, currViewId, relPose, 'Points', currPoints,...
    'Features', currFeatures.Features);

% 1番目と2番目のキーフレームの接続情報を追加
vSetKeyFrames = addConnection(vSetKeyFrames, preViewId, currViewId, relPose, 'Matches', indexPairs);

% 3Dマップ点の追加
[mapPointSet, newPointIdx] = addMapPoint(mapPointSet, xyzWorldPoints);

% マップ点の観測情報を追加
preLocations   = prePoints.Location;
currLocations  = currPoints.Location;
preScales      = prePoints.Scale;
currScales     = currPoints.Scale;

% 1番目のキーフレームにおける3Dマップ点に一致する画像点を追加
mapPointSet   = addObservation(mapPointSet, newPointIdx, preViewId, indexPairs(:,1), ....
    preLocations(indexPairs(:,1),:), preScales(indexPairs(:,1)));

% 2番目のキーフレームにおける3Dマップ点に一致する画像点を追加
mapPointSet   = addObservation(mapPointSet, newPointIdx, currViewId, indexPairs(:,2), ...
    currLocations(indexPairs(:,2),:), currScales(indexPairs(:,2)));

最初のバンドル調整と結果の可視化

Code

% ここまでで得られた2つのキーフレームでバンドル調整を実行
tracks       = findTracks(vSetKeyFrames);
cameraPoses  = poses(vSetKeyFrames);

[refinedPoints, refinedAbsPoses] = bundleAdjustment(xyzWorldPoints, tracks, ...
    cameraPoses, intrinsics, 'FixedViewIDs', 1, ...
    'PointsUndistorted', true, 'AbsoluteTolerance', 1e-7,...
    'RelativeTolerance', 1e-15, 'MaxIteration', 50);

% マップ点の奥行きの中央値を使ってマップとカメラ位置をスケーリング
medianDepth   = median(vecnorm(refinedPoints.'));
refinedPoints = refinedPoints / medianDepth;

refinedAbsPoses.AbsolutePose(currViewId).Translation = ...
    refinedAbsPoses.AbsolutePose(currViewId).Translation / medianDepth;
relPose.Translation = relPose.Translation/medianDepth;

% 調整後の位置でキーフレームを更新
vSetKeyFrames = updateView(vSetKeyFrames, refinedAbsPoses);
vSetKeyFrames = updateConnection(vSetKeyFrames, preViewId, currViewId, relPose);

% 調整後の位置でマップ点を更新
mapPointSet = updateLocation(mapPointSet, refinedPoints);

% 向きと奥行きの更新 
mapPointSet = updateViewAndRange(mapPointSet, vSetKeyFrames.Views, newPointIdx);

% 現在のフレームでマッチした特徴点を可視化
close(hfeature.Parent.Parent);
featurePlot = helperVisualizeMatchedFeatures(currI, currPoints(indexPairs(:,2)));

Code


% 初期のマップ点とカメラの軌跡を可視化
mapPlot     = helperVisualizeMotionAndStructure(vSetKeyFrames, mapPointSet);
showLegend(mapPlot);

トラッキング・地図作成と局所的な最適化（Local Mapping）・Loop Closure

Code

% 現在のキーフレームのViewId
currKeyFrameId    = currViewId;

% 最後のキーフレームのViewId
lastKeyFrameId    = currViewId;

% 共視認性が最も高いマップ点を保持した参照キーフレームのViewId
refKeyFrameId     = currViewId;

% 入力画像シーケンスにおける最後のキーフレームのインデックス
lastKeyFrameIdx   = currFrameIdx - 1; 

% 入力画像シーケンスにおける全てのキーフレームのインデックス
addedFramesIdx    = [1; lastKeyFrameIdx];

isLoopClosed      = false;

% Main loop
while ~isLoopClosed && hasdata(imds)   
    
    % ==============
    % Tracking
    % ==============   
    currI = readimage(imds, currFrameIdx);

    % ORB特徴量の検出と抽出
    [currFeatures, currPoints] = helperDetectAndExtractFeatures(currI);

    % 最後のキーフレームのトラック
    % mapPointsIdx:   現在のフレームで観測されたマップ点のインデックス
    % featureIdx:     現在のフレームと一致する特徴のインデックス
    [currPose, mapPointsIdx, featureIdx] = helperTrackLastKeyFrame(mapPointSet, ...
        vSetKeyFrames.Views, currFeatures, currPoints, lastKeyFrameId, intrinsics);
    
    % ローカルマップのトラック
    % refKeyFrameId:      現在のフレームとの共視認性が最も高い参照キーフレームのViewId
    % localKeyFrameIds:   現在のフレームと接続するキーフレームのViewId
    [refKeyFrameId, localKeyFrameIds, currPose, mapPointsIdx, featureIdx] = ...
        helperTrackLocalMap(mapPointSet, vSetKeyFrames, mapPointsIdx, ...
        featureIdx, currPose, currFeatures, currPoints, intrinsics);
    
    % 現在のフレームがキーフレームか確認 
    % ※下記の2条件を同時に満たす場合にキーフレームとする
    %
    % 1. 最後のキーフレームから少なくとも20フレーム経過しているか、
    %    80個のマップ点より現在のフレームのトラック数が少ない。 
    %
    % 2. 現在のフレームでトラックしたマップ点の数が、
    %    参照キーフレームによるトラックの数の90%より少ない。
    isKeyFrame = helperIsKeyFrame(mapPointSet, refKeyFrameId, lastKeyFrameIdx, ...
        currFrameIdx, mapPointsIdx);
    
    % マッチした特徴点の可視化
    updatePlot(featurePlot, currI, currPoints(featureIdx));
    
    % キーフレームでなかった場合は次のフレーム処理へ
    if ~isKeyFrame
        currFrameIdx = currFrameIdx + 1;
        continue
    end
    
    % 現在のキーフレームIDの更新
    currKeyFrameId  = currKeyFrameId + 1;

    % ==============
    % Local Mapping
    % ==============
    
    % 新しいキーフレームの追加 
    [mapPointSet, vSetKeyFrames] = helperAddNewKeyFrame(mapPointSet, vSetKeyFrames, ...
        currPose, currFeatures, currPoints, mapPointsIdx, featureIdx, localKeyFrameIds);
    
    % 方向と奥行きの更新
    mapPointSet = updateViewAndRange(mapPointSet, vSetKeyFrames.Views, mapPointsIdx);
    
    % 3キーフレーム未満で観察されたマップ点の外れ値を削除
    mapPointSet = helperCullRecentMapPoints(mapPointSet, vSetKeyFrames, newPointIdx);
    
    % 三角法により新しいマップ点を作成
    [mapPointSet, vSetKeyFrames, newPointIdx] = helperCreateNewMapPoints(mapPointSet, vSetKeyFrames, ...
        currKeyFrameId, intrinsics);

    % 局所的なバンドル調整
    [mapPointSet, vSetKeyFrames] = helperLocalBundleAdjustment(mapPointSet, vSetKeyFrames, ...
        currKeyFrameId, intrinsics); 
    
    % カメラの軌跡を3次元上で可視化
    updatePlot(mapPlot, vSetKeyFrames, mapPointSet);

    % ==============
    % Loop Closure
    % ==============
    
    % ループクロージャーのデータベースを初期化
    if currKeyFrameId == 3
        % 別途用意しておいたBag Of Featuresのデータを読み込み
        %bofData         = load('bagOfFeaturesData.mat');
        %loopDatabase    = invertedImageIndex(bofData.bof);
        
        % シーン一致の検出にBag Of Featuresで得られたVisual Wordsを使用
        setDir  = fullfile(toolboxdir('vision'),'visiondata','imageSets');
        subds = imageDatastore(setDir,'IncludeSubfolders',true,'LabelSource','foldernames');
        bof = bagOfFeatures(subds,'CustomExtractor',@helperSURFFeatureExtractorFunction);
        loopDatabase    = invertedImageIndex(bof);
        loopCandidates  = [1; 2];
        
        
    % いくつかのキーフレームの作成後、ループクロージャを確認  
    elseif currKeyFrameId > 20
        
        % ループクロージャ可能なキーフレーム候補の検出
        [isDetected, validLoopCandidates] = helperCheckLoopClosure(vSetKeyFrames, currKeyFrameId, ...
            loopDatabase, currI, loopCandidates);
        
        if isDetected 
            % ループクロージャの接続を追加
            [isLoopClosed, mapPointSet, vSetKeyFrames] = helperAddLoopConnections(...
                mapPointSet, vSetKeyFrames, validLoopCandidates, ...
                currKeyFrameId, currFeatures, currPoints, intrinsics);
        end
    end
    
    % ループクロージャが検出できなかった場合はデータベースに画像を追加
    if ~isLoopClosed
        currds = imageDatastore(imds.Files{currFrameIdx});
        addImages(loopDatabase, currds, 'Verbose', false);
        loopCandidates= [loopCandidates; currKeyFrameId];
    end
    
    % 各種IDとインデックスを更新
    lastKeyFrameId  = currKeyFrameId;
    lastKeyFrameIdx = currFrameIdx;
    addedFramesIdx  = [addedFramesIdx; currFrameIdx]; %#ok<AGROW>
    currFrameIdx  = currFrameIdx + 1;
end

Output

Creating Bag-Of-Features.
-------------------------
* Image category 1: books
* Image category 2: cups
* Extracting features using a custom feature extraction function: helperSURFFeatureExtractorFunction.

* Extracting features from 12 images...done. Extracted 5219 features.

* Keeping 80 percent of the strongest features from each category.

* Balancing the number of features across all image categories to improve clustering.
** Image category 2 has the least number of strongest features: 1207.
** Using the strongest 1207 features from each of the other image categories.

* Using K-Means clustering to create a 500 word visual vocabulary.
* Number of features          : 2414
* Number of clusters (K)      : 500

* Initializing cluster centers...100.00%.
* Clustering...completed 9/100 iterations (~0.04 seconds/iteration)...converged in 9 iterations.

* Finished creating Bag-Of-Features

Output

Loop edge added between keyframe: 2 and 115

全てのキーフレームで最適化

Code

% ポーズグラフの最適化
minNumMatches      = 40;
vSetKeyFramesOptim = optimizePoses(vSetKeyFrames, minNumMatches, 'Tolerance', 1e-16, 'Verbose', true);

Output

Iteration 1, residual error 0.016110
Iteration 2, residual error 0.016005
Iteration 3, residual error 0.016005
Iteration 4, residual error 0.016005
Iteration 5, residual error 0.016005
Iteration 6, residual error 0.016005
Iteration 7, residual error 0.016005
Iteration 8, residual error 0.016005
Iteration 9, residual error 0.016005
Iteration 10, residual error 0.016005
Iteration 11, residual error 0.016005
Solver stopped because change in function value was less than specified function tolerance.

Code


% 最適化されたカメラ軌跡のプロット
optimizedPoses  = poses(vSetKeyFramesOptim);
plotOptimizedTrajectory(mapPlot, optimizedPoses)
showLegend(mapPlot);

Ground Truthデータとの比較

データの読み込み

Code

gTruthData = load('orbslamGroundTruth.mat');
gTruth     = gTruthData.gTruth;

% 実際のカメラ軌跡をプロット 
plotActualTrajectory(mapPlot, gTruth(addedFramesIdx), optimizedPoses);
showLegend(mapPlot);

Code


% トラッキング精度を評価
helperEstimateTrajectoryError(gTruth(addedFramesIdx), optimizedPoses);

Output

Absolute RMSE for key frame trajectory (m): 0.20831

まとめ

MATLABによるVisual SLAMの例題をご紹介しました。

ORB-SLAMのカメラのトラッキング、マップ生成とバンドル調整までコードを確認しながら実行できます。

自身で撮影した動画などに切り替えて試してみてください。

参考

Monocular Visual Simultaneous Localization and Mapping

謝辞

本記事は @eigs さんのlivescript2markdownを使わせていただいてます。

目次へのリンク

MATLABによる画像処理・コンピュータービジョン入門目次

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up