画像特徴量から縮尺・回転角を推定する - 徒然なる日々を送るソフトウェアデベロッパーの記録（２）

カメラの被写体が元画像に対してどんな角度・縮尺で写っているのか
知りたいことがあります。
が、OpenCV には剛体の回転・拡大/縮小を行う関数がありませんので
新たに作成してみました。

手順

1. ORB, SIFT といった画像特徴量を抽出すると、ノルムが最小に近い
点のペアの集合を見つけることができます。
ペアの集合から任意の２組を選び、アフィン変換行列および逆行列
を求めます。(剛体の場合、回転、平行移動、縮尺の４つのパラメータ
で変換が規定されるので、必要な方程式は４つとなる。)
具体的には
${\displaystyle \vec{x'} = \mu D_\theta \vec{x} + \vec{T} }$
${\displaystyle D_\theta = \begin{pmatrix} \cos{\theta} && \sin{\theta} \\ -\sin{\theta} && \cos{\theta} \end{pmatrix} }$
を解くことになります。

解法: ２つの点 $\vec{x_1}, \vec{x_2}$ の差分を $\vec{\Delta x}$ 、
$\vec{x'_1}, \vec{x'_2}$ の差分を $\vec{\Delta x'}$ とすると $\vec{T}$ を
消去できます。２つの式に書き下すと
${\displaystyle \Delta x'_x = \mu \cos{\theta} \Delta x_x + \mu \sin{\theta} \Delta x_y }$
${\displaystyle \Delta x'_y = -\mu \sin(\theta) \Delta x_x + \mu \cos{\theta} \Delta x_y }$
これから $\mu, \theta$ について解くと
${\displaystyle \mu = \sqrt{\frac{\Delta x'^2_x + \Delta x'^2_y}{\Delta x^2_x + \Delta x^2_y}} }$
${\displaystyle \tan{\theta} = \frac{\Delta x'_x \Delta x_y - \Delta x'_y \Delta x_x}{\Delta x'_x \Delta x_x + \Delta y'_y \Delta y_y} }$
$\vec{T}$ は２つのうちの一方の点のペアから算出できます。もう１つのペアを
検算に使うことができます。

これからアフィン変換行列
${\displaystyle A = \begin{pmatrix} \mu \cos\theta && \mu \sin\theta && T_x \\ -\mu \sin\theta && \mu \cos\theta && T_y \\ 0 && 0 && 1 \end{pmatrix} }$
を作成します。逆変換は invertAffineTransformation() で計算することができます。

2. 手順1で見つかった全てのペアに対して、変換後の値の誤差 $|x'-Ax|$ を調べます。
誤差が一定値以下のペアの割合が例えば80%以上であればそのアフィン変換を採用します。
そうでなければ別の２組のペアを選択し、同じ手順を繰り返します。

プログラム

ORB インスタンスを作成し、detector, extractor, matcher を構成します。
matcher.match() を呼び出し vector を算出します。
この中からノルムが最小に近い（例えば３倍以下）ペアのvectorを算出します。
（ここまでは他の方のブログに記述があるので割愛します。）

#include <opencv2/opencv2.hpp>
#include <algorithm>
#include <vector>

using namespace std;
using namespace cv;

// ２つのキーポイントの最小距離を求める
static double getMinimumDistance(vector<KeyPoint> &kp, vector<DMatch> &m, vector
<int> &indexes, int k) {
  double dmin = DBL_MAX;
  Point2f pk = kp[m[k].queryIdx].pt;
  for (int i = 0; i < (int)indexes.size(); ++i) {
    Point2f pt = kp[m[indexes[i]].queryIdx].pt;
    double d = fabs(pt.x - pk.x) + fabs(pt.y - pk.y);
    if (d < dmin) dmin = d;
  }
  return dmin;
}

// 二乗和の計算
static inline float dist2(Point2f &p) {
  return p.x * p.x + p.y * p.y;
}

// 剛体のアフィン行列の算出
static bool getRigidTransform(Point2f *xs, Point2f *xd, Mat &mat, float &theta, float &mu) {
  Point2f ds(xs[0].x - xs[1].x, xs[0].y - xs[1].y);
  Point2f dd(xd[0].x - xd[1].x, xd[0].y - xd[1].y);
  if (dist2(ds) < 1e-10 || dist2(dd) < 1e-10) {
    return false;
  }
  mu = sqrt(dist2(dd)/dist2(ds));
  float ct, st;
  float y =  ds.y * dd.y + ds.x * dd.x;
  float x = -ds.x * dd.y + ds.y * dd.x;
  if (y < 1e-8) {
    ct = 0.0f;
    st = 1.0f;
    theta = M_PI / 2;
    if (y > 0.0f) {
      if (x < 0.0f) {
        theta = -theta;
      }
    } else {
      if (x > 0.0f) {
        theta = -theta;
      }
    }
  } else {
    float tt = x / y;
    theta = atan(tt);
    st = sqrt(tt * tt / (1.0f + tt * tt));
    ct = sqrt(1.0f / (1.0f + tt * tt));
    if (x < 0.0f) st = -st;
    if (y < 0.0f) st = -st;
  }
  
  mat = (Mat_<double>(2, 3)
    <<  (mu * ct), (mu * st), (xd[0].x - mu * ( ct * xs[0].x + st * xs[0].y)),
    (-mu * st), (mu * ct), (xd[0].y - mu * (-st * xs[0].x + ct * xs[0].y)));
  return true;
}

// Affine 変換を推定する
bool deferAffine(vector<DMatch> &matches_good, vector<KeyPoint> &kpA, vec
tor<KeyPoint> &kpB, Mat &af, Mat &invaf, float &mu, float &theta) {
  int count = 0;
  vector<int> indexs;

  // matches_good が少ない時はエラー
  if (matches_good.size() < 2) {
    return false;
  }

  // 乱数発生
  vector<int> seeds;
  for (int i = 0; i < (int)matches_good.size(); ++i) {
    seeds.push_back(i);
  }
  random_shuffle(seeds.begin(), seeds.end());
  vector<int>::iterator ps = seeds.begin();

  do {
    // index を追加する
    indexs.clear();
    int k = 0;
    for ( ; indexs.size() < 2; ++ps) {
      if (ps == seeds.end()) {
        random_shuffle(seeds.begin(), seeds.end());
        ps = seeds.begin();
        ++count;
      }
      int j = *ps;
      if (indexs.size() == 0) {
        indexs.push_back(j);
      } else {
        double d = getMinimumDistance(kpA, matches_good, indexs, j);
        if (d > orb_neighborThreshold) {
          indexs.push_back(j);
        }
      }
    }

    // 2点以下だったら失敗
    if (indexs.size() < 2) {
      break;
    }

    // 仮に affine 変換を求める
    Point2f pntSrc[2], pntDst[2];
    for (int i = 0; i < 2; ++i) {
      pntSrc[i] = kpA[matches_good[indexs[i]].queryIdx].pt;
      pntDst[i] = kpB[matches_good[indexs[i]].trainIdx].pt;
    }
    mu = 0.0f;
    theta = 0.0f;
    if (!getRigidTransform(pntSrc, pntDst, af, theta, mu)) {
      if (count < 3) {
        continue;
      }
      break;
    }
    Mat invaf2mat;
    invertAffineTransform(af, invaf2mat);
    int voteCount[2] = { 0, 0 };
    for (int i = 0; i < (int)matches_good.size(); ++i) {
      Point2f pntS = kpA[matches_good[i].queryIdx].pt;
      Point2f pntD = kpB[matches_good[i].trainIdx].pt;
      Mat m = (Mat_<double>(3, 1) << pntD.x, pntD.y, 1.0);
      Mat mr = invaf2mat * m;
      double dist = fabs(mr.at<double>(0, 0) - pntS.x) + fabs(mr.at<double>(1, 0) - pntS.y);
      voteCount[ (dist < orb_voteDist) ? 0 : 1 ]++;
    }
    double vc = 1.0 * (double)voteCount[0] / (double)(voteCount[0] + voteCount[1]);
    if (vc > orb_voteWinner) {
      // affine 変換を返す
      invaf = invaf2mat;
      return true;
    }
  } while(count < orb_repeatCount);
  return false;
}