Soldann · Soldann · Jun 29, 2022 · Jul 4, 2022 · Jul 13, 2022 · Jul 15, 2022
diff --git a/include/Frame.h b/include/Frame.h
@@ -59,19 +59,19 @@ class Frame
     Frame(const Frame &frame);
 
     // Constructor for stereo cameras.
-    Frame(const cv::Mat &imLeft, const cv::Mat &imRight, const double &timeStamp, ORBextractor* extractorLeft, ORBextractor* extractorRight, ORBVocabulary* voc, cv::Mat &K, cv::Mat &distCoef, const float &bf, const float &thDepth, GeometricCamera* pCamera,Frame* pPrevF = static_cast<Frame*>(NULL), const IMU::Calib &ImuCalib = IMU::Calib());
+    Frame(const cv::Mat &imLeft, const cv::Mat &imRight, const cv::Mat *mask, const double &timeStamp, ORBextractor* extractorLeft, ORBextractor* extractorRight, ORBVocabulary* voc, cv::Mat &K, cv::Mat &distCoef, const float &bf, const float &thDepth, GeometricCamera* pCamera,Frame* pPrevF = static_cast<Frame*>(NULL), const IMU::Calib &ImuCalib = IMU::Calib());
 
     // Constructor for RGB-D cameras.
-    Frame(const cv::Mat &imGray, const cv::Mat &imDepth, const double &timeStamp, ORBextractor* extractor,ORBVocabulary* voc, cv::Mat &K, cv::Mat &distCoef, const float &bf, const float &thDepth, GeometricCamera* pCamera,Frame* pPrevF = static_cast<Frame*>(NULL), const IMU::Calib &ImuCalib = IMU::Calib());
+    Frame(const cv::Mat &imGray, const cv::Mat &imDepth, const cv::Mat *mask, const double &timeStamp, ORBextractor* extractor,ORBVocabulary* voc, cv::Mat &K, cv::Mat &distCoef, const float &bf, const float &thDepth, GeometricCamera* pCamera,Frame* pPrevF = static_cast<Frame*>(NULL), const IMU::Calib &ImuCalib = IMU::Calib());
 
     // Constructor for Monocular cameras.
-    Frame(const cv::Mat &imGray, const double &timeStamp, ORBextractor* extractor,ORBVocabulary* voc, GeometricCamera* pCamera, cv::Mat &distCoef, const float &bf, const float &thDepth, Frame* pPrevF = static_cast<Frame*>(NULL), const IMU::Calib &ImuCalib = IMU::Calib());
+    Frame(const cv::Mat &imGray, const cv::Mat *mask, const double &timeStamp, ORBextractor* extractor,ORBVocabulary* voc, GeometricCamera* pCamera, cv::Mat &distCoef, const float &bf, const float &thDepth, Frame* pPrevF = static_cast<Frame*>(NULL), const IMU::Calib &ImuCalib = IMU::Calib());
 
     // Destructor
     // ~Frame();
 
     // Extract ORB on the image. 0 for left image and 1 for right image.
-    void ExtractORB(int flag, const cv::Mat &im, const int x0, const int x1);
+    void ExtractORB(int flag, const cv::Mat &im, const int x0, const int x1, const cv::Mat * mask = nullptr);
 
     // Compute Bag of Words representation.
     void ComputeBoW();
@@ -343,7 +343,7 @@ class Frame
     //Grid for the right image
     std::vector<std::size_t> mGridRight[FRAME_GRID_COLS][FRAME_GRID_ROWS];
 
-    Frame(const cv::Mat &imLeft, const cv::Mat &imRight, const double &timeStamp, ORBextractor* extractorLeft, ORBextractor* extractorRight, ORBVocabulary* voc, cv::Mat &K, cv::Mat &distCoef, const float &bf, const float &thDepth, GeometricCamera* pCamera, GeometricCamera* pCamera2, Sophus::SE3f& Tlr,Frame* pPrevF = static_cast<Frame*>(NULL), const IMU::Calib &ImuCalib = IMU::Calib());
+    Frame(const cv::Mat &imLeft, const cv::Mat &imRight, const cv::Mat *mask, const double &timeStamp, ORBextractor* extractorLeft, ORBextractor* extractorRight, ORBVocabulary* voc, cv::Mat &K, cv::Mat &distCoef, const float &bf, const float &thDepth, GeometricCamera* pCamera, GeometricCamera* pCamera2, Sophus::SE3f& Tlr,Frame* pPrevF = static_cast<Frame*>(NULL), const IMU::Calib &ImuCalib = IMU::Calib());
 
     //Stereo fisheye
     void ComputeStereoFishEyeMatches();

diff --git a/include/ORBextractor.h b/include/ORBextractor.h
@@ -53,7 +53,7 @@ class ORBextractor {
   // Compute the ORB features and descriptors on an image.
   // ORB are dispersed on the image using an octree.
   // Mask is ignored in the current implementation.
-  int operator()(cv::InputArray _image, cv::InputArray _mask,
+  int operator()(cv::InputArray _image, const cv::Mat *_mask,
                  std::vector<cv::KeyPoint> &_keypoints,
                  cv::OutputArray _descriptors, std::vector<int> &vLappingArea);
 

diff --git a/include/System.h b/include/System.h
@@ -107,17 +107,20 @@ class System
     // Proccess the given stereo frame. Images must be synchronized and rectified.
     // Input images: RGB (CV_8UC3) or grayscale (CV_8U). RGB is converted to grayscale.
     // Returns the camera pose (empty if tracking fails).
+    Sophus::SE3f TrackStereo(const cv::Mat &imLeft, const cv::Mat &imRight, const cv::Mat *mask, const double &timestamp, const vector<IMU::Point>& vImuMeas = vector<IMU::Point>(), string filename="");
     Sophus::SE3f TrackStereo(const cv::Mat &imLeft, const cv::Mat &imRight, const double &timestamp, const vector<IMU::Point>& vImuMeas = vector<IMU::Point>(), string filename="");
 
     // Process the given rgbd frame. Depthmap must be registered to the RGB frame.
     // Input image: RGB (CV_8UC3) or grayscale (CV_8U). RGB is converted to grayscale.
     // Input depthmap: Float (CV_32F).
     // Returns the camera pose (empty if tracking fails).
+    Sophus::SE3f TrackRGBD(const cv::Mat &im, const cv::Mat &depthmap, const cv::Mat *mask, const double &timestamp, const vector<IMU::Point>& vImuMeas = vector<IMU::Point>(), string filename="");
     Sophus::SE3f TrackRGBD(const cv::Mat &im, const cv::Mat &depthmap, const double &timestamp, const vector<IMU::Point>& vImuMeas = vector<IMU::Point>(), string filename="");
 
     // Proccess the given monocular frame and optionally imu data
     // Input images: RGB (CV_8UC3) or grayscale (CV_8U). RGB is converted to grayscale.
     // Returns the camera pose (empty if tracking fails).
+    Sophus::SE3f TrackMonocular(const cv::Mat &im, const cv::Mat *mask, const double &timestamp, const vector<IMU::Point>& vImuMeas = vector<IMU::Point>(), string filename="");
     Sophus::SE3f TrackMonocular(const cv::Mat &im, const double &timestamp, const vector<IMU::Point>& vImuMeas = vector<IMU::Point>(), string filename="");
 
 

diff --git a/include/Tracking.h b/include/Tracking.h
@@ -71,11 +71,13 @@ class Tracking {
   // matching.
   Sophus::SE3f GrabImageStereo(const cv::Mat& imRectLeft,
                                const cv::Mat& imRectRight,
+                               const cv::Mat *mask,
                                const double& timestamp, string filename);
   Sophus::SE3f GrabImageRGBD(const cv::Mat& imRGB, const cv::Mat& imD,
+                             const cv::Mat *mask,
                              const double& timestamp, string filename);
-  Sophus::SE3f GrabImageMonocular(const cv::Mat& im, const double& timestamp,
-                                  string filename);
+  Sophus::SE3f GrabImageMonocular(const cv::Mat& im, const cv::Mat *mask,
+                                  const double& timestamp, string filename);
 
   void GrabImuData(const IMU::Point& imuMeasurement);
 

diff --git a/src/Frame.cc b/src/Frame.cc
@@ -146,7 +146,7 @@ Frame::Frame(const Frame &frame)
 #endif
 }
 
-Frame::Frame(const cv::Mat &imLeft, const cv::Mat &imRight,
+Frame::Frame(const cv::Mat &imLeft, const cv::Mat &imRight, const cv::Mat *mask,
              const double &timeStamp, ORBextractor *extractorLeft,
              ORBextractor *extractorRight, ORBVocabulary *voc, cv::Mat &K,
              cv::Mat &distCoef, const float &bf, const float &thDepth,
@@ -190,8 +190,8 @@ Frame::Frame(const cv::Mat &imLeft, const cv::Mat &imRight,
   std::chrono::steady_clock::time_point time_StartExtORB =
       std::chrono::steady_clock::now();
 #endif
-  thread threadLeft(&Frame::ExtractORB, this, 0, imLeft, 0, 0);
-  thread threadRight(&Frame::ExtractORB, this, 1, imRight, 0, 0);
+  thread threadLeft(&Frame::ExtractORB, this, 0, imLeft, 0, 0, mask);
+  thread threadRight(&Frame::ExtractORB, this, 1, imRight, 0, 0, mask);
   threadLeft.join();
   threadRight.join();
 #ifdef REGISTER_TIMES
@@ -271,7 +271,7 @@ Frame::Frame(const cv::Mat &imLeft, const cv::Mat &imRight,
   AssignFeaturesToGrid();
 }
 
-Frame::Frame(const cv::Mat &imGray, const cv::Mat &imDepth,
+Frame::Frame(const cv::Mat &imGray, const cv::Mat &imDepth, const cv::Mat *mask,
              const double &timeStamp, ORBextractor *extractor,
              ORBVocabulary *voc, cv::Mat &K, cv::Mat &distCoef, const float &bf,
              const float &thDepth, GeometricCamera *pCamera, Frame *pPrevF,
@@ -314,7 +314,7 @@ Frame::Frame(const cv::Mat &imGray, const cv::Mat &imDepth,
   std::chrono::steady_clock::time_point time_StartExtORB =
       std::chrono::steady_clock::now();
 #endif
-  ExtractORB(0, imGray, 0, 0);
+  ExtractORB(0, imGray, 0, 0, mask);
 #ifdef REGISTER_TIMES
   std::chrono::steady_clock::time_point time_EndExtORB =
       std::chrono::steady_clock::now();
@@ -382,7 +382,7 @@ Frame::Frame(const cv::Mat &imGray, const cv::Mat &imDepth,
   AssignFeaturesToGrid();
 }
 
-Frame::Frame(const cv::Mat &imGray, const double &timeStamp,
+Frame::Frame(const cv::Mat &imGray, const cv::Mat *mask, const double &timeStamp,
              ORBextractor *extractor, ORBVocabulary *voc,
              GeometricCamera *pCamera, cv::Mat &distCoef, const float &bf,
              const float &thDepth, Frame *pPrevF, const IMU::Calib &ImuCalib)
@@ -424,7 +424,7 @@ Frame::Frame(const cv::Mat &imGray, const double &timeStamp,
   std::chrono::steady_clock::time_point time_StartExtORB =
       std::chrono::steady_clock::now();
 #endif
-  ExtractORB(0, imGray, 0, 1000);
+  ExtractORB(0, imGray, 0, 1000, mask);
 #ifdef REGISTER_TIMES
   std::chrono::steady_clock::time_point time_EndExtORB =
       std::chrono::steady_clock::now();
@@ -527,13 +527,13 @@ void Frame::AssignFeaturesToGrid() {
 }
 
 void Frame::ExtractORB(int flag, const cv::Mat &im, const int x0,
-                       const int x1) {
+                       const int x1, const cv::Mat *mask) {
   vector<int> vLapping = {x0, x1};
   if (flag == 0)
     monoLeft =
-        (*mpORBextractorLeft)(im, cv::Mat(), mvKeys, mDescriptors, vLapping);
+        (*mpORBextractorLeft)(im, mask, mvKeys, mDescriptors, vLapping);
   else
-    monoRight = (*mpORBextractorRight)(im, cv::Mat(), mvKeysRight,
+    monoRight = (*mpORBextractorRight)(im, mask, mvKeysRight,
                                        mDescriptorsRight, vLapping);
 }
 
@@ -1089,7 +1089,7 @@ void Frame::setIntegrated() {
   mbImuPreintegrated = true;
 }
 
-Frame::Frame(const cv::Mat &imLeft, const cv::Mat &imRight,
+Frame::Frame(const cv::Mat &imLeft, const cv::Mat &imRight, const cv::Mat *mask,
              const double &timeStamp, ORBextractor *extractorLeft,
              ORBextractor *extractorRight, ORBVocabulary *voc, cv::Mat &K,
              cv::Mat &distCoef, const float &bf, const float &thDepth,
@@ -1139,11 +1139,13 @@ Frame::Frame(const cv::Mat &imLeft, const cv::Mat &imRight,
 #endif
   thread threadLeft(&Frame::ExtractORB, this, 0, imLeft,
                     static_cast<KannalaBrandt8 *>(mpCamera)->mvLappingArea[0],
-                    static_cast<KannalaBrandt8 *>(mpCamera)->mvLappingArea[1]);
+                    static_cast<KannalaBrandt8 *>(mpCamera)->mvLappingArea[1],
+                    mask);
   thread threadRight(
       &Frame::ExtractORB, this, 1, imRight,
       static_cast<KannalaBrandt8 *>(mpCamera2)->mvLappingArea[0],
-      static_cast<KannalaBrandt8 *>(mpCamera2)->mvLappingArea[1]);
+      static_cast<KannalaBrandt8 *>(mpCamera2)->mvLappingArea[1],
+      mask);
   threadLeft.join();
   threadRight.join();
 #ifdef REGISTER_TIMES

diff --git a/src/ORBextractor.cc b/src/ORBextractor.cc
@@ -1003,7 +1003,7 @@ static void computeDescriptors(const Mat& image, vector<KeyPoint>& keypoints,
                          descriptors.ptr((int)i));
 }
 
-int ORBextractor::operator()(InputArray _image, InputArray _mask,
+int ORBextractor::operator()(InputArray _image, const cv::Mat *_mask,
                              vector<KeyPoint>& _keypoints,
                              OutputArray _descriptors,
                              std::vector<int>& vLappingArea) {
@@ -1013,6 +1013,13 @@ int ORBextractor::operator()(InputArray _image, InputArray _mask,
   Mat image = _image.getMat();
   assert(image.type() == CV_8UC1);
 
+  Mat mask;
+  if (_mask != nullptr){
+    mask = *_mask;
+    assert(image.size() == mask.size());
+    assert(mask.type() == CV_8UC1 );
+  }
+
   // Pre-compute the scale pyramid
   ComputePyramid(image);
 
@@ -1038,7 +1045,7 @@ int ORBextractor::operator()(InputArray _image, InputArray _mask,
 
   int offset = 0;
   // Modified for speeding up stereo fisheye matching
-  int monoIndex = 0, stereoIndex = nkeypoints - 1;
+  int monoIndex = 0, stereoIndexFromBack = 1;
   for (int level = 0; level < nlevels; ++level) {
     vector<KeyPoint>& keypoints = allKeypoints[level];
     int nkeypointsLevel = (int)keypoints.size();
@@ -1056,8 +1063,30 @@ int ORBextractor::operator()(InputArray _image, InputArray _mask,
 
     offset += nkeypointsLevel;
 
-    float scale =
-        mvScaleFactor[level];  // getScale(level, firstLevel, scaleFactor);
+    // filter keypoints based on mask
+
+    // segfault because keypoints are outside of the image.... but why >:(
+    float scale = mvScaleFactor[level];
+
+    if (_mask != nullptr){
+      vector<cv::KeyPoint> newPoints;
+      std::copy_if(keypoints.begin(), keypoints.end(), std::back_inserter(newPoints), [&mask, &scale, &nkeypoints, &level](cv::KeyPoint p){
+        KeyPoint newPoint = p;
+        newPoint.pt *= scale;
+
+        bool ret = mask.at<uint8_t>(newPoint.pt.y,newPoint.pt.x) == 255;
+        //std::cout << ret << std::endl;
+        --nkeypoints;
+        return ret;
+      });
+
+      //std::cout << "old size " << keypoints.size() << " new size " << newPoints.size() << std::endl;
+      keypoints = newPoints;
+    }
+
+    // end filter
+
+    //float scale = mvScaleFactor[level]; //getScale(level, firstLevel, scaleFactor);
     int i = 0;
     for (vector<KeyPoint>::iterator keypoint = keypoints.begin(),
                                     keypointEnd = keypoints.end();
@@ -1069,9 +1098,9 @@ int ORBextractor::operator()(InputArray _image, InputArray _mask,
 
       if (keypoint->pt.x >= vLappingArea[0] &&
           keypoint->pt.x <= vLappingArea[1]) {
-        _keypoints.at(stereoIndex) = (*keypoint);
-        desc.row(i).copyTo(descriptors.row(stereoIndex));
-        stereoIndex--;
+        _keypoints.at(nkeypoints - stereoIndexFromBack) = (*keypoint);
+        desc.row(i).copyTo(descriptors.row(nkeypoints - stereoIndexFromBack));
+        ++stereoIndexFromBack;
       } else {
         _keypoints.at(monoIndex) = (*keypoint);
         desc.row(i).copyTo(descriptors.row(monoIndex));

diff --git a/src/System.cc b/src/System.cc
@@ -261,6 +261,13 @@ Sophus::SE3f System::TrackStereo(const cv::Mat& imLeft, const cv::Mat& imRight,
                                  const double& timestamp,
                                  const vector<IMU::Point>& vImuMeas,
                                  string filename) {
+  return TrackStereo(imLeft, imRight, timestamp, vImuMeas, filename);
+}
+
+Sophus::SE3f System::TrackStereo(const cv::Mat& imLeft, const cv::Mat& imRight,
+                                 const cv::Mat *mask, const double& timestamp,
+                                 const vector<IMU::Point>& vImuMeas,
+                                 string filename) {
   if (mSensor != STEREO && mSensor != IMU_STEREO) {
     cerr << "ERROR: you called TrackStereo but input sensor was not set to "
             "Stereo nor Stereo-Inertial."
@@ -269,6 +276,10 @@ Sophus::SE3f System::TrackStereo(const cv::Mat& imLeft, const cv::Mat& imRight,
   }
 
   cv::Mat imLeftToFeed, imRightToFeed;
+  cv::Mat maskToFeed;
+  if (mask != nullptr){
+    maskToFeed = mask->clone();
+  }
   if (settings_ && settings_->needToRectify()) {
     cv::Mat M1l = settings_->M1l();
     cv::Mat M2l = settings_->M2l();
@@ -280,6 +291,9 @@ Sophus::SE3f System::TrackStereo(const cv::Mat& imLeft, const cv::Mat& imRight,
   } else if (settings_ && settings_->needToResize()) {
     cv::resize(imLeft, imLeftToFeed, settings_->newImSize());
     cv::resize(imRight, imRightToFeed, settings_->newImSize());
+    if (mask != nullptr){
+      cv::resize(*mask,maskToFeed,settings_->newImSize());
+    }
   } else {
     imLeftToFeed = imLeft.clone();
     imRightToFeed = imRight.clone();
@@ -324,7 +338,7 @@ Sophus::SE3f System::TrackStereo(const cv::Mat& imLeft, const cv::Mat& imRight,
       mpTracker->GrabImuData(vImuMeas[i_imu]);
 
   // std::cout << "start GrabImageStereo" << std::endl;
-  Sophus::SE3f Tcw = mpTracker->GrabImageStereo(imLeftToFeed, imRightToFeed,
+  Sophus::SE3f Tcw = mpTracker->GrabImageStereo(imLeftToFeed, imRightToFeed, mask == nullptr ? nullptr : &maskToFeed,
                                                 timestamp, filename);
 
   // std::cout << "out grabber" << std::endl;
@@ -341,6 +355,13 @@ Sophus::SE3f System::TrackRGBD(const cv::Mat& im, const cv::Mat& depthmap,
                                const double& timestamp,
                                const vector<IMU::Point>& vImuMeas,
                                string filename) {
+  return TrackRGBD(im, depthmap, nullptr, timestamp, vImuMeas, filename);
+}
+
+Sophus::SE3f System::TrackRGBD(const cv::Mat& im, const cv::Mat& depthmap,
+                               const cv::Mat *mask, const double& timestamp,
+                               const vector<IMU::Point>& vImuMeas,
+                               string filename) {
   if (mSensor != RGBD && mSensor != IMU_RGBD) {
     cerr << "ERROR: you called TrackRGBD but input sensor was not set to RGBD."
          << endl;
@@ -349,12 +370,18 @@ Sophus::SE3f System::TrackRGBD(const cv::Mat& im, const cv::Mat& depthmap,
 
   cv::Mat imToFeed = im.clone();
   cv::Mat imDepthToFeed = depthmap.clone();
+  cv::Mat maskToFeed;
+  if (mask != nullptr){
+    maskToFeed = mask->clone();
+  }
   if (settings_ && settings_->needToResize()) {
     cv::Mat resizedIm;
     cv::resize(im, resizedIm, settings_->newImSize());
     imToFeed = resizedIm;
 
     cv::resize(depthmap, imDepthToFeed, settings_->newImSize());
+    if (mask != nullptr)
+      cv::resize(*mask, maskToFeed, settings_->newImSize());
   }
 
   // Check mode change
@@ -396,7 +423,7 @@ Sophus::SE3f System::TrackRGBD(const cv::Mat& im, const cv::Mat& depthmap,
       mpTracker->GrabImuData(vImuMeas[i_imu]);
 
   Sophus::SE3f Tcw =
-      mpTracker->GrabImageRGBD(imToFeed, imDepthToFeed, timestamp, filename);
+      mpTracker->GrabImageRGBD(imToFeed, imDepthToFeed, mask == nullptr ? nullptr : &maskToFeed, timestamp, filename);
 
   unique_lock<mutex> lock2(mMutexState);
   mTrackingState = mpTracker->mState;
@@ -408,6 +435,12 @@ Sophus::SE3f System::TrackRGBD(const cv::Mat& im, const cv::Mat& depthmap,
 Sophus::SE3f System::TrackMonocular(const cv::Mat& im, const double& timestamp,
                                     const vector<IMU::Point>& vImuMeas,
                                     string filename) {
+  return TrackMonocular(im, nullptr, timestamp, vImuMeas, filename);
+}
+
+Sophus::SE3f System::TrackMonocular(const cv::Mat& im, const cv::Mat *mask, const double& timestamp,
+                                    const vector<IMU::Point>& vImuMeas,
+                                    string filename) {
   {
     unique_lock<mutex> lock(mMutexReset);
     if (mbShutDown) return Sophus::SE3f();
@@ -421,10 +454,18 @@ Sophus::SE3f System::TrackMonocular(const cv::Mat& im, const double& timestamp,
   }
 
   cv::Mat imToFeed = im.clone();
+  cv::Mat maskToFeed;
+  if (mask != nullptr) {
+    maskToFeed = mask->clone();
+  }
   if (settings_ && settings_->needToResize()) {
     cv::Mat resizedIm;
     cv::resize(im, resizedIm, settings_->newImSize());
     imToFeed = resizedIm;
+
+    if (mask != nullptr) {
+      cv::resize(*mask, maskToFeed, settings_->newImSize());
+    }
   }
 
   // Check mode change
@@ -467,7 +508,7 @@ Sophus::SE3f System::TrackMonocular(const cv::Mat& im, const double& timestamp,
       mpTracker->GrabImuData(vImuMeas[i_imu]);
 
   Sophus::SE3f Tcw =
-      mpTracker->GrabImageMonocular(imToFeed, timestamp, filename);
+      mpTracker->GrabImageMonocular(imToFeed, mask == nullptr ? nullptr : &maskToFeed, timestamp, filename);
 
   unique_lock<mutex> lock2(mMutexState);
   mTrackingState = mpTracker->mState;