alicevision · servantftransperfect · Jun 20, 2025
diff --git a/meshroom/aliceVision/SfMPoseInjecting.py b/meshroom/aliceVision/SfMPoseInjecting.py
@@ -11,7 +11,67 @@ class SfMPoseInjecting(desc.AVCommandLineNode):
     size = desc.DynamicNodeSize("input")
 
     category = "Utils"
-    documentation = """Use a JSON file to inject poses inside the SfMData."""
+    documentation = """
+Use a JSON file to inject poses inside the SfMData.
+
+[
+    {
+        'frame_no': 45,
+        'rx': 1.2,
+        'ry': -0.3,
+        'rz': 0.1
+        'tx': 5,
+        'ty': 6,
+        'tz': -2
+    }
+]
+
+or
+
+[
+    {
+        'path': "image_filename",
+        'rx': 1.2,
+        'ry': -0.3,
+        'rz': 0.1
+        'tx': 5,
+        'ty': 6,
+        'tz': -2
+    }
+]
+
+or
+
+[
+    {
+        'path': "/path/to/image",
+        'rx': 1.2,
+        'ry': -0.3,
+        'rz': 0.1
+        'tx': 5,
+        'ty': 6,
+        'tz': -2
+    }
+]
+
+
+frame_no is the detected frameId which is set by the number found in the image filename.
+
+Let's say you have a point with coordinates in the *camera frame*. The coordinates in the common world frame are given by the rotation matrix world_R_camera and the translation vector world_t_camera such that 
+
+worldFramePoint = world_R_camera * cameraFramePoint + world_t_camera
+
+world_t_camera is defined by the triplet [tx, ty, tz] in the json file
+world_R_camera is defined by the triplet [rx, ry, rz] (in degrees) in the json file.
+
+The matrix world_R_camera is built from the triplet, transformed using a function R depending on rotationFormat.
+
+If rotationFormat is EulerZXY, then world_R_camera = R_y(ry) * R_x(rx) * R_z(rz)
+Where R_x(rx) is the rotation along the x axis of rx degrees, R_y(ry) is the rotation along the y axis of ry degrees, R_z(rz) is the rotation along the z axis of rz degrees. It is the ZXY euler representation.
+
+Meshroom assumes that the axis x, y and z of the geometric frame in which you define the rotation and the translation is in Right hand coordinates where X points to the right, Y points downward, and Z points away from the camera. But you can change this using geometricFrame. 
+
+"""
 
     inputs = [
         desc.File(
@@ -34,6 +94,15 @@ class SfMPoseInjecting(desc.AVCommandLineNode):
             values=["EulerZXY"],
             value="EulerZXY",
         ),
+        desc.ChoiceParam(
+            name="geometricFrame",
+            label="Geometric Frame",
+            description="Defines the geometric frame for the input poses:\n"
+                        " - RHXrYbZf : Right hand coordinates, X right, Y down, Z far"
+                        " - RHXrYtZb : Right hand coordinates, X right, Y up, Z behind",
+            values=["RHXrYbZf", "RHXrYtZb"],
+            value="RHXrYbZf",
+        ),
         desc.ChoiceParam(
             name="verboseLevel",
             label="Verbose Level",

diff --git a/src/software/utils/main_sfmPoseInjecting.cpp b/src/software/utils/main_sfmPoseInjecting.cpp
@@ -18,6 +18,7 @@
 #include <boost/json.hpp>
 
 #include <fstream>
+#include <filesystem>
 
 // These constants define the current software version.
 // They must be updated when the command line is changed.
@@ -30,6 +31,7 @@
 struct PoseInput
 {
     IndexT frameId;
+    std::filesystem::path path;
     Eigen::Matrix4d T;
 };
 
@@ -76,52 +78,141 @@
     return in;
 }
 
+/**
+ * I/O for Rotation format choice
+ */
+
+enum class EGeometricFrameFormat
+{
+    RHXrYbZf,
+    RHXrYtZb
+};
+
+inline std::string EGeometricFrameFormat_enumToString(EGeometricFrameFormat format)
+{
+    switch (format)
+    {
+        case EGeometricFrameFormat::RHXrYbZf:
+        {
+            return "RHXrYbZf";
+        }
+        case EGeometricFrameFormat::RHXrYtZb:
+        {
+            return "RHXrYtZb";
+        }
+    }
+    throw std::out_of_range("Invalid Geometric format type Enum: " + std::to_string(int(format)));
+}
+
+inline EGeometricFrameFormat EGeometricFrameFormat_stringToEnum(const std::string& format)
+{
+    if (format == "RHXrYbZf")
+    {
+        return EGeometricFrameFormat::RHXrYbZf;
+    }
+
+    if (format == "RHXrYtZb")
+    {
+        return EGeometricFrameFormat::RHXrYtZb;
+    }
+
+    throw std::out_of_range("Invalid Geometric frame format type Enum: " + format);
+}
+
+inline std::ostream& operator<<(std::ostream& os, EGeometricFrameFormat s)
+{
+    return os << EGeometricFrameFormat_enumToString(s);
+}
+
+inline std::istream& operator>>(std::istream& in, EGeometricFrameFormat& s)
+{
+    std::string token(std::istreambuf_iterator<char>(in), {});
+    s = EGeometricFrameFormat_stringToEnum(token);
+    return in;
+}
+
+
+
 /**
  * @brief get a pose from a boost json object (assume the file format is ok)
  * @param obj the input json object
- * @param format the required rotation format to transform to rotation matrix
+ * @param rotationFormat the required rotation format to transform to rotation matrix
+ * @param geometricFrameFormat the geometric frame format used to interpret the camera pose
  * @param readPose the output pose information
  * @return false if the process failed
  */
-bool getPoseFromJson(const boost::json::object& obj, ERotationFormat format, PoseInput& readPose)
+bool getPoseFromJson(const boost::json::object& obj, ERotationFormat rotationFormat, EGeometricFrameFormat & geometricFormat, PoseInput& readPose)
 {
     Eigen::Matrix3d R = Eigen::Matrix3d::Identity();
 
-    if (format == ERotationFormat::EulerZXY)
+    if (rotationFormat == ERotationFormat::EulerZXY)
     {
-        // Reading information from lineup
-        const double rx = degreeToRadian(boost::json::value_to<double>(obj.at("rx")));
-        const double ry = degreeToRadian(boost::json::value_to<double>(obj.at("ry")));
-        const double rz = degreeToRadian(boost::json::value_to<double>(obj.at("rz")));
+        try
+        {
+            const double rx = degreeToRadian(boost::json::value_to<double>(obj.at("rx")));
+            const double ry = degreeToRadian(boost::json::value_to<double>(obj.at("ry")));
+            const double rz = degreeToRadian(boost::json::value_to<double>(obj.at("rz")));
 
-        Eigen::AngleAxisd Rx(rx, Eigen::Vector3d::UnitX());
-        Eigen::AngleAxisd Ry(ry, Eigen::Vector3d::UnitY());
-        Eigen::AngleAxisd Rz(rz, Eigen::Vector3d::UnitZ());
+            Eigen::AngleAxisd Rx(rx, Eigen::Vector3d::UnitX());
+            Eigen::AngleAxisd Ry(ry, Eigen::Vector3d::UnitY());
+            Eigen::AngleAxisd Rz(rz, Eigen::Vector3d::UnitZ());
 
-        R = Ry.toRotationMatrix() * Rx.toRotationMatrix() * Rz.toRotationMatrix();
+            R = Ry.toRotationMatrix() * Rx.toRotationMatrix() * Rz.toRotationMatrix();
+        }
+        catch (boost::system::system_error error)
+        {
+            ALICEVISION_LOG_ERROR("Incorrect rotation parameters");
+            return false;
+        }
     }
     else
     {
+        ALICEVISION_LOG_ERROR("Unsupported rotation parameters");
         return false;
     }
 
-    readPose.frameId = boost::json::value_to<IndexT>(obj.at("frame_no"));
-
     Eigen::Vector3d t;
-    t.x() = boost::json::value_to<double>(obj.at("tx"));
-    t.y() = boost::json::value_to<double>(obj.at("ty"));
-    t.z() = boost::json::value_to<double>(obj.at("tz"));
+    try
+    {
+        t.x() = boost::json::value_to<double>(obj.at("tx"));
+        t.y() = boost::json::value_to<double>(obj.at("ty"));
+        t.z() = boost::json::value_to<double>(obj.at("tz"));
+    }
+    catch (boost::system::system_error error)
+    {
+        //If no tx,ty,tz we assume 0 translation
+    }
+
+    readPose.frameId = UndefinedIndexT;
+    if (obj.if_contains("frame_no"))
+    {
+        readPose.frameId = boost::json::value_to<IndexT>(obj.at("frame_no"));
+    }
+    else if (obj.if_contains("path"))
+    {
+        readPose.path = boost::json::value_to<std::string>(obj.at("filename"));
+    }
 
     Eigen::Matrix4d world_T_camera = Eigen::Matrix4d::Identity();
     world_T_camera.block<3, 3>(0, 0) = R;
     world_T_camera.block<3, 1>(0, 3) = t;
 
     //Get transform in av coordinates
     Eigen::Matrix4d aliceTinput = Eigen::Matrix4d::Identity();
-    aliceTinput(1, 1) = -1;
-    aliceTinput(1, 2) = 0;
-    aliceTinput(2, 1) = 0;
-    aliceTinput(2, 2) = -1;
+
+    switch (geometricFormat)
+    {
+        case EGeometricFrameFormat::RHXrYtZb:
+        {
+            aliceTinput(1, 1) = -1;
+            aliceTinput(1, 2) = 0;
+            aliceTinput(2, 1) = 0;
+            aliceTinput(2, 2) = -1;
+            break;
+        }
+        default:
+            break;
+    }
 
 
     world_T_camera = aliceTinput * world_T_camera * aliceTinput.inverse();
@@ -135,10 +226,11 @@
  * The JSON file contains an array of objects. Each object describes a frameId, a rotation and a translation.
  * @param posesFilename the input JSON filename
  * @param format the required rotation format to transform to rotation matrix
+ * @param geometricFrameFormat the geometric frame format used to interpret the camera pose
  * @param readPose the output poses vector
  * @return false if the process failed, true otherwise
  */
-bool getPosesFromJson(const std::string& posesFilename, ERotationFormat format, std::vector<PoseInput>& readPoses)
+bool getPosesFromJson(const std::string& posesFilename, ERotationFormat format, EGeometricFrameFormat & geometricFormat, std::vector<PoseInput>& readPoses)
 {
     std::ifstream inputfile(posesFilename);
     if (!inputfile.is_open())
@@ -162,7 +254,7 @@
         const boost::json::object& obj = item.as_object();
 
         PoseInput input;
-        if (getPoseFromJson(obj, format, input))
+        if (getPoseFromJson(obj, format, geometricFormat, input))
         {
             readPoses.push_back(input);
         }
@@ -171,81 +263,118 @@
    return true;
 }

 int aliceVision_main(int argc, char** argv)
 {
    // command-line parameters
    std::string sfmDataFilename;
     std::string sfmDataOutputFilename;
     std::string posesFilename;
     ERotationFormat format;
+    EGeometricFrameFormat gframe;
 
     // clang-format off
     po::options_description requiredParams("Required parameters");
    requiredParams.add_options()
        ("input,i", po::value<std::string>(&sfmDataFilename)->required(),
         "Input SfMData file.")
         ("output,o", po::value<std::string>(&sfmDataOutputFilename)->required(),
          "SfMData output file with the injected poses.")
         ("rotationFormat,r", po::value<ERotationFormat>(&format)->required(),
-         "Rotation format for the input poses: EulerZXY.");
+         "Rotation format for the input poses: EulerZXY.")
+        ("geometricFrame, g", po::value<EGeometricFrameFormat>(&gframe)->required(),
+        "Defines the geometric frame for the input poses");
 
     po::options_description optionalParams("Optional parameters");
     optionalParams.add_options()
        ("posesFilename,p", po::value<std::string>(&posesFilename)->default_value(posesFilename),
        "JSON file containing the poses to inject.");
    // clang-format on

    CmdLine cmdline("AliceVision SfM Pose injecting");

    cmdline.add(requiredParams);
    cmdline.add(optionalParams);
    if (!cmdline.execute(argc, argv))
    {
        return EXIT_FAILURE;
    }


    // Set maxThreads
    HardwareContext hwc = cmdline.getHardwareContext();
    omp_set_num_threads(hwc.getMaxThreads());

    if (posesFilename.empty())
    {
        ALICEVISION_LOG_INFO("Nothing to do.");
        return EXIT_SUCCESS;
    }

    // Load input SfMData scene
    sfmData::SfMData sfmData;
    if (!sfmDataIO::load(sfmData, sfmDataFilename, sfmDataIO::ESfMData::ALL))
    {
        ALICEVISION_LOG_ERROR("The input SfMData file '" + sfmDataFilename + "' cannot be read.");
        return EXIT_FAILURE;
     }
 
     std::vector<PoseInput> readPoses;
-    if (!getPosesFromJson(posesFilename, format, readPoses))
+    if (!getPosesFromJson(posesFilename, format, gframe, readPoses))
     {
         ALICEVISION_LOG_ERROR("Cannot read the poses");
         return EXIT_FAILURE;
    }

    // Set the pose for all the views with frame IDs found in the JSON file
    for (const auto& [id, pview] : sfmData.getViews())
     {
         for (const auto& rpose : readPoses)
         {
-            if (pview->getFrameId() == rpose.frameId)
+            //Check if the frameid is the same than described
+            bool found = false;
+            if (rpose.frameId != UndefinedIndexT)
             {
-                geometry::Pose3 pose(rpose.T);
-                sfmData::CameraPose cpose(pose, false);
-                cpose.setRemovable(false);
-                sfmData.setAbsolutePose(id, cpose);
+                if (pview->getFrameId() == rpose.frameId)
+                {
+                    found = true;
+                }
             }
+
+            if (!found && !rpose.path.empty())
+            {
+                //Check if this view has the same filename
+                std::filesystem::path viewPath(pview->getImage().getImagePath());
+                if (rpose.path.has_relative_path())
+                {
+                    if (std::filesystem::canonical(viewPath) == std::filesystem::canonical(rpose.path))
+                    {
+                        found = true;
+                    }
+                }
+                else 
+                {
+                    if (viewPath.filename() == rpose.path.filename())
+                    {
+                        found = true;
+                    }
+                }
+            }
+
+            if (!found)
+            {
+                continue;
+            }
+
+            ALICEVISION_LOG_INFO("Pose assigned to image with path " << pview->getImage().getImagePath());
+
+            geometry::Pose3 pose(rpose.T);
+            sfmData::CameraPose cpose(pose, false);
+            cpose.setRemovable(false);
+            sfmData.setAbsolutePose(id, cpose);
         }
     }
 
    sfmDataIO::save(sfmData, sfmDataOutputFilename, sfmDataIO::ESfMData::ALL);

    return EXIT_SUCCESS;
 }