Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

General: C++ API updates #19

Closed
wants to merge 10 commits into from
49 changes: 31 additions & 18 deletions include/backend/backend_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,13 @@ class nixlBackendEngine {
}
}

std::string getInitParam(const std::string &key) {
if (customParams->count(key)==0)
return "";
else
return (*customParams)[key];
nixl_status_t getInitParam(const std::string &key, std::string &value) {
if (customParams->count(key)==0) {
return NIXL_ERR_INVALID_PARAM;
} else {
value = (*customParams)[key];
return NIXL_SUCCESS;
}
}

public:
Expand Down Expand Up @@ -83,14 +85,16 @@ class nixlBackendEngine {
// Determines if a backend supports progress thread.
virtual bool supportsProgTh () const = 0;

virtual nixl_status_t supportedMemTypes (std::vector<nixl_mem_t> &mems) const = 0;


// *** Pure virtual methods that need to be implemented by any backend *** //

// Register and deregister local memory
virtual nixl_status_t registerMem (const nixlStringDesc &mem,
const nixl_mem_t &nixl_mem,
nixlBackendMD* &out) = 0;
virtual void deregisterMem (nixlBackendMD* meta) = 0;
virtual nixl_status_t deregisterMem (nixlBackendMD* meta) = 0;

// Make connection to a remote node identified by the name into loaded conn infos
// Child might just return 0, if making proactive connections are not necessary.
Expand All @@ -101,28 +105,37 @@ class nixlBackendEngine {
// Remove loaded local or remtoe metadata for target
virtual nixl_status_t unloadMD (nixlBackendMD* input) = 0;

// Posting a request, which returns populates the async handle.
virtual nixl_xfer_state_t postXfer (const nixl_meta_dlist_t &local,
const nixl_meta_dlist_t &remote,
const nixl_xfer_op_t &operation,
const std::string &remote_agent,
const std::string &notif_msg,
nixlBackendReqH* &handle) = 0;
// Preparing a request, which populates the async handle as desired
virtual nixl_status_t prepXfer (const nixl_xfer_op_t &operation,
const nixl_meta_dlist_t &local,
const nixl_meta_dlist_t &remote,
const std::string &remote_agent,
nixlBackendReqH* &handle,
const nixl_blob_t &notif_msg=NIXL_NO_MSG)=0;

// Posting a request, which completes the async handle creation and posts it
virtual nixl_status_t postXfer (const nixl_xfer_op_t &operation,
const nixl_meta_dlist_t &local,
const nixl_meta_dlist_t &remote,
const std::string &remote_agent,
nixlBackendReqH* &handle,
const nixl_blob_t &notif_msg=NIXL_NO_MSG)=0;

// Use a handle to progress backend engine and see if a transfer is completed or not
virtual nixl_xfer_state_t checkXfer(nixlBackendReqH* handle) = 0;
virtual nixl_status_t checkXfer(nixlBackendReqH* handle) = 0;

//Backend aborts the transfer if necessary, and destructs the relevant objects
virtual void releaseReqH(nixlBackendReqH* handle) = 0;
virtual nixl_status_t releaseReqH(nixlBackendReqH* handle) = 0;


// *** Needs to be implemented if supportsRemote() is true *** //

// Gets serialized form of public metadata
virtual std::string getPublicData (const nixlBackendMD* meta) const { return ""; };
virtual nixl_status_t getPublicData (const nixlBackendMD* meta,
std::string &str) const { return ""; };

// Provide the required connection info for remote nodes, should be non-empty
virtual std::string getConnInfo() const { return ""; }
virtual nixl_status_t getConnInfo(std::string &str) const { return ""; }

// Deserialize from string the connection info for a remote node, if supported
// The generated data should be deleted in nixlBackendEngine destructor
Expand Down Expand Up @@ -152,7 +165,7 @@ class nixlBackendEngine {
// *** Needs to be implemented if supportsNotif() is true *** //

// Populate an empty received notif list. Elements are released within backend then.
virtual int getNotifs(notif_list_t &notif_list) { return NIXL_ERR_BACKEND; }
virtual nixl_status_t getNotifs(notif_list_t &notif_list) { return NIXL_ERR_BACKEND; }

// Generates a standalone notification, not bound to a transfer.
virtual nixl_status_t genNotif(const std::string &remote_agent, const std::string &msg) {
Expand Down
3 changes: 1 addition & 2 deletions include/internal/transfer_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,8 @@ class nixlXferReqH {

class nixlXferSideH {
private:
nixl_meta_dlist_t* descs;
std::unordered_map<nixl_backend_t, nixl_meta_dlist_t*> descs;

nixlBackendEngine* engine;
std::string remoteAgent;
bool isLocal;

Expand Down
170 changes: 106 additions & 64 deletions include/nixl.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,95 +35,137 @@ class nixlAgent {
nixlAgent (const std::string &name, const nixlAgentConfig &cfg);
~nixlAgent ();

// Prints the supported configs with their default or initialized values
nixl_b_params_t getBackendOptions (const nixl_backend_t &type);
// Returns the available plugins found in the paths.
nixl_status_t getAvailPlugins (std::vector<nixl_backend_t> &plugins);

// Instantiate BackendEngine objects, based on corresponding params
nixlBackendH* createBackend (const nixl_backend_t &type,
const nixl_b_params_t &params);
// Returns the supported configs with their default values
nixl_status_t getPluginOptions (
const nixl_backend_t &type,
nixl_b_params_t &params) const;

// Register with the backend and populate memory_section
nixl_status_t registerMem (const nixl_reg_dlist_t &descs,
nixlBackendH* backend);
// Deregister and remove from memory section
nixl_status_t deregisterMem (const nixl_reg_dlist_t &descs,
nixlBackendH* backend);
// Instantiate BackendEngine objects, based on corresponding params
nixl_status_t createBackend (
const nixl_backend_t &type,
const nixl_b_params_t &params,
nixlBackendH* &backend);

// Returns the backend parameters after instantiation
nixl_status_t getBackendOptions (
const nixlBackendH* &backend,
nixl_b_params_t &params) const;

// Register a memory with NIXL. User can provide a list of backends
// to specify which backends are targeted for a memory, otherwise
// NIXL will register with all backends that support the memory type.
nixl_status_t registerMem (
const nixl_reg_dlist_t &descs,
const nixl_xfer_params_t* extra_params = nullptr) const;

// Deregister the memory from NIXL
nixl_status_t deregisterMem (
const nixl_reg_dlist_t &descs,
nixlBackendH* backend);

// Make connection proactively, instead of at transfer time
nixl_status_t makeConnection (const std::string &remote_agent);


/*** Transfer Request Handling ***/

// Creates a transfer request, with automatic backend selection if null.
nixl_status_t createXferReq (const nixl_xfer_dlist_t &local_descs,
const nixl_xfer_dlist_t &remote_descs,
const std::string &remote_agent,
const std::string &notif_msg,
const nixl_xfer_op_t &operation,
nixlXferReqH* &req_handle,
const nixlBackendH* backend = nullptr) const;

// Submit a transfer request, which populates the req async handler.
nixl_xfer_state_t postXferReq (nixlXferReqH* req);
/*** Transfer Request Prepration ***/

// Method 1, for when memory addresses of the transfer is not known
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why phrasing it this way?
Maybe its the opposite - all the addresses are known a prior, and you create this handle at app initialization and use it throughout. it's actually the best scenario that may happen!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, it's both, should update the comment, known priori or the time of transfer. But that kind of sounds all the time. I'll think about it how to make it say there are no blocks.

// beforehand, and the transfer request is prepared with information
// from both sides. The backend is selected automatically by NIXL,
// while user can provide a list of backends as hints in extra_params.
// The notification message can be given in optional extra_params too.
nixl_status_t prepXferFull (
const nixl_xfer_op_t &operation,
const nixl_xfer_dlist_t &local_descs,
const nixl_xfer_dlist_t &remote_descs,
const std::string &remote_agent,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so the agent stays string? We had discussions it turns blob as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Kapil made this comment in the meeting that for logging, string makes more sense, and that's what users want. And if they pass something that has \0, anyways we process it properly. That's more intuitive too.

nixlXferReqH* &req_handle,
const nixl_xfer_params_t* extra_params = nullptr) const;

// Method 2, for when memory blocks used in transfers are pre-known, but
// selection of blocks for transfers are determined in run time.
// There are two steps, initial preparations for each side, followed by a
// selection by indices to prepare a full transfer request.

// Prepares descriptors for one side of a transfer request. For local
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd suggest to add some text formatting here to improve readability. I.e.:

Prepares descriptors for one side of a transfer request.
* For local initiator side, remote_agent should be passed as NIXL_INIT_AGENT.
* For local target side in local transfers agent's own name is passed as remote_agent.

Also, I don't follow the description here. What is local target or initiator side?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed about formatting, will do.
So for ucx we should pass local metadata for the initiator side, and remote metadata for the target, even if target is the same agent. You mentioned better to do it once, instead of each time parse the local metadata to extract the remote metadata.

// initiator side, remote_agent should be passed as NIXL_INIT_AGENT.
// For local target side in local transfers agent's own name is passed as
// remote_agent. User can provide a list of backends as hints in
// extra_params to limit preparations to those backends.
nixl_status_t prepXferSide (
const nixl_xfer_dlist_t &descs,
const std::string &remote_agent,
nixlXferSideH* &side_handle,
const nixl_xfer_params_t* extra_params = nullptr) const;

// Makes a full transfer request by selecting indices from already prepared sides.
// NIXL automatically determines the backend that can perform the transfer.
// The notification message can be given in optional extra_params.
nixl_status_t selectXferSides (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't like select word - there are 2 sides passed into this function and they are already "selected"
I think it should be something like "buildXferReq"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure about this myself either. Previously it was makeXferReq, very similar to buildXferReq. But the point being these are preprations, the post actually makes it. So not sure what else to call it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Select sounded like there are some preps, we're selecting from them, it's still prep. But agreed there should be a better word.

const nixl_xfer_op_t &operation,
const nixlXferSideH* local_side,
const std::vector<int> &local_indices,
const nixlXferSideH* remote_side,
const std::vector<int> &remote_indices,
nixlXferReqH* &req_handle,
const nixl_xfer_params_t* extra_params = nullptr) const;


/*** Operations on prepared Transfer Request ***/

// Submit a transfer request, which enables async checks on the transfer.
// Operation and notification message can be updated per post through
// the extra_params.
nixl_status_t postXferReq (
nixlXferReqH* req,
const nixl_xfer_params_t* extra_params = nullptr) const;

// Check the status of transfer requests
nixl_xfer_state_t getXferStatus (nixlXferReqH* req);
nixl_status_t getXferStatus (nixlXferReqH* req);

// Invalidate transfer request if we no longer need it.
// Will also abort a running transfer.
void invalidateXferReq (nixlXferReqH* req);
// User can ask for backend used in a nixlXferReqH. For example to use the
// same backend for genNotif, or to know the decision after a prepXferFull.
nixl_status_t getXferBackend (
const nixlXferReqH* req_handle,
nixlBackendH* &backend) const;

// Invalidate (free) transfer request if we no longer need it.
// Tries to abort a running transfer, or return error if couldn't
nixl_status_t invalidateXferReq (nixlXferReqH* req);

/*** Alternative method to create transfer handle manually ***/
// Frees a side handle object
nixl_status_t invalidateXferSide (nixlXferSideH* side_handle) const;

// User can ask for backend chosen for a XferReq to use it for prepXferSide.
nixlBackendH* getXferBackend(const nixlXferReqH* req_handle) const;

// Prepares descriptors for one side of a transfer with given backend.
// Empty string for remote_agent means it's local side.
nixl_status_t prepXferSide (const nixl_xfer_dlist_t &descs,
const std::string &remote_agent,
const nixlBackendH* backend,
nixlXferSideH* &side_handle) const;

// Makes a transfer request from already prepared side transfer handles.
nixl_status_t makeXferReq (const nixlXferSideH* local_side,
const std::vector<int> &local_indices,
const nixlXferSideH* remote_side,
const std::vector<int> &remote_indices,
const std::string &notif_msg,
const nixl_xfer_op_t &operation,
nixlXferReqH* &req_handle) const;

void invalidateXferSide (nixlXferSideH* side_handle) const;

/*** Notification Handling ***/

// Add entries to the passed received notifications list (can be
// non-empty), and return number of added entries, or -1 if there was
// an error. Elements are released within the Agent after this call.
int getNotifs (nixl_notifs_t &notif_map);
// Populates an empty notification map, and releases them within the agent.
nixl_status_t getNotifs (nixl_notifs_t &notif_map);

// Generate a notification, not bound to a transfer, e.g., for control.
// Can be used after the remote metadata is exchanged. Will be received
// in notif list. Nixl will choose a backend if null is passed.
nixl_status_t genNotif (const std::string &remote_agent,
const std::string &msg,
nixlBackendH* backend = nullptr);

// in notif list. Providing a backend in extra_params is optional, as
// nixl can automatically decide.
nixl_status_t genNotif (
const std::string &remote_agent,
const nixl_blob_t &msg,
const nixl_xfer_params_t* extra_params = nullptr);

/*** Metadata handling through side channel ***/

// Get nixl_metadata for this agent. Empty string means error.
// The std::string used for serialized MD can have \0 values.
std::string getLocalMD () const;
// Get nixl metadata blob for this agent.
nixl_status_t getLocalMD (nixl_blob_t &str) const;

// Load other agent's metadata and unpack it internally.
// Returns the found agent name in metadata, or "" in case of error.
std::string loadRemoteMD (const std::string &remote_metadata);
// Received agent name can be checked through agent_name.
nixl_status_t loadRemoteMD (
const nixl_blobl_t &remote_metadata,
std::string &agent_name);

// Invalidate the remote section information cached locally
// Invalidate the remote agent metadata cached locally, and disconnect from it.
nixl_status_t invalidateRemoteMD (const std::string &remote_agent);
};

Expand Down
2 changes: 1 addition & 1 deletion include/nixl_descriptors.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class nixlBasicDesc {
// String next to each BasicDesc, used for extra info for memory registrartion
class nixlStringDesc : public nixlBasicDesc {
public:
std::string metaInfo;
nixl_blob_t metaInfo;

// Reuse parent constructor without the extra info
using nixlBasicDesc::nixlBasicDesc;
Expand Down
33 changes: 27 additions & 6 deletions include/nixl_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,29 +22,50 @@

typedef std::unordered_map<std::string, std::string> nixl_b_params_t;
typedef std::unordered_map<std::string, std::vector<std::string>> nixl_notifs_t;

typedef std::string nixl_backend_t;

//FILE_SEG must be last
typedef enum {DRAM_SEG, VRAM_SEG, BLK_SEG, FILE_SEG} nixl_mem_t;
// std::string supports \0 natively, as long as c_str() is not called.
// So it can be looked as a void* of data, with specified length.
// Giving it a new name to be clear in the API and avoid users to
// think it's a string and call c_str().
typedef std::string nixl_blob_t;

#define NIXL_NO_MSG stringWrapper("")
#define NIXL_INIT_AGENT ""

typedef enum {NIXL_XFER_INIT, NIXL_XFER_PROC,
NIXL_XFER_DONE, NIXL_XFER_ERR} nixl_xfer_state_t;
// FILE_SEG must be last
typedef enum {DRAM_SEG, VRAM_SEG, BLK_SEG, OBJ_SEG, FILE_SEG} nixl_mem_t;

typedef enum {NIXL_READ, NIXL_RD_NOTIF,
NIXL_WRITE, NIXL_WR_NOTIF} nixl_xfer_op_t;

typedef enum {
NIXL_IN_PROG = 1,
NIXL_SUCCESS = 0,
NIXL_ERR_INVALID_PARAM = -1,
NIXL_ERR_BACKEND = -2,
NIXL_ERR_NOT_FOUND = -3,
NIXL_ERR_NYI = -4,
NIXL_ERR_MISMATCH = -5,
NIXL_ERR_BAD = -6,
NIXL_ERR_NOT_ALLOWED = -7
NIXL_ERR_NOT_ALLOWED = -7,
NIXL_NOT_POSTED = -8
} nixl_status_t;


typedef struct {
// Used in createBackend/prepXferFull/prepXferSide/GenNotif
// as suggestion to limit the list of backends to be explored.
std::vector<nixlBackendH*> suggestedBackends;

nixl_xfer_op_t operation;
bool validOp = false;

nixl_blob_t &notif_msg;
bool validNotif = false;
} nixl_xfer_params_t;


class nixlSerDes;
class nixlBackendH;
class nixlXferReqH;
Expand Down
2 changes: 2 additions & 0 deletions src/nixl_agent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ nixlAgent::~nixlAgent() {
nixl_b_params_t nixlAgent::getBackendOptions (const nixl_backend_t &type) {
nixl_b_params_t params;

// TODO: unify to uppercase/lowercase and do ltrim/rtrim

// First try to get options from a loaded plugin
auto& plugin_manager = nixlPluginManager::getInstance();
auto plugin_handle = plugin_manager.getPlugin(type);
Expand Down