diff --git a/README.md b/README.md index 5d9f6b277..89d36ca64 100644 --- a/README.md +++ b/README.md @@ -115,6 +115,44 @@ target_link_libraries(<your_executable_or_library> PUBLIC ${IR2VEC_INSTALL_DIR}/ And then pass the location of IR2Vec's install prefix to `DIR2VEC_INSTALL_DIR` during cmake. +The following example snippet shows how to query the exposed vector representations. + +```c++ +#include "IR2Vec.h" + +// Creating object to generate FlowAware representation +auto ir2vec = + IR2Vec::Embeddings(<LLVM Module>, IR2Vec::IR2VecMode::FlowAware, + "./vocabulary/seedEmbeddingVocab-300-llvm10.txt"); + +// Getting Instruction vectors corresponding to the instructions in <LLVM Module> +auto instVecMap = ir2vec.getInstVecMap(); +// Access the generated vectors +for (auto instVec : instVecMap) { + outs() << "Instruction : "; + instVec.first->print(outs()); + outs() << ": "; + + for (auto val : instVec.second) + outs() << val << "\t"; +} + +// Getting vectors corresponding to the functions in <LLVM Module> +auto funcVecMap = ir2vec.getFunctionVecMap(); +// Access the generated vectors +for (auto funcVec : funcVecMap) { + outs() << "Function : " << funcVec.first->getName() << "\n"; + for (auto val : funcVec.second) + outs() << val << "\t"; + } + +// Getting the program vector +auto pgmVec = ir2vec.getProgramVector(); +// Access the generated vector +for (auto val : pgmVec) + outs() << val << "\t"; +``` + ## Experiments ### Note diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 929e8ddb2..94523060b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.4.3) +cmake_minimum_required(VERSION 3.13) project(ir2vec VERSION 1.0.0) set(IR2VEC_LIB "IR2Vec") diff --git a/src/include/IR2Vec.h b/src/include/IR2Vec.h index 385dcae75..a791f0ddf 100644 --- a/src/include/IR2Vec.h +++ b/src/include/IR2Vec.h @@ -18,7 +18,7 @@ using Vector = llvm::SmallVector<double, DIM>; enum IR2VecMode { FlowAware, Symbolic }; -class IR2VecTy { +class Embeddings { int generateEncodings(llvm::Module &M, IR2VecMode mode, std::string vocab, char level = '\0', std::ostream *o = nullptr, int cls = -1, float WO = 1, float WA = 0.2, @@ -29,26 +29,36 @@ class IR2VecTy { Vector pgmVector; public: - IR2VecTy(llvm::Module &M, IR2VecMode mode, std::string vocab, float WO = 1, - float WA = 0.2, float WT = 0.5) { + Embeddings(llvm::Module &M, IR2VecMode mode, std::string vocab, float WO = 1, + float WA = 0.2, float WT = 0.5) { generateEncodings(M, mode, vocab, '\0', nullptr, -1, WO, WA, WT); } - IR2VecTy(llvm::Module &M, IR2VecMode mode, std::string vocab, char level, - std::ostream *o, float WO = 1, float WA = 0.2, float WT = 0.5) { + // Use this constructor if the representations ought to be written to a file. + // Analogous to the command line options that are being used in IR2Vec binary. + Embeddings(llvm::Module &M, IR2VecMode mode, std::string vocab, char level, + std::ostream *o, float WO = 1, float WA = 0.2, float WT = 0.5) { generateEncodings(M, mode, vocab, level, o, -1, WO, WA, WT); } + // Returns a map containing instructions and the corresponding vector + // representations for a given module corresponding to the IR2VecMode and + // other configurations that is set in constructor llvm::SmallMapVector<const llvm::Instruction *, Vector, 128> & getInstVecMap() { return instVecMap; } + // Returns a map containing functions and the corresponding vector + // representations for a given module corresponding to the IR2VecMode and + // other configurations that is set in constructor llvm::SmallMapVector<const llvm::Function *, Vector, 16> & getFunctionVecMap() { return funcVecMap; } + // Returns the program vector for a module corresponding to the IR2VecMode + // and other configurations that is set in constructor Vector &getProgramVector() { return pgmVector; } }; diff --git a/src/libIR2Vec.cpp b/src/libIR2Vec.cpp index be5fcb2bd..87a05756b 100644 --- a/src/libIR2Vec.cpp +++ b/src/libIR2Vec.cpp @@ -12,11 +12,11 @@ #include "llvm/IR/Module.h" -int IR2Vec::IR2VecTy::generateEncodings(llvm::Module &M, - IR2Vec::IR2VecMode mode, - std::string vocab, char level, - std::ostream *o, int cls, float WO, - float WA, float WT) { +int IR2Vec::Embeddings::generateEncodings(llvm::Module &M, + IR2Vec::IR2VecMode mode, + std::string vocab, char level, + std::ostream *o, int cls, float WO, + float WA, float WT) { IR2Vec::vocab = vocab; IR2Vec::level = level; IR2Vec::cls = cls;