diff --git a/apex/apex.cpp b/apex/apex.cpp index d46ad23..7e6ef89 100644 --- a/apex/apex.cpp +++ b/apex/apex.cpp @@ -7,6 +7,8 @@ // This file implements APEX as LLVM Pass (APEXPass). See build_and_run.sh for // information about how to run APEXPass. +// TODO: Create aliases for long type defs. + #include "apex.h" /// Running on each module. @@ -15,11 +17,16 @@ bool APEXPass::runOnModule(Module &M) { APEXDependencyGraph apex_dg; LLVMDependenceGraph dg; - std::vector>> call_graph; - // std::vector path; + + // @path is the representation of computed execution path. + // It holds pair of function and dependency block from function through which + // is the execution "flowing". + std::vector> path; + std::string source_function_id = "main"; // TODO: Figure out entry automatically? std::string target_function_id = ""; // Will be properly initialized later. + std::vector target_instructions; std::map>> function_dependency_blocks; @@ -52,219 +59,31 @@ bool APEXPass::runOnModule(Module &M) { apexDgPrintDataDependenciesCompact(apex_dg); } - // logPrintUnderline( - // "Building callgraph with root at function: " + source_function_id + - // "."); - // createCallGraphOrDie(M, source_function_id, call_graph); - // - // if (VERBOSE_DEBUG) { - // logPrintUnderline("Printing callgraph."); - // printCallGraph(call_graph); - // } - // - // logPrintUnderline("Finding path from @" + source_function_id + " to @" + - // target_function_id); - // findPathOrDie(call_graph, source_function_id, target_function_id, path); - // - // logPrintUnderline("Printing path from @" + source_function_id + " to @" + - // target_function_id); - // printPath(path, source_function_id, target_function_id); - logPrintUnderline("Constructing function dependency blocks."); apexDgComputeFunctionDependencyBlocks(M, apex_dg, function_dependency_blocks); - // if (VERBOSE_DEBUG) { - logPrintUnderline("Printing calculated function dependency blocks."); - apexDgPrintFunctionDependencyBlocks(function_dependency_blocks); - // } + if (VERBOSE_DEBUG) { + logPrintUnderline("Printing calculated function dependency blocks."); + apexDgPrintFunctionDependencyBlocks(function_dependency_blocks); + } logPrintUnderline("Constructing dependency blocks to functions callgraph."); apexDgConstructBlocksFunctionsCallgraph(function_dependency_blocks, blocks_functions_callgraph); - // if (VERBOSE_DEBUG) { - logPrintUnderline("Printing dependency block to functions callgraph."); - apexDgPrintBlocksFunctionsCallgraph(blocks_functions_callgraph); - // } + if (VERBOSE_DEBUG) { + logPrintUnderline("Printing dependency block to functions callgraph."); + apexDgPrintBlocksFunctionsCallgraph(blocks_functions_callgraph); + } logPrintUnderline("Finding path from @" + source_function_id + " to @" + target_function_id + "."); - // @path is the representation of computed execution path - // It holds pair of function and dependency block from function through which - // is the execution "flowing". - std::vector> path; - - // @block_path is used to store path from @source_function to each node. - std::map *, std::vector>> - block_path; - { - std::vector *> queue; - std::vector *> visited; - - // Initially push blocks from source function into the @queue. - for (auto &block : - function_dependency_blocks[M.getFunction(source_function_id)]) { - queue.push_back(&block); - } - - while (false == queue.empty()) { - const auto current_ptr = queue.back(); - queue.pop_back(); - visited.push_back(current_ptr); - - // Find if there are calls to other functions that come from the current - // block. - for (const auto &called_function : - blocks_functions_callgraph[*current_ptr]) { - // Get blocks for those functions that are called. - for (auto &neighbour_block : - function_dependency_blocks[called_function]) { - // What we do here is that we copy the path from the current - // block, append current node to it and assign this path to - // each neighbour of current. - block_path[&neighbour_block] = block_path[current_ptr]; - block_path[&neighbour_block].push_back(*current_ptr); - - bool block_already_visited = false; - for (const auto &visited_block_ptr : visited) { - if (visited_block_ptr == &neighbour_block) { - block_already_visited = true; - break; - } - } - // Add non-visited blocks from called function into the queue. - if (false == block_already_visited) { - queue.push_back(&neighbour_block); - } - } - } - } - } - - // Finding target block. That is, block where target instruction are. - // We use heuristic and take last instruction from the @target_instructions. - // The reason is that inside @target_instructions may be more instructions - // than present in target block (especially at the beginning), so - // TODO: This is segfaulting when @target_block is pointer. - // TODO: Leaving it as a value for now. - std::vector target_block; - for (auto block : - function_dependency_blocks[M.getFunction(target_function_id)]) { - for (auto node_ptr : block) { - if (target_instructions.back() == node_ptr->getValue()) { - // Check the target line, just in case. - if (std::to_string( - target_instructions.back()->getDebugLoc().getLine()) == - ARG_LINE) { - target_block = block; - } - } - } - } - if (target_block.empty()) { - logPrint("ERROR: Could not find target block! Make sure target " - "instructions are not dead code (e.g. in the uncalled function"); - exit(FATAL_ERROR); - } + findPath(M, blocks_functions_callgraph, function_dependency_blocks, + target_instructions, source_function_id, target_function_id, path); - logPrint("Reconstructing block path:"); - for (const auto bp : block_path) { - // Comparing values can be slow, but blocks should not be really huge. - if (*bp.first == target_block) { - path = bp.second; - } - } - path.push_back(target_block); - - for (const auto block : path) { - Instruction *i = cast(block.front()->getValue()); - logPrint("- BLOCK FROM: " + i->getFunction()->getGlobalIdentifier()); - for (const auto node : block) { - node->getValue()->dump(); - } - logPrint(""); - } - - /* - logPrintUnderline("Dependency resolver: START"); - // Investigate each function in @path. - for (auto &fcn_in_path : path) { - logPrint("FCN: " + fcn_in_path->getGlobalIdentifier()); - for (auto &BB : *fcn_in_path) { - // Go over instructions for each function in @path. - for (auto &I : BB) { - if (false == isa(I)) { - continue; - } - // Ok, @I is call instruction. - // If @I calls something in @path, compute dependencies and include - // new stuff to @path. - CallInst *call_inst = cast(&I); - Function *called_fcn = call_inst->getCalledFunction(); - logPrintFlat("- @I calling: " + called_fcn->getGlobalIdentifier()); - - // We care if @I is calling function that is in the @path. - if (false == functionInPath(called_fcn, path)) { - // Do not investigate @I if it is calling function OUTSIDE PATH. - logPrint(" (not in path)"); - continue; - } - - // We care if @I is calling non protected function. - if (functionIsProtected(called_fcn)) { - logPrint(" (protected)"); - continue; - } - - logPrint(" (IN path)"); - - // Find @I in the @apex_dg. - logPrint(" - finding @I in @apex_dg"); - APEXDependencyNode *I_apex = nullptr; - for (APEXDependencyFunction apex_fcn : apex_dg.functions) { - std::string apex_fcn_name = apex_fcn.value->getName(); - if (apex_fcn_name == fcn_in_path->getGlobalIdentifier()) { - for (APEXDependencyNode apex_node : apex_fcn.nodes) { - if (isa(apex_node.value)) { - CallInst *node_inst = cast(apex_node.value); - Function *node_called_fcn = node_inst->getCalledFunction(); - if (node_called_fcn == called_fcn) { - I_apex = &apex_node; - logPrintFlat(" - found @I: "); - I_apex->value->dump(); - goto I_FOUND; // Get out of the loops. - } - } - } - } - } - if (nullptr == I_apex) { - logPrint("ERROR: Could not find @I in @apex_dg!"); - exit(-1); - } - - I_FOUND: - // Now, check if @I has some data dependencies that are fcn calls. - std::vector data_dependencies; - std::vector rev_data_dependencies; - LLVMNode *I_apex_node = I_apex->node; - apexDgFindDataDependencies(apex_dg, *I_apex_node, data_dependencies, - rev_data_dependencies); - - logPrint(" - @I data dependencies:\n"); - updatePathAddDependencies(data_dependencies, path); - logPrint(""); - logPrint(" - @I reverse data dependencies:\n"); - updatePathAddDependencies(rev_data_dependencies, path); - logPrint(""); - } - } - // TODO: one iteration of dep resolving is done - - logPrint("\n---\n"); - } - printPath(path, source_function_id, target_function_id); - */ + logPrintUnderline("Printing path from @" + source_function_id + " to @" + + target_function_id + "."); + printPath(path); // TODO: ERROR: inlinable function call in a function with debug info must // have a !dbg location @@ -423,172 +242,279 @@ bool APEXPass::functionInPath(Function *F, // Callgraph utilities // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -/// Creates callgraph from module @M, starting from the function with global id -/// specified in the @root. -/// -/// Callgraph is saved in the vector @callgraph as the pairs of the following -/// format: -/// . -/// -/// Dies if: -/// - unable to find function with @root global ID. -/// - unable to collect target callees when building callgraph. -int APEXPass::createCallGraphOrDie( - const Module &M, const std::string &root, - std::vector>> &callgraph) { +void APEXPass::findPath( + Module &M, + std::map, std::vector> + &blocks_functions_callgraph, + std::map>> + &function_dependency_blocks, + const std::vector &target_instructions, + const std::string &source_function_id, + const std::string &target_function_id, + std::vector> &path) { + // @block_path is used to store path from @source_function to each node. + std::map *, std::vector>> + block_path; + { + std::vector *> queue; + std::vector *> visited; - Function *root_fcn = M.getFunction(root); - if (nullptr == root_fcn) { - logPrint("ERROR : Root function should not be nullptr!"); - exit(FATAL_ERROR); - } + // Initially push blocks from source function into the @queue. + for (auto &block : + function_dependency_blocks[M.getFunction(source_function_id)]) { + queue.push_back(&block); + } - std::vector queue; - queue.push_back(root_fcn); + while (false == queue.empty()) { + const auto current_ptr = queue.back(); + queue.pop_back(); + visited.push_back(current_ptr); - while (false == queue.empty()) { - Function *node = queue.back(); - queue.pop_back(); + // Find if there are calls to other functions that come from the current + // block. + for (const auto &called_function : + blocks_functions_callgraph[*current_ptr]) { + // Get blocks for those functions that are called. + for (auto &neighbour_block : + function_dependency_blocks[called_function]) { + // What we do here is that we copy the path from the current + // block, append current node to it and assign this path to + // each neighbour of current. + block_path[&neighbour_block] = block_path[current_ptr]; + block_path[&neighbour_block].push_back(*current_ptr); - // Find call functions that are called by the @node. - std::vector node_callees; - if (functionGetCallees(node, node_callees) < 0) { - logPrint("ERROR: Failed to collect target callees!"); - exit(FATAL_ERROR); + bool block_already_visited = false; + for (const auto &visited_block_ptr : visited) { + if (visited_block_ptr == &neighbour_block) { + block_already_visited = true; + break; + } + } + // Add non-visited blocks from called function into the queue. + if (false == block_already_visited) { + queue.push_back(&neighbour_block); + } + } + } } + } - // Make caller->callees pair and save it. - std::pair> caller_callees; - caller_callees.first = node; - caller_callees.second = node_callees; - callgraph.push_back(caller_callees); - - // Put callees to queue, they will be examined next. - for (Function *callee : node_callees) { - queue.push_back(callee); + // Finding target block. That is, block where target instruction are. + // We use heuristic and take last instruction from the @target_instructions. + // The reason is that inside @target_instructions may be more instructions + // than present in target block (especially at the beginning), so + // TODO: This is segfaulting when @target_block is pointer. + // TODO: Leaving it as a value for now. + // TODO: auto &block + std::vector target_block; + for (auto block : + function_dependency_blocks[M.getFunction(target_function_id)]) { + for (auto node_ptr : block) { + if (target_instructions.back() == node_ptr->getValue()) { + // Check the target line, just in case. + if (std::to_string( + target_instructions.back()->getDebugLoc().getLine()) == + ARG_LINE) { + target_block = block; + } + } } } + if (target_block.empty()) { + logPrint("ERROR: Could not find target block! Make sure target " + "instructions are not dead code (e.g. in the uncalled function"); + exit(FATAL_ERROR); + } + logPrint("Reconstructing block path:"); + for (const auto bp : block_path) { + // Comparing values can be slow, but blocks should not be really huge. + if (*bp.first == target_block) { + path = bp.second; + } + } + path.push_back(target_block); logPrint("- done"); - return 0; } -/// Prints callgraph that is stored in the @callgraph. -void APEXPass::printCallGraph( - const std::vector>> - &callgraph) { - for (auto &caller_callees : callgraph) { - std::string caller = caller_callees.first->getGlobalIdentifier(); - std::string callees = " -> "; - if (caller_callees.second.empty()) { - callees += "[External/Nothing]"; - } - for (auto &callee : caller_callees.second) { - callees += callee->getGlobalIdentifier(); - callees += ", "; +void APEXPass::printPath(const std::vector> &path) { + for (const auto block : path) { + Instruction *i = cast(block.front()->getValue()); + logPrint("- BLOCK FROM: " + i->getFunction()->getGlobalIdentifier()); + for (const auto node : block) { + node->getValue()->dump(); } - logPrint(caller + callees); + logPrint(""); } } +/// Creates callgraph from module @M, starting from the function with global id +/// specified in the @root. +/// +/// Callgraph is saved in the vector @callgraph as the pairs of the following +/// format: +/// . +/// +/// Dies if: +/// - unable to find function with @root global ID. +/// - unable to collect target callees when building callgraph. +// int APEXPass::createCallGraphOrDie( +// const Module &M, const std::string &root, +// std::vector>> &callgraph) { +// +// Function *root_fcn = M.getFunction(root); +// if (nullptr == root_fcn) { +// logPrint("ERROR : Root function should not be nullptr!"); +// exit(FATAL_ERROR); +// } +// +// std::vector queue; +// queue.push_back(root_fcn); +// +// while (false == queue.empty()) { +// Function *node = queue.back(); +// queue.pop_back(); +// +// // Find call functions that are called by the @node. +// std::vector node_callees; +// if (functionGetCallees(node, node_callees) < 0) { +// logPrint("ERROR: Failed to collect target callees!"); +// exit(FATAL_ERROR); +// } +// +// // Make caller->callees pair and save it. +// std::pair> caller_callees; +// caller_callees.first = node; +// caller_callees.second = node_callees; +// callgraph.push_back(caller_callees); +// +// // Put callees to queue, they will be examined next. +// for (Function *callee : node_callees) { +// queue.push_back(callee); +// } +// } +// +// logPrint("- done"); +// return 0; +//} + +/// Prints callgraph that is stored in the @callgraph. +// void APEXPass::printCallGraph( +// const std::vector>> +// &callgraph) { +// for (auto &caller_callees : callgraph) { +// std::string caller = caller_callees.first->getGlobalIdentifier(); +// std::string callees = " -> "; +// if (caller_callees.second.empty()) { +// callees += "[External/Nothing]"; +// } +// for (auto &callee : caller_callees.second) { +// callees += callee->getGlobalIdentifier(); +// callees += ", "; +// } +// logPrint(caller + callees); +// } +//} + /// Uses BFS to find path in the @callgraph from @source to @target /// (both are global Function IDs). /// /// Result is stored in the @final_path. /// /// Dies if unable to find source node. -int APEXPass::findPathOrDie( - const std::vector>> - &callgraph, - const std::string &source, const std::string &target, - std::vector &final_path) { - - logPrint("Searching for the source node:"); - const std::pair> *source_node = nullptr; - { - for (auto &node_neighbours : callgraph) { - Function *node = node_neighbours.first; - if (source == node->getGlobalIdentifier()) { - source_node = &node_neighbours; - break; - } - } - if (nullptr == source_node) { - logPrint("ERROR: Could not find source node!"); - exit(FATAL_ERROR); - } else { - logPrint("- done"); - } - } - - logPrint("Running BFS to find path from @" + source + " to @" + target + ":"); - std::map> function_path; - { - std::vector>> queue; - std::vector visited; - - queue.push_back(*source_node); - - while (false == queue.empty()) { - std::pair> current = queue.back(); - queue.pop_back(); - Function *node = current.first; - std::vector neighbours = current.second; - - // Store current node as visited. - visited.push_back(node); - - for (Function *neighbour : current.second) { - // Store @main->@neighbour path. - function_path[neighbour].push_back(current.first); - - // Check if we already visited @neighbour. - bool neighbour_already_visited = false; - for (Function *visited_fcn : visited) { - if (visited_fcn == neighbour) { - neighbour_already_visited = true; - break; - } - } - // Add @neighbour to the queue along with its neighbours if it was - // not visited already. - if (false == neighbour_already_visited) { - for (auto &node_neighbours : callgraph) { - if (node_neighbours.first == neighbour) { - queue.push_back(node_neighbours); - } - } - } - } - } - logPrint("- done"); - } - - logPrint("Reconstructing and storing @" + source + " -> @" + target + - " path to the @final_path:"); - { - for (auto &node_path : function_path) { - // Need to pull @target first. - if (node_path.first->getGlobalIdentifier() == target) { - // We push path that was stored. First node should be @source. - for (Function *node : node_path.second) { - final_path.push_back(node); - } - // Finally, push @target node itself. - final_path.push_back(node_path.first); - logPrint("- done"); - } - } - } -} +// int APEXPass::findPathOrDie( +// const std::vector>> +// &callgraph, +// const std::string &source, const std::string &target, +// std::vector &final_path) { +// +// logPrint("Searching for the source node:"); +// const std::pair> *source_node = nullptr; +// { +// for (auto &node_neighbours : callgraph) { +// Function *node = node_neighbours.first; +// if (source == node->getGlobalIdentifier()) { +// source_node = &node_neighbours; +// break; +// } +// } +// if (nullptr == source_node) { +// logPrint("ERROR: Could not find source node!"); +// exit(FATAL_ERROR); +// } else { +// logPrint("- done"); +// } +// } +// +// logPrint("Running BFS to find path from @" + source + " to @" + target + +// ":"); std::map> function_path; +// { +// std::vector>> queue; +// std::vector visited; +// +// queue.push_back(*source_node); +// +// while (false == queue.empty()) { +// std::pair> current = queue.back(); +// queue.pop_back(); +// Function *node = current.first; +// std::vector neighbours = current.second; +// +// // Store current node as visited. +// visited.push_back(node); +// +// for (Function *neighbour : current.second) { +// // Store @main->@neighbour path. +// function_path[neighbour].push_back(current.first); +// +// // Check if we already visited @neighbour. +// bool neighbour_already_visited = false; +// for (Function *visited_fcn : visited) { +// if (visited_fcn == neighbour) { +// neighbour_already_visited = true; +// break; +// } +// } +// // Add @neighbour to the queue along with its neighbours if it was +// // not visited already. +// if (false == neighbour_already_visited) { +// for (auto &node_neighbours : callgraph) { +// if (node_neighbours.first == neighbour) { +// queue.push_back(node_neighbours); +// } +// } +// } +// } +// } +// logPrint("- done"); +// } +// +// logPrint("Reconstructing and storing @" + source + " -> @" + target + +// " path to the @final_path:"); +// { +// for (auto &node_path : function_path) { +// // Need to pull @target first. +// if (node_path.first->getGlobalIdentifier() == target) { +// // We push path that was stored. First node should be @source. +// for (Function *node : node_path.second) { +// final_path.push_back(node); +// } +// // Finally, push @target node itself. +// final_path.push_back(node_path.first); +// logPrint("- done"); +// } +// } +// } +//} /// Prints path from @source to @target (both are function global IDs). -void APEXPass::printPath(const std::vector &path, - const std::string &source, const std::string &target) { - for (auto &node : path) { - logPrint(node->getGlobalIdentifier()); - } -} +// void APEXPass::printPath(const std::vector &path, +// const std::string &source, const std::string &target) +// { +// for (auto &node : path) { +// logPrint(node->getGlobalIdentifier()); +// } +//} // dg utilities // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -929,8 +855,8 @@ void APEXPass::apexDgGetNodeOrDie(const APEXDependencyGraph &apex_dg, } logPrintFlat( "ERROR: Could not find the following instruction in the @apex_dg." - "Maybe it is in function that @dg did not computed dependencies" - "(because it is not used, etc.)"); + "Maybe it is in a function that @dg did not computed dependencies for" + "(e.g. it is not used, etc.)"); I->dump(); exit(FATAL_ERROR); } @@ -1107,9 +1033,11 @@ void APEXPass::apexDgConstructBlocksFunctionsCallgraph( for (auto &function_blocks : function_dependency_blocks) { for (auto &block : function_blocks.second) { for (LLVMNode *node : block) { + if (isa(node->getValue())) { const CallInst *call_inst = cast(node->getValue()); const Function *called_fcn = call_inst->getCalledFunction(); + // Store call edge from block to function. auto &functions = blocks_functions_callgraph[block]; functions.push_back(called_fcn); diff --git a/apex/apex.h b/apex/apex.h index 7df49d4..88da373 100644 --- a/apex/apex.h +++ b/apex/apex.h @@ -106,20 +106,16 @@ class APEXPass : public ModulePass { bool functionIsProtected(const Function *F); bool functionInPath(Function *F, const std::vector &path); - // Callgraph utilities. - int createCallGraphOrDie( - const Module &M, const std::string &root, - std::vector>> &callgraph); - void printCallGraph( - const std::vector>> - &callgraph); - int findPathOrDie( - const std::vector>> - &callgraph, - const std::string &source, const std::string &target, - std::vector &final_path); - void printPath(const std::vector &path, const std::string &source, - const std::string &target); + void findPath(Module &M, + std::map, std::vector> + &blocks_functions_callgraph, + std::map>> + &function_dependency_blocks, + const std::vector &target_instructions, + const std::string &source_function_id, + const std::string &target_function_id, + std::vector> &path); + void printPath(const std::vector> &path); // dg utilities. void dgInit(Module &M, LLVMDependenceGraph &dg);