diff --git a/CMakeLists.txt b/CMakeLists.txt index e0b8e04..b313b98 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,6 +55,12 @@ if(NOT EXISTS ${PROJECT_SOURCE_DIR}/src/tree-sitter/tree-sitter-php) ) endif() +if(NOT EXISTS ${PROJECT_SOURCE_DIR}/src/tree-sitter/tree-sitter-solidity) + execute_process( + COMMAND git clone https://github.com/JoranHonig/tree-sitter-solidity.git ${PROJECT_SOURCE_DIR}/src/tree-sitter/tree-sitter-solidity + ) +endif() + if(NOT EXISTS ${PROJECT_SOURCE_DIR}/src/tree-sitter/tree-sitter-cpp) execute_process( COMMAND git clone https://github.com/tree-sitter/tree-sitter-cpp.git ${PROJECT_SOURCE_DIR}/src/tree-sitter/tree-sitter-cpp diff --git a/scripts/mine_patterns.sh b/scripts/mine_patterns.sh index b877bc1..ad8db72 100755 --- a/scripts/mine_patterns.sh +++ b/scripts/mine_patterns.sh @@ -10,7 +10,7 @@ function print_usage() { else echo "[-n number_of_processes_to_use_for_mining] (default: num_cpus_on_system)" fi - echo "[-l source_language_number] (default: 1 (C), supported: 1 (C), 2 (Verilog), 3 (PHP), 4 (C++)" + echo "[-l source_language_number] (default: 1 (C), supported: 1 (C), 2 (Verilog), 3 (PHP), 4 (C++), 5 (Solidity)" echo "[-g github_repo_id] (default: 0) A unique identifier for GitHub repository, if any" exit } @@ -43,9 +43,9 @@ then print_usage $0 fi -if (( ${LANGUAGE} < 1 || ${LANGUAGE} > 4 )); +if (( ${LANGUAGE} < 1 || ${LANGUAGE} > 5 )); then - echo "ERROR: Only 1 (C), 2 (Verilog), 3 (PHP), and 4 (C++) are supported languages; received ${LANGUAGE}" + echo "ERROR: Only 1 (C), 2 (Verilog), 3 (PHP), 4 (C++), 5 (Solidity) are supported languages; received ${LANGUAGE}" print_usage $0 fi @@ -69,6 +69,9 @@ then elif [ "${LANGUAGE}" = "4" ]; then find "${TRAIN_DIR}" -iname "*.cpp" -o -iname "*.cc" -o -iname "*.cxx" -o -iname "*.h" -o -iname "*.hpp" -o -iname "*.hxx" -type f > ${FILE_LIST} +elif [ "${LANGUAGE}" = "5" ]; +then + find "${TRAIN_DIR}" -iname "*.sol" -type f > ${FILE_LIST} fi SCRIPTS_DIR=`dirname $0` diff --git a/scripts/scan_for_anomalies.sh b/scripts/scan_for_anomalies.sh index 8e8ef31..c57947a 100755 --- a/scripts/scan_for_anomalies.sh +++ b/scripts/scan_for_anomalies.sh @@ -14,7 +14,7 @@ function print_usage() { fi echo " [-o output_log_dir] (default: /tmp)" echo " [-a anomaly_threshold] (default: 3.0)" - echo " [-l source_language_number] (default: 1 (C), supported: 1 (C), 2 (Verilog), 3 (PHP), 4 (C++)" + echo " [-l source_language_number] (default: 1 (C), supported: 1 (C), 2 (Verilog), 3 (PHP), 4 (C++), 5 (Solidity)" exit } @@ -58,9 +58,9 @@ then print_usage $0 fi -if (( ${LANGUAGE} < 1 || ${LANGUAGE} > 4 )); +if (( ${LANGUAGE} < 1 || ${LANGUAGE} > 5 )); then - echo "ERROR: Only 1 (C), 2 (Verilog), 3 (PHP), and 4 (C++) are supported languages; received ${LANGUAGE}" + echo "ERROR: Only 1 (C), 2 (Verilog), 3 (PHP), 4 (C++), 5 (Solidity) are supported languages; received ${LANGUAGE}" print_usage $0 fi @@ -77,6 +77,9 @@ then elif [ "${LANGUAGE}" = "4" ]; then find "${SCAN_DIR}" -iname "*.cpp" -o -iname "*.cc" -o -iname "*.cxx" -o -iname "*.h" -o -iname "*.hpp" -o -iname "*.hxx" -type f > ${SCAN_FILE_LIST} +elif [ "${LANGUAGE}" = "5" ]; +then + find "${SCAN_DIR}" -iname "*.sol" -type f > ${SCAN_FILE_LIST} fi SCRIPTS_DIR=`dirname $0` diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 285d313..d52c807 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -27,6 +27,7 @@ set(COMMON_LINK_LIBRARIES tree-sitter tree-sitter-c tree-sitter-php + tree-sitter-solidity tree-sitter-cpp tree-sitter-verilog pthread diff --git a/src/cf_dump_code_blocks.cpp b/src/cf_dump_code_blocks.cpp index eb115ac..3adef9f 100644 --- a/src/cf_dump_code_blocks.cpp +++ b/src/cf_dump_code_blocks.cpp @@ -93,7 +93,7 @@ int handle_command_args(int argc, char* argv[], CFDumpArgs& command_args) { << std::endl << " [-l source_language_number] (default: " << LANGUAGE_C << ")" - << ", supported: 1 (C), 2 (Verilog), 3 (PHP), 4 (C++)" + << ", supported: 1 (C), 2 (Verilog), 3 (PHP), 4 (C++), 5 (SOLIDITY)" << std::endl; }; @@ -137,6 +137,9 @@ int main(int argc, char* argv[]) { case LANGUAGE_PHP: DumpCodeBlocksFromSourceFile(command_args); break; + case LANGUAGE_SOLIDITY: + DumpCodeBlocksFromSourceFile(command_args); + break; case LANGUAGE_CPP: DumpCodeBlocksFromSourceFile(command_args); break; diff --git a/src/cf_file_scanner.cpp b/src/cf_file_scanner.cpp index 3e1750c..2ed8ad6 100644 --- a/src/cf_file_scanner.cpp +++ b/src/cf_file_scanner.cpp @@ -55,7 +55,7 @@ static int handle_command_args(int argc, char* argv[], FileScannerArgs& args) { << " [-a anomaly_threshold] (default: 3.0)" << std::endl << " [-l source_language_number] (default: 1 (C), " - << "supported: 1 (C), 2 (Verilog), 3 (PHP), 4 (C++))" + << "supported: 1 (C), 2 (Verilog), 3 (PHP), 4 (C++), 5 (SOLIDITY) )" << std::endl << " [-v log_level ] (default: 0, " << "{ERROR, 0}, {INFO, 1}, {DEBUG, 2})" @@ -162,6 +162,10 @@ int main(int argc, char* argv[]) { status = train_and_scan_util.ScanFile(eval_file, log_file); break; + case LANGUAGE_SOLIDITY: + status = train_and_scan_util.ScanFile(eval_file, + log_file); + break; case LANGUAGE_CPP: status = train_and_scan_util.ScanFile(eval_file, log_file); diff --git a/src/common_util.cpp b/src/common_util.cpp index 139ca0c..99ea355 100644 --- a/src/common_util.cpp +++ b/src/common_util.cpp @@ -127,6 +127,8 @@ ManagedTSTree GetTSTree(const std::string&, bool); template ManagedTSTree GetTSTree(const std::string&, bool); template +ManagedTSTree GetTSTree(const std::string&, bool); +template ManagedTSTree GetTSTree(const std::string&, bool); template ManagedTSTree GetTSTree(const std::string&, std::string&); @@ -135,6 +137,8 @@ ManagedTSTree GetTSTree(const std::string&, std::string&); template ManagedTSTree GetTSTree(const std::string&, std::string&); template +ManagedTSTree GetTSTree(const std::string&, std::string&); +template ManagedTSTree GetTSTree(const std::string&, std::string&); template void CollectCodeBlocksOfInterest(const ManagedTSTree&, @@ -146,5 +150,8 @@ template void CollectCodeBlocksOfInterest(const ManagedTSTree &, code_blocks_t&); template +void CollectCodeBlocksOfInterest(const ManagedTSTree &, + code_blocks_t&); +template void CollectCodeBlocksOfInterest(const ManagedTSTree &, code_blocks_t&); diff --git a/src/parser.h b/src/parser.h index fed8712..7b4c687 100644 --- a/src/parser.h +++ b/src/parser.h @@ -32,13 +32,15 @@ extern "C" const TSLanguage *tree_sitter_c(); extern "C" const TSLanguage *tree_sitter_verilog(); extern "C" const TSLanguage *tree_sitter_php(); +extern "C" const TSLanguage *tree_sitter_solidity(); extern "C" const TSLanguage *tree_sitter_cpp(); enum Language { LANGUAGE_C = 1, LANGUAGE_VERILOG = 2, LANGUAGE_PHP = 3, - LANGUAGE_CPP = 4 + LANGUAGE_CPP = 4, + LANGUAGE_SOLIDITY = 5 }; #define LANGUAGE_MIN LANGUAGE_C @@ -68,6 +70,10 @@ template <> inline const TSLanguage* GetTSLanguage () { return tree_sitter_php(); } +template <> inline const TSLanguage* GetTSLanguage () { + return tree_sitter_solidity(); +} + template class ParserBase { public: @@ -126,6 +132,9 @@ template <> inline bool IsIfStatement(const TSNode& node) { template <> inline bool IsIfStatement(const TSNode& node) { return IsTSNodeofType(node, "if_statement"); } +template <> inline bool IsIfStatement(const TSNode& node) { + return IsTSNodeofType(node, "if_statement"); +} template <> inline bool IsIfStatement(const TSNode& node) { return IsTSNodeofType(node, "if_statement"); } @@ -214,6 +223,13 @@ inline TSNode GetIfConditionNode(const TSNode& if_statement) { kIfCondition.c_str(), kIfCondition.length()); } +template <> +inline TSNode GetIfConditionNode(const TSNode& if_statement) { + const std::string& kIfCondition = "condition"; + return ts_node_child_by_field_name(if_statement, + kIfCondition.c_str(), kIfCondition.length()); +} + std::string OriginalSourceExpression(const TSNode&, const std::string&); template diff --git a/src/train_and_scan_util.cpp b/src/train_and_scan_util.cpp index 07d7dc1..58f11ef 100644 --- a/src/train_and_scan_util.cpp +++ b/src/train_and_scan_util.cpp @@ -30,6 +30,8 @@ template int TrainAndScanUtil::ScanFile( const std::string& test_file, std::ostream& log_file) const; template int TrainAndScanUtil::ScanFile( const std::string& test_file, std::ostream& log_file) const; +template int TrainAndScanUtil::ScanFile( + const std::string& test_file, std::ostream& log_file) const; template int TrainAndScanUtil::ScanFile( const std::string& test_file, std::ostream& log_file) const; template int TrainAndScanUtil::ScanExpression( @@ -38,6 +40,8 @@ template int TrainAndScanUtil::ScanExpression( const std::string& expression, std::ostream& log_file) const; template int TrainAndScanUtil::ScanExpression( const std::string& expression, std::ostream& log_file) const; +template int TrainAndScanUtil::ScanExpression( + const std::string& expression, std::ostream& log_file) const; template int TrainAndScanUtil::ScanExpression( const std::string& expression, std::ostream& log_file) const; diff --git a/src/tree-sitter/CMakeLists.txt b/src/tree-sitter/CMakeLists.txt index 0d40417..4cc08a8 100644 --- a/src/tree-sitter/CMakeLists.txt +++ b/src/tree-sitter/CMakeLists.txt @@ -36,6 +36,15 @@ target_include_directories(tree-sitter-php tree-sitter-php/src ) +add_library(tree-sitter-solidity STATIC + tree-sitter-solidity/src/parser.c +) + +target_include_directories(tree-sitter-solidity + PRIVATE + tree-sitter-solidity/src +) + add_library(tree-sitter-cpp STATIC tree-sitter-cpp/src/parser.c tree-sitter-cpp/src/scanner.cc diff --git a/src/tree_abstraction.h b/src/tree_abstraction.h index d6d57c8..3a95474 100644 --- a/src/tree_abstraction.h +++ b/src/tree_abstraction.h @@ -147,6 +147,12 @@ inline std::string NodeToString( return NodeToString(conditional_expression); } +template <> +inline std::string NodeToString( + const TSNode& conditional_expression) { + return NodeToString(conditional_expression); +} + // --------------------------------------------------------------------------- inline std::string AbstractTerminalString(const TSNode& node) { @@ -405,6 +411,12 @@ inline std::string NodeToString( const TSNode& conditional_expression) { return NodeToString(conditional_expression); } + +template <> +inline std::string NodeToString( + const TSNode& conditional_expression) { + return NodeToString(conditional_expression); +} // ----------------------------------------------------------------------- // Close to full-detailed level with using Tree-sitter print. Only // difference is in printing operators for binary and unary ops. @@ -456,6 +468,12 @@ inline std::string NodeToString( return NodeToString(conditional_expression); } +template <> +inline std::string NodeToString( + const TSNode& conditional_expression) { + return NodeToString(conditional_expression); +} + template <> inline std::string NodeToString( const TSNode& conditional_expression) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 07b7f84..9db5a23 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -43,6 +43,7 @@ foreach(file ${files}) tree-sitter tree-sitter-c tree-sitter-php + tree-sitter-solidity tree-sitter-cpp tree-sitter-verilog pthread)