Skip to content

Commit

Permalink
Add RE2 to thirdparty (#1934)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
  • Loading branch information
yingfeng authored Sep 27, 2024
1 parent 0077b4a commit 34eef36
Show file tree
Hide file tree
Showing 47 changed files with 25,381 additions and 6 deletions.
15 changes: 11 additions & 4 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ target_sources(infinity_core
${network_cppm}
)

add_dependencies(infinity_core thrift thriftnb parquet_static snappy)
add_dependencies(infinity_core thrift thriftnb parquet_static snappy re2)
target_include_directories(infinity_core PUBLIC ${Python3_INCLUDE_DIRS})
target_include_directories(infinity_core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
target_include_directories(infinity_core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/parser")
Expand Down Expand Up @@ -279,6 +279,7 @@ target_include_directories(infinity_core PUBLIC "${CMAKE_SOURCE_DIR}/third_party
target_include_directories(infinity_core PUBLIC "${CMAKE_SOURCE_DIR}/third_party/curlpp/include")
target_include_directories(infinity_core PUBLIC "${CMAKE_SOURCE_DIR}/third_party/curl/include")
target_include_directories(infinity_core PUBLIC "${CMAKE_SOURCE_DIR}/third_party/darts/")
target_include_directories(infinity_core PUBLIC "${CMAKE_SOURCE_DIR}/third_party/re2")

if (NOT SUPPORT_FMA EQUAL 0)
message(FATAL_ERROR "This project requires the processor support fused multiply-add (FMA) instructions.")
Expand Down Expand Up @@ -347,6 +348,7 @@ target_link_libraries(infinity
libcurl_static
ssl.a
crypto.a
re2.a
)
target_link_directories(infinity PUBLIC "${CMAKE_BINARY_DIR}/lib")
target_link_directories(infinity PUBLIC "${CMAKE_BINARY_DIR}/third_party/oatpp/src/")
Expand All @@ -357,6 +359,7 @@ target_link_directories(infinity PUBLIC "${CMAKE_BINARY_DIR}/third_party/pugixml
target_link_directories(infinity PUBLIC "${CMAKE_BINARY_DIR}/third_party/curlpp/")
target_link_directories(infinity PUBLIC "${CMAKE_BINARY_DIR}/third_party/curl/")
target_link_directories(infinity PUBLIC "${CMAKE_BINARY_DIR}/third_party/")
target_link_directories(infinity PUBLIC "${CMAKE_BINARY_DIR}/third_party/re2/")
target_link_directories(infinity PUBLIC "/usr/local/openssl30/lib64")

target_include_directories(infinity PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
Expand Down Expand Up @@ -413,6 +416,7 @@ if (SKBUILD)
libcurl_static
ssl.a
crypto.a
re2.a
)

# WARN: python modules shall not link to static libstdc++!!!
Expand All @@ -427,6 +431,7 @@ if (SKBUILD)
target_link_directories(embedded_infinity_ext PUBLIC "${CMAKE_BINARY_DIR}/third_party/pugixml/")
target_link_directories(embedded_infinity_ext PUBLIC "${CMAKE_BINARY_DIR}/third_party/curlpp/")
target_link_directories(embedded_infinity_ext PUBLIC "${CMAKE_BINARY_DIR}/third_party/curl/")
target_link_directories(embedded_infinity_ext PUBLIC "${CMAKE_BINARY_DIR}/third_party/re2/")
target_link_directories(embedded_infinity_ext PUBLIC "${CMAKE_BINARY_DIR}/third_party/")
target_link_directories(embedded_infinity_ext PUBLIC "/usr/local/openssl30/lib64")
nanobind_disable_stack_protector(embedded_infinity_ext)
Expand Down Expand Up @@ -532,7 +537,7 @@ add_executable(unit_test
)

set_target_properties(unit_test PROPERTIES OUTPUT_NAME test_main)
add_dependencies(unit_test oatpp miniocpp pugixml-static curlpp_static inih libcurl_static)
add_dependencies(unit_test oatpp miniocpp pugixml-static curlpp_static inih libcurl_static re2)

target_link_libraries(unit_test
gtest
Expand All @@ -559,6 +564,7 @@ target_link_libraries(unit_test
thriftnb.a
event.a
miniocpp.a
re2.a
pugixml-static
curlpp_static
inih.a
Expand All @@ -567,8 +573,6 @@ target_link_libraries(unit_test
crypto.a
)

add_dependencies(unit_test oatpp)

target_link_directories(unit_test PUBLIC "${CMAKE_BINARY_DIR}/lib")
target_link_directories(unit_test PUBLIC "${CMAKE_BINARY_DIR}/third_party/arrow/")
target_link_directories(unit_test PUBLIC "${CMAKE_BINARY_DIR}/third_party/snappy/")
Expand All @@ -577,6 +581,7 @@ target_link_directories(unit_test PUBLIC "${CMAKE_BINARY_DIR}/third_party/minio-
target_link_directories(unit_test PUBLIC "${CMAKE_BINARY_DIR}/third_party/pugixml/")
target_link_directories(unit_test PUBLIC "${CMAKE_BINARY_DIR}/third_party/curlpp/")
target_link_directories(unit_test PUBLIC "${CMAKE_BINARY_DIR}/third_party/curl/")
target_link_directories(unit_test PUBLIC "${CMAKE_BINARY_DIR}/third_party/re2/")
target_link_directories(unit_test PUBLIC "${CMAKE_BINARY_DIR}/third_party/")
target_link_directories(unit_test PUBLIC "/usr/local/openssl30/lib64")

Expand Down Expand Up @@ -604,6 +609,8 @@ target_include_directories(unit_test PUBLIC "${CMAKE_SOURCE_DIR}/third_party/ope
target_include_directories(unit_test PUBLIC "${CMAKE_SOURCE_DIR}/third_party/curlpp/include")
target_include_directories(unit_test PUBLIC "${CMAKE_SOURCE_DIR}/third_party/curl/include")
target_include_directories(unit_test PUBLIC "${CMAKE_SOURCE_DIR}/third_party/darts")
target_include_directories(unit_test PUBLIC "${CMAKE_SOURCE_DIR}/third_party/re2")


# target_compile_options(unit_test PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-mavx2 -mfma -mf16c -mpopcnt>)
if (SUPPORT_AVX2 EQUAL 0 OR SUPPORT_AVX512 EQUAL 0)
Expand Down
5 changes: 3 additions & 2 deletions third_party/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,12 @@ add_library(inih STATIC
inih/ini.c
)

add_subdirectory(curl)

add_subdirectory(re2)
target_compile_options(
inih
PRIVATE
-O3 -Wno-sign-compare
-fPIC
)

add_subdirectory(curl)
70 changes: 70 additions & 0 deletions third_party/re2/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Copyright 2015 The RE2 Authors. All Rights Reserved. Use of this source code
# is governed by a BSD-style license that can be found in the LICENSE file.

cmake_minimum_required(VERSION 3.5...3.29)

if(POLICY CMP0048)
cmake_policy(SET CMP0048 NEW)
endif()

if(POLICY CMP0063)
cmake_policy(SET CMP0063 NEW)
endif()

project(RE2 CXX)

set(CMAKE_CXX_VISIBILITY_PRESET hidden)

include(CTest)

# CMake seems to have no way to enable/disable testing per subproject, so we
# provide an option similar to BUILD_TESTING, but just for RE2.
option(RE2_BUILD_TESTING "enable testing for RE2" OFF)

set(EXTRA_TARGET_LINK_LIBRARIES)

add_definitions(-DRE2_ON_VALGRIND)

if(WIN32)
add_definitions(-DUNICODE
-D_UNICODE
-DSTRICT
-DNOMINMAX)
add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS)
elseif(UNIX)
# add_compile_options(-pthread) list(APPEND EXTRA_TARGET_LINK_LIBRARIES
# -pthread)
endif()

set(RE2_SOURCES
re2/bitmap256.cc
re2/compile.cc
re2/bitstate.cc
re2/dfa.cc
re2/filtered_re2.cc
re2/mimics_pcre.cc
re2/nfa.cc
re2/onepass.cc
re2/parse.cc
re2/perl_groups.cc
re2/prefilter.cc
re2/prefilter_tree.cc
re2/prog.cc
re2/re2.cc
re2/regexp.cc
re2/set.cc
re2/simplify.cc
re2/stringpiece.cc
re2/tostring.cc
re2/unicode_casefold.cc
re2/unicode_groups.cc
util/rune.cc
util/strutil.cc
)

add_library(re2 STATIC ${RE2_SOURCES})

target_include_directories(
re2
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)

44 changes: 44 additions & 0 deletions third_party/re2/re2/bitmap256.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright 2023 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "re2/bitmap256.h"

#include <stdint.h>

#include "util/logging.h"
#include "util/util.h"

namespace re2 {

int Bitmap256::FindNextSetBit(int c) const {
DCHECK_GE(c, 0);
DCHECK_LE(c, 255);

// Check the word that contains the bit. Mask out any lower bits.
int i = c / 64;
uint64_t word = words_[i] & (~uint64_t{0} << (c % 64));
if (word != 0)
return (i * 64) + FindLSBSet(word);

// Check any following words.
i++;
switch (i) {
case 1:
if (words_[1] != 0)
return (1 * 64) + FindLSBSet(words_[1]);
FALLTHROUGH_INTENDED;
case 2:
if (words_[2] != 0)
return (2 * 64) + FindLSBSet(words_[2]);
FALLTHROUGH_INTENDED;
case 3:
if (words_[3] != 0)
return (3 * 64) + FindLSBSet(words_[3]);
FALLTHROUGH_INTENDED;
default:
return -1;
}
}

} // namespace re2
82 changes: 82 additions & 0 deletions third_party/re2/re2/bitmap256.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Copyright 2016 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#ifndef RE2_BITMAP256_H_
#define RE2_BITMAP256_H_

#ifdef _MSC_VER
#include <intrin.h>
#endif
#include <stdint.h>
#include <string.h>

#include "util/logging.h"

namespace re2 {

class Bitmap256 {
public:
Bitmap256() { Clear(); }

// Clears all of the bits.
void Clear() { memset(words_, 0, sizeof words_); }

// Tests the bit with index c.
bool Test(int c) const {
DCHECK_GE(c, 0);
DCHECK_LE(c, 255);

return (words_[c / 64] & (uint64_t{1} << (c % 64))) != 0;
}

// Sets the bit with index c.
void Set(int c) {
DCHECK_GE(c, 0);
DCHECK_LE(c, 255);

words_[c / 64] |= (uint64_t{1} << (c % 64));
}

// Finds the next non-zero bit with index >= c.
// Returns -1 if no such bit exists.
int FindNextSetBit(int c) const;

private:
// Finds the least significant non-zero bit in n.
static int FindLSBSet(uint64_t n) {
DCHECK_NE(n, 0);
#if defined(__GNUC__)
return __builtin_ctzll(n);
#elif defined(_MSC_VER) && defined(_M_X64)
unsigned long c;
_BitScanForward64(&c, n);
return static_cast<int>(c);
#elif defined(_MSC_VER) && defined(_M_IX86)
unsigned long c;
if (static_cast<uint32_t>(n) != 0) {
_BitScanForward(&c, static_cast<uint32_t>(n));
return static_cast<int>(c);
} else {
_BitScanForward(&c, static_cast<uint32_t>(n >> 32));
return static_cast<int>(c) + 32;
}
#else
int c = 63;
for (int shift = 1 << 5; shift != 0; shift >>= 1) {
uint64_t word = n << shift;
if (word != 0) {
n = word;
c -= shift;
}
}
return c;
#endif
}

uint64_t words_[4];
};

} // namespace re2

#endif // RE2_BITMAP256_H_
Loading

0 comments on commit 34eef36

Please sign in to comment.