Skip to content

Commit

Permalink
Introduce new and more flexible API grapheme_line_segmenter, replacin…
Browse files Browse the repository at this point in the history
…g scan API

Signed-off-by: Christian Parpart <[email protected]>
  • Loading branch information
christianparpart committed Apr 5, 2024
1 parent 1b88442 commit 09cc6a2
Show file tree
Hide file tree
Showing 11 changed files with 1,569 additions and 869 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ option(LIBUNICODE_BENCHMARK "libunicode: Enables building of benchmark for libun
option(LIBUNICODE_TOOLS "libunicode: Builds CLI tools [default: ${MASTER_PROJECT}]" ${MASTER_PROJECT})
option(LIBUNICODE_BUILD_STATIC "libunicode: provide static library instead of dynamic [default: ${LIBUNICODE_BUILD_STATIC_DEFAULT}]" ${LIBUNICODE_BUILD_STATIC_DEFAULT})
option(LIBUNICODE_USE_INTRINSICS "libunicode: Use SIMD extenstion during text read [default: ON]" ON)
option(LIBUNICODE_USE_STD_SIMD "libunicode: Use std::simd as SIMD extenstion during text read (takes precedence over own intrinsics) [default: ON]" ${LIBUNICODE_USE_INTRINSICS})
option(LIBUNICODE_USE_STD_SIMD "libunicode: Use std::simd as SIMD extenstion during text read (takes precedence over own intrinsics) [default: ON]" ON)
option(LIBUNICODE_TABLEGEN_FASTBUILD "libunicode: Use fast table generation (takes more memory in final tables) [default: OFF]" OFF)

set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Enable testing of the benchmark library." FORCE)
Expand Down
2 changes: 1 addition & 1 deletion cmake/presets/common.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"version": 6,
"configurePresets": [
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug", "LIBUNICODE_TABLEGEN_FASTBUILD": "ON" } },
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug", "LIBUNICODE_TABLEGEN_FASTBUILD": "ON", "LIBUNICODE_TRACE": "ON" } },
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
{ "name": "arch-native", "hidden": true, "cacheVariables": { "CMAKE_CXX_FLAGS": "-march=native" } },
{ "name": "clang", "hidden": true, "cacheVariables": { "CMAKE_CXX_COMPILER": "clang++" } },
Expand Down
18 changes: 10 additions & 8 deletions src/libunicode/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
include(GNUInstallDirs)

option(LIBUNICODE_TRACE "Enable trace logging" OFF)

function(ExtractZipArchive ZIP_FILE OUTPUT_DIR)
if(CMAKE_VERSION VERSION_LESS 3.18)
# Use the older method for versions prior to CMake 3.18
Expand Down Expand Up @@ -102,7 +104,6 @@ add_library(unicode ${LIBUNICODE_LIB_MODE}
codepoint_properties.cpp
emoji_segmenter.cpp
grapheme_segmenter.cpp
scan.cpp
script_segmenter.cpp
utf8.cpp
width.cpp
Expand All @@ -114,22 +115,22 @@ add_library(unicode ${LIBUNICODE_LIB_MODE}
)

if(LIBUNICODE_USE_STD_SIMD)
target_compile_definitions(unicode PRIVATE LIBUNICODE_USE_STD_SIMD)
target_compile_definitions(unicode PUBLIC LIBUNICODE_USE_STD_SIMD)
endif()
if(LIBUNICODE_USE_INTRINSICS)
target_compile_definitions(unicode PRIVATE USE_INTRINSICS)
target_compile_definitions(unicode PUBLIC LIBUNICODE_USE_INTRINSICS)
endif()

set(public_headers
capi.h
codepoint_properties.h
convert.h
emoji_segmenter.h
grapheme_line_segmenter.h
grapheme_segmenter.h
intrinsics.h
multistage_table_view.h
run_segmenter.h
scan.h
script_segmenter.h
support.h
utf8.h
Expand All @@ -150,6 +151,10 @@ set_target_properties(unicode PROPERTIES
SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}"
)

if(LIBUNICODE_TRACE)
target_compile_definitions(unicode PUBLIC LIBUNICODE_TRACE)
endif()

add_library(unicode::unicode ALIAS unicode)
add_library(unicode::core ALIAS unicode)
target_include_directories(unicode PUBLIC $<BUILD_INTERFACE:${${PROJECT_NAME}_SOURCE_DIR}/src>
Expand All @@ -161,7 +166,6 @@ add_executable(unicode_tablegen tablegen.cpp)
set_target_properties(unicode_tablegen PROPERTIES CMAKE_BUILD_TYPE Release)
target_link_libraries(unicode_tablegen PRIVATE unicode::loader)


# {{{ installation
set(LIBUNICODE_CMAKE_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/libunicode" CACHE PATH "Installation directory for cmake files, a relative path that will be joined with ${CMAKE_INSTALL_PREFIX} or an absolute path.")
set(LIBUNICODE_INSTALL_CMAKE_FILES ${MASTER_PROJECT} CACHE BOOL "Decides whether or not to install CMake config and -version files.")
Expand Down Expand Up @@ -220,9 +224,9 @@ if(LIBUNICODE_TESTING)
capi_test.cpp
convert_test.cpp
emoji_segmenter_test.cpp
grapheme_line_segmenter_test.cpp
grapheme_segmenter_test.cpp
run_segmenter_test.cpp
scan_test.cpp
script_segmenter_test.cpp
test_main.cpp
unicode_test.cpp
Expand All @@ -247,8 +251,6 @@ if(LIBUNICODE_TESTING)
endif()
# }}}



# {{{ unicode_test
if(LIBUNICODE_BENCHMARK)
if(NOT benchmark_FOUND)
Expand Down
8 changes: 5 additions & 3 deletions src/libunicode/benchmark.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include <libunicode/convert.h>
#include <libunicode/scan.h>
#include <libunicode/grapheme_line_segmenter.h>
#include <libunicode/utf8.h>

#include <string_view>
Expand All @@ -14,7 +14,7 @@ static void benchmarkWithLength(benchmark::State& benchmarkState)
auto TestText = std::string(L, 'a') + "\u00A9";
for (auto _: benchmarkState)
{
benchmark::DoNotOptimize(unicode::detail::scan_for_text_ascii(TestText, L + 10));
benchmark::DoNotOptimize(unicode::detail::process_only_ascii(std::string_view(TestText).substr(0, L + 10)));
}
}

Expand All @@ -24,7 +24,9 @@ static void benchmarkWithOffset(benchmark::State& benchmarkState)
auto TestText = std::string(L, 'a') + "\U0001F600" + std::string(1000, 'a');
for (auto _: benchmarkState)
{
benchmark::DoNotOptimize(unicode::detail::scan_for_text_ascii(TestText, L + 10));
auto state = unicode::detail::unicode_process_state {};
auto eventHandler = unicode::detail::EventHandler{};
benchmark::DoNotOptimize(unicode::detail::process_only_complex_unicode(eventHandler, state, TestText, L + 10));
}
}

Expand Down
Loading

0 comments on commit 09cc6a2

Please sign in to comment.