From 1a17b8575cf1c43b91fefb27f0f06f0046d244a1 Mon Sep 17 00:00:00 2001
From: wraymo <37269683+wraymo@users.noreply.github.com>
Date: Tue, 9 Jan 2024 16:44:18 -0500
Subject: [PATCH] Add clp-s for compressing and searching semi-structured logs.
 (#217)

---
 .gitmodules                                   |    6 +
 components/core/.clang-format                 |    4 +-
 components/core/.gitignore                    |    1 +
 components/core/CMakeLists.txt                |   17 +
 components/core/README.md                     |  119 +-
 .../cmake/Modules/ExternalAntlr4Cpp.cmake     |  180 +++
 components/core/cmake/Modules/FindANTLR.cmake |  139 ++
 components/core/src/clp_s/ArchiveReader.cpp   |   82 ++
 components/core/src/clp_s/ArchiveReader.hpp   |   71 +
 components/core/src/clp_s/ArchiveWriter.cpp   |  124 ++
 components/core/src/clp_s/ArchiveWriter.hpp   |   94 ++
 components/core/src/clp_s/CMakeLists.txt      |  137 ++
 components/core/src/clp_s/ColumnReader.cpp    |  177 +++
 components/core/src/clp_s/ColumnReader.hpp    |  265 ++++
 components/core/src/clp_s/ColumnWriter.cpp    |  142 ++
 components/core/src/clp_s/ColumnWriter.hpp    |  232 ++++
 .../core/src/clp_s/CommandLineArguments.cpp   |  298 +++++
 .../core/src/clp_s/CommandLineArguments.hpp   |   74 ++
 components/core/src/clp_s/Compressor.hpp      |   51 +
 components/core/src/clp_s/Decompressor.hpp    |   64 +
 components/core/src/clp_s/Defs.hpp            |   44 +
 components/core/src/clp_s/DictionaryEntry.cpp |  257 ++++
 components/core/src/clp_s/DictionaryEntry.hpp |  290 ++++
 .../core/src/clp_s/DictionaryReader.hpp       |  210 +++
 .../core/src/clp_s/DictionaryWriter.cpp       |   67 +
 .../core/src/clp_s/DictionaryWriter.hpp       |  158 +++
 components/core/src/clp_s/ErrorCode.hpp       |   31 +
 components/core/src/clp_s/FileReader.cpp      |  150 +++
 components/core/src/clp_s/FileReader.hpp      |  166 +++
 components/core/src/clp_s/FileWriter.cpp      |  165 +++
 components/core/src/clp_s/FileWriter.hpp      |  122 ++
 components/core/src/clp_s/JsonConstructor.cpp |   72 +
 components/core/src/clp_s/JsonConstructor.hpp |   59 +
 .../core/src/clp_s/JsonFileIterator.cpp       |  129 ++
 .../core/src/clp_s/JsonFileIterator.hpp       |   75 ++
 components/core/src/clp_s/JsonParser.cpp      |  298 +++++
 components/core/src/clp_s/JsonParser.hpp      |  101 ++
 components/core/src/clp_s/JsonSerializer.hpp  |   83 ++
 components/core/src/clp_s/ParsedMessage.hpp   |   54 +
 components/core/src/clp_s/ReaderUtils.cpp     |  231 ++++
 components/core/src/clp_s/ReaderUtils.hpp     |  118 ++
 components/core/src/clp_s/SchemaMap.cpp       |   37 +
 components/core/src/clp_s/SchemaMap.hpp       |   48 +
 components/core/src/clp_s/SchemaReader.cpp    |  288 ++++
 components/core/src/clp_s/SchemaReader.hpp    |  153 +++
 components/core/src/clp_s/SchemaTree.cpp      |   25 +
 components/core/src/clp_s/SchemaTree.hpp      |   99 ++
 components/core/src/clp_s/SchemaWriter.cpp    |   56 +
 components/core/src/clp_s/SchemaWriter.hpp    |   61 +
 .../src/clp_s/TimestampDictionaryReader.cpp   |   91 ++
 .../src/clp_s/TimestampDictionaryReader.hpp   |   99 ++
 .../src/clp_s/TimestampDictionaryWriter.cpp   |  146 ++
 .../src/clp_s/TimestampDictionaryWriter.hpp   |   95 ++
 components/core/src/clp_s/TimestampEntry.cpp  |  345 +++++
 components/core/src/clp_s/TimestampEntry.hpp  |  101 ++
 .../core/src/clp_s/TimestampPattern.cpp       | 1008 ++++++++++++++
 .../core/src/clp_s/TimestampPattern.hpp       |  166 +++
 .../core/src/clp_s/TraceableException.hpp     |   49 +
 components/core/src/clp_s/Utils.cpp           |  431 ++++++
 components/core/src/clp_s/Utils.hpp           |  273 ++++
 components/core/src/clp_s/VariableDecoder.cpp |  118 ++
 components/core/src/clp_s/VariableDecoder.hpp |   61 +
 components/core/src/clp_s/VariableEncoder.cpp |  184 +++
 components/core/src/clp_s/VariableEncoder.hpp |   71 +
 components/core/src/clp_s/ZstdCompressor.cpp  |  120 ++
 components/core/src/clp_s/ZstdCompressor.hpp  |   98 ++
 .../core/src/clp_s/ZstdDecompressor.cpp       |  238 ++++
 .../core/src/clp_s/ZstdDecompressor.hpp       |  146 ++
 components/core/src/clp_s/clp-s.cpp           |  125 ++
 components/core/src/clp_s/search/AndExpr.cpp  |   57 +
 components/core/src/clp_s/search/AndExpr.hpp  |   58 +
 .../core/src/clp_s/search/BooleanLiteral.cpp  |   44 +
 .../core/src/clp_s/search/BooleanLiteral.hpp  |   58 +
 .../src/clp_s/search/ColumnDescriptor.cpp     |   90 ++
 .../src/clp_s/search/ColumnDescriptor.hpp     |  214 +++
 .../core/src/clp_s/search/ConstantProp.cpp    |   43 +
 .../core/src/clp_s/search/ConstantProp.hpp    |   23 +
 .../core/src/clp_s/search/ConvertToExists.cpp |  116 ++
 .../core/src/clp_s/search/ConvertToExists.hpp |   29 +
 .../core/src/clp_s/search/DateLiteral.cpp     |   92 ++
 .../core/src/clp_s/search/DateLiteral.hpp     |   65 +
 .../core/src/clp_s/search/EmptyExpr.cpp       |   27 +
 .../core/src/clp_s/search/EmptyExpr.hpp       |   37 +
 .../clp_s/search/EvaluateTimestampIndex.cpp   |  103 ++
 .../clp_s/search/EvaluateTimestampIndex.hpp   |   31 +
 .../core/src/clp_s/search/Expression.cpp      |   35 +
 .../core/src/clp_s/search/Expression.hpp      |  118 ++
 .../core/src/clp_s/search/FilterExpr.cpp      |  106 ++
 .../core/src/clp_s/search/FilterExpr.hpp      |  100 ++
 .../core/src/clp_s/search/FilterOperation.hpp |   20 +
 components/core/src/clp_s/search/Integral.cpp |   96 ++
 components/core/src/clp_s/search/Integral.hpp |   84 ++
 components/core/src/clp_s/search/Literal.hpp  |  115 ++
 .../core/src/clp_s/search/NarrowTypes.cpp     |   76 ++
 .../core/src/clp_s/search/NarrowTypes.hpp     |   22 +
 .../core/src/clp_s/search/NullLiteral.cpp     |   32 +
 .../core/src/clp_s/search/NullLiteral.hpp     |   54 +
 components/core/src/clp_s/search/OrExpr.cpp   |   55 +
 components/core/src/clp_s/search/OrExpr.hpp   |   53 +
 .../core/src/clp_s/search/OrOfAndForm.cpp     |  179 +++
 .../core/src/clp_s/search/OrOfAndForm.hpp     |   66 +
 components/core/src/clp_s/search/Output.cpp   | 1182 +++++++++++++++++
 components/core/src/clp_s/search/Output.hpp   |  338 +++++
 .../core/src/clp_s/search/SchemaMatch.cpp     |  452 +++++++
 .../core/src/clp_s/search/SchemaMatch.hpp     |  172 +++
 .../core/src/clp_s/search/SearchUtils.cpp     |   87 ++
 .../core/src/clp_s/search/SearchUtils.hpp     |   48 +
 .../core/src/clp_s/search/StringLiteral.cpp   |   95 ++
 .../core/src/clp_s/search/StringLiteral.hpp   |   78 ++
 .../core/src/clp_s/search/Transformation.hpp  |   21 +
 components/core/src/clp_s/search/Value.hpp    |   33 +
 .../clp_search/EncodedVariableInterpreter.cpp |   75 ++
 .../clp_search/EncodedVariableInterpreter.hpp |   84 ++
 .../core/src/clp_s/search/clp_search/Grep.cpp |  639 +++++++++
 .../core/src/clp_s/search/clp_search/Grep.hpp |   54 +
 .../src/clp_s/search/clp_search/Query.cpp     |  150 +++
 .../src/clp_s/search/clp_search/Query.hpp     |  192 +++
 .../core/src/clp_s/search/kql/CMakeLists.txt  |   28 +
 components/core/src/clp_s/search/kql/Kql.g4   |  107 ++
 components/core/src/clp_s/search/kql/kql.cpp  |  248 ++++
 components/core/src/clp_s/search/kql/kql.hpp  |   17 +
 components/core/submodules/abseil-cpp         |    1 +
 components/core/submodules/simdjson           |    1 +
 .../scripts/deps-download/abseil-cpp.json     |   10 +
 .../tools/scripts/deps-download/antlr4.json   |   14 +
 .../scripts/deps-download/download-all.sh     |    3 +
 .../tools/scripts/deps-download/simdjson.json |   11 +
 .../centos7.4/install-prebuilt-packages.sh    |    1 +
 .../lib_install/macos-12/install-all.sh       |    1 +
 .../ubuntu-focal/install-prebuilt-packages.sh |    1 +
 .../ubuntu-jammy/install-prebuilt-packages.sh |    1 +
 docs/core/clp-structured.md                   |  125 ++
 docs/core/clp-unstructured.md                 |  157 +++
 133 files changed, 16503 insertions(+), 110 deletions(-)
 create mode 100644 components/core/cmake/Modules/ExternalAntlr4Cpp.cmake
 create mode 100644 components/core/cmake/Modules/FindANTLR.cmake
 create mode 100644 components/core/src/clp_s/ArchiveReader.cpp
 create mode 100644 components/core/src/clp_s/ArchiveReader.hpp
 create mode 100644 components/core/src/clp_s/ArchiveWriter.cpp
 create mode 100644 components/core/src/clp_s/ArchiveWriter.hpp
 create mode 100644 components/core/src/clp_s/CMakeLists.txt
 create mode 100644 components/core/src/clp_s/ColumnReader.cpp
 create mode 100644 components/core/src/clp_s/ColumnReader.hpp
 create mode 100644 components/core/src/clp_s/ColumnWriter.cpp
 create mode 100644 components/core/src/clp_s/ColumnWriter.hpp
 create mode 100644 components/core/src/clp_s/CommandLineArguments.cpp
 create mode 100644 components/core/src/clp_s/CommandLineArguments.hpp
 create mode 100644 components/core/src/clp_s/Compressor.hpp
 create mode 100644 components/core/src/clp_s/Decompressor.hpp
 create mode 100644 components/core/src/clp_s/Defs.hpp
 create mode 100644 components/core/src/clp_s/DictionaryEntry.cpp
 create mode 100644 components/core/src/clp_s/DictionaryEntry.hpp
 create mode 100644 components/core/src/clp_s/DictionaryReader.hpp
 create mode 100644 components/core/src/clp_s/DictionaryWriter.cpp
 create mode 100644 components/core/src/clp_s/DictionaryWriter.hpp
 create mode 100644 components/core/src/clp_s/ErrorCode.hpp
 create mode 100644 components/core/src/clp_s/FileReader.cpp
 create mode 100644 components/core/src/clp_s/FileReader.hpp
 create mode 100644 components/core/src/clp_s/FileWriter.cpp
 create mode 100644 components/core/src/clp_s/FileWriter.hpp
 create mode 100644 components/core/src/clp_s/JsonConstructor.cpp
 create mode 100644 components/core/src/clp_s/JsonConstructor.hpp
 create mode 100644 components/core/src/clp_s/JsonFileIterator.cpp
 create mode 100644 components/core/src/clp_s/JsonFileIterator.hpp
 create mode 100644 components/core/src/clp_s/JsonParser.cpp
 create mode 100644 components/core/src/clp_s/JsonParser.hpp
 create mode 100644 components/core/src/clp_s/JsonSerializer.hpp
 create mode 100644 components/core/src/clp_s/ParsedMessage.hpp
 create mode 100644 components/core/src/clp_s/ReaderUtils.cpp
 create mode 100644 components/core/src/clp_s/ReaderUtils.hpp
 create mode 100644 components/core/src/clp_s/SchemaMap.cpp
 create mode 100644 components/core/src/clp_s/SchemaMap.hpp
 create mode 100644 components/core/src/clp_s/SchemaReader.cpp
 create mode 100644 components/core/src/clp_s/SchemaReader.hpp
 create mode 100644 components/core/src/clp_s/SchemaTree.cpp
 create mode 100644 components/core/src/clp_s/SchemaTree.hpp
 create mode 100644 components/core/src/clp_s/SchemaWriter.cpp
 create mode 100644 components/core/src/clp_s/SchemaWriter.hpp
 create mode 100644 components/core/src/clp_s/TimestampDictionaryReader.cpp
 create mode 100644 components/core/src/clp_s/TimestampDictionaryReader.hpp
 create mode 100644 components/core/src/clp_s/TimestampDictionaryWriter.cpp
 create mode 100644 components/core/src/clp_s/TimestampDictionaryWriter.hpp
 create mode 100644 components/core/src/clp_s/TimestampEntry.cpp
 create mode 100644 components/core/src/clp_s/TimestampEntry.hpp
 create mode 100644 components/core/src/clp_s/TimestampPattern.cpp
 create mode 100644 components/core/src/clp_s/TimestampPattern.hpp
 create mode 100644 components/core/src/clp_s/TraceableException.hpp
 create mode 100644 components/core/src/clp_s/Utils.cpp
 create mode 100644 components/core/src/clp_s/Utils.hpp
 create mode 100644 components/core/src/clp_s/VariableDecoder.cpp
 create mode 100644 components/core/src/clp_s/VariableDecoder.hpp
 create mode 100644 components/core/src/clp_s/VariableEncoder.cpp
 create mode 100644 components/core/src/clp_s/VariableEncoder.hpp
 create mode 100644 components/core/src/clp_s/ZstdCompressor.cpp
 create mode 100644 components/core/src/clp_s/ZstdCompressor.hpp
 create mode 100644 components/core/src/clp_s/ZstdDecompressor.cpp
 create mode 100644 components/core/src/clp_s/ZstdDecompressor.hpp
 create mode 100644 components/core/src/clp_s/clp-s.cpp
 create mode 100644 components/core/src/clp_s/search/AndExpr.cpp
 create mode 100644 components/core/src/clp_s/search/AndExpr.hpp
 create mode 100644 components/core/src/clp_s/search/BooleanLiteral.cpp
 create mode 100644 components/core/src/clp_s/search/BooleanLiteral.hpp
 create mode 100644 components/core/src/clp_s/search/ColumnDescriptor.cpp
 create mode 100644 components/core/src/clp_s/search/ColumnDescriptor.hpp
 create mode 100644 components/core/src/clp_s/search/ConstantProp.cpp
 create mode 100644 components/core/src/clp_s/search/ConstantProp.hpp
 create mode 100644 components/core/src/clp_s/search/ConvertToExists.cpp
 create mode 100644 components/core/src/clp_s/search/ConvertToExists.hpp
 create mode 100644 components/core/src/clp_s/search/DateLiteral.cpp
 create mode 100644 components/core/src/clp_s/search/DateLiteral.hpp
 create mode 100644 components/core/src/clp_s/search/EmptyExpr.cpp
 create mode 100644 components/core/src/clp_s/search/EmptyExpr.hpp
 create mode 100644 components/core/src/clp_s/search/EvaluateTimestampIndex.cpp
 create mode 100644 components/core/src/clp_s/search/EvaluateTimestampIndex.hpp
 create mode 100644 components/core/src/clp_s/search/Expression.cpp
 create mode 100644 components/core/src/clp_s/search/Expression.hpp
 create mode 100644 components/core/src/clp_s/search/FilterExpr.cpp
 create mode 100644 components/core/src/clp_s/search/FilterExpr.hpp
 create mode 100644 components/core/src/clp_s/search/FilterOperation.hpp
 create mode 100644 components/core/src/clp_s/search/Integral.cpp
 create mode 100644 components/core/src/clp_s/search/Integral.hpp
 create mode 100644 components/core/src/clp_s/search/Literal.hpp
 create mode 100644 components/core/src/clp_s/search/NarrowTypes.cpp
 create mode 100644 components/core/src/clp_s/search/NarrowTypes.hpp
 create mode 100644 components/core/src/clp_s/search/NullLiteral.cpp
 create mode 100644 components/core/src/clp_s/search/NullLiteral.hpp
 create mode 100644 components/core/src/clp_s/search/OrExpr.cpp
 create mode 100644 components/core/src/clp_s/search/OrExpr.hpp
 create mode 100644 components/core/src/clp_s/search/OrOfAndForm.cpp
 create mode 100644 components/core/src/clp_s/search/OrOfAndForm.hpp
 create mode 100644 components/core/src/clp_s/search/Output.cpp
 create mode 100644 components/core/src/clp_s/search/Output.hpp
 create mode 100644 components/core/src/clp_s/search/SchemaMatch.cpp
 create mode 100644 components/core/src/clp_s/search/SchemaMatch.hpp
 create mode 100644 components/core/src/clp_s/search/SearchUtils.cpp
 create mode 100644 components/core/src/clp_s/search/SearchUtils.hpp
 create mode 100644 components/core/src/clp_s/search/StringLiteral.cpp
 create mode 100644 components/core/src/clp_s/search/StringLiteral.hpp
 create mode 100644 components/core/src/clp_s/search/Transformation.hpp
 create mode 100644 components/core/src/clp_s/search/Value.hpp
 create mode 100644 components/core/src/clp_s/search/clp_search/EncodedVariableInterpreter.cpp
 create mode 100644 components/core/src/clp_s/search/clp_search/EncodedVariableInterpreter.hpp
 create mode 100644 components/core/src/clp_s/search/clp_search/Grep.cpp
 create mode 100644 components/core/src/clp_s/search/clp_search/Grep.hpp
 create mode 100644 components/core/src/clp_s/search/clp_search/Query.cpp
 create mode 100644 components/core/src/clp_s/search/clp_search/Query.hpp
 create mode 100644 components/core/src/clp_s/search/kql/CMakeLists.txt
 create mode 100644 components/core/src/clp_s/search/kql/Kql.g4
 create mode 100644 components/core/src/clp_s/search/kql/kql.cpp
 create mode 100644 components/core/src/clp_s/search/kql/kql.hpp
 create mode 160000 components/core/submodules/abseil-cpp
 create mode 160000 components/core/submodules/simdjson
 create mode 100644 components/core/tools/scripts/deps-download/abseil-cpp.json
 create mode 100644 components/core/tools/scripts/deps-download/antlr4.json
 create mode 100644 components/core/tools/scripts/deps-download/simdjson.json
 create mode 100644 docs/core/clp-structured.md
 create mode 100644 docs/core/clp-unstructured.md

diff --git a/.gitmodules b/.gitmodules
index 4b3b13551..614f0871e 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -17,3 +17,9 @@
 [submodule "components/core/submodules/boost-outcome"]
 	path = components/core/submodules/boost-outcome
 	url = https://github.com/boostorg/outcome.git
+[submodule "components/core/submodules/simdjson"]
+	path = components/core/submodules/simdjson
+	url = https://github.com/simdjson/simdjson.git
+[submodule "components/core/submodules/abseil-cpp"]
+	path = components/core/submodules/abseil-cpp
+	url = https://github.com/abseil/abseil-cpp.git
diff --git a/components/core/.clang-format b/components/core/.clang-format
index fed2096cb..fbaf8f62e 100644
--- a/components/core/.clang-format
+++ b/components/core/.clang-format
@@ -72,8 +72,8 @@ IncludeCategories:
   # NOTE: A header is grouped by first matching regex
   # Library headers. Update when adding new libraries.
   # NOTE: clang-format retains leading white-space on a line in violation of the YAML spec.
-  - Regex: "^<(archive|boost|catch2|date|fmt|json|log_surgeon|mariadb|spdlog|sqlite3|string_utils\
-|yaml-cpp|zstd)"
+  - Regex: "<(absl|antlr4|archive|boost|catch2|date|fmt|json|log_surgeon|mariadb|simdjson|spdlog\
+|sqlite3|string_utils|yaml-cpp|zstd)"
     Priority: 3
   # C system headers
   - Regex: "^<.+\\.h>"
diff --git a/components/core/.gitignore b/components/core/.gitignore
index a47a8cbd7..18670ec68 100644
--- a/components/core/.gitignore
+++ b/components/core/.gitignore
@@ -1,2 +1,3 @@
 build/**
 submodules/sqlite3/*
+third-party/**
diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt
index 35ebc84ac..9007f9328 100644
--- a/components/core/CMakeLists.txt
+++ b/components/core/CMakeLists.txt
@@ -75,6 +75,15 @@ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
     endif ()
 endif ()
 
+# Find and setup ANTLR Library
+# We build and link to the static library
+find_package(ANTLR REQUIRED)
+if (ANTLR_FOUND)
+    message(STATUS "Found ANTLR ${ANTLR_VERSION}")
+else()
+    message(FATAL_ERROR "Could not find libraries for ANTLR ${ANTLR4_TAG}")
+endif()
+
 # Find and setup Boost Library
 if(CLP_USE_STATIC_LIBS)
     set(Boost_USE_STATIC_LIBS ON)
@@ -142,6 +151,13 @@ else()
     message(FATAL_ERROR "Could not find msgpack-cxx")
 endif()
 
+# Add abseil-cpp
+set(ABSL_PROPAGATE_CXX_STD ON)
+add_subdirectory(submodules/abseil-cpp EXCLUDE_FROM_ALL)
+
+# Add simdjson
+add_subdirectory(submodules/simdjson EXCLUDE_FROM_ALL)
+
 # Add yaml-cpp
 add_subdirectory(submodules/yaml-cpp EXCLUDE_FROM_ALL)
 
@@ -167,6 +183,7 @@ add_subdirectory(src/clp/clg)
 add_subdirectory(src/clp/clo)
 add_subdirectory(src/clp/clp)
 add_subdirectory(src/clp/make_dictionaries_readable)
+add_subdirectory(src/clp_s)
 
 set(SOURCE_FILES_unitTest
         src/clp/BufferedFileReader.cpp
diff --git a/components/core/README.md b/components/core/README.md
index cd2721d0e..b8d283e4f 100644
--- a/components/core/README.md
+++ b/components/core/README.md
@@ -12,10 +12,7 @@ CLP core is the low-level component that performs compression, decompression, an
     * [Docker Environment](#docker-environment)
   * [Build](#build)
 * [Running](#running)
-  * [`clp`](#clp)
-  * [`clg`](#clg)
-  * [`make-dictionaries-readable`](#make-dictionaries-readable)
-* [Parallel Compression](#parallel-compression)
+
 
 ## Requirements
 
@@ -36,10 +33,13 @@ tools/scripts/deps-download/download-all.sh
 ```
 
 This will download:
+* [abseil-cpp](https://github.com/abseil/abseil-cpp) (20230802.1)
+* [ANTLR](https://www.antlr.org) (v4.13.1)
 * [Catch2](https://github.com/catchorg/Catch2.git) (v2.13.7)
 * [date](https://github.com/HowardHinnant/date.git) (v3.0.1)
 * [json](https://github.com/nlohmann/json.git) (v3.10.4)
 * [log-surgeon](https://github.com/y-scope/log-surgeon) (895f464)
+* [simdjson](https://github.com/simdjson/simdjson) (v3.6.3)
 * [SQLite3](https://www.sqlite.org/download.html) (v3.36.0)
 * [yaml-cpp](https://github.com/jbeder/yaml-cpp.git) (v0.7.0)
 
@@ -98,108 +98,11 @@ the relevant paths on your machine.
 
 ## Running
 
-* CLP contains two core executables: `clp` and `clg`
-    * `clp` is used for compressing and extracting logs
-    * `clg` is used for performing wildcard searches on the compressed logs
-
-### `clp`
-
-To compress some logs without a schema file:
-```shell
-./clp c archives-dir /home/my/logs
-```
-* `archives-dir` is where compressed logs should be output
-  * `clp` will create a number of files and directories within, so it's best if this directory is empty
-  * You can use the same directory repeatedly and `clp` will add to the compressed logs within.
-* `/home/my/logs` is any log file or directory containing log files
-* In this mode, `clp` will use heuristics to determine what are the variables in
-  each uncompressed message.
-  * The heuristics roughly correspond to the example schema file in
-    `config/schemas.txt`.
-
-To compress with a user-defined schema file:
-```shell
-./clp c --schema-path path-to-schema-file archives-dir /home/my/logs 
-```
-* `path-to-schema-file` is the location of a schema file. For more details on 
-  schema files, see README-Schema.md.
+* CLP contains three core executables: `clp`, `clg`, and `clp-s`.
+  * `clp` is used for compressing and extracting unstructured (plain text) logs.
+  * `clg` is used for performing wildcard searches on the compressed unstructured logs.
+  * `clp-s` is used for compressing and searching semi-structured logs (e.g., JSON) with support for
+    handling highly dynamic schemas.
 
-To decompress those logs:
-```shell
-./clp x archive-dir decompressed
-```
-* `archives-dir` is where the compressed logs were previously stored
-* `decompressed` is a directory where they will be decompressed to
-
-You can also decompress a specific file:
-```shell
-./clp x archive-dir decompressed /my/file/path.log
-```
-* `/my/file/path.log` is the uncompressed file's path (the one that was passed to `clp` for compression) 
-
-More usage instructions can be found by running:
-```shell
-./clp --help
-```
-
-### `clg`
-
-To search the compressed logs:
-```shell
-./clg archives-dir " a *wildcard* search phrase "
-```
-* `archives-dir` is where the compressed logs were previously stored
-* For archives compressed without a schema file:
-  * The search phrase can contain the `*` wildcard which matches 0 or more
-    characters, or the `?` wildcard which matches any single character.
-* For archives compressed using a schema file:
-  * `*` may only represent non-delimiter characters.
-
-Similar to `clp`, `clg` can search a single file:
-```shell
-./clg archives-dir " a *wildcard* search phrase " /my/file/path.log
-```
-* `/my/file/path.log` is the uncompressed file's path (the one that was passed to `clp` for compression)
-
-More usage instructions can be found by running:
-```shell
-./clg --help
-```
-
-### `make-dictionaries-readable`
-
-If you'd like to convert the dictionaries of an individual archive into a human-readable form, you 
-can use `make-dictionaries-readable`.
-
-```shell
-./make-dictionaries-readable archive-path <output dir>
-```
-* `archive-path` is a path to a specific archive (inside `archives-dir`)
-
-See the `make-dictionaries-readable` [README](src/clp/make_dictionaries_readable/README.md) for 
-details on the output format. 
-
-## Parallel Compression
-
-By default, `clp` uses an embedded SQLite database, so each directory containing archives can only
-be accessed by a single `clp` instance.
-
-To enable parallel compression to the same archives directory, `clp`/`clg` can be configured to
-use a MySQL-type database (MariaDB) as follows: 
-
-* Install and configure MariaDB using the instructions for your platform
-* Create a user that has privileges to create databases, create tables, insert records, and delete
-  records.
-* Copy and change `config/metadata-db.yml`, setting the type to `mysql` and uncommenting the MySQL 
-  parameters.
-* Install the MariaDB and PyYAML Python packages `pip3 install mariadb PyYAML`
-  * This is necessary to run the database initialization script. If you prefer, you can run the 
-    SQL statements in `tools/scripts/db/init-db.py` directly.
-* Run `tools/scripts/db/init-db.py` with the updated config file. This will initialize the 
-  database CLP requires.
-* Run `clp` or `clg` as before, with the addition of the `--db-config-file` option pointing at 
-  the updated config file.
-* To compress in parallel, simply run another instance of `clp` concurrently.
-
-Note that currently, decompression (`clp x`) and search (`clg`) can only be run with a single 
-instance. We are in the process of open-sourcing parallelized versions of these as well.
+See [Using CLP for unstructured logs](../../docs/core/clp-unstructured.md) and
+[Using CLP for semi-structured logs](../../docs/core/clp-structured.md) for usage instructions.
diff --git a/components/core/cmake/Modules/ExternalAntlr4Cpp.cmake b/components/core/cmake/Modules/ExternalAntlr4Cpp.cmake
new file mode 100644
index 000000000..9c12ee3f3
--- /dev/null
+++ b/components/core/cmake/Modules/ExternalAntlr4Cpp.cmake
@@ -0,0 +1,180 @@
+# NOTE: ExternalAntlr4Cpp.cmake taken from
+# https://github.com/antlr/antlr4/blob/4.13.1/runtime/Cpp/cmake/ExternalAntlr4Cpp.cmake
+
+cmake_minimum_required(VERSION 3.7)
+
+if(POLICY CMP0114)
+    cmake_policy(SET CMP0114 NEW)
+endif()
+
+include(ExternalProject)
+
+set(ANTLR4_ROOT ${CMAKE_CURRENT_BINARY_DIR}/antlr4_runtime/src/antlr4_runtime)
+set(ANTLR4_INCLUDE_DIRS ${ANTLR4_ROOT}/runtime/Cpp/runtime/src)
+set(ANTLR4_GIT_REPOSITORY https://github.com/antlr/antlr4.git)
+if(NOT DEFINED ANTLR4_TAG)
+    # Set to branch name to keep library updated at the cost of needing to rebuild after 'clean'
+    # Set to commit hash to keep the build stable and does not need to rebuild after 'clean'
+    set(ANTLR4_TAG master)
+endif()
+
+# Ensure that the include dir already exists at configure time (to avoid cmake erroring
+# on non-existent include dirs)
+file(MAKE_DIRECTORY "${ANTLR4_INCLUDE_DIRS}")
+
+if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*")
+    set(ANTLR4_OUTPUT_DIR ${ANTLR4_ROOT}/runtime/Cpp/dist/$(Configuration))
+elseif(${CMAKE_GENERATOR} MATCHES "Xcode.*")
+    set(ANTLR4_OUTPUT_DIR ${ANTLR4_ROOT}/runtime/Cpp/dist/$(CONFIGURATION))
+else()
+    set(ANTLR4_OUTPUT_DIR ${ANTLR4_ROOT}/runtime/Cpp/dist)
+endif()
+
+if(MSVC)
+    set(ANTLR4_STATIC_LIBRARIES
+            ${ANTLR4_OUTPUT_DIR}/antlr4-runtime-static.lib)
+    set(ANTLR4_SHARED_LIBRARIES
+            ${ANTLR4_OUTPUT_DIR}/antlr4-runtime.lib)
+    set(ANTLR4_RUNTIME_LIBRARIES
+            ${ANTLR4_OUTPUT_DIR}/antlr4-runtime.dll)
+else()
+    set(ANTLR4_STATIC_LIBRARIES
+            ${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.a)
+    if(MINGW)
+        set(ANTLR4_SHARED_LIBRARIES
+                ${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.dll.a)
+        set(ANTLR4_RUNTIME_LIBRARIES
+                ${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.dll)
+    elseif(CYGWIN)
+        set(ANTLR4_SHARED_LIBRARIES
+                ${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.dll.a)
+        set(ANTLR4_RUNTIME_LIBRARIES
+                ${ANTLR4_OUTPUT_DIR}/cygantlr4-runtime-${ANTLR4_TAG}.dll)
+    elseif(APPLE)
+        set(ANTLR4_RUNTIME_LIBRARIES
+                ${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.dylib)
+    else()
+        set(ANTLR4_RUNTIME_LIBRARIES
+                ${ANTLR4_OUTPUT_DIR}/libantlr4-runtime.so)
+    endif()
+endif()
+
+if(${CMAKE_GENERATOR} MATCHES ".* Makefiles")
+    # This avoids
+    # 'warning: jobserver unavailable: using -j1. Add '+' to parent make rule.'
+    set(ANTLR4_BUILD_COMMAND $(MAKE))
+elseif(${CMAKE_GENERATOR} MATCHES "Visual Studio.*")
+    set(ANTLR4_BUILD_COMMAND
+            ${CMAKE_COMMAND}
+            --build .
+            --config $(Configuration)
+            --target)
+elseif(${CMAKE_GENERATOR} MATCHES "Xcode.*")
+    set(ANTLR4_BUILD_COMMAND
+            ${CMAKE_COMMAND}
+            --build .
+            --config $(CONFIGURATION)
+            --target)
+else()
+    set(ANTLR4_BUILD_COMMAND
+            ${CMAKE_COMMAND}
+            --build .
+            --target)
+endif()
+
+if(NOT DEFINED ANTLR4_WITH_STATIC_CRT)
+    set(ANTLR4_WITH_STATIC_CRT ON)
+endif()
+
+if(ANTLR4_ZIP_REPOSITORY)
+    ExternalProject_Add(
+            antlr4_runtime
+            PREFIX antlr4_runtime
+            URL ${ANTLR4_ZIP_REPOSITORY}
+            DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}
+            BUILD_COMMAND ""
+            BUILD_IN_SOURCE 1
+            SOURCE_DIR ${ANTLR4_ROOT}
+            SOURCE_SUBDIR runtime/Cpp
+            CMAKE_CACHE_ARGS
+            -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
+            -DWITH_STATIC_CRT:BOOL=${ANTLR4_WITH_STATIC_CRT}
+            -DDISABLE_WARNINGS:BOOL=ON
+            # -DCMAKE_CXX_STANDARD:STRING=17 # if desired, compile the runtime with a different C++ standard
+            # -DCMAKE_CXX_STANDARD:STRING=${CMAKE_CXX_STANDARD} # alternatively, compile the runtime with the same C++ standard as the outer project
+            INSTALL_COMMAND ""
+            EXCLUDE_FROM_ALL 1)
+else()
+    ExternalProject_Add(
+            antlr4_runtime
+            PREFIX antlr4_runtime
+            GIT_REPOSITORY ${ANTLR4_GIT_REPOSITORY}
+            GIT_TAG ${ANTLR4_TAG}
+            DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}
+            BUILD_COMMAND ""
+            BUILD_IN_SOURCE 1
+            SOURCE_DIR ${ANTLR4_ROOT}
+            SOURCE_SUBDIR runtime/Cpp
+            CMAKE_CACHE_ARGS
+            -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
+            -DWITH_STATIC_CRT:BOOL=${ANTLR4_WITH_STATIC_CRT}
+            -DDISABLE_WARNINGS:BOOL=ON
+            # -DCMAKE_CXX_STANDARD:STRING=17 # if desired, compile the runtime with a different C++ standard
+            # -DCMAKE_CXX_STANDARD:STRING=${CMAKE_CXX_STANDARD} # alternatively, compile the runtime with the same C++ standard as the outer project
+            INSTALL_COMMAND ""
+            EXCLUDE_FROM_ALL 1)
+endif()
+
+# Separate build step as rarely people want both
+set(ANTLR4_BUILD_DIR ${ANTLR4_ROOT})
+if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14.0")
+    # CMake 3.14 builds in above's SOURCE_SUBDIR when BUILD_IN_SOURCE is true
+    set(ANTLR4_BUILD_DIR ${ANTLR4_ROOT}/runtime/Cpp)
+endif()
+
+ExternalProject_Add_Step(
+        antlr4_runtime
+        build_static
+        COMMAND ${ANTLR4_BUILD_COMMAND} antlr4_static
+        # Depend on target instead of step (a custom command)
+        # to avoid running dependent steps concurrently
+        DEPENDS antlr4_runtime
+        BYPRODUCTS ${ANTLR4_STATIC_LIBRARIES}
+        EXCLUDE_FROM_MAIN 1
+        WORKING_DIRECTORY ${ANTLR4_BUILD_DIR})
+ExternalProject_Add_StepTargets(antlr4_runtime build_static)
+
+add_library(antlr4_static STATIC IMPORTED)
+add_dependencies(antlr4_static antlr4_runtime-build_static)
+set_target_properties(antlr4_static PROPERTIES
+        IMPORTED_LOCATION ${ANTLR4_STATIC_LIBRARIES})
+target_include_directories(antlr4_static
+        INTERFACE
+        ${ANTLR4_INCLUDE_DIRS}
+)
+
+ExternalProject_Add_Step(
+        antlr4_runtime
+        build_shared
+        COMMAND ${ANTLR4_BUILD_COMMAND} antlr4_shared
+        # Depend on target instead of step (a custom command)
+        # to avoid running dependent steps concurrently
+        DEPENDS antlr4_runtime
+        BYPRODUCTS ${ANTLR4_SHARED_LIBRARIES} ${ANTLR4_RUNTIME_LIBRARIES}
+        EXCLUDE_FROM_MAIN 1
+        WORKING_DIRECTORY ${ANTLR4_BUILD_DIR})
+ExternalProject_Add_StepTargets(antlr4_runtime build_shared)
+
+add_library(antlr4_shared SHARED IMPORTED)
+add_dependencies(antlr4_shared antlr4_runtime-build_shared)
+set_target_properties(antlr4_shared PROPERTIES
+        IMPORTED_LOCATION ${ANTLR4_RUNTIME_LIBRARIES})
+target_include_directories(antlr4_shared
+        INTERFACE
+        ${ANTLR4_INCLUDE_DIRS}
+)
+
+if(ANTLR4_SHARED_LIBRARIES)
+    set_target_properties(antlr4_shared PROPERTIES
+            IMPORTED_IMPLIB ${ANTLR4_SHARED_LIBRARIES})
+endif()
diff --git a/components/core/cmake/Modules/FindANTLR.cmake b/components/core/cmake/Modules/FindANTLR.cmake
new file mode 100644
index 000000000..d191ba071
--- /dev/null
+++ b/components/core/cmake/Modules/FindANTLR.cmake
@@ -0,0 +1,139 @@
+# NOTE: FindANTLR.cmake taken from
+# https://github.com/antlr/antlr4/blob/4.13.1/runtime/Cpp/cmake/FindANTLR.cmake
+
+# TODO: Clean up ANTLR cmake files
+# On macOS, the way Java is installed with brew doesn't also make it the default version of Java on
+# the system. So we set JAVA_HOME to the install location here.
+if (APPLE)
+    set(ENV{JAVA_HOME} "/usr/local/opt/openjdk@11/")
+endif ()
+
+set(ANTLR4_TAG 4.13.1)
+add_definitions(-DANTLR4CPP_STATIC)
+set(ANTLR_EXECUTABLE ${PROJECT_SOURCE_DIR}/third-party/antlr/antlr-${ANTLR4_TAG}-complete.jar)
+include(ExternalAntlr4Cpp)
+
+find_package(Java 11 REQUIRED COMPONENTS Runtime)
+
+if(NOT ANTLR_EXECUTABLE)
+    find_program(ANTLR_EXECUTABLE
+            NAMES antlr.jar antlr4.jar antlr-4.jar antlr-${ANTLR4_TAG}-complete.jar)
+endif()
+
+if(ANTLR_EXECUTABLE AND Java_JAVA_EXECUTABLE)
+    execute_process(
+            COMMAND ${Java_JAVA_EXECUTABLE} -jar ${ANTLR_EXECUTABLE}
+            OUTPUT_VARIABLE ANTLR_COMMAND_OUTPUT
+            ERROR_VARIABLE ANTLR_COMMAND_ERROR
+            RESULT_VARIABLE ANTLR_COMMAND_RESULT
+            OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+    if(ANTLR_COMMAND_RESULT EQUAL 0)
+        string(REGEX MATCH "Version [0-9]+(\\.[0-9]+)*" ANTLR_VERSION ${ANTLR_COMMAND_OUTPUT})
+        string(REPLACE "Version " "" ANTLR_VERSION ${ANTLR_VERSION})
+    else()
+        message(
+                SEND_ERROR
+                "Command '${Java_JAVA_EXECUTABLE} -jar ${ANTLR_EXECUTABLE}' "
+                "failed with the output '${ANTLR_COMMAND_ERROR}'")
+    endif()
+
+    macro(ANTLR_TARGET Name InputFile)
+        set(ANTLR_OPTIONS LEXER PARSER LISTENER VISITOR)
+        set(ANTLR_ONE_VALUE_ARGS PACKAGE OUTPUT_DIRECTORY DEPENDS_ANTLR)
+        set(ANTLR_MULTI_VALUE_ARGS COMPILE_FLAGS DEPENDS)
+        cmake_parse_arguments(ANTLR_TARGET
+                "${ANTLR_OPTIONS}"
+                "${ANTLR_ONE_VALUE_ARGS}"
+                "${ANTLR_MULTI_VALUE_ARGS}"
+                ${ARGN})
+
+        set(ANTLR_${Name}_INPUT ${InputFile})
+
+        get_filename_component(ANTLR_INPUT ${InputFile} NAME_WE)
+
+        if(ANTLR_TARGET_OUTPUT_DIRECTORY)
+            set(ANTLR_${Name}_OUTPUT_DIR ${ANTLR_TARGET_OUTPUT_DIRECTORY})
+        else()
+            set(ANTLR_${Name}_OUTPUT_DIR
+                    ${CMAKE_CURRENT_BINARY_DIR}/antlr4cpp_generated_src/${ANTLR_INPUT})
+        endif()
+
+        unset(ANTLR_${Name}_CXX_OUTPUTS)
+
+        if((ANTLR_TARGET_LEXER AND NOT ANTLR_TARGET_PARSER) OR
+        (ANTLR_TARGET_PARSER AND NOT ANTLR_TARGET_LEXER))
+            list(APPEND ANTLR_${Name}_CXX_OUTPUTS
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}.h
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}.cpp)
+            set(ANTLR_${Name}_OUTPUTS
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}.interp
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}.tokens)
+        else()
+            list(APPEND ANTLR_${Name}_CXX_OUTPUTS
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Lexer.h
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Lexer.cpp
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Parser.h
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Parser.cpp)
+            list(APPEND ANTLR_${Name}_OUTPUTS
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Lexer.interp
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Lexer.tokens)
+        endif()
+
+        if(ANTLR_TARGET_LISTENER)
+            list(APPEND ANTLR_${Name}_CXX_OUTPUTS
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}BaseListener.h
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}BaseListener.cpp
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Listener.h
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Listener.cpp)
+            list(APPEND ANTLR_TARGET_COMPILE_FLAGS -listener)
+        endif()
+
+        if(ANTLR_TARGET_VISITOR)
+            list(APPEND ANTLR_${Name}_CXX_OUTPUTS
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}BaseVisitor.h
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}BaseVisitor.cpp
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Visitor.h
+                    ${ANTLR_${Name}_OUTPUT_DIR}/${ANTLR_INPUT}Visitor.cpp)
+            list(APPEND ANTLR_TARGET_COMPILE_FLAGS -visitor)
+        endif()
+
+        if(ANTLR_TARGET_PACKAGE)
+            list(APPEND ANTLR_TARGET_COMPILE_FLAGS -package ${ANTLR_TARGET_PACKAGE})
+        endif()
+
+        list(APPEND ANTLR_${Name}_OUTPUTS ${ANTLR_${Name}_CXX_OUTPUTS})
+
+        if(ANTLR_TARGET_DEPENDS_ANTLR)
+            if(ANTLR_${ANTLR_TARGET_DEPENDS_ANTLR}_INPUT)
+                list(APPEND ANTLR_TARGET_DEPENDS
+                        ${ANTLR_${ANTLR_TARGET_DEPENDS_ANTLR}_INPUT})
+                list(APPEND ANTLR_TARGET_DEPENDS
+                        ${ANTLR_${ANTLR_TARGET_DEPENDS_ANTLR}_OUTPUTS})
+            else()
+                message(SEND_ERROR
+                        "ANTLR target '${ANTLR_TARGET_DEPENDS_ANTLR}' not found")
+            endif()
+        endif()
+
+        add_custom_command(
+                OUTPUT ${ANTLR_${Name}_OUTPUTS}
+                COMMAND ${Java_JAVA_EXECUTABLE} -jar ${ANTLR_EXECUTABLE}
+                ${InputFile}
+                -o ${ANTLR_${Name}_OUTPUT_DIR}
+                -no-listener
+                -Dlanguage=Cpp
+                ${ANTLR_TARGET_COMPILE_FLAGS}
+                DEPENDS ${InputFile}
+                ${ANTLR_TARGET_DEPENDS}
+                WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+                COMMENT "Building ${Name} with ANTLR ${ANTLR_VERSION}")
+    endmacro(ANTLR_TARGET)
+
+endif(ANTLR_EXECUTABLE AND Java_JAVA_EXECUTABLE)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(
+        ANTLR
+        REQUIRED_VARS ANTLR_EXECUTABLE Java_JAVA_EXECUTABLE
+        VERSION_VAR ANTLR_VERSION)
diff --git a/components/core/src/clp_s/ArchiveReader.cpp b/components/core/src/clp_s/ArchiveReader.cpp
new file mode 100644
index 000000000..c716969a4
--- /dev/null
+++ b/components/core/src/clp_s/ArchiveReader.cpp
@@ -0,0 +1,82 @@
+#include "ArchiveReader.hpp"
+
+#include "ReaderUtils.hpp"
+
+namespace clp_s {
+void ArchiveReader::open(ArchiveReaderOption& option) {
+    // Open dictionary readers
+    m_archive_path = option.archive_path;
+
+    m_var_dict = ReaderUtils::get_variable_dictionary_reader(m_archive_path);
+    m_log_dict = ReaderUtils::get_log_type_dictionary_reader(m_archive_path);
+    m_array_dict = ReaderUtils::get_array_dictionary_reader(m_archive_path);
+
+    m_var_dict->read_new_entries();
+    m_log_dict->read_new_entries();
+    m_array_dict->read_new_entries();
+
+    std::string encoded_messages_dir = m_archive_path + "/encoded_messages";
+    if (false == boost::filesystem::exists(encoded_messages_dir)) {
+        throw OperationFailed(ErrorCodeErrno, __FILENAME__, __LINE__);
+    }
+
+    std::set<int32_t> schema_ids;
+    boost::filesystem::directory_iterator iter(encoded_messages_dir);
+    boost::filesystem::directory_iterator end;
+
+    // Get all schema ids
+    for (; iter != end; ++iter) {
+        if (boost::filesystem::is_regular_file(iter->path())) {
+            std::string path = iter->path().rbegin()->string();
+            if (false == path.empty() && std::all_of(path.begin(), path.end(), ::isdigit)) {
+                schema_ids.insert(std::stoi(path));
+            }
+        }
+    }
+
+    if (schema_ids.empty()) {
+        throw OperationFailed(ErrorCodeFileNotFound, __FILENAME__, __LINE__);
+    }
+
+    // Open schema readers and load encoded messages
+    for (int32_t schema_id : schema_ids) {
+        auto& schema = m_id_to_schema[schema_id];
+        auto schema_reader = new SchemaReader(m_schema_tree, schema_id);
+        schema_reader->open(encoded_messages_dir + "/" + std::to_string(schema_id));
+
+        ReaderUtils::append_reader_columns(
+                schema_reader,
+                schema,
+                m_schema_tree,
+                m_var_dict,
+                m_log_dict,
+                m_array_dict,
+                m_timestamp_dict
+        );
+
+        schema_reader->load();
+        m_schema_id_to_reader[schema_id] = schema_reader;
+    }
+}
+
+void ArchiveReader::store(FileWriter& writer) {
+    std::string message;
+    for (auto& i : m_schema_id_to_reader) {
+        while (i.second->get_next_message(message)) {
+            writer.write(message.c_str(), message.length());
+        }
+    }
+}
+
+void ArchiveReader::close() {
+    m_var_dict->close();
+    m_log_dict->close();
+
+    for (auto& i : m_schema_id_to_reader) {
+        i.second->close();
+        delete i.second;
+    }
+
+    m_schema_id_to_reader.clear();
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/ArchiveReader.hpp b/components/core/src/clp_s/ArchiveReader.hpp
new file mode 100644
index 000000000..7df8a01ef
--- /dev/null
+++ b/components/core/src/clp_s/ArchiveReader.hpp
@@ -0,0 +1,71 @@
+#ifndef CLP_S_ARCHIVEREADER_HPP
+#define CLP_S_ARCHIVEREADER_HPP
+
+#include <map>
+#include <set>
+#include <utility>
+
+#include <boost/filesystem.hpp>
+
+#include "DictionaryReader.hpp"
+#include "SchemaReader.hpp"
+#include "TimestampDictionaryReader.hpp"
+
+namespace clp_s {
+struct ArchiveReaderOption {
+    std::string archive_path;
+    std::map<int32_t, std::set<int32_t>> id_to_schema;
+};
+
+class ArchiveReader {
+public:
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    // Constructor
+    ArchiveReader(
+            std::shared_ptr<SchemaTree> schema_tree,
+            std::map<int32_t, std::set<int32_t>> id_to_schema,
+            std::shared_ptr<TimestampDictionaryReader> timestamp_dict
+    )
+            : m_schema_tree(std::move(schema_tree)),
+              m_id_to_schema(std::move(id_to_schema)),
+              m_timestamp_dict(std::move(timestamp_dict)) {}
+
+    /**
+     * Opens an archive for reading.
+     * @param option
+     */
+    void open(ArchiveReaderOption& option);
+
+    /**
+     * Writes decoded messages to a file.
+     * @param writer
+     */
+    void store(FileWriter& writer);
+
+    /**
+     * Closes the archive.
+     */
+    void close();
+
+private:
+    std::string m_archive_path;
+
+    std::shared_ptr<VariableDictionaryReader> m_var_dict;
+    std::shared_ptr<LogTypeDictionaryReader> m_log_dict;
+    std::shared_ptr<LogTypeDictionaryReader> m_array_dict;
+
+    std::shared_ptr<SchemaTree> m_schema_tree;
+    std::map<int32_t, std::set<int32_t>> m_id_to_schema;
+    std::map<int32_t, SchemaReader*> m_schema_id_to_reader;
+
+    std::shared_ptr<TimestampDictionaryReader> m_timestamp_dict;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_ARCHIVEREADER_HPP
diff --git a/components/core/src/clp_s/ArchiveWriter.cpp b/components/core/src/clp_s/ArchiveWriter.cpp
new file mode 100644
index 000000000..52eee8a0d
--- /dev/null
+++ b/components/core/src/clp_s/ArchiveWriter.cpp
@@ -0,0 +1,124 @@
+#include "ArchiveWriter.hpp"
+
+#include "SchemaTree.hpp"
+
+namespace clp_s {
+void ArchiveWriter::open(ArchiveWriterOption const& option) {
+    m_id = option.id;
+    m_compression_level = option.compression_level;
+    auto archive_path
+            = boost::filesystem::path(option.archives_dir) / boost::uuids::to_string(m_id);
+
+    boost::system::error_code boost_error_code;
+    bool path_exists = boost::filesystem::exists(archive_path, boost_error_code);
+    if (path_exists) {
+        SPDLOG_ERROR("Archive path already exists: {}", archive_path.c_str());
+        throw OperationFailed(ErrorCodeUnsupported, __FILENAME__, __LINE__);
+    }
+
+    m_archive_path = archive_path.string();
+    if (false == boost::filesystem::create_directory(m_archive_path)) {
+        throw OperationFailed(ErrorCodeErrno, __FILENAME__, __LINE__);
+    }
+
+    m_encoded_messages_dir = m_archive_path + "/encoded_messages";
+    if (false == boost::filesystem::create_directory(m_encoded_messages_dir)) {
+        throw OperationFailed(ErrorCodeErrno, __FILENAME__, __LINE__);
+    }
+
+    std::string var_dict_path = m_archive_path + "/var.dict";
+    m_var_dict = std::make_shared<VariableDictionaryWriter>();
+    m_var_dict->open(var_dict_path, m_compression_level, UINT64_MAX);
+
+    std::string log_dict_path = m_archive_path + "/log.dict";
+    m_log_dict = std::make_shared<LogTypeDictionaryWriter>();
+    m_log_dict->open(log_dict_path, m_compression_level, UINT64_MAX);
+
+    std::string array_dict_path = m_archive_path + "/array.dict";
+    m_array_dict = std::make_shared<LogTypeDictionaryWriter>();
+    m_array_dict->open(array_dict_path, m_compression_level, UINT64_MAX);
+
+    std::string timestamp_local_dict_path = m_archive_path + "/timestamp.dict";
+    m_timestamp_dict->open_local(timestamp_local_dict_path, m_compression_level);
+}
+
+void ArchiveWriter::close() {
+    m_var_dict->close();
+    m_log_dict->close();
+    m_array_dict->close();
+    m_timestamp_dict->close_local();
+
+    for (auto& i : m_schema_id_to_writer) {
+        i.second->store();
+        i.second->close();
+        delete i.second;
+    }
+
+    m_schema_id_to_writer.clear();
+    m_encoded_message_size = 0UL;
+}
+
+void ArchiveWriter::append_message(
+        int32_t schema_id,
+        std::set<int32_t>& schema,
+        ParsedMessage& message
+) {
+    SchemaWriter* schema_writer;
+    auto it = m_schema_id_to_writer.find(schema_id);
+    if (it != m_schema_id_to_writer.end()) {
+        schema_writer = it->second;
+    } else {
+        schema_writer = new SchemaWriter();
+        schema_writer->open(
+                m_encoded_messages_dir + "/" + std::to_string(schema_id),
+                m_compression_level
+        );
+        initialize_schema_writer(schema_writer, schema);
+        m_schema_id_to_writer[schema_id] = schema_writer;
+    }
+
+    m_encoded_message_size += schema_writer->append_message(message);
+}
+
+size_t ArchiveWriter::get_data_size() {
+    return m_log_dict->get_data_size() + m_var_dict->get_data_size() + m_array_dict->get_data_size()
+           + m_encoded_message_size;
+}
+
+void ArchiveWriter::initialize_schema_writer(SchemaWriter* writer, std::set<int32_t>& schema) {
+    for (int32_t id : schema) {
+        auto node = m_schema_tree->get_node(id);
+        std::string key_name = node->get_key_name();
+        switch (node->get_type()) {
+            case NodeType::INTEGER:
+                writer->append_column(new Int64ColumnWriter(key_name));
+                break;
+            case NodeType::FLOAT:
+                writer->append_column(new FloatColumnWriter(key_name));
+                break;
+            case NodeType::CLPSTRING:
+                writer->append_column(new ClpStringColumnWriter(key_name, m_var_dict, m_log_dict));
+                break;
+            case NodeType::VARSTRING:
+                writer->append_column(new VariableStringColumnWriter(key_name, m_var_dict));
+                break;
+            case NodeType::BOOLEAN:
+                writer->append_column(new BooleanColumnWriter(key_name));
+                break;
+            case NodeType::ARRAY:
+                writer->append_column(new ClpStringColumnWriter(key_name, m_var_dict, m_array_dict)
+                );
+                break;
+            case NodeType::DATESTRING:
+                writer->append_column(new DateStringColumnWriter(key_name, m_timestamp_dict));
+                break;
+            case NodeType::FLOATDATESTRING:
+                writer->append_column(new FloatDateStringColumnWriter(key_name, m_timestamp_dict));
+                break;
+            case NodeType::OBJECT:
+            case NodeType::NULLVALUE:
+                break;
+        }
+    }
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/ArchiveWriter.hpp b/components/core/src/clp_s/ArchiveWriter.hpp
new file mode 100644
index 000000000..9c3b7db37
--- /dev/null
+++ b/components/core/src/clp_s/ArchiveWriter.hpp
@@ -0,0 +1,94 @@
+#ifndef CLP_S_ARCHIVEWRITER_HPP
+#define CLP_S_ARCHIVEWRITER_HPP
+
+#include <set>
+#include <utility>
+
+#include <boost/filesystem.hpp>
+#include <boost/uuid/uuid.hpp>
+#include <boost/uuid/uuid_io.hpp>
+
+#include "DictionaryWriter.hpp"
+#include "SchemaTree.hpp"
+#include "SchemaWriter.hpp"
+#include "TimestampDictionaryWriter.hpp"
+
+namespace clp_s {
+struct ArchiveWriterOption {
+    boost::uuids::uuid id;
+    std::string archives_dir;
+    int compression_level;
+};
+
+class ArchiveWriter {
+public:
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    // Delete default constructor
+    ArchiveWriter() = delete;
+
+    // Constructor
+    explicit ArchiveWriter(
+            std::shared_ptr<SchemaTree> schema_tree,
+            std::shared_ptr<TimestampDictionaryWriter> timestamp_dict
+    )
+            : m_encoded_message_size(0UL),
+              m_schema_tree(std::move(schema_tree)),
+              m_timestamp_dict(std::move(timestamp_dict)) {}
+
+    /**
+     * Opens the archive writer
+     * @param option
+     */
+    void open(ArchiveWriterOption const& option);
+
+    /**
+     * Closes the archive writer
+     */
+    void close();
+
+    /**
+     * Appends a message to the archive writer
+     * @param schema_id
+     * @param schema
+     * @param message
+     */
+    void append_message(int32_t schema_id, std::set<int32_t>& schema, ParsedMessage& message);
+
+    /**
+     * @return Size of the uncompressed data written to the archive
+     */
+    size_t get_data_size();
+
+private:
+    /**
+     * Initializes the schema writer
+     * @param writer
+     * @param schema
+     */
+    void initialize_schema_writer(SchemaWriter* writer, std::set<int32_t>& schema);
+
+    size_t m_encoded_message_size;
+
+    boost::uuids::uuid m_id{};
+
+    std::string m_archive_path;
+    std::string m_encoded_messages_dir;
+
+    std::shared_ptr<VariableDictionaryWriter> m_var_dict;
+    std::shared_ptr<LogTypeDictionaryWriter> m_log_dict;
+    std::shared_ptr<LogTypeDictionaryWriter> m_array_dict;  // log type dictionary for arrays
+    std::shared_ptr<TimestampDictionaryWriter> m_timestamp_dict;
+    int m_compression_level{};
+
+    std::shared_ptr<SchemaTree> m_schema_tree;
+    std::map<int32_t, SchemaWriter*> m_schema_id_to_writer;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_ARCHIVEWRITER_HPP
diff --git a/components/core/src/clp_s/CMakeLists.txt b/components/core/src/clp_s/CMakeLists.txt
new file mode 100644
index 000000000..325af0334
--- /dev/null
+++ b/components/core/src/clp_s/CMakeLists.txt
@@ -0,0 +1,137 @@
+add_subdirectory(search/kql)
+
+set(
+        CLP_S_SOURCES
+        "${PROJECT_SOURCE_DIR}/submodules/date/include/date/date.h"
+        ArchiveReader.cpp
+        ArchiveReader.hpp
+        ArchiveWriter.cpp
+        ArchiveWriter.hpp
+        ColumnReader.cpp
+        ColumnReader.hpp
+        ColumnWriter.cpp
+        ColumnWriter.hpp
+        CommandLineArguments.cpp
+        CommandLineArguments.hpp
+        Compressor.hpp
+        Decompressor.hpp
+        Defs.hpp
+        DictionaryEntry.cpp
+        DictionaryEntry.hpp
+        DictionaryReader.hpp
+        DictionaryWriter.cpp
+        DictionaryWriter.hpp
+        ErrorCode.hpp
+        FileReader.cpp
+        FileReader.hpp
+        FileWriter.cpp
+        FileWriter.hpp
+        JsonConstructor.cpp
+        JsonConstructor.hpp
+        JsonFileIterator.cpp
+        JsonFileIterator.hpp
+        JsonParser.cpp
+        JsonParser.hpp
+        JsonSerializer.hpp
+        ParsedMessage.hpp
+        ReaderUtils.cpp
+        ReaderUtils.hpp
+        SchemaMap.cpp
+        SchemaMap.hpp
+        SchemaReader.cpp
+        SchemaReader.hpp
+        SchemaTree.cpp
+        SchemaTree.hpp
+        SchemaWriter.cpp
+        SchemaWriter.hpp
+        TimestampDictionaryReader.cpp
+        TimestampDictionaryReader.hpp
+        TimestampDictionaryWriter.cpp
+        TimestampDictionaryWriter.hpp
+        TimestampEntry.cpp
+        TimestampEntry.hpp
+        TimestampPattern.cpp
+        TimestampPattern.hpp
+        TraceableException.hpp
+        Utils.cpp
+        Utils.hpp
+        VariableDecoder.cpp
+        VariableDecoder.hpp
+        VariableEncoder.cpp
+        VariableEncoder.hpp
+        ZstdCompressor.cpp
+        ZstdCompressor.hpp
+        ZstdDecompressor.cpp
+        ZstdDecompressor.hpp
+)
+
+set(
+        CLP_S_SEARCH_SOURCES
+        search/AndExpr.cpp
+        search/AndExpr.hpp
+        search/BooleanLiteral.cpp
+        search/BooleanLiteral.hpp
+        search/clp_search/EncodedVariableInterpreter.cpp
+        search/clp_search/EncodedVariableInterpreter.hpp
+        search/clp_search/Grep.cpp
+        search/clp_search/Grep.hpp
+        search/clp_search/Query.cpp
+        search/clp_search/Query.hpp
+        search/ColumnDescriptor.cpp
+        search/ColumnDescriptor.hpp
+        search/ConstantProp.cpp
+        search/ConstantProp.hpp
+        search/ConvertToExists.cpp
+        search/ConvertToExists.hpp
+        search/DateLiteral.cpp
+        search/DateLiteral.hpp
+        search/EmptyExpr.cpp
+        search/EmptyExpr.hpp
+        search/EvaluateTimestampIndex.cpp
+        search/EvaluateTimestampIndex.hpp
+        search/Expression.cpp
+        search/Expression.hpp
+        search/FilterExpr.cpp
+        search/FilterExpr.hpp
+        search/FilterOperation.hpp
+        search/Integral.cpp
+        search/Integral.hpp
+        search/Literal.hpp
+        search/NarrowTypes.cpp
+        search/NarrowTypes.hpp
+        search/NullLiteral.cpp
+        search/NullLiteral.hpp
+        search/OrExpr.cpp
+        search/OrExpr.hpp
+        search/OrOfAndForm.cpp
+        search/OrOfAndForm.hpp
+        search/Output.cpp
+        search/Output.hpp
+        search/SchemaMatch.cpp
+        search/SchemaMatch.hpp
+        search/SearchUtils.cpp
+        search/SearchUtils.hpp
+        search/StringLiteral.cpp
+        search/StringLiteral.hpp
+        search/Transformation.hpp
+        search/Value.hpp
+)
+
+add_executable(clp-s clp-s.cpp ${CLP_S_SOURCES} ${CLP_S_SEARCH_SOURCES})
+target_compile_features(clp-s PRIVATE cxx_std_17)
+target_link_libraries(
+        clp-s
+        PRIVATE
+        absl::flat_hash_map
+        Boost::filesystem Boost::iostreams Boost::program_options
+        kql
+        simdjson
+        spdlog::spdlog
+        ZStd::ZStd
+)
+target_include_directories(clp-s PRIVATE "${PROJECT_SOURCE_DIR}/submodules")
+set_target_properties(
+        clp-s
+        PROPERTIES
+        RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}"
+)
diff --git a/components/core/src/clp_s/ColumnReader.cpp b/components/core/src/clp_s/ColumnReader.cpp
new file mode 100644
index 000000000..c45104422
--- /dev/null
+++ b/components/core/src/clp_s/ColumnReader.cpp
@@ -0,0 +1,177 @@
+#include "ColumnReader.hpp"
+
+#include "ColumnWriter.hpp"
+#include "Utils.hpp"
+#include "VariableDecoder.hpp"
+
+namespace clp_s {
+void Int64ColumnReader::load(ZstdDecompressor& decompressor, uint64_t num_messages) {
+    m_values = std::make_unique<int64_t[]>(num_messages);
+
+    decompressor.try_read_exact_length(
+            reinterpret_cast<char*>(m_values.get()),
+            num_messages * sizeof(int64_t)
+    );
+}
+
+std::variant<int64_t, double, std::string, uint8_t> Int64ColumnReader::extract_value(
+        uint64_t cur_message
+) {
+    return m_values[cur_message];
+}
+
+void FloatColumnReader::load(ZstdDecompressor& decompressor, uint64_t num_messages) {
+    m_values = std::make_unique<double[]>(num_messages);
+
+    decompressor.try_read_exact_length(
+            reinterpret_cast<char*>(m_values.get()),
+            num_messages * sizeof(double)
+    );
+}
+
+std::variant<int64_t, double, std::string, uint8_t> FloatColumnReader::extract_value(
+        uint64_t cur_message
+) {
+    return m_values[cur_message];
+}
+
+void BooleanColumnReader::load(ZstdDecompressor& decompressor, uint64_t num_messages) {
+    m_values = std::make_unique<uint8_t[]>(num_messages);
+
+    decompressor.try_read_exact_length(
+            reinterpret_cast<char*>(m_values.get()),
+            num_messages * sizeof(uint8_t)
+    );
+}
+
+std::variant<int64_t, double, std::string, uint8_t> BooleanColumnReader::extract_value(
+        uint64_t cur_message
+) {
+    return m_values[cur_message];
+}
+
+void ClpStringColumnReader::load(ZstdDecompressor& decompressor, uint64_t num_messages) {
+    size_t encoded_vars_length;
+
+    m_logtypes = std::make_unique<int64_t[]>(num_messages);
+    decompressor.try_read_exact_length(
+            reinterpret_cast<char*>(m_logtypes.get()),
+            num_messages * sizeof(int64_t)
+    );
+
+    auto error_code = decompressor.try_read_numeric_value(encoded_vars_length);
+    if (ErrorCodeSuccess != error_code) {
+        throw OperationFailed(error_code, __FILENAME__, __LINE__);
+    }
+
+    m_encoded_vars = std::make_unique<int64_t[]>(encoded_vars_length);
+    decompressor.try_read_exact_length(
+            reinterpret_cast<char*>(m_encoded_vars.get()),
+            encoded_vars_length * sizeof(int64_t)
+    );
+}
+
+std::variant<int64_t, double, std::string, uint8_t> ClpStringColumnReader::extract_value(
+        uint64_t cur_message
+) {
+    std::string message;
+
+    auto value = m_logtypes[cur_message];
+    int64_t logtype_id = ClpStringColumnWriter::get_encoded_log_dict_id(value);
+    auto& entry = m_log_dict->get_entry(logtype_id);
+
+    if (false == entry.initialized()) {
+        entry.decode_log_type();
+    }
+
+    int64_t encoded_vars_offset = ClpStringColumnWriter::get_encoded_offset(value);
+    Span<int64_t> encoded_vars(&m_encoded_vars[encoded_vars_offset], entry.get_num_vars());
+
+    VariableDecoder::decode_variables_into_message(entry, *m_var_dict, encoded_vars, message);
+
+    return message;
+}
+
+int64_t ClpStringColumnReader::get_encoded_id(uint64_t cur_message) {
+    auto value = m_logtypes[cur_message];
+    return ClpStringColumnWriter::get_encoded_log_dict_id(value);
+}
+
+Span<int64_t> ClpStringColumnReader::get_encoded_vars(uint64_t cur_message) {
+    auto value = m_logtypes[cur_message];
+    int64_t logtype_id = ClpStringColumnWriter::get_encoded_log_dict_id(value);
+    auto& entry = m_log_dict->get_entry(logtype_id);
+
+    // It should be initialized before because we are searching on this field
+    if (false == entry.initialized()) {
+        entry.decode_log_type();
+    }
+
+    int64_t encoded_vars_offset = ClpStringColumnWriter::get_encoded_offset(value);
+
+    return {&m_encoded_vars[encoded_vars_offset], entry.get_num_vars()};
+}
+
+void VariableStringColumnReader::load(ZstdDecompressor& decompressor, uint64_t num_messages) {
+    m_variables = std::make_unique<int64_t[]>(num_messages);
+    decompressor.try_read_exact_length(
+            reinterpret_cast<char*>(m_variables.get()),
+            num_messages * sizeof(int64_t)
+    );
+}
+
+std::variant<int64_t, double, std::string, uint8_t> VariableStringColumnReader::extract_value(
+        uint64_t cur_message
+) {
+    return m_var_dict->get_value(m_variables[cur_message]);
+}
+
+int64_t VariableStringColumnReader::get_variable_id(uint64_t cur_message) {
+    return m_variables[cur_message];
+}
+
+void DateStringColumnReader::load(ZstdDecompressor& decompressor, uint64_t num_messages) {
+    m_timestamps = std::make_unique<int64_t[]>(num_messages);
+    m_timestamp_encodings = std::make_unique<int64_t[]>(num_messages);
+
+    decompressor.try_read_exact_length(
+            reinterpret_cast<char*>(m_timestamps.get()),
+            num_messages * sizeof(int64_t)
+    );
+    decompressor.try_read_exact_length(
+            reinterpret_cast<char*>(m_timestamp_encodings.get()),
+            num_messages * sizeof(int64_t)
+    );
+}
+
+std::variant<int64_t, double, std::string, uint8_t> DateStringColumnReader::extract_value(
+        uint64_t cur_message
+) {
+    return m_timestamp_dict->get_string_encoding(
+            m_timestamps[cur_message],
+            m_timestamp_encodings[cur_message]
+    );
+}
+
+epochtime_t DateStringColumnReader::get_encoded_time(uint64_t cur_message) {
+    return m_timestamps[cur_message];
+}
+
+void FloatDateStringColumnReader::load(ZstdDecompressor& decompressor, uint64_t num_messages) {
+    m_timestamps = std::make_unique<double[]>(num_messages);
+    decompressor.try_read_exact_length(
+            reinterpret_cast<char*>(m_timestamps.get()),
+            num_messages * sizeof(double)
+    );
+}
+
+std::variant<int64_t, double, std::string, uint8_t> FloatDateStringColumnReader::extract_value(
+        uint64_t cur_message
+) {
+    return std::to_string(m_timestamps[cur_message]);
+}
+
+double FloatDateStringColumnReader::get_encoded_time(uint64_t cur_message) {
+    return m_timestamps[cur_message];
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/ColumnReader.hpp b/components/core/src/clp_s/ColumnReader.hpp
new file mode 100644
index 000000000..0b3d86a65
--- /dev/null
+++ b/components/core/src/clp_s/ColumnReader.hpp
@@ -0,0 +1,265 @@
+#ifndef CLP_S_COLUMNREADER_HPP
+#define CLP_S_COLUMNREADER_HPP
+
+#include <string>
+#include <variant>
+
+#include "DictionaryReader.hpp"
+#include "TimestampDictionaryReader.hpp"
+#include "Utils.hpp"
+#include "ZstdDecompressor.hpp"
+
+namespace clp_s {
+class BaseColumnReader {
+public:
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    // Constructor
+    BaseColumnReader(std::string name, int32_t id) : m_name(std::move(name)), m_id(id) {}
+
+    // Destructor
+    virtual ~BaseColumnReader() = default;
+
+    /**
+     * Reads the column from the disk
+     * @param decompressor
+     * @param num_messages
+     */
+    virtual void load(ZstdDecompressor& decompressor, uint64_t num_messages) = 0;
+
+    std::string get_name() const { return m_name; }
+
+    int32_t get_id() const { return m_id; }
+
+    virtual std::string get_type() { return "base"; }
+
+    /**
+     * Extracts a value of the column
+     * @param cur_message
+     * @return Value
+     */
+    virtual std::variant<int64_t, double, std::string, uint8_t> extract_value(uint64_t cur_message)
+            = 0;
+
+private:
+    std::string m_name;
+    int32_t m_id;
+};
+
+class Int64ColumnReader : public BaseColumnReader {
+public:
+    // Constructor
+    explicit Int64ColumnReader(std::string name, int32_t id)
+            : BaseColumnReader(std::move(name), id) {}
+
+    // Destructor
+    ~Int64ColumnReader() override = default;
+
+    // Methods inherited from BaseColumnReader
+    void load(ZstdDecompressor& decompressor, uint64_t num_messages) override;
+
+    std::string get_type() override { return "int"; }
+
+    std::variant<int64_t, double, std::string, uint8_t> extract_value(uint64_t cur_message
+    ) override;
+
+private:
+    std::unique_ptr<int64_t[]> m_values;
+};
+
+class FloatColumnReader : public BaseColumnReader {
+public:
+    // Constructor
+    explicit FloatColumnReader(std::string name, int32_t id)
+            : BaseColumnReader(std::move(name), id) {}
+
+    // Destructor
+    ~FloatColumnReader() override = default;
+
+    // Methods inherited from BaseColumnReader
+    void load(ZstdDecompressor& decompressor, uint64_t num_messages) override;
+
+    std::string get_type() override { return "float"; }
+
+    std::variant<int64_t, double, std::string, uint8_t> extract_value(uint64_t cur_message
+    ) override;
+
+private:
+    std::unique_ptr<double[]> m_values;
+};
+
+class BooleanColumnReader : public BaseColumnReader {
+public:
+    // Constructor
+    explicit BooleanColumnReader(std::string name, int32_t id)
+            : BaseColumnReader(std::move(name), id) {}
+
+    // Destructor
+    ~BooleanColumnReader() override = default;
+
+    // Methods inherited from BaseColumnReader
+    void load(ZstdDecompressor& decompressor, uint64_t num_messages) override;
+
+    std::string get_type() override { return "bool"; }
+
+    std::variant<int64_t, double, std::string, uint8_t> extract_value(uint64_t cur_message
+    ) override;
+
+private:
+    std::unique_ptr<uint8_t[]> m_values;
+};
+
+class ClpStringColumnReader : public BaseColumnReader {
+public:
+    // Constructor
+    ClpStringColumnReader(
+            std::string const& name,
+            int32_t id,
+            std::shared_ptr<VariableDictionaryReader> var_dict,
+            std::shared_ptr<LogTypeDictionaryReader> log_dict,
+            bool is_array = false
+    )
+            : BaseColumnReader(name, id),
+              m_var_dict(std::move(var_dict)),
+              m_log_dict(std::move(log_dict)),
+              m_is_array(is_array) /*, encoded_vars_index_(0)*/ {}
+
+    // Destructor
+    ~ClpStringColumnReader() override = default;
+
+    // Methods inherited from BaseColumnReader
+    void load(ZstdDecompressor& decompressor, uint64_t num_messages) override;
+
+    std::string get_type() override { return m_is_array ? "array" : "string"; }
+
+    std::variant<int64_t, double, std::string, uint8_t> extract_value(uint64_t cur_message
+    ) override;
+
+    /**
+     * Gets the encoded id of the variable
+     * @param cur_message
+     * @return The encoded logtype id
+     */
+    int64_t get_encoded_id(uint64_t cur_message);
+
+    /**
+     * Gets the encoded variables
+     * @param cur_message
+     * @return Encoded variables in a span
+     */
+    Span<int64_t> get_encoded_vars(uint64_t cur_message);
+
+private:
+    std::shared_ptr<VariableDictionaryReader> m_var_dict;
+    std::shared_ptr<LogTypeDictionaryReader> m_log_dict;
+
+    std::unique_ptr<int64_t[]> m_logtypes;
+    std::unique_ptr<int64_t[]> m_encoded_vars;
+    // size_t encoded_vars_index_;
+
+    bool m_is_array;
+};
+
+class VariableStringColumnReader : public BaseColumnReader {
+public:
+    // Constructor
+    VariableStringColumnReader(
+            std::string const& name,
+            int32_t id,
+            std::shared_ptr<VariableDictionaryReader> var_dict
+    )
+            : BaseColumnReader(name, id),
+              m_var_dict(std::move(var_dict)) {}
+
+    // Destructor
+    ~VariableStringColumnReader() override = default;
+
+    // Methods inherited from BaseColumnReader
+    void load(ZstdDecompressor& decompressor, uint64_t num_messages) override;
+
+    std::string get_type() override { return "string"; }
+
+    std::variant<int64_t, double, std::string, uint8_t> extract_value(uint64_t cur_message
+    ) override;
+
+    /**
+     * Gets the encoded id of the variable
+     * @param cur_message
+     * @return The encoded logtype id
+     */
+    int64_t get_variable_id(uint64_t cur_message);
+
+private:
+    std::shared_ptr<VariableDictionaryReader> m_var_dict;
+
+    std::unique_ptr<int64_t[]> m_variables;
+};
+
+class DateStringColumnReader : public BaseColumnReader {
+public:
+    // Constructor
+    DateStringColumnReader(
+            std::string const& name,
+            int32_t id,
+            std::shared_ptr<TimestampDictionaryReader> timestamp_dict
+    )
+            : BaseColumnReader(name, id),
+              m_timestamp_dict(std::move(timestamp_dict)) {}
+
+    // Destructor
+    ~DateStringColumnReader() override = default;
+
+    // Methods inherited from BaseColumnReader
+    void load(ZstdDecompressor& decompressor, uint64_t num_messages) override;
+
+    std::string get_type() override { return "string"; }
+
+    std::variant<int64_t, double, std::string, uint8_t> extract_value(uint64_t cur_message
+    ) override;
+
+    /**
+     * @param cur_message
+     * @return The encoded time in epoch time
+     */
+    epochtime_t get_encoded_time(uint64_t cur_message);
+
+private:
+    std::shared_ptr<TimestampDictionaryReader> m_timestamp_dict;
+
+    std::unique_ptr<int64_t[]> m_timestamps;
+    std::unique_ptr<int64_t[]> m_timestamp_encodings;
+};
+
+class FloatDateStringColumnReader : public BaseColumnReader {
+public:
+    // Constructor
+    FloatDateStringColumnReader(std::string const& name, int32_t id) : BaseColumnReader(name, id) {}
+
+    // Destructor
+    ~FloatDateStringColumnReader() override = default;
+
+    // Methods inherited from BaseColumnReader
+    void load(ZstdDecompressor& decompressor, uint64_t num_messages) override;
+
+    std::string get_type() override { return "string"; }
+
+    std::variant<int64_t, double, std::string, uint8_t> extract_value(uint64_t cur_message
+    ) override;
+
+    /**
+     * @param cur_message
+     * @return The encoded time in float epoch time
+     */
+    double get_encoded_time(uint64_t cur_message);
+
+private:
+    std::unique_ptr<double[]> m_timestamps;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_COLUMNREADER_HPP
diff --git a/components/core/src/clp_s/ColumnWriter.cpp b/components/core/src/clp_s/ColumnWriter.cpp
new file mode 100644
index 000000000..6abe9c302
--- /dev/null
+++ b/components/core/src/clp_s/ColumnWriter.cpp
@@ -0,0 +1,142 @@
+#include "ColumnWriter.hpp"
+
+namespace clp_s {
+void Int64ColumnWriter::add_value(
+        std::variant<int64_t, double, std::string, bool>& value,
+        size_t& size
+) {
+    size = sizeof(int64_t);
+    m_values.push_back(std::get<int64_t>(value));
+}
+
+void Int64ColumnWriter::store(ZstdCompressor& compressor) {
+    compressor.write(
+            reinterpret_cast<char const*>(m_values.data()),
+            m_values.size() * sizeof(int64_t)
+    );
+}
+
+void FloatColumnWriter::add_value(
+        std::variant<int64_t, double, std::string, bool>& value,
+        size_t& size
+) {
+    size = sizeof(double);
+    m_values.push_back(std::get<double>(value));
+}
+
+void FloatColumnWriter::store(ZstdCompressor& compressor) {
+    compressor.write(
+            reinterpret_cast<char const*>(m_values.data()),
+            m_values.size() * sizeof(double)
+    );
+}
+
+void BooleanColumnWriter::add_value(
+        std::variant<int64_t, double, std::string, bool>& value,
+        size_t& size
+) {
+    size = sizeof(uint8_t);
+    m_values.push_back(std::get<bool>(value) ? 1 : 0);
+}
+
+void BooleanColumnWriter::store(ZstdCompressor& compressor) {
+    compressor.write(
+            reinterpret_cast<char const*>(m_values.data()),
+            m_values.size() * sizeof(uint8_t)
+    );
+}
+
+void ClpStringColumnWriter::add_value(
+        std::variant<int64_t, double, std::string, bool>& value,
+        size_t& size
+) {
+    size = sizeof(int64_t);
+    std::string string_var = std::get<std::string>(value);
+    uint64_t id;
+    uint64_t offset = m_encoded_vars.size();
+    VariableEncoder::encode_and_add_to_dictionary(
+            string_var,
+            m_logtype_entry,
+            *m_var_dict,
+            m_encoded_vars
+    );
+    m_log_dict->add_entry(m_logtype_entry, id);
+    auto encoded_id = encode_log_dict_id(id, offset);
+    m_logtypes.push_back(encoded_id);
+    size += sizeof(int64_t) * (m_encoded_vars.size() - offset);
+}
+
+void ClpStringColumnWriter::store(ZstdCompressor& compressor) {
+    compressor.write(
+            reinterpret_cast<char const*>(m_logtypes.data()),
+            m_logtypes.size() * sizeof(int64_t)
+    );
+    compressor.write_numeric_value(m_encoded_vars.size());
+    compressor.write(
+            reinterpret_cast<char const*>(m_encoded_vars.data()),
+            m_encoded_vars.size() * sizeof(int64_t)
+    );
+}
+
+void VariableStringColumnWriter::add_value(
+        std::variant<int64_t, double, std::string, bool>& value,
+        size_t& size
+) {
+    size = sizeof(int64_t);
+    std::string string_var = std::get<std::string>(value);
+    uint64_t id;
+    m_var_dict->add_entry(string_var, id);
+    m_variables.push_back(id);
+}
+
+void VariableStringColumnWriter::store(ZstdCompressor& compressor) {
+    compressor.write(
+            reinterpret_cast<char const*>(m_variables.data()),
+            m_variables.size() * sizeof(int64_t)
+    );
+}
+
+void DateStringColumnWriter::add_value(
+        std::variant<int64_t, double, std::string, bool>& value,
+        size_t& size
+) {
+    size = 2 * sizeof(int64_t);
+    std::string string_timestamp = std::get<std::string>(value);
+
+    uint64_t encoding_id;
+    epochtime_t timestamp = m_timestamp_dict->ingest_entry(m_name, string_timestamp, encoding_id);
+
+    m_timestamps.push_back(timestamp);
+    m_timestamp_encodings.push_back(encoding_id);
+}
+
+void DateStringColumnWriter::store(ZstdCompressor& compressor) {
+    compressor.write(
+            reinterpret_cast<char const*>(m_timestamps.data()),
+            m_timestamps.size() * sizeof(int64_t)
+    );
+    compressor.write(
+            reinterpret_cast<char const*>(m_timestamp_encodings.data()),
+            m_timestamp_encodings.size() * sizeof(int64_t)
+    );
+}
+
+void FloatDateStringColumnWriter::add_value(
+        std::variant<int64_t, double, std::string, bool>& value,
+        size_t& size
+) {
+    size = sizeof(double);
+    double timestamp = std::get<double>(value);
+
+    m_timestamp_dict->ingest_entry(m_name, timestamp);
+
+    m_timestamps.push_back(timestamp);
+}
+
+void FloatDateStringColumnWriter::store(ZstdCompressor& compressor) {
+    compressor.write(
+            reinterpret_cast<char const*>(m_timestamps.data()),
+            m_timestamps.size() * sizeof(double)
+    );
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/ColumnWriter.hpp b/components/core/src/clp_s/ColumnWriter.hpp
new file mode 100644
index 000000000..447f0adc3
--- /dev/null
+++ b/components/core/src/clp_s/ColumnWriter.hpp
@@ -0,0 +1,232 @@
+#ifndef CLP_S_COLUMNWRITER_HPP
+#define CLP_S_COLUMNWRITER_HPP
+
+#include <utility>
+#include <variant>
+
+#include <simdjson.h>
+
+#include "DictionaryWriter.hpp"
+#include "FileWriter.hpp"
+#include "TimestampDictionaryWriter.hpp"
+#include "VariableEncoder.hpp"
+#include "ZstdCompressor.hpp"
+
+using namespace simdjson;
+
+namespace clp_s {
+class BaseColumnWriter {
+public:
+    // Constructor
+    explicit BaseColumnWriter(std::string name) : m_name(std::move(name)) {}
+
+    // Destructor
+    virtual ~BaseColumnWriter() = default;
+
+    /**
+     * Adds a value to the column
+     * @param value
+     * @param size
+     */
+    virtual void add_value(std::variant<int64_t, double, std::string, bool>& value, size_t& size)
+            = 0;
+
+    /**
+     * Stores the column to a compressed file
+     * @param compressor
+     */
+    virtual void store(ZstdCompressor& compressor) = 0;
+
+    /**
+     * @return Name of the column
+     */
+    std::string get_name() { return m_name; }
+
+protected:
+    std::string m_name;
+};
+
+class Int64ColumnWriter : public BaseColumnWriter {
+public:
+    // Constructor
+    explicit Int64ColumnWriter(std::string name) : BaseColumnWriter(std::move(name)) {}
+
+    // Destructor
+    ~Int64ColumnWriter() override = default;
+
+    // Methods inherited from BaseColumnWriter
+    void add_value(std::variant<int64_t, double, std::string, bool>& value, size_t& size) override;
+
+    void store(ZstdCompressor& compressor) override;
+
+private:
+    std::vector<int64_t> m_values;
+};
+
+class FloatColumnWriter : public BaseColumnWriter {
+public:
+    // Constructor
+    explicit FloatColumnWriter(std::string name) : BaseColumnWriter(std::move(name)) {}
+
+    // Destructor
+    ~FloatColumnWriter() override = default;
+
+    // Methods inherited from BaseColumnWriter
+    void add_value(std::variant<int64_t, double, std::string, bool>& value, size_t& size) override;
+
+    void store(ZstdCompressor& compressor) override;
+
+private:
+    std::vector<double> m_values;
+};
+
+class BooleanColumnWriter : public BaseColumnWriter {
+public:
+    // Constructor
+    explicit BooleanColumnWriter(std::string name) : BaseColumnWriter(std::move(name)) {}
+
+    // Destructor
+    ~BooleanColumnWriter() override = default;
+
+    // Methods inherited from BaseColumnWriter
+    void add_value(std::variant<int64_t, double, std::string, bool>& value, size_t& size) override;
+
+    void store(ZstdCompressor& compressor) override;
+
+private:
+    std::vector<uint8_t> m_values;
+};
+
+class ClpStringColumnWriter : public BaseColumnWriter {
+public:
+    // Constructor
+    ClpStringColumnWriter(
+            std::string const& name,
+            std::shared_ptr<VariableDictionaryWriter> var_dict,
+            std::shared_ptr<LogTypeDictionaryWriter> log_dict
+    )
+            : BaseColumnWriter(name),
+              m_var_dict(std::move(var_dict)),
+              m_log_dict(std::move(log_dict)) {}
+
+    // Destructor
+    ~ClpStringColumnWriter() override = default;
+
+    // Methods inherited from BaseColumnWriter
+    void add_value(std::variant<int64_t, double, std::string, bool>& value, size_t& size) override;
+
+    void store(ZstdCompressor& compressor) override;
+
+    /**
+     * @param encoded_id
+     * @return the encoded log dict id
+     */
+    static int64_t get_encoded_log_dict_id(uint64_t encoded_id) {
+        return (int64_t)encoded_id & cLogDictIdMask;
+    }
+
+    /**
+     * @param encoded_id
+     * @return The encoded offset
+     */
+    static int64_t get_encoded_offset(uint64_t encoded_id) {
+        return ((int64_t)encoded_id & cOffsetMask) >> cOffsetBitPosition;
+    }
+
+private:
+    /**
+     * Encodes a log dict id
+     * @param id
+     * @param offset
+     * @return The encoded log dict id
+     */
+    static int64_t encode_log_dict_id(uint64_t id, uint64_t offset) {
+        return ((int64_t)id) | ((int64_t)offset) << cOffsetBitPosition;
+    }
+
+    static constexpr int cOffsetBitPosition = 24;
+    static constexpr int64_t cLogDictIdMask = ~(-1ULL << cOffsetBitPosition);
+    static constexpr int64_t cOffsetMask = ~cLogDictIdMask;
+
+    std::shared_ptr<VariableDictionaryWriter> m_var_dict;
+    std::shared_ptr<LogTypeDictionaryWriter> m_log_dict;
+    LogTypeDictionaryEntry m_logtype_entry;
+
+    std::vector<int64_t> m_logtypes;
+    std::vector<int64_t> m_encoded_vars;
+};
+
+class VariableStringColumnWriter : public BaseColumnWriter {
+public:
+    // Constructor
+    VariableStringColumnWriter(
+            std::string const& name,
+            std::shared_ptr<VariableDictionaryWriter> var_dict
+    )
+            : BaseColumnWriter(name),
+              m_var_dict(std::move(var_dict)) {}
+
+    // Destructor
+    ~VariableStringColumnWriter() override = default;
+
+    // Methods inherited from BaseColumnWriter
+    void add_value(std::variant<int64_t, double, std::string, bool>& value, size_t& size) override;
+
+    void store(ZstdCompressor& compressor) override;
+
+private:
+    std::shared_ptr<VariableDictionaryWriter> m_var_dict;
+    std::vector<int64_t> m_variables;
+};
+
+class DateStringColumnWriter : public BaseColumnWriter {
+public:
+    // Constructor
+    DateStringColumnWriter(
+            std::string const& name,
+            std::shared_ptr<TimestampDictionaryWriter> timestamp_dict
+    )
+            : BaseColumnWriter(name),
+              m_timestamp_dict(std::move(timestamp_dict)) {}
+
+    // Destructor
+    ~DateStringColumnWriter() override = default;
+
+    // Methods inherited from BaseColumnWriter
+    void add_value(std::variant<int64_t, double, std::string, bool>& value, size_t& size) override;
+
+    void store(ZstdCompressor& compressor) override;
+
+private:
+    std::shared_ptr<TimestampDictionaryWriter> m_timestamp_dict;
+
+    std::vector<int64_t> m_timestamps;
+    std::vector<int64_t> m_timestamp_encodings;
+};
+
+class FloatDateStringColumnWriter : public BaseColumnWriter {
+public:
+    // Constructor
+    FloatDateStringColumnWriter(
+            std::string const& name,
+            std::shared_ptr<TimestampDictionaryWriter> timestamp_dict
+    )
+            : BaseColumnWriter(name),
+              m_timestamp_dict(std::move(timestamp_dict)) {}
+
+    // Destructor
+    ~FloatDateStringColumnWriter() override = default;
+
+    // Methods inherited from BaseColumnWriter
+    void add_value(std::variant<int64_t, double, std::string, bool>& value, size_t& size) override;
+
+    void store(ZstdCompressor& compressor) override;
+
+private:
+    std::shared_ptr<TimestampDictionaryWriter> m_timestamp_dict;
+
+    std::vector<double> m_timestamps;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_COLUMNWRITER_HPP
diff --git a/components/core/src/clp_s/CommandLineArguments.cpp b/components/core/src/clp_s/CommandLineArguments.cpp
new file mode 100644
index 000000000..cecf51f91
--- /dev/null
+++ b/components/core/src/clp_s/CommandLineArguments.cpp
@@ -0,0 +1,298 @@
+#include "CommandLineArguments.hpp"
+
+#include <iostream>
+
+#include <boost/program_options.hpp>
+#include <spdlog/spdlog.h>
+
+namespace po = boost::program_options;
+
+namespace clp_s {
+CommandLineArguments::ParsingResult
+CommandLineArguments::parse_arguments(int argc, char const** argv) {
+    if (1 == argc) {
+        print_basic_usage();
+        return ParsingResult::Failure;
+    }
+
+    po::options_description general_options("General options");
+    general_options.add_options()("help,h", "Print help");
+
+    char command_input;
+    po::options_description general_positional_options("General positional options");
+    // clang-format off
+    general_positional_options.add_options()(
+            "command", po::value<char>(&command_input)
+    )(
+            "command-args", po::value<std::vector<std::string>>()
+    );
+    // clang-format on
+
+    po::positional_options_description general_positional_options_description;
+    general_positional_options_description.add("command", 1);
+    general_positional_options_description.add("command-args", -1);
+
+    po::options_description all_descriptions;
+    all_descriptions.add(general_options);
+    all_descriptions.add(general_positional_options);
+
+    try {
+        po::variables_map parsed_command_line_options;
+        po::parsed_options parsed = po::command_line_parser(argc, argv)
+                                            .options(all_descriptions)
+                                            .positional(general_positional_options_description)
+                                            .allow_unregistered()
+                                            .run();
+        po::store(parsed, parsed_command_line_options);
+        po::notify(parsed_command_line_options);
+
+        if (parsed_command_line_options.count("command") == 0) {
+            if (parsed_command_line_options.count("help") != 0) {
+                if (argc > 2) {
+                    SPDLOG_WARN("Ignoring all options besides --help.");
+                }
+
+                print_basic_usage();
+                std::cerr << "COMMAND is one of:" << std::endl;
+                std::cerr << "  c - compress" << std::endl;
+                std::cerr << "  x - decompress" << std::endl;
+                std::cerr << "  s - search" << std::endl;
+                std::cerr << std::endl;
+                std::cerr << "Try "
+                          << " c --help OR"
+                          << " x --help OR"
+                          << " s --help for command-specific details." << std::endl;
+
+                po::options_description visible_options;
+                visible_options.add(general_options);
+                std::cerr << visible_options << '\n';
+                return ParsingResult::InfoCommand;
+            }
+
+            throw std::invalid_argument("Command unspecified");
+        }
+
+        switch (command_input) {
+            case (char)Command::Compress:
+            case (char)Command::Extract:
+            case (char)Command::Search:
+                m_command = (Command)command_input;
+                break;
+            default:
+                throw std::invalid_argument(std::string("Unknown action '") + command_input + "'");
+        }
+
+        if (Command::Compress == m_command) {
+            po::options_description compression_positional_options;
+            // clang-format off
+             compression_positional_options.add_options()(
+                     "archives-dir",
+                     po::value<std::string>(&m_archives_dir)->value_name("DIR"),
+                     "output directory"
+             )(
+                     "input-paths",
+                     po::value<std::vector<std::string>>(&m_file_paths)->value_name("PATHS"),
+                     "input paths"
+             );
+            // clang-format on
+
+            po::options_description compression_options("Compression options");
+            // clang-format off
+            compression_options.add_options()(
+                    "compression-level",
+                    po::value<int>(&m_compression_level)->value_name("LEVEL")->default_value(3),
+                    "1 (fast/low compression) to 9 (slow/high compression)."
+            )(
+                    "target-encoded-size",
+                    po::value<size_t>(&m_target_encoded_size)->value_name("TARGET_ENCODED_SIZE")->
+                        default_value(8UL * 1024 * 1024 * 1024),  // 8 GiB
+                    "Target size (B) for the dictionaries and encoded messages before a new "
+                    "archive is created."
+            )(
+                    "timestamp-key",
+                    po::value<std::string>(&m_timestamp_key)->value_name("TIMESTAMP_COLUMN_KEY")->
+                        default_value(""),
+                    "Path (e.g. x.y) for the field containing the log event's timestamp."
+            );
+            // clang-format on
+
+            po::positional_options_description positional_options;
+            positional_options.add("archives-dir", 1);
+            positional_options.add("input-paths", -1);
+
+            po::options_description all_compression_options;
+            all_compression_options.add(compression_options);
+            all_compression_options.add(compression_positional_options);
+
+            std::vector<std::string> unrecognized_options
+                    = po::collect_unrecognized(parsed.options, po::include_positional);
+            unrecognized_options.erase(unrecognized_options.begin());
+            po::store(
+                    po::command_line_parser(unrecognized_options)
+                            .options(all_compression_options)
+                            .positional(positional_options)
+                            .run(),
+                    parsed_command_line_options
+            );
+            po::notify(parsed_command_line_options);
+
+            if (parsed_command_line_options.count("help")) {
+                print_compression_usage();
+
+                std::cerr << "Examples:" << std::endl;
+                std::cerr << "  # Compress file1.json and dir1 into archives-dir" << std::endl;
+                std::cerr << "  " << m_program_name << " c archives-dir file1.json dir1"
+                          << std::endl;
+
+                po::options_description visible_options;
+                visible_options.add(general_options);
+                visible_options.add(compression_options);
+                std::cerr << visible_options << '\n';
+                return ParsingResult::InfoCommand;
+            }
+
+            if (m_file_paths.empty()) {
+                throw std::invalid_argument("No input paths specified.");
+            }
+
+            if (m_archives_dir.empty()) {
+                throw std::invalid_argument("No archives directory specified.");
+            }
+        } else if ((char)Command::Extract == command_input) {
+            po::options_description extraction_options;
+            // clang-format off
+            extraction_options.add_options()(
+                    "archives-dir",
+                    po::value<std::string>(&m_archives_dir),
+                    "The directory containing the archives"
+            )(
+                    "output-dir",
+                    po::value<std::string>(&m_output_dir),
+                    "The output directory for the decompressed file"
+            );
+            // clang-format on
+
+            po::positional_options_description positional_options;
+            positional_options.add("archives-dir", 1);
+            positional_options.add("output-dir", 1);
+
+            std::vector<std::string> unrecognized_options
+                    = po::collect_unrecognized(parsed.options, po::include_positional);
+            unrecognized_options.erase(unrecognized_options.begin());
+            po::store(
+                    po::command_line_parser(unrecognized_options)
+                            .options(extraction_options)
+                            .positional(positional_options)
+                            .run(),
+                    parsed_command_line_options
+            );
+
+            po::notify(parsed_command_line_options);
+
+            if (parsed_command_line_options.count("help")) {
+                print_decompression_usage();
+
+                std::cerr << "Examples:" << std::endl;
+                std::cerr << "  # Decompress all files from archives-dir into output-dir"
+                          << std::endl;
+                std::cerr << "  " << m_program_name << " x archives-dir output-dir" << std::endl;
+                std::cerr << std::endl;
+
+                po::options_description visible_options;
+                visible_options.add(general_options);
+                std::cerr << visible_options << std::endl;
+                return ParsingResult::InfoCommand;
+            }
+
+            if (m_archives_dir.empty()) {
+                throw std::invalid_argument("No archives directory specified");
+            }
+
+            if (m_output_dir.empty()) {
+                throw std::invalid_argument("No output directory specified");
+            }
+        } else if ((char)Command::Search == command_input) {
+            std::string archives_dir;
+            std::string query;
+
+            po::options_description search_options;
+            // clang-format off
+            search_options.add_options()(
+                    "archives-dir",
+                    po::value<std::string>(&m_archives_dir),
+                    "The directory containing the archives"
+            )(
+                    "query,q",
+                    po::value<std::string>(&m_query),
+                    "Query to perform"
+            );
+            // clang-format on
+
+            po::positional_options_description positional_options;
+            positional_options.add("archives-dir", 1);
+            positional_options.add("query", 1);
+
+            std::vector<std::string> unrecognized_options
+                    = po::collect_unrecognized(parsed.options, po::include_positional);
+            unrecognized_options.erase(unrecognized_options.begin());
+            po::store(
+                    po::command_line_parser(unrecognized_options)
+                            .options(search_options)
+                            .positional(positional_options)
+                            .run(),
+                    parsed_command_line_options
+            );
+
+            po::notify(parsed_command_line_options);
+
+            if (parsed_command_line_options.count("help")) {
+                print_search_usage();
+
+                std::cerr << "Examples:" << std::endl;
+                std::cerr << "  # Search archives-dir for logs matching a KQL query" << std::endl;
+                std::cerr << "  " << m_program_name << " s archives-dir kql-query" << std::endl;
+                std::cerr << std::endl;
+
+                po::options_description visible_options;
+                visible_options.add(general_options);
+                std::cerr << visible_options << '\n';
+                return ParsingResult::InfoCommand;
+            }
+            if (m_archives_dir.empty()) {
+                throw std::invalid_argument("No archives directory specified");
+            }
+
+            if (m_query.empty()) {
+                throw std::invalid_argument("No query specified");
+            }
+        }
+
+    } catch (std::exception& e) {
+        SPDLOG_ERROR("{}", e.what());
+        print_basic_usage();
+        std::cerr << "Try " << get_program_name() << " --help for detailed usage instructions"
+                  << std::endl;
+        return ParsingResult::Failure;
+    }
+
+    return ParsingResult::Success;
+}
+
+void CommandLineArguments::print_basic_usage() const {
+    std::cerr << "Usage: " << m_program_name << " [OPTIONS] COMMAND [COMMAND ARGUMENTS]"
+              << std::endl;
+}
+
+void CommandLineArguments::print_compression_usage() const {
+    std::cerr << "Usage: " << m_program_name << " c [OPTIONS] ARCHIVES_DIR [FILE/DIR ...]"
+              << std::endl;
+}
+
+void CommandLineArguments::print_decompression_usage() const {
+    std::cerr << "Usage: " << m_program_name << " x [OPTIONS] ARCHIVES_DIR OUTPUT_DIR" << std::endl;
+}
+
+void CommandLineArguments::print_search_usage() const {
+    std::cerr << "Usage: " << m_program_name << " s [OPTIONS] ARCHIVES_DIR KQL_QUERY" << std::endl;
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/CommandLineArguments.hpp b/components/core/src/clp_s/CommandLineArguments.hpp
new file mode 100644
index 000000000..16e8dde74
--- /dev/null
+++ b/components/core/src/clp_s/CommandLineArguments.hpp
@@ -0,0 +1,74 @@
+#ifndef CLP_S_COMMANDLINEARGUMENTS_HPP
+#define CLP_S_COMMANDLINEARGUMENTS_HPP
+
+#include <string>
+#include <vector>
+
+namespace clp_s {
+class CommandLineArguments {
+public:
+    // Types
+    enum class ParsingResult {
+        Success = 0,
+        InfoCommand,
+        Failure
+    };
+
+    enum class Command : char {
+        Compress = 'c',
+        Extract = 'x',
+        Search = 's'
+    };
+
+    // Constructors
+    explicit CommandLineArguments(std::string const& program_name) : m_program_name(program_name) {}
+
+    // Methods
+    ParsingResult parse_arguments(int argc, char const* argv[]);
+
+    std::string const& get_program_name() const { return m_program_name; }
+
+    Command get_command() const { return m_command; }
+
+    std::vector<std::string> const& get_file_paths() const { return m_file_paths; }
+
+    std::string const& get_archives_dir() const { return m_archives_dir; }
+
+    std::string const& get_output_dir() const { return m_output_dir; }
+
+    std::string const& get_timestamp_key() const { return m_timestamp_key; }
+
+    int get_compression_level() const { return m_compression_level; }
+
+    size_t get_target_encoded_size() const { return m_target_encoded_size; }
+
+    std::string const& get_query() const { return m_query; }
+
+private:
+    // Methods
+    void print_basic_usage() const;
+
+    void print_compression_usage() const;
+
+    void print_decompression_usage() const;
+
+    void print_search_usage() const;
+
+    // Variables
+    std::string m_program_name;
+    Command m_command;
+
+    // Compression and decompression variables
+    std::vector<std::string> m_file_paths;
+    std::string m_archives_dir;
+    std::string m_output_dir;
+    std::string m_timestamp_key;
+    int m_compression_level;
+    size_t m_target_encoded_size;
+
+    // Search variables
+    std::string m_query;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_COMMANDLINEARGUMENTS_HPP
diff --git a/components/core/src/clp_s/Compressor.hpp b/components/core/src/clp_s/Compressor.hpp
new file mode 100644
index 000000000..ba4edae0c
--- /dev/null
+++ b/components/core/src/clp_s/Compressor.hpp
@@ -0,0 +1,51 @@
+// Code from CLP
+
+#ifndef CLP_S_COMPRESSOR_HPP
+#define CLP_S_COMPRESSOR_HPP
+
+#include <cstdint>
+#include <string>
+
+#include <zstd.h>
+
+#include "TraceableException.hpp"
+
+namespace clp_s {
+class Compressor {
+public:
+    // Types
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    enum class CompressorType : uint8_t {
+        ZSTD = 0x10,
+        Passthrough = 0xFF,
+    };
+
+    // Constructor
+    explicit Compressor(CompressorType type) : m_type(type) {}
+
+    // Destructor
+    virtual ~Compressor() = default;
+
+    // Explicitly disable copy and move constructor/assignment
+    Compressor(Compressor const&) = delete;
+
+    Compressor& operator=(Compressor const&) = delete;
+
+    // Methods
+    /**
+     * Closes the compression stream
+     */
+    virtual void close() = 0;
+
+protected:
+    CompressorType m_type;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_COMPRESSOR_HPP
diff --git a/components/core/src/clp_s/Decompressor.hpp b/components/core/src/clp_s/Decompressor.hpp
new file mode 100644
index 000000000..4aebec945
--- /dev/null
+++ b/components/core/src/clp_s/Decompressor.hpp
@@ -0,0 +1,64 @@
+// Code from CLP
+
+#ifndef CLP_S_DECOMPRESSOR_HPP
+#define CLP_S_DECOMPRESSOR_HPP
+
+#include <string>
+
+#include "FileReader.hpp"
+#include "TraceableException.hpp"
+
+namespace clp_s {
+class Decompressor {
+public:
+    // Types
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    enum class CompressorType : uint8_t {
+        ZSTD = 0x10,
+        Passthrough = 0xFF,
+    };
+
+    // Constructor
+    explicit Decompressor(CompressorType type) : m_type(type) {}
+
+    // Destructor
+    ~Decompressor() = default;
+
+    // Explicitly disable copy and move constructor/assignment
+    Decompressor(Decompressor const&) = delete;
+
+    Decompressor& operator=(Decompressor const&) = delete;
+
+    // Methods
+    /**
+     * Initializes streaming decompressor to decompress from the specified compressed data buffer
+     * @param compressed_data_buffer
+     * @param compressed_data_buffer_size
+     */
+    virtual void open(char const* compressed_data_buffer, size_t compressed_data_buffer_size) = 0;
+
+    /**
+     * Initializes the decompressor to decompress from an open file
+     * @param file_reader
+     * @param file_read_buffer_capacity The maximum amount of data to read from a file at a time
+     */
+    virtual void open(FileReader& file_reader, size_t file_read_buffer_capacity) = 0;
+
+    /**
+     * Closes decompression stream
+     */
+    virtual void close() = 0;
+
+protected:
+    // Variables
+    CompressorType m_type;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_DECOMPRESSOR_HPP
diff --git a/components/core/src/clp_s/Defs.hpp b/components/core/src/clp_s/Defs.hpp
new file mode 100644
index 000000000..090d8c0ed
--- /dev/null
+++ b/components/core/src/clp_s/Defs.hpp
@@ -0,0 +1,44 @@
+// Code from CLP
+
+#ifndef CLP_S_DEFS_HPP
+#define CLP_S_DEFS_HPP
+
+// C++ libraries
+#include <atomic>
+#include <cstdint>
+#include <limits>
+
+namespace clp_s {
+// Types
+typedef int64_t epochtime_t;
+static epochtime_t const cEpochTimeMin = INT64_MIN;
+static epochtime_t const cEpochTimeMax = INT64_MAX;
+static double const cDoubleEpochTimeMin = std::numeric_limits<double>::lowest();
+static double const cDoubleEpochTimeMax = std::numeric_limits<double>::max();
+#define SECONDS_TO_EPOCHTIME(x) x * 1000
+#define MICROSECONDS_TO_EPOCHTIME(x) 0
+
+typedef uint64_t variable_dictionary_id_t;
+static variable_dictionary_id_t const cVariableDictionaryIdMax = UINT64_MAX;
+typedef int64_t logtype_dictionary_id_t;
+static logtype_dictionary_id_t const cLogtypeDictionaryIdMax = INT64_MAX;
+
+typedef uint16_t archive_format_version_t;
+// This flag is used to maintain two separate streams of archive format versions:
+// - Development versions (which can change frequently as necessary) which should have the flag
+// - Production versions (which should be changed with care and as infrequently as possible)
+// which should not have the flag
+constexpr archive_format_version_t cArchiveFormatDevelopmentVersionFlag = 0x8000;
+
+typedef uint64_t file_id_t;
+typedef uint64_t segment_id_t;
+typedef int64_t encoded_variable_t;
+}  // namespace clp_s
+
+// Macros
+// Relative version of __FILE__
+#define __FILENAME__ ((__FILE__) + SOURCE_PATH_SIZE)
+// Rounds up VALUE to be a multiple of MULTIPLE
+#define ROUND_UP_TO_MULTIPLE(VALUE, MULTIPLE) ((VALUE + MULTIPLE - 1) / MULTIPLE) * MULTIPLE
+
+#endif  // CLP_S_DEFS_HPP
diff --git a/components/core/src/clp_s/DictionaryEntry.cpp b/components/core/src/clp_s/DictionaryEntry.cpp
new file mode 100644
index 000000000..379753d7e
--- /dev/null
+++ b/components/core/src/clp_s/DictionaryEntry.cpp
@@ -0,0 +1,257 @@
+// Code from CLP
+
+#include "DictionaryEntry.hpp"
+
+#include "Utils.hpp"
+
+using std::string;
+
+namespace clp_s {
+size_t LogTypeDictionaryEntry::get_var_info(size_t var_ix, VarDelim& var_delim) const {
+    if (var_ix >= m_var_positions.size()) {
+        return SIZE_MAX;
+    }
+
+    auto var_position = m_var_positions[var_ix];
+    var_delim = (VarDelim)m_value[var_position];
+
+    return m_var_positions[var_ix];
+}
+
+LogTypeDictionaryEntry::VarDelim LogTypeDictionaryEntry::get_var_delim(size_t var_ix) const {
+    if (var_ix >= m_var_positions.size()) {
+        return VarDelim::Length;
+    }
+
+    auto var_position = m_var_positions[var_ix];
+    return (VarDelim)m_value[var_position];
+}
+
+size_t LogTypeDictionaryEntry::get_var_length_in_logtype(size_t var_ix) const {
+    auto var_delim = get_var_delim(var_ix);
+    switch (var_delim) {
+        case VarDelim::NonDouble:
+            return 1;
+        case VarDelim::Double:
+            return 2;
+        case VarDelim::Length:
+        default:
+            throw OperationFailed(ErrorCodeBadParam, __FILENAME__, __LINE__);
+    }
+}
+
+size_t LogTypeDictionaryEntry::get_data_size() const {
+    // NOTE: sizeof(vector[0]) is executed at compile time so there's no risk of an exception at
+    // runtime
+    return sizeof(m_id) + m_value.length() + m_var_positions.size() * sizeof(m_var_positions[0]);
+}
+
+void LogTypeDictionaryEntry::add_constant(
+        string const& value_containing_constant,
+        size_t begin_pos,
+        size_t length
+) {
+    m_value.append(value_containing_constant, begin_pos, length);
+}
+
+void LogTypeDictionaryEntry::add_non_double_var() {
+    m_var_positions.push_back(m_value.length());
+    add_non_double_var(m_value);
+}
+
+void LogTypeDictionaryEntry::add_double_var() {
+    m_var_positions.push_back(m_value.length());
+    add_double_var(m_value);
+}
+
+bool LogTypeDictionaryEntry::parse_next_var(
+        string const& msg,
+        size_t& var_begin_pos,
+        size_t& var_end_pos,
+        string& var
+) {
+    auto last_var_end_pos = var_end_pos;
+    if (StringUtils::get_bounds_of_next_var(msg, var_begin_pos, var_end_pos)) {
+        // Append to log type: from end of last variable to start of current variable
+        add_constant(msg, last_var_end_pos, var_begin_pos - last_var_end_pos);
+
+        var.assign(msg, var_begin_pos, var_end_pos - var_begin_pos);
+        return true;
+    }
+    if (last_var_end_pos < msg.length()) {
+        // Append to log type: from end of last variable to end
+        add_constant(msg, last_var_end_pos, msg.length() - last_var_end_pos);
+    }
+
+    return false;
+}
+
+void LogTypeDictionaryEntry::clear() {
+    m_value.clear();
+    m_var_positions.clear();
+}
+
+void LogTypeDictionaryEntry::write_to_file(ZstdCompressor& compressor) const {
+    string escaped_value;
+    get_value_with_unfounded_variables_escaped(escaped_value);
+    compressor.write_numeric_value(escaped_value.length());
+    compressor.write_string(escaped_value);
+}
+
+ErrorCode
+LogTypeDictionaryEntry::try_read_from_file(ZstdDecompressor& decompressor, uint64_t id, bool lazy) {
+    clear();
+
+    m_id = id;
+    ErrorCode error_code;
+    uint64_t escaped_value_length;
+    error_code = decompressor.try_read_numeric_value(escaped_value_length);
+    if (ErrorCodeSuccess != error_code) {
+        return error_code;
+    }
+
+    string escaped_value;
+    error_code = decompressor.try_read_string(escaped_value_length, escaped_value);
+    if (ErrorCodeSuccess != error_code) {
+        return error_code;
+    }
+
+    if (lazy) {
+        m_value = std::move(escaped_value);
+    } else {
+        decode_log_type(escaped_value);
+    }
+
+    return error_code;
+}
+
+void LogTypeDictionaryEntry::read_from_file(
+        ZstdDecompressor& decompressor,
+        uint64_t id,
+        bool lazy
+) {
+    auto error_code = try_read_from_file(decompressor, id, lazy);
+    if (ErrorCodeSuccess != error_code) {
+        throw OperationFailed(error_code, __FILENAME__, __LINE__);
+    }
+}
+
+void LogTypeDictionaryEntry::decode_log_type(string& escaped_value) {
+    bool is_escaped = false;
+    string constant;
+    for (char c : escaped_value) {
+        if (is_escaped) {
+            constant += c;
+            is_escaped = false;
+        } else if (cEscapeChar == c) {
+            is_escaped = true;
+        } else {
+            if ((char)LogTypeDictionaryEntry::VarDelim::NonDouble == c) {
+                add_constant(constant, 0, constant.length());
+                constant.clear();
+
+                add_non_double_var();
+            } else if ((char)LogTypeDictionaryEntry::VarDelim::Double == c) {
+                add_constant(constant, 0, constant.length());
+                constant.clear();
+
+                add_double_var();
+            } else {
+                constant += c;
+            }
+        }
+    }
+
+    if (false == constant.empty()) {
+        add_constant(constant, 0, constant.length());
+    }
+
+    m_init = true;
+}
+
+void LogTypeDictionaryEntry::decode_log_type() {
+    string escaped_value = std::move(m_value);
+    m_value.clear();
+    decode_log_type(escaped_value);
+}
+
+void LogTypeDictionaryEntry::get_value_with_unfounded_variables_escaped(
+        string& escaped_logtype_value
+) const {
+    size_t begin_ix = 0;
+    // Reset escaped value and reserve enough space to at least contain the whole value
+    escaped_logtype_value.clear();
+    escaped_logtype_value.reserve(m_value.length());
+    for (auto var_position : m_var_positions) {
+        size_t end_ix = var_position;
+
+        escape_variable_delimiters(m_value, begin_ix, end_ix, escaped_logtype_value);
+
+        // Add variable delimiter
+        escaped_logtype_value += m_value[end_ix];
+
+        // Move begin to start of next portion of logtype between variables
+        begin_ix = end_ix + 1;
+    }
+    // Escape any variable delimiters in remainder of value
+    escape_variable_delimiters(m_value, begin_ix, m_value.length(), escaped_logtype_value);
+}
+
+void LogTypeDictionaryEntry::escape_variable_delimiters(
+        string const& value,
+        size_t begin_ix,
+        size_t end_ix,
+        string& escaped_value
+) {
+    for (size_t i = begin_ix; i < end_ix; ++i) {
+        auto c = value[i];
+
+        // Add escape character if necessary
+        if ((char)LogTypeDictionaryEntry::VarDelim::NonDouble == c
+            || (char)LogTypeDictionaryEntry::VarDelim::Double == c || cEscapeChar == c)
+        {
+            escaped_value += cEscapeChar;
+        }
+
+        // Add character
+        escaped_value += value[i];
+    }
+}
+
+size_t VariableDictionaryEntry::get_data_size() const {
+    return sizeof(m_id) + m_value.length();
+}
+
+void VariableDictionaryEntry::write_to_file(ZstdCompressor& compressor) const {
+    compressor.write_numeric_value<uint64_t>(m_value.length());
+    compressor.write_string(m_value);
+}
+
+ErrorCode VariableDictionaryEntry::try_read_from_file(ZstdDecompressor& decompressor, uint64_t id) {
+    m_id = id;
+
+    ErrorCode error_code;
+    uint64_t value_length;
+    error_code = decompressor.try_read_numeric_value(value_length);
+    if (ErrorCodeSuccess != error_code) {
+        return error_code;
+    }
+    error_code = decompressor.try_read_string(value_length, m_value);
+    if (ErrorCodeSuccess != error_code) {
+        return error_code;
+    }
+
+    return error_code;
+}
+
+void VariableDictionaryEntry::read_from_file(
+        ZstdDecompressor& decompressor,
+        uint64_t id,
+        bool lazy
+) {
+    auto error_code = try_read_from_file(decompressor, id);
+    if (ErrorCodeSuccess != error_code) {
+        throw OperationFailed(error_code, __FILENAME__, __LINE__);
+    }
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/DictionaryEntry.hpp b/components/core/src/clp_s/DictionaryEntry.hpp
new file mode 100644
index 000000000..e2b15b72c
--- /dev/null
+++ b/components/core/src/clp_s/DictionaryEntry.hpp
@@ -0,0 +1,290 @@
+// Code from CLP
+
+#ifndef CLP_S_DICTIONARYENTRY_HPP
+#define CLP_S_DICTIONARYENTRY_HPP
+
+#include <string>
+#include <utility>
+
+#include "TraceableException.hpp"
+#include "ZstdCompressor.hpp"
+#include "ZstdDecompressor.hpp"
+
+namespace clp_s {
+/**
+ * Template class representing a dictionary entry
+ * @tparam DictionaryIdType
+ */
+template <typename DictionaryIdType>
+class DictionaryEntry {
+public:
+    // Constructors
+    DictionaryEntry() = default;
+
+    DictionaryEntry(std::string value, DictionaryIdType id) : m_value(std::move(value)), m_id(id) {}
+
+    // Methods
+    DictionaryIdType get_id() const { return m_id; }
+
+    std::string const& get_value() const { return m_value; }
+
+protected:
+    // Variables
+    DictionaryIdType m_id;
+    std::string m_value;
+};
+
+/**
+ * Class representing a logtype dictionary entry
+ */
+class LogTypeDictionaryEntry : public DictionaryEntry<uint64_t> {
+public:
+    // Types
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    // Constants
+    enum class VarDelim {
+        // NOTE: These values are used within logtypes to denote variables, so care must be taken
+        // when changing them
+        NonDouble = 17,
+        Double = 18,
+        Length = 2,
+    };
+
+    static constexpr char cEscapeChar = '\\';
+
+    // Constructors
+    LogTypeDictionaryEntry() : m_init(false) {}
+
+    // Use default copy constructor
+    LogTypeDictionaryEntry(LogTypeDictionaryEntry const&) = default;
+
+    // Use default assignment operators
+    LogTypeDictionaryEntry& operator=(LogTypeDictionaryEntry const&) = default;
+
+    // Methods
+    /**
+     * Adds a non-double variable delimiter to the given logtype
+     * @param logtype
+     */
+    static void add_non_double_var(std::string& logtype) { logtype += (char)VarDelim::NonDouble; }
+
+    /**
+     * Adds a double variable delimiter to the given logtype
+     * @param logtype
+     */
+    static void add_double_var(std::string& logtype) { logtype += (char)VarDelim::Double; }
+
+    /**
+     * @return The number of variables in the logtype
+     */
+    size_t get_num_vars() const { return m_var_positions.size(); }
+
+    /**
+     * Gets all info about a variable in the logtype
+     * @param var_ix The index of the variable to get the info for
+     * @param var_delim
+     * @return The variable's position in the logtype, or SIZE_MAX if var_ix is out of bounds
+     */
+    size_t get_var_info(size_t var_ix, VarDelim& var_delim) const;
+
+    /**
+     * Gets the variable delimiter at the given index
+     * @param var_ix The index of the variable delimiter to get
+     * @return The variable delimiter, or LogTypeDictionaryEntry::VarDelim::Length if var_ix is out
+     * of bounds
+     */
+    VarDelim get_var_delim(size_t var_ix) const;
+
+    /**
+     * Gets the length of the specified variable's representation in the logtype
+     * @param var_ix The index of the variable
+     * @return The length
+     */
+    size_t get_var_length_in_logtype(size_t var_ix) const;
+
+    /**
+     * Gets the size (in-memory) of the data contained in this entry
+     * @return Size of the data contained in this entry
+     */
+    size_t get_data_size() const;
+
+    /**
+     * Adds a constant to the logtype
+     * @param value_containing_constant
+     * @param begin_pos Start of the constant in value_containing_constant
+     * @param length
+     */
+    void
+    add_constant(std::string const& value_containing_constant, size_t begin_pos, size_t length);
+
+    /**
+     * Adds a non-double variable delimiter
+     */
+    void add_non_double_var();
+
+    /**
+     * Adds a double variable delimiter
+     */
+    void add_double_var();
+
+    /**
+     * Parses next variable from a message, constructing the constant part of the message's logtype
+     * as well
+     * @param msg
+     * @param var_begin_pos Beginning position of last variable. Changes to beginning position of
+     * current variable.
+     * @param var_end_pos End position of last variable (exclusive). Changes to end position of
+     * current variable.
+     * @param var
+     * @return true if another variable was found, false otherwise
+     */
+    bool parse_next_var(
+            std::string const& msg,
+            size_t& var_begin_pos,
+            size_t& var_end_pos,
+            std::string& var
+    );
+
+    /**
+     * Reserves space for a constant of the given length
+     * @param length
+     */
+    void reserve_constant_length(size_t length) { m_value.reserve(length); }
+
+    void set_id(uint64_t id) { m_id = id; }
+
+    /**
+     * Clears the entry
+     */
+    void clear();
+
+    /**
+     * Writes an entry to a compressed file
+     * @param compressor
+     */
+    void write_to_file(ZstdCompressor& compressor) const;
+
+    /**
+     * Tries to read an entry from the given decompressor
+     * @param decompressor
+     * @return Same as streaming_compression::Decompressor::try_read_numeric_value
+     * @return Same as streaming_compression::Decompressor::try_read_string
+     */
+    ErrorCode try_read_from_file(ZstdDecompressor& decompressor, uint64_t id, bool lazy);
+
+    /**
+     * Reads an entry from the given decompressor
+     * @param decompressor
+     * @param lazy apply lazy decoding
+     */
+    void read_from_file(ZstdDecompressor& decompressor, uint64_t id, bool lazy);
+
+    /**
+     * Decodes the log type
+     * @param escaped_value
+     */
+    void decode_log_type(std::string& escaped_value);
+
+    /**
+     * Decodes the log type
+     */
+    void decode_log_type();
+
+    /**
+     * Checks if the entry has been initialized
+     * @return true if the entry has been initialized, false otherwise
+     */
+    bool initialized() const { return m_init; }
+
+private:
+    // Methods
+    /**
+     * Escapes any variable delimiters that don't correspond to the positions of variables in the
+     * logtype entry's value
+     * @param escaped_logtype_value
+     */
+    void get_value_with_unfounded_variables_escaped(std::string& escaped_logtype_value) const;
+
+    /**
+     * Escapes any variable delimiters in the identified portion of the given value
+     * @param value
+     * @param begin_ix
+     * @param end_ix
+     * @param escaped_value
+     */
+    static void escape_variable_delimiters(
+            std::string const& value,
+            size_t begin_ix,
+            size_t end_ix,
+            std::string& escaped_value
+    );
+
+    // Variables
+    std::vector<size_t> m_var_positions;
+    bool m_init;
+};
+
+class VariableDictionaryEntry : public DictionaryEntry<uint64_t> {
+public:
+    // Types
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    // Constructors
+    VariableDictionaryEntry() = default;
+
+    VariableDictionaryEntry(std::string value, uint64_t id)
+            : DictionaryEntry<uint64_t>(std::move(value), id) {}
+
+    // Use default copy constructor
+    VariableDictionaryEntry(VariableDictionaryEntry const&) = default;
+
+    // Assignment operators
+    // Use default
+    VariableDictionaryEntry& operator=(VariableDictionaryEntry const&) = default;
+
+    // Methods
+    /**
+     * Gets the size (in-memory) of the data contained in this entry
+     * @return Size of the data contained in this entry
+     */
+    size_t get_data_size() const;
+
+    /**
+     * Clears the entry
+     */
+    void clear() { m_value.clear(); }
+
+    /**
+     * Writes an entry to a compressed file
+     * @param compressor
+     */
+    void write_to_file(ZstdCompressor& compressor) const;
+
+    /**
+     * Tries to read an entry from the given decompressor
+     * @param decompressor
+     * @return Same as streaming_compression::Decompressor::try_read_numeric_value
+     * @return Same as streaming_compression::Decompressor::try_read_string
+     */
+    ErrorCode try_read_from_file(ZstdDecompressor& decompressor, uint64_t id);
+
+    /**
+     * Reads an entry from the given decompressor
+     * @param decompressor
+     */
+    void read_from_file(ZstdDecompressor& decompressor, uint64_t id, bool lazy);
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_DICTIONARYENTRY_HPP
diff --git a/components/core/src/clp_s/DictionaryReader.hpp b/components/core/src/clp_s/DictionaryReader.hpp
new file mode 100644
index 000000000..175214d88
--- /dev/null
+++ b/components/core/src/clp_s/DictionaryReader.hpp
@@ -0,0 +1,210 @@
+// Code from CLP
+
+#ifndef CLP_S_DICTIONARYREADER_HPP
+#define CLP_S_DICTIONARYREADER_HPP
+
+#include <unordered_set>
+
+#include <boost/algorithm/string/case_conv.hpp>
+
+#include "DictionaryEntry.hpp"
+#include "Utils.hpp"
+
+namespace clp_s {
+template <typename DictionaryIdType, typename EntryType>
+class DictionaryReader {
+public:
+    // Types
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    // Constructors
+    DictionaryReader() : m_is_open(false) {}
+
+    // Methods
+    /**
+     * Opens dictionary for reading
+     * @param dictionary_path
+     */
+    void open(std::string const& dictionary_path);
+
+    /**
+     * Closes the dictionary
+     */
+    void close();
+
+    /**
+     * Reads any new entries from disk
+     */
+    void read_new_entries(bool lazy = false);
+
+    /**
+     * @return All dictionary entries
+     */
+    std::vector<EntryType> const& get_entries() const { return m_entries; }
+
+    /**
+     * @param id
+     * @return The entry with the given ID
+     */
+    EntryType& get_entry(DictionaryIdType id);
+
+    /**
+     * @param id
+     * @return Value of the entry with the specified ID
+     */
+    std::string const& get_value(DictionaryIdType id) const;
+
+    /**
+     * Gets the entry exactly matching the given search string
+     * @param search_string
+     * @param ignore_case
+     * @return nullptr if an exact match is not found, the entry otherwise
+     */
+    EntryType const*
+    get_entry_matching_value(std::string const& search_string, bool ignore_case) const;
+
+    /**
+     * Gets the entries that match a given wildcard string
+     * @param wildcard_string
+     * @param ignore_case
+     * @param entries Set in which to store found entries
+     */
+    void get_entries_matching_wildcard_string(
+            std::string const& wildcard_string,
+            bool ignore_case,
+            std::unordered_set<EntryType const*>& entries
+    ) const;
+
+protected:
+    bool m_is_open;
+    FileReader m_dictionary_file_reader;
+    ZstdDecompressor m_dictionary_decompressor;
+    std::vector<EntryType> m_entries;
+};
+
+class VariableDictionaryReader : public DictionaryReader<uint64_t, VariableDictionaryEntry> {};
+
+class LogTypeDictionaryReader : public DictionaryReader<uint64_t, LogTypeDictionaryEntry> {};
+
+template <typename DictionaryIdType, typename EntryType>
+void DictionaryReader<DictionaryIdType, EntryType>::open(std::string const& dictionary_path) {
+    if (m_is_open) {
+        throw OperationFailed(ErrorCodeNotReady, __FILENAME__, __LINE__);
+    }
+
+    constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024;  // 64 KB
+
+    m_dictionary_file_reader.open(dictionary_path);
+    // Skip header
+    m_dictionary_file_reader.seek_from_begin(sizeof(uint64_t));
+    // Open decompressor
+    m_dictionary_decompressor.open(m_dictionary_file_reader, cDecompressorFileReadBufferCapacity);
+
+    m_is_open = true;
+}
+
+template <typename DictionaryIdType, typename EntryType>
+void DictionaryReader<DictionaryIdType, EntryType>::close() {
+    if (false == m_is_open) {
+        throw OperationFailed(ErrorCodeNotReady, __FILENAME__, __LINE__);
+    }
+
+    m_dictionary_decompressor.close();
+    m_dictionary_file_reader.close();
+
+    m_is_open = false;
+}
+
+template <typename DictionaryIdType, typename EntryType>
+void DictionaryReader<DictionaryIdType, EntryType>::read_new_entries(bool lazy) {
+    if (false == m_is_open) {
+        throw OperationFailed(ErrorCodeNotInit, __FILENAME__, __LINE__);
+    }
+
+    auto dictionary_file_reader_pos = m_dictionary_file_reader.get_pos();
+    m_dictionary_file_reader.seek_from_begin(0);
+    uint64_t num_dictionary_entries;
+    m_dictionary_file_reader.read_numeric_value(num_dictionary_entries, false);
+    m_dictionary_file_reader.seek_from_begin(dictionary_file_reader_pos);
+
+    // Validate dictionary header
+    if (num_dictionary_entries < m_entries.size()) {
+        throw OperationFailed(ErrorCodeCorrupt, __FILENAME__, __LINE__);
+    }
+
+    // Read new dictionary entries
+    if (num_dictionary_entries > m_entries.size()) {
+        auto prev_num_dictionary_entries = m_entries.size();
+        m_entries.resize(num_dictionary_entries);
+
+        for (size_t i = prev_num_dictionary_entries; i < num_dictionary_entries; ++i) {
+            auto& entry = m_entries[i];
+            entry.read_from_file(m_dictionary_decompressor, i, lazy);
+        }
+    }
+}
+
+template <typename DictionaryIdType, typename EntryType>
+EntryType& DictionaryReader<DictionaryIdType, EntryType>::get_entry(DictionaryIdType id) {
+    if (false == m_is_open) {
+        throw OperationFailed(ErrorCodeNotInit, __FILENAME__, __LINE__);
+    }
+    if (id >= m_entries.size()) {
+        throw OperationFailed(ErrorCodeBadParam, __FILENAME__, __LINE__);
+    }
+
+    return m_entries[id];
+}
+
+template <typename DictionaryIdType, typename EntryType>
+std::string const& DictionaryReader<DictionaryIdType, EntryType>::get_value(DictionaryIdType id
+) const {
+    if (id >= m_entries.size()) {
+        throw OperationFailed(ErrorCodeCorrupt, __FILENAME__, __LINE__);
+    }
+    return m_entries[id].get_value();
+}
+
+template <typename DictionaryIdType, typename EntryType>
+EntryType const* DictionaryReader<DictionaryIdType, EntryType>::get_entry_matching_value(
+        std::string const& search_string,
+        bool ignore_case
+) const {
+    if (false == ignore_case) {
+        for (auto const& entry : m_entries) {
+            if (entry.get_value() == search_string) {
+                return &entry;
+            }
+        }
+    } else {
+        auto const& search_string_uppercase = boost::algorithm::to_upper_copy(search_string);
+        for (auto const& entry : m_entries) {
+            if (boost::algorithm::to_upper_copy(entry.get_value()) == search_string_uppercase) {
+                return &entry;
+            }
+        }
+    }
+
+    return nullptr;
+}
+
+template <typename DictionaryIdType, typename EntryType>
+void DictionaryReader<DictionaryIdType, EntryType>::get_entries_matching_wildcard_string(
+        std::string const& wildcard_string,
+        bool ignore_case,
+        std::unordered_set<EntryType const*>& entries
+) const {
+    for (auto const& entry : m_entries) {
+        if (StringUtils::wildcard_match_unsafe(entry.get_value(), wildcard_string, !ignore_case)) {
+            entries.insert(&entry);
+        }
+    }
+}
+}  // namespace clp_s
+
+#endif  // CLP_S_DICTIONARYREADER_HPP
diff --git a/components/core/src/clp_s/DictionaryWriter.cpp b/components/core/src/clp_s/DictionaryWriter.cpp
new file mode 100644
index 000000000..31a4ec430
--- /dev/null
+++ b/components/core/src/clp_s/DictionaryWriter.cpp
@@ -0,0 +1,67 @@
+// Code from CLP
+
+#include "DictionaryWriter.hpp"
+
+namespace clp_s {
+bool VariableDictionaryWriter::add_entry(std::string const& value, uint64_t& id) {
+    bool new_entry = false;
+
+    auto const ix = m_value_to_id.find(value);
+    if (m_value_to_id.end() != ix) {
+        id = ix->second;
+    } else {
+        // Entry doesn't exist so create it
+
+        if (m_next_id > m_max_id) {
+            SPDLOG_ERROR("VariableDictionaryWriter ran out of IDs.");
+            throw OperationFailed(ErrorCodeOutOfBounds, __FILENAME__, __LINE__);
+        }
+
+        // Assign ID
+        id = m_next_id;
+        ++m_next_id;
+
+        // Insert the ID obtained from the database into the dictionary
+        auto entry = VariableDictionaryEntry(value, id);
+        m_value_to_id[value] = id;
+
+        new_entry = true;
+
+        // TODO: This doesn't account for the segment index that's constantly updated
+        m_data_size += entry.get_data_size();
+
+        entry.write_to_file(m_dictionary_compressor);
+    }
+    return new_entry;
+}
+
+bool LogTypeDictionaryWriter::add_entry(
+        LogTypeDictionaryEntry& logtype_entry,
+        uint64_t& logtype_id
+) {
+    bool is_new_entry = false;
+
+    std::string const& value = logtype_entry.get_value();
+    auto const ix = m_value_to_id.find(value);
+    if (m_value_to_id.end() != ix) {
+        // Entry exists so get its ID
+        logtype_id = ix->second;
+    } else {
+        // Assign ID
+        logtype_id = m_next_id;
+        ++m_next_id;
+        logtype_entry.set_id(logtype_id);
+
+        // Insert new entry into dictionary
+        m_value_to_id[value] = logtype_id;
+
+        is_new_entry = true;
+
+        // TODO: This doesn't account for the segment index that's constantly updated
+        m_data_size += logtype_entry.get_data_size();
+
+        logtype_entry.write_to_file(m_dictionary_compressor);
+    }
+    return is_new_entry;
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/DictionaryWriter.hpp b/components/core/src/clp_s/DictionaryWriter.hpp
new file mode 100644
index 000000000..3fb9ec4d1
--- /dev/null
+++ b/components/core/src/clp_s/DictionaryWriter.hpp
@@ -0,0 +1,158 @@
+// Code from CLP
+
+#ifndef CLP_S_DICTIONARYWRITER_HPP
+#define CLP_S_DICTIONARYWRITER_HPP
+
+#include "DictionaryEntry.hpp"
+
+namespace clp_s {
+template <typename DictionaryIdType, typename EntryType>
+class DictionaryWriter {
+public:
+    // Types
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    // Constructors
+    DictionaryWriter() : m_is_open(false) {}
+
+    ~DictionaryWriter() = default;
+
+    // Methods
+    /**
+     * Opens dictionary for writing
+     * @param dictionary_path
+     * @param compression_level
+     * @param max_id
+     */
+    void open(std::string const& dictionary_path, int compression_level, DictionaryIdType max_id);
+
+    /**
+     * Closes the dictionary
+     */
+    void close();
+
+    /**
+     * Writes the dictionary's header and flushes unwritten content to disk
+     */
+
+    void write_header_and_flush_to_disk();
+
+    /**
+     * @return The size (in-memory) of the data contained in the dictionary
+     */
+    size_t get_data_size() const { return m_data_size; }
+
+protected:
+    // Types
+    typedef std::unordered_map<std::string, DictionaryIdType> value_to_id_t;
+
+    // Variables
+    bool m_is_open;
+
+    // Variables related to on-disk storage
+    FileWriter m_dictionary_file_writer;
+    ZstdCompressor m_dictionary_compressor;
+
+    value_to_id_t m_value_to_id;
+    uint64_t m_next_id{};
+    uint64_t m_max_id{};
+
+    // Size (in-memory) of the data contained in the dictionary
+    size_t m_data_size{};
+};
+
+class VariableDictionaryWriter : public DictionaryWriter<uint64_t, VariableDictionaryEntry> {
+public:
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    /**
+     * Adds the given variable to the dictionary if it doesn't exist.
+     * @param value
+     * @param id ID of the variable matching the given entry
+     */
+    bool add_entry(std::string const& value, uint64_t& id);
+};
+
+class LogTypeDictionaryWriter : public DictionaryWriter<uint64_t, LogTypeDictionaryEntry> {
+public:
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    /**
+     * Adds the given entry to the dictionary if it doesn't exist
+     * @param logtype_entry
+     * @param logtype_id ID of the logtype matching the given entry
+     */
+    bool add_entry(LogTypeDictionaryEntry& logtype_entry, uint64_t& logtype_id);
+};
+
+template <typename DictionaryIdType, typename EntryType>
+void DictionaryWriter<DictionaryIdType, EntryType>::open(
+        std::string const& dictionary_path,
+        int compression_level,
+        DictionaryIdType max_id
+) {
+    if (m_is_open) {
+        throw OperationFailed(ErrorCodeNotReady, __FILENAME__, __LINE__);
+    }
+
+    m_dictionary_file_writer.open(dictionary_path, FileWriter::OpenMode::CreateForWriting);
+    // Write header
+    m_dictionary_file_writer.write_numeric_value<uint64_t>(0);
+    // Open compressor
+    m_dictionary_compressor.open(m_dictionary_file_writer, compression_level);
+
+    m_next_id = 0;
+    m_max_id = max_id;
+
+    m_data_size = 0;
+    m_is_open = true;
+}
+
+template <typename DictionaryIdType, typename EntryType>
+void DictionaryWriter<DictionaryIdType, EntryType>::close() {
+    if (false == m_is_open) {
+        throw OperationFailed(ErrorCodeNotInit, __FILENAME__, __LINE__);
+    }
+
+    write_header_and_flush_to_disk();
+    m_dictionary_compressor.close();
+    m_dictionary_file_writer.close();
+
+    m_value_to_id.clear();
+
+    m_is_open = false;
+}
+
+template <typename DictionaryIdType, typename EntryType>
+void DictionaryWriter<DictionaryIdType, EntryType>::write_header_and_flush_to_disk() {
+    if (false == m_is_open) {
+        throw OperationFailed(ErrorCodeNotInit, __FILENAME__, __LINE__);
+    }
+
+    // Update header
+    auto dictionary_file_writer_pos = m_dictionary_file_writer.get_pos();
+    m_dictionary_file_writer.seek_from_begin(0);
+    m_dictionary_file_writer.write_numeric_value<uint64_t>(m_value_to_id.size());
+    m_dictionary_file_writer.seek_from_begin(dictionary_file_writer_pos);
+
+    m_dictionary_compressor.flush();
+    m_dictionary_file_writer.flush();
+}
+}  // namespace clp_s
+
+#endif  // CLP_S_DICTIONARYWRITER_HPP
diff --git a/components/core/src/clp_s/ErrorCode.hpp b/components/core/src/clp_s/ErrorCode.hpp
new file mode 100644
index 000000000..be2c78e73
--- /dev/null
+++ b/components/core/src/clp_s/ErrorCode.hpp
@@ -0,0 +1,31 @@
+// Code from CLP
+
+#ifndef CLP_S_ERRORCODE_HPP
+#define CLP_S_ERRORCODE_HPP
+
+namespace clp_s {
+typedef enum {
+    ErrorCodeSuccess = 0,
+    ErrorCodeBadParam,
+    ErrorCodeBadParamDbUri,
+    ErrorCodeCorrupt,
+    ErrorCodeErrno,
+    ErrorCodeEndOfFile,
+    ErrorCodeFileExists,
+    ErrorCodeFileNotFound,
+    ErrorCodeNoMem,
+    ErrorCodeNotInit,
+    ErrorCodeNotReady,
+    ErrorCodeOutOfBounds,
+    ErrorCodeTooLong,
+    ErrorCodeTruncated,
+    ErrorCodeUnsupported,
+    ErrorCodeNoAccess,
+    ErrorCodeFailure,
+    ErrorCodeFailureMetadataCorrupted,
+    ErrorCodeMetadataCorrupted,
+    ErrorCodeFailureDbBulkWrite
+} ErrorCode;
+}  // namespace clp_s
+
+#endif  // CLP_S_ERRORCODE_HPP
diff --git a/components/core/src/clp_s/FileReader.cpp b/components/core/src/clp_s/FileReader.cpp
new file mode 100644
index 000000000..91bafed0a
--- /dev/null
+++ b/components/core/src/clp_s/FileReader.cpp
@@ -0,0 +1,150 @@
+// Code from CLP
+
+#include "FileReader.hpp"
+
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <cassert>
+#include <cerrno>
+
+using std::string;
+
+namespace clp_s {
+FileReader::~FileReader() {
+    close();
+    free(m_getdelim_buf);
+}
+
+ErrorCode FileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) {
+    if (nullptr == m_file) {
+        return ErrorCodeNotInit;
+    }
+    if (nullptr == buf) {
+        return ErrorCodeBadParam;
+    }
+
+    num_bytes_read = fread(buf, sizeof(*buf), num_bytes_to_read, m_file);
+    if (num_bytes_read < num_bytes_to_read) {
+        if (ferror(m_file)) {
+            return ErrorCodeErrno;
+        } else if (feof(m_file)) {
+            if (0 == num_bytes_read) {
+                return ErrorCodeEndOfFile;
+            }
+        }
+    }
+
+    return ErrorCodeSuccess;
+}
+
+ErrorCode FileReader::try_seek_from_begin(size_t pos) {
+    if (nullptr == m_file) {
+        return ErrorCodeNotInit;
+    }
+
+    int retval = fseeko(m_file, pos, SEEK_SET);
+    if (0 != retval) {
+        return ErrorCodeErrno;
+    }
+
+    return ErrorCodeSuccess;
+}
+
+ErrorCode FileReader::try_get_pos(size_t& pos) {
+    if (nullptr == m_file) {
+        return ErrorCodeNotInit;
+    }
+
+    pos = ftello(m_file);
+    if ((off_t)-1 == pos) {
+        return ErrorCodeErrno;
+    }
+
+    return ErrorCodeSuccess;
+}
+
+ErrorCode FileReader::try_open(string const& path) {
+    // Cleanup in case caller forgot to call close before calling this function
+    close();
+
+    m_file = fopen(path.c_str(), "rb");
+    if (nullptr == m_file) {
+        if (ENOENT == errno) {
+            return ErrorCodeFileNotFound;
+        }
+        return ErrorCodeErrno;
+    }
+
+    return ErrorCodeSuccess;
+}
+
+void FileReader::open(string const& path) {
+    ErrorCode error_code = try_open(path);
+    if (ErrorCodeSuccess != error_code) {
+        throw OperationFailed(error_code, __FILENAME__, __LINE__);
+    }
+}
+
+void FileReader::close() {
+    if (m_file != nullptr) {
+        // NOTE: We don't check errors for fclose since it seems the only reason it could fail is if
+        // it was interrupted by a signal
+        fclose(m_file);
+        m_file = nullptr;
+    }
+}
+
+ErrorCode
+FileReader::try_read_to_delimiter(char delim, bool keep_delimiter, bool append, string& str) {
+    assert(nullptr != m_file);
+
+    if (false == append) {
+        str.clear();
+    }
+    ssize_t num_bytes_read = getdelim(&m_getdelim_buf, &m_getdelim_buf_len, delim, m_file);
+    if (num_bytes_read < 1) {
+        if (ferror(m_file)) {
+            return ErrorCodeErrno;
+        } else if (feof(m_file)) {
+            return ErrorCodeEndOfFile;
+        }
+    }
+    if (false == keep_delimiter && delim == m_getdelim_buf[num_bytes_read - 1]) {
+        --num_bytes_read;
+    }
+    str.append(m_getdelim_buf, num_bytes_read);
+
+    return ErrorCodeSuccess;
+}
+
+ErrorCode FileReader::try_read_exact_length(char* buf, size_t num_bytes) {
+    size_t num_bytes_read;
+    auto error_code = try_read(buf, num_bytes, num_bytes_read);
+    if (ErrorCodeSuccess != error_code) {
+        return error_code;
+    }
+    if (num_bytes_read < num_bytes) {
+        return ErrorCodeTruncated;
+    }
+
+    return ErrorCodeSuccess;
+}
+
+size_t FileReader::get_pos() {
+    size_t pos;
+    ErrorCode error_code = try_get_pos(pos);
+    if (ErrorCodeSuccess != error_code) {
+        throw OperationFailed(error_code, __FILENAME__, __LINE__);
+    }
+
+    return pos;
+}
+
+void FileReader::seek_from_begin(size_t pos) {
+    ErrorCode error_code = try_seek_from_begin(pos);
+    if (ErrorCodeSuccess != error_code) {
+        throw OperationFailed(error_code, __FILENAME__, __LINE__);
+    }
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/FileReader.hpp b/components/core/src/clp_s/FileReader.hpp
new file mode 100644
index 000000000..59e88eaec
--- /dev/null
+++ b/components/core/src/clp_s/FileReader.hpp
@@ -0,0 +1,166 @@
+// Code from CLP
+
+#ifndef CLP_S_FILEREADER_HPP
+#define CLP_S_FILEREADER_HPP
+
+#include <cstdio>
+#include <string>
+
+#include "ErrorCode.hpp"
+#include "TraceableException.hpp"
+
+namespace clp_s {
+class FileReader {
+public:
+    // Types
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    // Constructor
+    FileReader() : m_file(nullptr), m_getdelim_buf_len(0), m_getdelim_buf(nullptr) {}
+
+    // Destructor
+    ~FileReader();
+
+    // Methods implementing the ReaderInterface
+    /**
+     * Tries to get the current position of the read head in the file
+     * @param pos Position of the read head in the file
+     * @return ErrorCodeNotInit if the file is not open
+     * @return ErrorCodeErrno on error
+     * @return ErrorCodeSuccess on success
+     */
+    ErrorCode try_get_pos(size_t& pos);
+
+    /**
+     * Tries to seek from the beginning of the file to the given position
+     * @param pos The position to seek to
+     * @return ErrorCodeNotInit if the file is not open
+     * @return ErrorCodeErrno on error
+     * @return ErrorCodeSuccess on success
+     */
+    ErrorCode try_seek_from_begin(size_t pos);
+
+    /**
+     * Tries to read up to a given number of bytes from the file
+     * @param buf The buffer to read into
+     * @param num_bytes_to_read The number of bytes to try and read
+     * @param num_bytes_read The actual number of bytes read
+     * @return ErrorCodeNotInit if the file is not open
+     * @return ErrorCodeBadParam if buf is invalid
+     * @return ErrorCodeErrno on error
+     * @return ErrorCodeEndOfFile on EOF
+     * @return ErrorCodeSuccess on success
+     */
+    ErrorCode try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read);
+
+    /**
+     * Tries to read a string from the file until it reaches the specified delimiter
+     * @param delim The delimiter to stop at
+     * @param keep_delimiter Whether to include the delimiter in the output string or not
+     * @param append Whether to append to the given string or replace its contents
+     * @param str The string read
+     * @return ErrorCodeSuccess on success
+     * @return ErrorCodeEndOfFile on EOF
+     * @return ErrorCodeErrno otherwise
+     */
+    ErrorCode try_read_to_delimiter(char delim, bool keep_delimiter, bool append, std::string& str);
+
+    /**
+     * Tries to read a number of bytes
+     * @param buf The buffer to read into
+     * @param num_bytes Number of bytes to read
+     * @return Same as the underlying medium's try_read method
+     * @return ErrorCodeTruncated if 0 < # bytes read < num_bytes
+     */
+    ErrorCode try_read_exact_length(char* buf, size_t num_bytes);
+
+    /**
+     * Tries to read a numeric value
+     * @tparam ValueType The type of the value to read
+     * @param value The value read
+     * @return Same as the underlying medium's try_read_exact_length method
+     */
+    template <typename ValueType>
+    ErrorCode try_read_numeric_value(ValueType& value) {
+        ErrorCode error_code
+                = try_read_exact_length(reinterpret_cast<char*>(&value), sizeof(value));
+        if (ErrorCodeSuccess != error_code) {
+            return error_code;
+        }
+        return ErrorCodeSuccess;
+    }
+
+    /**
+     * Reads a numeric value
+     * @tparam ValueType The type of the value to read
+     * @param value The value read
+     * @param eof_possible Whether EOF is possible or not
+     * @return true on success
+     * @return false on EOF if eof_possible is true
+     * @throw FileReader::OperationFailed on failure
+     */
+    template <typename ValueType>
+    bool read_numeric_value(ValueType& value, bool eof_possible) {
+        ErrorCode error_code = try_read_numeric_value(value);
+        if (ErrorCodeEndOfFile == error_code && eof_possible) {
+            return false;
+        }
+        if (ErrorCodeSuccess != error_code) {
+            throw OperationFailed(error_code, __FILENAME__, __LINE__);
+        }
+        return true;
+    }
+
+    // Methods
+    /**
+     * Checks if the file is open
+     * @return true if the file is open, false otherwise
+     */
+    bool is_open() const { return m_file != nullptr; }
+
+    /**
+     * Tries to open a file
+     * @param path
+     * @return ErrorCodeSuccess on success
+     * @return ErrorCodeFileNotFound if the file was not found
+     * @return ErrorCodeErrno otherwise
+     */
+    ErrorCode try_open(std::string const& path);
+
+    /**
+     * Opens a file
+     * @param path
+     * @throw FileReader::OperationFailed on failure
+     */
+    void open(std::string const& path);
+
+    /**
+     * Closes the file if it's open
+     */
+    void close();
+
+    /**
+     * Gets the current position of the read head
+     * @return Position of the read head
+     */
+    size_t get_pos();
+
+    /**
+     * Seeks from the beginning to the given position
+     * @param pos
+     */
+    void seek_from_begin(size_t pos);
+
+private:
+    FILE* m_file;
+    size_t m_getdelim_buf_len;
+    char* m_getdelim_buf;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_FILEREADER_HPP
diff --git a/components/core/src/clp_s/FileWriter.cpp b/components/core/src/clp_s/FileWriter.cpp
new file mode 100644
index 000000000..49540881b
--- /dev/null
+++ b/components/core/src/clp_s/FileWriter.cpp
@@ -0,0 +1,165 @@
+// Code from CLP
+
+#include "FileWriter.hpp"
+
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <cerrno>
+
+#include <spdlog/spdlog.h>
+
+using std::string;
+
+namespace clp_s {
+FileWriter::~FileWriter() {
+    if (nullptr != m_file) {
+        SPDLOG_ERROR("FileWriter not closed before being destroyed - may cause data loss");
+    }
+}
+
+void FileWriter::write(char const* data, size_t data_length) {
+    ErrorCode error_code = ErrorCodeSuccess;
+    if (nullptr == m_file) {
+        error_code = ErrorCodeNotInit;
+    } else if (nullptr == data) {
+        error_code = ErrorCodeBadParam;
+    } else {
+        size_t num_bytes_written = fwrite(data, sizeof(*data), data_length, m_file);
+        if (num_bytes_written < data_length) {
+            error_code = ErrorCodeErrno;
+        }
+    }
+    if (ErrorCodeSuccess != error_code) {
+        throw OperationFailed(error_code, __FILENAME__, __LINE__);
+    }
+}
+
+void FileWriter::flush() {
+#if !FLUSH_TO_DISK_ENABLED
+    return;
+#endif
+    // Flush userspace buffers to page cache
+    if (0 != fflush(m_file)) {
+        SPDLOG_ERROR("fflush failed, errno={}", errno);
+        throw OperationFailed(ErrorCodeErrno, __FILENAME__, __LINE__);
+    }
+
+    // Flush page cache pages to disk
+    if (0 != fsync(m_fd)) {
+        SPDLOG_ERROR("fdatasync failed, errno={}", errno);
+        throw OperationFailed(ErrorCodeErrno, __FILENAME__, __LINE__);
+    }
+}
+
+size_t FileWriter::get_pos() {
+    size_t pos;
+    ErrorCode error_code = try_get_pos(pos);
+    if (ErrorCodeSuccess != error_code) {
+        throw OperationFailed(error_code, __FILENAME__, __LINE__);
+    }
+
+    return pos;
+}
+
+ErrorCode FileWriter::try_get_pos(size_t& pos) const {
+    if (nullptr == m_file) {
+        return ErrorCodeNotInit;
+    }
+
+    pos = ftello(m_file);
+    if ((off_t)-1 == pos) {
+        return ErrorCodeErrno;
+    }
+
+    return ErrorCodeSuccess;
+}
+
+void FileWriter::seek_from_begin(size_t pos) {
+    auto error_code = try_seek_from_begin(pos);
+    if (ErrorCodeSuccess != error_code) {
+        throw OperationFailed(error_code, __FILENAME__, __LINE__);
+    }
+}
+
+ErrorCode FileWriter::try_seek_from_begin(size_t pos) {
+    if (nullptr == m_file) {
+        return ErrorCodeNotInit;
+    }
+
+    int retval = fseeko(m_file, pos, SEEK_SET);
+    if (0 != retval) {
+        return ErrorCodeErrno;
+    }
+
+    return ErrorCodeSuccess;
+}
+
+ErrorCode FileWriter::try_seek_from_current(off_t offset) {
+    if (nullptr == m_file) {
+        return ErrorCodeNotInit;
+    }
+
+    int retval = fseeko(m_file, offset, SEEK_CUR);
+    if (0 != retval) {
+        return ErrorCodeErrno;
+    }
+
+    return ErrorCodeSuccess;
+}
+
+void FileWriter::open(string const& path, OpenMode open_mode) {
+    if (nullptr != m_file) {
+        throw OperationFailed(ErrorCodeNotInit, __FILENAME__, __LINE__);
+    }
+
+    switch (open_mode) {
+        case OpenMode::CreateForWriting:
+            m_file = fopen(path.c_str(), "wb");
+            break;
+        case OpenMode::CreateIfNonexistentForAppending:
+            m_file = fopen(path.c_str(), "ab");
+            break;
+        case OpenMode::CreateIfNonexistentForSeekableWriting: {
+            struct stat stat_buf = {};
+            if (0 == stat(path.c_str(), &stat_buf)) {
+                // File exists, so open it for seekable writing
+                m_file = fopen(path.c_str(), "r+b");
+            } else {
+                if (ENOENT != errno) {
+                    throw OperationFailed(ErrorCodeErrno, __FILENAME__, __LINE__);
+                }
+                // File doesn't exist, so create and open it for seekable writing
+                // NOTE: We can't use the "w+" mode if the file exists since that will truncate the
+                // file
+                m_file = fopen(path.c_str(), "w+b");
+            }
+
+            auto retval = fseek(m_file, 0, SEEK_END);
+            if (0 != retval) {
+                throw OperationFailed(ErrorCodeErrno, __FILENAME__, __LINE__);
+            }
+            break;
+        }
+    }
+    if (nullptr == m_file) {
+        throw OperationFailed(ErrorCodeErrno, __FILENAME__, __LINE__);
+    }
+
+    m_fd = fileno(m_file);
+    if (-1 == m_fd) {
+        fclose(m_file);
+        throw OperationFailed(ErrorCodeErrno, __FILENAME__, __LINE__);
+    }
+}
+
+void FileWriter::close() {
+    if (nullptr != m_file) {
+        if (0 != fclose(m_file)) {
+            throw OperationFailed(ErrorCodeErrno, __FILENAME__, __LINE__);
+        }
+        m_file = nullptr;
+        m_fd = -1;
+    }
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/FileWriter.hpp b/components/core/src/clp_s/FileWriter.hpp
new file mode 100644
index 000000000..edcd3a2c4
--- /dev/null
+++ b/components/core/src/clp_s/FileWriter.hpp
@@ -0,0 +1,122 @@
+// Code from CLP
+
+#ifndef CLP_S_FILEWRITER_HPP
+#define CLP_S_FILEWRITER_HPP
+
+#include <cstdio>
+#include <string>
+
+#include "ErrorCode.hpp"
+#include "TraceableException.hpp"
+
+namespace clp_s {
+class FileWriter {
+public:
+    // Types
+    enum class OpenMode {
+        CreateForWriting,
+        CreateIfNonexistentForAppending,
+        CreateIfNonexistentForSeekableWriting,
+    };
+
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    // Constructors
+    FileWriter() : m_file(nullptr), m_fd(-1) {}
+
+    // Destructor
+    ~FileWriter();
+
+    // Methods implementing the WriterInterface
+    /**
+     * Writes a buffer to the file
+     * @param data
+     * @param data_length Length of the buffer
+     * @throw FileWriter::OperationFailed on failure
+     */
+    void write(char const* data, size_t data_length);
+
+    /**
+     * Writes a numeric value to the file
+     * @param val
+     * @tparam ValueType
+     */
+    template <typename ValueType>
+    void write_numeric_value(ValueType val) {
+        write(reinterpret_cast<char*>(&val), sizeof(val));
+    }
+
+    /**
+     * Flushes the file
+     * @throw FileWriter::OperationFailed on failure
+     */
+    void flush();
+
+    /**
+     * Gets the current position of the write head in the file
+     * @return Position of the write head in the file
+     * @throw FileWriter::OperationFailed on failure
+     */
+    size_t get_pos();
+
+    /**
+     * Tries to get the current position of the write head in the file
+     * @param pos Position of the write head in the file
+     * @return ErrorCodeNotInit if the file is not open
+     * @return ErrorCodeErrno on error
+     * @return ErrorCodeSuccess on success
+     */
+    ErrorCode try_get_pos(size_t& pos) const;
+
+    /**
+     * Seeks from the beginning of the file to the given position
+     * @param pos The position to seek to
+     * @throw FileWriter::OperationFailed on failure
+     */
+    void seek_from_begin(size_t pos);
+
+    /**
+     * Tries to seek from the beginning of the file to the given position
+     * @param pos
+     * @return ErrorCodeNotInit if the file is not open
+     * @return ErrorCodeErrno on error
+     * @return ErrorCodeSuccess on success
+     */
+    ErrorCode try_seek_from_begin(size_t pos);
+
+    /**
+     * Tries to offset from the current position by the given amount
+     * @param pos
+     * @return ErrorCodeNotInit if the file is not open
+     * @return ErrorCodeErrno on error
+     * @return ErrorCodeSuccess on success
+     */
+    ErrorCode try_seek_from_current(off_t offset);
+
+    // Methods
+    /**
+     * Opens a file for writing
+     * @param path
+     * @param open_mode The mode to open the file with
+     * @throw FileWriter::OperationFailed on failure
+     */
+    void open(std::string const& path, OpenMode open_mode);
+
+    /**
+     * Closes the file
+     * @throw FileWriter::OperationFailed on failure
+     */
+    void close();
+
+private:
+    FILE* m_file;
+    int m_fd;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_FILEWRITER_HPP
diff --git a/components/core/src/clp_s/JsonConstructor.cpp b/components/core/src/clp_s/JsonConstructor.cpp
new file mode 100644
index 000000000..433ed76c6
--- /dev/null
+++ b/components/core/src/clp_s/JsonConstructor.cpp
@@ -0,0 +1,72 @@
+#include "JsonConstructor.hpp"
+
+#include <boost/filesystem.hpp>
+
+#include "ReaderUtils.hpp"
+#include "SchemaTree.hpp"
+
+namespace clp_s {
+JsonConstructor::JsonConstructor(JsonConstructorOption const& option)
+        : m_output_dir(option.output_dir),
+          m_archives_dir(option.archives_dir),
+          m_current_archive_index(0),
+          m_max_archive_index(0) {
+    if (false == boost::filesystem::create_directory(m_output_dir)) {
+        SPDLOG_ERROR("Can not create directory '{}'", m_output_dir);
+        exit(1);
+    }
+
+    if (false == boost::filesystem::is_directory(m_archives_dir)) {
+        SPDLOG_ERROR("'{}' is not a directory", m_archives_dir);
+        exit(1);
+    }
+
+    boost::filesystem::directory_iterator iter(m_archives_dir);
+    boost::filesystem::directory_iterator end;
+
+    for (; iter != end; ++iter) {
+        if (boost::filesystem::is_directory(iter->path())) {
+            m_archive_paths.push_back(iter->path().string());
+        }
+    }
+
+    if (m_archive_paths.empty()) {
+        SPDLOG_ERROR("No archives in '{}'", m_archives_dir);
+        exit(1);
+    }
+
+    m_max_archive_index = m_archive_paths.size() - 1;
+}
+
+void JsonConstructor::construct() {
+    constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024;  // 64 KB
+
+    m_schema_tree = ReaderUtils::read_schema_tree(m_archives_dir);
+    auto id_to_schema = ReaderUtils::read_schemas(m_archives_dir);
+
+    auto timestamp_dict = ReaderUtils::read_timestamp_dictionary(m_archives_dir);
+
+    m_archive_reader
+            = std::make_unique<ArchiveReader>(m_schema_tree, *id_to_schema, timestamp_dict);
+}
+
+void JsonConstructor::store() {
+    FileWriter writer;
+    writer.open(m_output_dir + "/original", FileWriter::OpenMode::CreateForWriting);
+
+    while (m_current_archive_index <= m_max_archive_index) {
+        ArchiveReaderOption option;
+        option.archive_path = m_archive_paths[m_current_archive_index];
+        m_archive_reader->open(option);
+        m_archive_reader->store(writer);
+        m_archive_reader->close();
+        m_current_archive_index++;
+    }
+
+    writer.close();
+}
+
+void JsonConstructor::close() {
+    //    archive_reader_->Close();
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/JsonConstructor.hpp b/components/core/src/clp_s/JsonConstructor.hpp
new file mode 100644
index 000000000..12a30d9fd
--- /dev/null
+++ b/components/core/src/clp_s/JsonConstructor.hpp
@@ -0,0 +1,59 @@
+#ifndef CLP_S_JSONCONSTRUCTOR_HPP
+#define CLP_S_JSONCONSTRUCTOR_HPP
+
+#include <set>
+#include <string>
+
+#include "ArchiveReader.hpp"
+#include "ColumnReader.hpp"
+#include "DictionaryReader.hpp"
+#include "SchemaReader.hpp"
+#include "SchemaTree.hpp"
+
+namespace clp_s {
+struct JsonConstructorOption {
+    std::string archives_dir;
+    std::string output_dir;
+};
+
+class JsonConstructor {
+public:
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    // Constructors
+    explicit JsonConstructor(JsonConstructorOption const& option);
+
+    /**
+     * Reads the schema information
+     */
+    void construct();
+
+    /**
+     * Decompresses each archive and stores the decompressed files in the output directory
+     */
+    void store();
+
+    /**
+     * Closes the JsonConstructor
+     */
+    void close();
+
+private:
+    std::string m_archives_dir;
+    std::string m_output_dir;
+
+    std::unique_ptr<ArchiveReader> m_archive_reader;
+    std::vector<std::string> m_archive_paths;
+    size_t m_current_archive_index;
+    size_t m_max_archive_index;
+
+    std::shared_ptr<SchemaTree> m_schema_tree;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_JSONCONSTRUCTOR_HPP
diff --git a/components/core/src/clp_s/JsonFileIterator.cpp b/components/core/src/clp_s/JsonFileIterator.cpp
new file mode 100644
index 000000000..0b6499502
--- /dev/null
+++ b/components/core/src/clp_s/JsonFileIterator.cpp
@@ -0,0 +1,129 @@
+#include "JsonFileIterator.hpp"
+
+#include <cstring>
+
+namespace clp_s {
+JsonFileIterator::JsonFileIterator(std::string const& file_name, size_t buf_size) {
+    m_buf = nullptr;
+
+    try {
+        m_reader.open(file_name);
+    } catch (FileReader::OperationFailed& e) {
+        return;
+    }
+
+    m_eof = false;
+    m_buf_size = buf_size;
+    m_buf = new char[buf_size + simdjson::SIMDJSON_PADDING];
+    m_buf_occupied = 0;
+    m_first_read = true;
+    m_bytes_read = 0;
+
+    read_new_json(/*truncated_bytes=*/0);
+}
+
+JsonFileIterator::~JsonFileIterator() {
+    delete[] m_buf;
+    if (false == m_reader.is_open()) {
+        m_reader.close();
+    }
+}
+
+void JsonFileIterator::read_new_json(size_t truncated_bytes) {
+    if (truncated_bytes == m_buf_size) {
+        // double buffer size to attempt to capture long json object
+        size_t new_buf_size = m_buf_size * 2;
+        char* new_buf = new char[new_buf_size + simdjson::SIMDJSON_PADDING];
+        memcpy(new_buf, m_buf, m_buf_size);
+        delete[] m_buf;
+        m_buf = new_buf;
+        m_buf_size = new_buf_size;
+    } else if (truncated_bytes > 0) {
+        // move bytes to start of buffer
+        memmove(m_buf, m_buf + (m_buf_occupied - truncated_bytes), truncated_bytes);
+        m_buf_occupied = truncated_bytes;
+    } else {
+        m_buf_occupied = 0;
+    }
+
+    size_t size_read = 0;
+    auto error = m_reader.try_read(m_buf + m_buf_occupied, m_buf_size - m_buf_occupied, size_read);
+    m_buf_occupied += size_read;
+    m_bytes_read += size_read;
+
+    if (error != ErrorCodeSuccess) {
+        m_eof = true;
+    }
+
+    m_parser.iterate_many(
+                    m_buf,
+                    /* length of data */ m_buf_occupied,
+                    /* batch size of data to parse*/ m_buf_occupied
+    )
+            .get(m_stream);
+
+    m_doc_it = m_stream.begin();
+    // only implements != so this is equivalent to
+    // if no json available from buffer and we haven't hit eof
+    // then retry reading the json with a larger buffer up to eof
+    if (false == (m_doc_it != m_stream.end()) && false == m_eof) {
+        read_new_json(m_stream.truncated_bytes());
+    }
+}
+
+bool JsonFileIterator::get_json(simdjson::ondemand::document_stream::iterator& it) {
+    if (false == m_first_read) {
+        ++m_doc_it;
+    } else {
+        m_first_read = false;
+    }
+
+    size_t patch_truncated_bytes = 0;
+    if (m_doc_it != m_stream.end()) {
+        if (m_doc_it.error() == simdjson::error_code::SUCCESS) {
+            it = m_doc_it;
+            return true;
+        } else if (m_doc_it.error() == simdjson::error_code::UTF8_ERROR) {
+            patch_truncated_bytes
+                    = reverse_search_newline_truncated_bytes(m_doc_it.current_index());
+        }
+    } else if (m_eof) {
+        return false;
+    }
+
+    // there is a bug in simdjson where when invalid utf8 is encountered
+    // at the end of the stream truncated bytes isn't set correctly.
+    // Work around this limitation by manually searching for the start
+    // of the erroring document and stetting truncated bytes appropriately
+    if (patch_truncated_bytes == 0) {
+        read_new_json(m_stream.truncated_bytes());
+    } else {
+        read_new_json(patch_truncated_bytes);
+    }
+
+    if (m_doc_it != m_stream.end()) {
+        if (m_doc_it.error() == simdjson::error_code::SUCCESS) {
+            it = m_doc_it;
+            return true;
+        }
+    }
+
+    return false;
+}
+
+size_t JsonFileIterator::reverse_search_newline_truncated_bytes(size_t start) {
+    if (m_buf_occupied == 0) {
+        return 0;
+    }
+
+    if (start > m_buf_occupied) {
+        start = m_buf_occupied - 1;
+    }
+
+    while (start > 0 && m_buf[start] != '\n') {
+        --start;
+    }
+
+    return m_buf_occupied - start - 1;
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/JsonFileIterator.hpp b/components/core/src/clp_s/JsonFileIterator.hpp
new file mode 100644
index 000000000..2677875b4
--- /dev/null
+++ b/components/core/src/clp_s/JsonFileIterator.hpp
@@ -0,0 +1,75 @@
+#ifndef CLP_S_JSONFILEITERATOR_HPP
+#define CLP_S_JSONFILEITERATOR_HPP
+
+#include <simdjson.h>
+
+#include "FileReader.hpp"
+
+namespace clp_s {
+class JsonFileIterator {
+public:
+    /**
+     * An iterator over a file containing json objects. JSON is parsed
+     * using simdjson::parse_many. This allows simdjson to efficiently find
+     * delimeters between JSON objects, and if enabled parse JSON ahead of time
+     * in another thread while the JSON is being iterated over.
+     *
+     * The buffer grows automatically if there are JSON objects larger than the buffer size.
+     * The buffer is padded to be SIMDJSON_PADDING bytes larger than the specified size.
+
+     * @param file_name the file containing JSON
+     * @param buf_size the initial buffer size
+     */
+    explicit JsonFileIterator(
+            std::string const& file_name,
+            size_t buf_size = 1024 * 1024 /*1MB default*/
+    );
+    ~JsonFileIterator();
+
+    /**
+     * Reads the next JSON document and returns it in the it argument
+     * @param it an iterator to the JSON object that gets returned
+     * @return true if the iterator is valid, false otherwise
+     */
+    bool get_json(simdjson::ondemand::document_stream::iterator& it);
+
+    /**
+     * Checks if the file is open
+     * @return true if the file opened successfully
+     */
+    bool is_open() { return m_reader.is_open(); }
+
+    /**
+     * @return number of truncated bytes after json documents
+     */
+    size_t truncated_bytes() {
+        if (m_stream.size_in_bytes() != 0) {
+            return m_stream.truncated_bytes();
+        }
+        return 0;
+    }
+
+private:
+    /**
+     * Reads new JSON into the buffer and initializes iterators into the data.
+     * If the buffer is not large enough to contain the JSON its size is doubled.
+     * @param truncated_bytes length of incomplete JSON at end of buffer in bytes
+     */
+    void read_new_json(size_t truncated_bytes);
+
+    size_t reverse_search_newline_truncated_bytes(size_t start);
+
+    size_t m_bytes_read;
+    size_t m_buf_size;
+    size_t m_buf_occupied;
+    char* m_buf;
+    FileReader m_reader;
+    simdjson::ondemand::parser m_parser;
+    simdjson::ondemand::document_stream m_stream;
+    bool m_eof;
+    bool m_first_read;
+    simdjson::ondemand::document_stream::iterator m_doc_it;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_JSONFILEITERATOR_HPP
diff --git a/components/core/src/clp_s/JsonParser.cpp b/components/core/src/clp_s/JsonParser.cpp
new file mode 100644
index 000000000..d13bbc729
--- /dev/null
+++ b/components/core/src/clp_s/JsonParser.cpp
@@ -0,0 +1,298 @@
+#include "JsonParser.hpp"
+
+#include <iostream>
+#include <stack>
+
+#include "JsonFileIterator.hpp"
+
+namespace clp_s {
+JsonParser::JsonParser(JsonParserOption const& option)
+        : m_archives_dir(option.archives_dir),
+          m_num_messages(0),
+          m_compression_level(option.compression_level),
+          m_target_encoded_size(option.target_encoded_size),
+          m_timestamp_column(option.timestamp_column) {
+    if (false == boost::filesystem::create_directory(m_archives_dir)) {
+        SPDLOG_ERROR("The output directory '{}' already exists", m_archives_dir);
+        exit(1);
+    }
+
+    if (false == FileUtils::validate_path(option.file_paths)) {
+        exit(1);
+    }
+
+    for (auto& file_path : option.file_paths) {
+        FileUtils::find_all_files(file_path, m_file_paths);
+    }
+
+    m_schema_tree = std::make_shared<SchemaTree>();
+    m_schema_tree_path = m_archives_dir + "/schema_tree";
+
+    m_schema_map = std::make_shared<SchemaMap>(m_archives_dir, m_compression_level);
+
+    m_timestamp_dictionary = std::make_shared<TimestampDictionaryWriter>();
+    m_timestamp_dictionary->open(m_archives_dir + "/timestamp.dict", option.compression_level);
+
+    ArchiveWriterOption archive_writer_option;
+    archive_writer_option.archives_dir = m_archives_dir;
+    archive_writer_option.id = m_generator();
+    archive_writer_option.compression_level = option.compression_level;
+
+    m_archive_writer = std::make_unique<ArchiveWriter>(m_schema_tree, m_timestamp_dictionary);
+    m_archive_writer->open(archive_writer_option);
+}
+
+void JsonParser::parse_line(ondemand::value line, int32_t parent_node_id, std::string const& key) {
+    int32_t node_id;
+    std::stack<ondemand::object> object_stack;
+    std::stack<int32_t> node_id_stack;
+    std::stack<ondemand::object_iterator> object_it_stack;
+
+    ondemand::field cur_field;
+
+    std::string cur_key = key;
+    node_id_stack.push(parent_node_id);
+
+    bool can_match_timestamp = !m_timestamp_column.empty();
+    bool may_match_timestamp = can_match_timestamp;
+    int longest_matching_timestamp_prefix = 0;
+    bool matches_timestamp = false;
+
+    do {
+        if (false == object_stack.empty()) {
+            cur_field = *object_it_stack.top();
+            cur_key = std::string(std::string_view(cur_field.unescaped_key(true)));
+            line = cur_field.value();
+            if (may_match_timestamp) {
+                if (object_stack.size() <= m_timestamp_column.size()
+                    && cur_key == m_timestamp_column[object_stack.size() - 1])
+                {
+                    if (object_stack.size() == m_timestamp_column.size()) {
+                        // FIXME: technically need to handle the case where this
+                        // isn't a string or number column by resetting matches_timestamp
+                        // to false
+                        matches_timestamp = true;
+                    }
+                } else {
+                    longest_matching_timestamp_prefix = object_stack.size() - 1;
+                    may_match_timestamp = false;
+                }
+            }
+        }
+
+        switch (line.type()) {
+            case ondemand::json_type::object: {
+                node_id = m_schema_tree->add_node(node_id_stack.top(), NodeType::OBJECT, cur_key);
+                object_stack.push(std::move(line.get_object()));
+                auto objref = object_stack.top();
+                auto it = ondemand::object_iterator(objref.begin());
+                if (it == objref.end()) {
+                    m_current_schema.insert(node_id);
+                    object_stack.pop();
+                    break;
+                } else {
+                    object_it_stack.push(it);
+                    node_id_stack.push(node_id);
+                    continue;
+                }
+            }
+            case ondemand::json_type::array: {
+                std::string value = std::string(std::string_view(simdjson::to_json_string(line)));
+                node_id = m_schema_tree->add_node(node_id_stack.top(), NodeType::ARRAY, cur_key);
+                m_current_parsed_message.add_value(node_id, value);
+                m_current_schema.insert(node_id);
+                break;
+            }
+            case ondemand::json_type::number: {
+                NodeType type;
+                ondemand::number number_value = line.get_number();
+                if (false == number_value.is_double()) {
+                    // FIXME: should have separate integer and unsigned
+                    // integer types to handle values greater than max int64
+                    type = NodeType::INTEGER;
+                } else {
+                    type = NodeType::FLOAT;
+                }
+                node_id = m_schema_tree->add_node(node_id_stack.top(), type, cur_key);
+
+                if (type == NodeType::INTEGER) {
+                    int64_t i64_value;
+                    if (number_value.is_uint64()) {
+                        i64_value = static_cast<int64_t>(number_value.get_uint64());
+                    } else {
+                        i64_value = line.get_int64();
+                    }
+
+                    m_current_parsed_message.add_value(node_id, i64_value);
+                    if (matches_timestamp) {
+                        m_timestamp_dictionary->ingest_entry(cur_key, i64_value);
+                        matches_timestamp = may_match_timestamp = can_match_timestamp = false;
+                    }
+                } else {
+                    double double_value = line.get_double();
+                    m_current_parsed_message.add_value(node_id, double_value);
+                    if (matches_timestamp) {
+                        m_timestamp_dictionary->ingest_entry(cur_key, double_value);
+                        matches_timestamp = may_match_timestamp = can_match_timestamp = false;
+                    }
+                }
+                m_current_schema.insert(node_id);
+                break;
+            }
+            case ondemand::json_type::string: {
+                // TODO (Rui): Take a look
+                std::string value = std::string(
+                        line.raw_json_token().substr(1, line.raw_json_token().size() - 2)
+                );
+                if (matches_timestamp) {
+                    double ret_double;
+                    if (StringUtils::convert_string_to_double(value, ret_double)) {
+                        node_id = m_schema_tree->add_node(
+                                node_id_stack.top(),
+                                NodeType::FLOATDATESTRING,
+                                cur_key
+                        );
+                        m_current_parsed_message.add_value(node_id, ret_double);
+                    } else {
+                        node_id = m_schema_tree->add_node(
+                                node_id_stack.top(),
+                                NodeType::DATESTRING,
+                                cur_key
+                        );
+                        m_current_parsed_message.add_value(node_id, value);
+                    }
+                    matches_timestamp = may_match_timestamp = can_match_timestamp = false;
+                } else if (value.find(' ') != std::string::npos) {
+                    node_id = m_schema_tree
+                                      ->add_node(node_id_stack.top(), NodeType::CLPSTRING, cur_key);
+                    m_current_parsed_message.add_value(node_id, value);
+                } else {
+                    node_id = m_schema_tree
+                                      ->add_node(node_id_stack.top(), NodeType::VARSTRING, cur_key);
+                    m_current_parsed_message.add_value(node_id, value);
+                }
+
+                m_current_schema.insert(node_id);
+                break;
+            }
+            case ondemand::json_type::boolean: {
+                bool value = line.get_bool();
+                node_id = m_schema_tree->add_node(node_id_stack.top(), NodeType::BOOLEAN, cur_key);
+
+                m_current_parsed_message.add_value(node_id, value);
+                m_current_schema.insert(node_id);
+                break;
+            }
+            case ondemand::json_type::null: {
+                node_id = m_schema_tree
+                                  ->add_node(node_id_stack.top(), NodeType::NULLVALUE, cur_key);
+                m_current_schema.insert(node_id);
+                break;
+            }
+        }
+
+        if (object_stack.empty()) {
+            break;
+        }
+
+        bool hit_end;
+        do {
+            hit_end = false;
+            ++object_it_stack.top();
+            if (object_it_stack.top() == object_stack.top().end()) {
+                object_it_stack.pop();
+                object_stack.pop();
+                node_id_stack.pop();
+                hit_end = true;
+            }
+            if (can_match_timestamp
+                && (object_it_stack.size() - 1) <= longest_matching_timestamp_prefix)
+            {
+                may_match_timestamp = true;
+            }
+        } while (!object_it_stack.empty() && hit_end);
+    }
+
+    while (!object_stack.empty());
+}
+
+void JsonParser::parse() {
+    for (auto& file_path : m_file_paths) {
+        JsonFileIterator json_file_iterator(file_path);
+        if (false == json_file_iterator.is_open()) {
+            return;
+        }
+
+        simdjson::ondemand::document_stream::iterator json_it;
+
+        m_num_messages = 0;
+
+        while (json_file_iterator.get_json(json_it)) {
+            m_current_schema.clear();
+
+            parse_line((*json_it).value(), -1, "root");
+            m_num_messages++;
+
+            int32_t current_schema_id = m_schema_map->add_schema(m_current_schema);
+            m_current_parsed_message.set_id(current_schema_id);
+
+            if (m_archive_writer->get_data_size() >= m_target_encoded_size) {
+                split_archive();
+            }
+
+            m_archive_writer
+                    ->append_message(current_schema_id, m_current_schema, m_current_parsed_message);
+            m_current_parsed_message.clear();
+        }
+
+        if (json_file_iterator.truncated_bytes() > 0) {
+            SPDLOG_ERROR(
+                    "Truncated JSON  ({} bytes) at end of file {}",
+                    json_file_iterator.truncated_bytes(),
+                    file_path.c_str()
+            );
+        }
+    }
+}
+
+void JsonParser::store() {
+    FileWriter schema_tree_writer;
+    ZstdCompressor schema_tree_compressor;
+
+    schema_tree_writer.open(m_schema_tree_path, FileWriter::OpenMode::CreateForWriting);
+    schema_tree_compressor.open(schema_tree_writer, m_compression_level);
+
+    auto nodes = m_schema_tree->get_nodes();
+    schema_tree_compressor.write_numeric_value(nodes.size());
+    for (auto const& node : nodes) {
+        schema_tree_compressor.write_numeric_value(node->get_parent_id());
+
+        std::string const& key = node->get_key_name();
+        schema_tree_compressor.write_numeric_value(key.size());
+        schema_tree_compressor.write_string(key);
+        schema_tree_compressor.write_numeric_value(node->get_type());
+    }
+
+    schema_tree_compressor.close();
+    schema_tree_writer.close();
+
+    m_schema_map->store();
+
+    m_timestamp_dictionary->close();
+}
+
+void JsonParser::split_archive() {
+    m_archive_writer->close();
+
+    ArchiveWriterOption archive_writer_option;
+    archive_writer_option.archives_dir = m_archives_dir;
+    archive_writer_option.id = m_generator();
+    archive_writer_option.compression_level = m_compression_level;
+
+    m_archive_writer->open(archive_writer_option);
+}
+
+void JsonParser::close() {
+    m_archive_writer->close();
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/JsonParser.hpp b/components/core/src/clp_s/JsonParser.hpp
new file mode 100644
index 000000000..96250129e
--- /dev/null
+++ b/components/core/src/clp_s/JsonParser.hpp
@@ -0,0 +1,101 @@
+#ifndef CLP_S_JSONPARSER_HPP
+#define CLP_S_JSONPARSER_HPP
+
+#include <map>
+#include <set>
+#include <string>
+#include <variant>
+#include <vector>
+
+#include <boost/uuid/random_generator.hpp>
+#include <simdjson.h>
+
+#include "ArchiveWriter.hpp"
+#include "DictionaryWriter.hpp"
+#include "FileReader.hpp"
+#include "FileWriter.hpp"
+#include "ParsedMessage.hpp"
+#include "SchemaMap.hpp"
+#include "SchemaTree.hpp"
+#include "SchemaWriter.hpp"
+#include "TimestampDictionaryWriter.hpp"
+#include "Utils.hpp"
+#include "ZstdCompressor.hpp"
+
+using namespace simdjson;
+
+namespace clp_s {
+struct JsonParserOption {
+    std::vector<std::string> file_paths;
+    std::vector<std::string> timestamp_column;
+    std::string archives_dir;
+    size_t target_encoded_size;
+    int compression_level;
+};
+
+class JsonParser {
+public:
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    // Constructor
+    explicit JsonParser(JsonParserOption const& option);
+
+    // Destructor
+    ~JsonParser() = default;
+
+    /**
+     * Parses the JSON log messages and store the parsed data in the archive.
+     */
+    void parse();
+
+    /**
+     * Writes the metadata and archive data to disk.
+     */
+    void store();
+
+    /**
+     * Closes the archive and clean up.
+     */
+    void close();
+
+private:
+    /**
+     * Parses a JSON line
+     * @param line the JSON line
+     * @param parent_node_id the parent node id
+     * @param key the key of the node
+     */
+    void parse_line(ondemand::value line, int32_t parent_node_id, std::string const& key);
+
+    /**
+     * Splits the archive if the size of the archive exceeds the maximum size
+     */
+    void split_archive();
+
+    int m_num_messages;
+    int m_compression_level;
+    std::vector<std::string> m_file_paths;
+    std::string m_archives_dir;
+    std::string m_schema_tree_path;
+
+    std::set<int32_t> m_current_schema;
+    std::shared_ptr<SchemaMap> m_schema_map;
+
+    std::shared_ptr<SchemaTree> m_schema_tree;
+    ParsedMessage m_current_parsed_message;
+    std::shared_ptr<TimestampDictionaryWriter> m_timestamp_dictionary;
+
+    std::vector<std::string> m_timestamp_column;
+
+    boost::uuids::random_generator m_generator;
+    std::unique_ptr<ArchiveWriter> m_archive_writer;
+    size_t m_target_encoded_size;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_JSONPARSER_HPP
diff --git a/components/core/src/clp_s/JsonSerializer.hpp b/components/core/src/clp_s/JsonSerializer.hpp
new file mode 100644
index 000000000..9c45e6b27
--- /dev/null
+++ b/components/core/src/clp_s/JsonSerializer.hpp
@@ -0,0 +1,83 @@
+#ifndef CLP_S_JSONSERIALIZER_HPP
+#define CLP_S_JSONSERIALIZER_HPP
+
+#include <string>
+#include <vector>
+
+class JsonSerializer {
+public:
+    enum Op : uint8_t {
+        BeginObject,
+        EndObject,
+        AddIntField,
+        AddFloatField,
+        AddBoolField,
+        AddStringField,
+        AddArrayField,
+        AddNullField,
+    };
+
+    static int64_t const cReservedLength = 4096;
+
+    explicit JsonSerializer(int64_t reserved_length = cReservedLength) : m_special_keys_index(0) {
+        m_json_string.reserve(cReservedLength);
+    }
+
+    std::string& get_serialized_string() { return m_json_string; }
+
+    void reset() {
+        m_json_string.clear();
+        m_op_list_index = 0;
+        m_special_keys_index = 0;
+    }
+
+    void add_op(Op op) { m_op_list.push_back(op); }
+
+    std::vector<Op>& get_op_list() { return m_op_list; }
+
+    bool get_next_op(Op& op) {
+        if (m_op_list_index < m_op_list.size()) {
+            op = m_op_list[m_op_list_index++];
+            return true;
+        }
+        return false;
+    }
+
+    void add_special_key(std::string const& key) { m_special_keys.push_back(key); }
+
+    void begin_object() {
+        append_key();
+        m_json_string += "{";
+    }
+
+    void begin_document() { m_json_string += "{"; }
+
+    void end_document() { m_json_string[m_json_string.size() - 1] = '}'; }
+
+    void end_object() {
+        if (m_op_list[m_op_list_index - 2] != BeginObject) {
+            m_json_string.pop_back();
+        }
+        m_json_string += "},";
+    }
+
+    void append_key() { append_key(m_special_keys[m_special_keys_index++]); }
+
+    void append_key(std::string const& key) { m_json_string += "\"" + key + "\":"; }
+
+    void append_value(std::string const& value) { m_json_string += value + ","; }
+
+    void append_value_with_quotes(std::string const& value) {
+        m_json_string += "\"" + value + "\",";
+    }
+
+private:
+    std::string m_json_string;
+    std::vector<Op> m_op_list;
+    std::vector<std::string> m_special_keys;
+
+    size_t m_op_list_index;
+    size_t m_special_keys_index;
+};
+
+#endif  // CLP_S_JSONSERIALIZER_HPP
diff --git a/components/core/src/clp_s/ParsedMessage.hpp b/components/core/src/clp_s/ParsedMessage.hpp
new file mode 100644
index 000000000..769440778
--- /dev/null
+++ b/components/core/src/clp_s/ParsedMessage.hpp
@@ -0,0 +1,54 @@
+#ifndef CLP_S_PARSEDMESSAGE_HPP
+#define CLP_S_PARSEDMESSAGE_HPP
+
+#include <map>
+#include <string>
+#include <utility>
+#include <variant>
+
+namespace clp_s {
+class ParsedMessage {
+public:
+    // Constructor
+    ParsedMessage() : m_schema_id(-1) {}
+
+    // Destructor
+    ~ParsedMessage() = default;
+
+    void set_id(int32_t schema_id) { m_schema_id = schema_id; }
+
+    /**
+     * Adds a value with different types to the message
+     * @param node_id
+     * @param value
+     */
+    inline void add_value(int32_t node_id, int64_t value) { m_message[node_id] = value; }
+
+    inline void add_value(int32_t node_id, double value) { m_message[node_id] = value; }
+
+    inline void add_value(int32_t node_id, std::string const& value) { m_message[node_id] = value; }
+
+    inline void add_value(int32_t node_id, bool value) { m_message[node_id] = value; }
+
+    /**
+     * Clears the message
+     */
+    void clear() {
+        m_schema_id = -1;
+        m_message.clear();
+    }
+
+    /**
+     * @return The content of the message
+     */
+    std::map<int32_t, std::variant<int64_t, double, std::string, bool>>& get_content() {
+        return m_message;
+    }
+
+private:
+    int32_t m_schema_id;
+    std::map<int32_t, std::variant<int64_t, double, std::string, bool>> m_message;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_PARSEDMESSAGE_HPP
diff --git a/components/core/src/clp_s/ReaderUtils.cpp b/components/core/src/clp_s/ReaderUtils.cpp
new file mode 100644
index 000000000..2b0d94d27
--- /dev/null
+++ b/components/core/src/clp_s/ReaderUtils.cpp
@@ -0,0 +1,231 @@
+#include "ReaderUtils.hpp"
+
+namespace clp_s {
+std::shared_ptr<SchemaTree> ReaderUtils::read_schema_tree(std::string const& archives_dir) {
+    FileReader schema_tree_reader;
+    ZstdDecompressor schema_tree_decompressor;
+
+    std::shared_ptr<SchemaTree> tree = std::make_shared<SchemaTree>();
+
+    schema_tree_reader.open(archives_dir + "/schema_tree");
+    schema_tree_decompressor.open(schema_tree_reader, cDecompressorFileReadBufferCapacity);
+
+    size_t num_nodes;
+    auto error_code = schema_tree_decompressor.try_read_numeric_value(num_nodes);
+    if (ErrorCodeSuccess != error_code) {
+        throw OperationFailed(error_code, __FILENAME__, __LINE__);
+    }
+
+    for (size_t i = 0; i < num_nodes; i++) {
+        int32_t parent_id;
+        size_t key_length;
+        std::string key;
+        uint8_t node_type;
+
+        error_code = schema_tree_decompressor.try_read_numeric_value(parent_id);
+        if (ErrorCodeSuccess != error_code) {
+            throw OperationFailed(error_code, __FILENAME__, __LINE__);
+        }
+
+        error_code = schema_tree_decompressor.try_read_numeric_value(key_length);
+        if (ErrorCodeSuccess != error_code) {
+            throw OperationFailed(error_code, __FILENAME__, __LINE__);
+        }
+
+        error_code = schema_tree_decompressor.try_read_string(key_length, key);
+        if (ErrorCodeSuccess != error_code) {
+            throw OperationFailed(error_code, __FILENAME__, __LINE__);
+        }
+
+        error_code = schema_tree_decompressor.try_read_numeric_value(node_type);
+        if (ErrorCodeSuccess != error_code) {
+            throw OperationFailed(error_code, __FILENAME__, __LINE__);
+        }
+
+        tree->add_node(parent_id, (NodeType)node_type, key);
+    }
+
+    schema_tree_decompressor.close();
+    schema_tree_reader.close();
+
+    return tree;
+}
+
+std::shared_ptr<VariableDictionaryReader> ReaderUtils::get_variable_dictionary_reader(
+        std::string const& archive_path
+) {
+    auto reader = std::make_shared<VariableDictionaryReader>();
+    reader->open(archive_path + "/var.dict");
+    return reader;
+}
+
+std::shared_ptr<LogTypeDictionaryReader> ReaderUtils::get_log_type_dictionary_reader(
+        std::string const& archive_path
+) {
+    auto reader = std::make_shared<LogTypeDictionaryReader>();
+    reader->open(archive_path + "/log.dict");
+    return reader;
+}
+
+std::shared_ptr<LogTypeDictionaryReader> ReaderUtils::get_array_dictionary_reader(
+        std::string const& archive_path
+) {
+    auto reader = std::make_shared<LogTypeDictionaryReader>();
+    reader->open(archive_path + "/array.dict");
+    return reader;
+}
+
+std::shared_ptr<ReaderUtils::SchemaMap> ReaderUtils::read_schemas(std::string const& archives_dir) {
+    auto schemas_pointer = std::make_shared<SchemaMap>();
+    SchemaMap& schemas = *schemas_pointer;
+    FileReader schema_id_reader;
+    ZstdDecompressor schema_id_decompressor;
+
+    schema_id_reader.open(archives_dir + "/schema_ids");
+    schema_id_decompressor.open(schema_id_reader, cDecompressorFileReadBufferCapacity);
+
+    size_t schema_size;
+    auto error_code = schema_id_decompressor.try_read_numeric_value(schema_size);
+    if (ErrorCodeSuccess != error_code) {
+        throw OperationFailed(error_code, __FILENAME__, __LINE__);
+    }
+
+    for (size_t i = 0; i < schema_size; i++) {
+        int32_t schema_id;
+        error_code = schema_id_decompressor.try_read_numeric_value(schema_id);
+        if (ErrorCodeSuccess != error_code) {
+            throw OperationFailed(error_code, __FILENAME__, __LINE__);
+        }
+
+        size_t schema_node_size;
+        error_code = schema_id_decompressor.try_read_numeric_value(schema_node_size);
+        if (ErrorCodeSuccess != error_code) {
+            throw OperationFailed(error_code, __FILENAME__, __LINE__);
+        }
+
+        std::set<int32_t>& schema = schemas[schema_id];
+        for (size_t j = 0; j < schema_node_size; j++) {
+            int32_t node_id;
+            error_code = schema_id_decompressor.try_read_numeric_value(node_id);
+            if (ErrorCodeSuccess != error_code) {
+                throw OperationFailed(error_code, __FILENAME__, __LINE__);
+            }
+
+            schema.insert(node_id);
+        }
+    }
+
+    schema_id_decompressor.close();
+    schema_id_reader.close();
+
+    return schemas_pointer;
+}
+
+std::shared_ptr<TimestampDictionaryReader> ReaderUtils::read_timestamp_dictionary(
+        std::string const& archives_dir
+) {
+    auto reader = std::make_shared<TimestampDictionaryReader>();
+    reader->open(archives_dir + "/timestamp.dict");
+    reader->read_new_entries();
+    reader->close();
+
+    return reader;
+}
+
+std::shared_ptr<TimestampDictionaryReader> ReaderUtils::read_local_timestamp_dictionary(
+        std::string const& archive_path
+) {
+    auto reader = std::make_shared<TimestampDictionaryReader>();
+    reader->open(archive_path + "/timestamp.dict");
+    reader->read_local_entries();
+    reader->close();
+
+    return reader;
+}
+
+std::vector<std::string> ReaderUtils::get_archives(std::string const& archives_dir) {
+    std::vector<std::string> archive_paths;
+
+    if (false == boost::filesystem::is_directory(archives_dir)) {
+        throw OperationFailed(ErrorCodeBadParam, __FILENAME__, __LINE__);
+    }
+
+    boost::filesystem::directory_iterator iter(archives_dir);
+    boost::filesystem::directory_iterator end;
+    for (; iter != end; ++iter) {
+        if (boost::filesystem::is_directory(iter->path())) {
+            archive_paths.push_back(iter->path().string());
+        }
+    }
+
+    return archive_paths;
+}
+
+std::vector<int32_t> ReaderUtils::get_schemas(std::string const& archive_path) {
+    std::vector<int32_t> schemas;
+    std::string encoded_messages_dir = archive_path + "/encoded_messages";
+
+    boost::filesystem::directory_iterator iter(encoded_messages_dir);
+    boost::filesystem::directory_iterator end;
+
+    for (; iter != end; ++iter) {
+        if (boost::filesystem::is_regular_file(iter->path())) {
+            std::string schema = iter->path().rbegin()->string();
+            if (false == schema.empty() && std::all_of(schema.begin(), schema.end(), ::isdigit)) {
+                schemas.push_back(std::stoi(schema));
+            }
+        }
+    }
+
+    return schemas;
+}
+
+void ReaderUtils::append_reader_columns(
+        SchemaReader* reader,
+        std::set<int32_t>& columns,
+        std::shared_ptr<SchemaTree> const& schema_tree,
+        std::shared_ptr<VariableDictionaryReader> const& var_dict,
+        std::shared_ptr<LogTypeDictionaryReader> const& log_dict,
+        std::shared_ptr<LogTypeDictionaryReader> const& array_dict,
+        std::shared_ptr<TimestampDictionaryReader> const& timestamp_dict
+) {
+    for (int32_t column : columns) {
+        auto node = schema_tree->get_node(column);
+        std::string key_name = node->get_key_name();
+        switch (node->get_type()) {
+            case NodeType::INTEGER:
+                reader->append_column(new Int64ColumnReader(key_name, column));
+                break;
+            case NodeType::FLOAT:
+                reader->append_column(new FloatColumnReader(key_name, column));
+                break;
+            case NodeType::CLPSTRING:
+                reader->append_column(
+                        new ClpStringColumnReader(key_name, column, var_dict, log_dict)
+                );
+                break;
+            case NodeType::VARSTRING:
+                reader->append_column(new VariableStringColumnReader(key_name, column, var_dict));
+                break;
+            case NodeType::BOOLEAN:
+                reader->append_column(new BooleanColumnReader(key_name, column));
+                break;
+            case NodeType::ARRAY:
+                reader->append_column(
+                        new ClpStringColumnReader(key_name, column, var_dict, array_dict, true)
+                );
+                break;
+            case NodeType::DATESTRING:
+                reader->append_column(new DateStringColumnReader(key_name, column, timestamp_dict));
+                break;
+            case NodeType::FLOATDATESTRING:
+                reader->append_column(new FloatDateStringColumnReader(key_name, column));
+                break;
+            case NodeType::OBJECT:
+            case NodeType::NULLVALUE:
+                reader->append_column(column);
+                break;
+        }
+    }
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/ReaderUtils.hpp b/components/core/src/clp_s/ReaderUtils.hpp
new file mode 100644
index 000000000..074dc056e
--- /dev/null
+++ b/components/core/src/clp_s/ReaderUtils.hpp
@@ -0,0 +1,118 @@
+#ifndef CLP_S_READERUTILS_HPP
+#define CLP_S_READERUTILS_HPP
+
+#include "DictionaryReader.hpp"
+#include "SchemaReader.hpp"
+#include "SchemaTree.hpp"
+#include "TimestampDictionaryReader.hpp"
+#include "TraceableException.hpp"
+
+namespace clp_s {
+class ReaderUtils {
+public:
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    typedef std::map<int32_t, std::set<int32_t>> SchemaMap;
+    static constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024;  // 64 KB
+
+    /**
+     * Reads the schema tree from the given archive directory
+     * @param archives_dir
+     * @return The schema tree
+     */
+    static std::shared_ptr<SchemaTree> read_schema_tree(std::string const& archives_dir);
+
+    /**
+     * Opens and gets the variable dictionary reader for the given archive path
+     * @param archive_path
+     * @return the variable dictionary reader
+     */
+    static std::shared_ptr<VariableDictionaryReader> get_variable_dictionary_reader(
+            std::string const& archive_path
+    );
+
+    /**
+     * Opens and gets the log type dictionary reader for the given archive path
+     * @param archive_path
+     * @return the log type dictionary reader
+     */
+    static std::shared_ptr<LogTypeDictionaryReader> get_log_type_dictionary_reader(
+            std::string const& archive_path
+    );
+
+    /**
+     * Opens and gets the array dictionary reader for the given archive path
+     * @param archive_path
+     * @return the array dictionary reader
+     */
+    static std::shared_ptr<LogTypeDictionaryReader> get_array_dictionary_reader(
+            std::string const& archive_path
+    );
+
+    /**
+     * Reads the schema map from the given archive directory
+     * @param archive_path
+     * @return the schema map
+     */
+    static std::shared_ptr<SchemaMap> read_schemas(std::string const& archives_dir);
+
+    /**
+     * Opens and gets the timestamp dictionary reader for the given archive path
+     * @param archive_path
+     * @return the timestamp dictionary reader
+     */
+    static std::shared_ptr<TimestampDictionaryReader> read_timestamp_dictionary(
+            std::string const& archives_dir
+    );
+
+    /**
+     * Opens and gets the local timestamp dictionary reader for the given archive path
+     * @param archive_path
+     * @return the timestamp dictionary reader
+     */
+    static std::shared_ptr<TimestampDictionaryReader> read_local_timestamp_dictionary(
+            std::string const& archive_path
+    );
+
+    /**
+     * Gets the list of archives in the given archive directory
+     * @param archives_dir
+     * @return the list of archives
+     */
+    static std::vector<std::string> get_archives(std::string const& archives_dir);
+
+    /**
+     * Gets the list of schemas in the given archive
+     * @param archive_path
+     * @return the list of schemas
+     */
+    static std::vector<int32_t> get_schemas(std::string const& archive_path);
+
+    /**
+     * Append a set of columns to the given schema reader
+     * @param reader
+     * @param columns
+     * @param schema_tree
+     * @param var_dict
+     * @param log_dict
+     * @param array_dict
+     * @param timestamp_dict
+     */
+    static void append_reader_columns(
+            SchemaReader* reader,
+            std::set<int32_t>& columns,
+            std::shared_ptr<SchemaTree> const& schema_tree,
+            std::shared_ptr<VariableDictionaryReader> const& var_dict,
+            std::shared_ptr<LogTypeDictionaryReader> const& log_dict,
+            std::shared_ptr<LogTypeDictionaryReader> const& array_dict,
+            std::shared_ptr<TimestampDictionaryReader> const& timestamp_dict
+    );
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_READERUTILS_HPP
diff --git a/components/core/src/clp_s/SchemaMap.cpp b/components/core/src/clp_s/SchemaMap.cpp
new file mode 100644
index 000000000..65af81feb
--- /dev/null
+++ b/components/core/src/clp_s/SchemaMap.cpp
@@ -0,0 +1,37 @@
+#include "SchemaMap.hpp"
+
+#include "FileWriter.hpp"
+#include "ZstdCompressor.hpp"
+
+namespace clp_s {
+int32_t SchemaMap::add_schema(std::set<int32_t>& schema) {
+    auto schema_it = m_schema_map.find(schema);
+    if (schema_it != m_schema_map.end()) {
+        return schema_it->second;
+    } else {
+        m_schema_map[schema] = m_current_schema_id;
+        return m_current_schema_id++;
+    }
+}
+
+void SchemaMap::store() {
+    FileWriter schema_map_writer;
+    ZstdCompressor schema_map_compressor;
+
+    // TODO: rename schema_ids -> schema_map, and use int32_t for schema size
+    schema_map_writer.open(m_archives_dir + "/schema_ids", FileWriter::OpenMode::CreateForWriting);
+    schema_map_compressor.open(schema_map_writer, m_compression_level);
+    schema_map_compressor.write_numeric_value(m_schema_map.size());
+    for (auto const& schema_mapping : m_schema_map) {
+        auto const& schema = schema_mapping.first;
+        schema_map_compressor.write_numeric_value(schema_mapping.second);
+        schema_map_compressor.write_numeric_value(schema.size());
+        for (int32_t mst_node_id : schema) {
+            schema_map_compressor.write_numeric_value(mst_node_id);
+        }
+    }
+
+    schema_map_compressor.close();
+    schema_map_writer.close();
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/SchemaMap.hpp b/components/core/src/clp_s/SchemaMap.hpp
new file mode 100644
index 000000000..f1cb9a087
--- /dev/null
+++ b/components/core/src/clp_s/SchemaMap.hpp
@@ -0,0 +1,48 @@
+#ifndef CLP_S_SCHEMAMAP_HPP
+#define CLP_S_SCHEMAMAP_HPP
+
+#include <map>
+#include <set>
+#include <string>
+
+namespace clp_s {
+class SchemaMap {
+public:
+    typedef std::map<std::set<int32_t>, int32_t> schema_map_t;
+
+    // Constructor
+    explicit SchemaMap(std::string const& archives_dir, int compression_level)
+            : m_archives_dir(archives_dir),
+              m_compression_level(compression_level),
+              m_current_schema_id(0) {}
+
+    /**
+     * Return a schema's Id and add the schema to the
+     * schema map if it does not already exist.
+     * @param schema
+     * @return the Id of the schema
+     */
+    int32_t add_schema(std::set<int32_t>& schema);
+
+    /**
+     * Write the contents of the SchemaMap to archives_dir/schema_ids
+     */
+    void store();
+
+    /**
+     * Get const iterators into the schema map
+     * @return const it to the schema map
+     */
+    schema_map_t::const_iterator schema_map_begin() const { return m_schema_map.cbegin(); }
+
+    schema_map_t::const_iterator schema_map_end() const { return m_schema_map.cend(); }
+
+private:
+    std::string m_archives_dir;
+    int m_compression_level;
+    int32_t m_current_schema_id;
+    schema_map_t m_schema_map;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_SCHEMAMAP_HPP
diff --git a/components/core/src/clp_s/SchemaReader.cpp b/components/core/src/clp_s/SchemaReader.cpp
new file mode 100644
index 000000000..b55f6feae
--- /dev/null
+++ b/components/core/src/clp_s/SchemaReader.cpp
@@ -0,0 +1,288 @@
+#include "SchemaReader.hpp"
+
+namespace clp_s {
+void SchemaReader::open(std::string path) {
+    m_path = std::move(path);
+    m_local_schema_tree = std::make_shared<SchemaTree>();
+}
+
+void SchemaReader::close() {
+    for (auto& i : m_columns) {
+        delete i;
+    }
+
+    m_column_map.clear();
+    m_global_id_to_local_id.clear();
+}
+
+void SchemaReader::append_column(BaseColumnReader* column_reader) {
+    m_column_map[column_reader->get_id()] = column_reader;
+    m_columns.push_back(column_reader);
+    generate_local_tree(column_reader->get_id());
+}
+
+void SchemaReader::append_column(int32_t id) {
+    generate_local_tree(id);
+}
+
+void SchemaReader::load() {
+    constexpr size_t cDecompressorFileReadBufferCapacity = 64 * 1024;  // 64 KB
+
+    m_file_reader.open(m_path);
+    m_decompressor.open(m_file_reader, cDecompressorFileReadBufferCapacity);
+
+    m_file_reader.seek_from_begin(0);
+    m_file_reader.read_numeric_value(m_num_messages, false);
+
+    for (auto& reader : m_columns) {
+        reader->load(m_decompressor, m_num_messages);
+    }
+
+    m_decompressor.close();
+    m_file_reader.close();
+
+    generate_json_template(0);
+}
+
+bool SchemaReader::get_next_message(std::string& message) {
+    if (m_cur_message >= m_num_messages) {
+        return false;
+    }
+
+    m_json_serializer->reset();
+    m_json_serializer->begin_document();
+    size_t column_id_index = 0;
+    BaseColumnReader* column;
+    JsonSerializer::Op op;
+    while (m_json_serializer->get_next_op(op)) {
+        switch (op) {
+            case JsonSerializer::Op::BeginObject: {
+                m_json_serializer->begin_object();
+                break;
+            }
+            case JsonSerializer::Op::EndObject: {
+                m_json_serializer->end_object();
+                break;
+            }
+            case JsonSerializer::Op::AddIntField: {
+                column = m_reordered_columns[column_id_index++];
+                m_json_serializer->append_key(column->get_name());
+                m_json_serializer->append_value(
+                        std::to_string(std::get<int64_t>(column->extract_value(m_cur_message)))
+                );
+                break;
+            }
+            case JsonSerializer::Op::AddFloatField: {
+                column = m_reordered_columns[column_id_index++];
+                m_json_serializer->append_key(column->get_name());
+                m_json_serializer->append_value(
+                        std::to_string(std::get<double>(column->extract_value(m_cur_message)))
+                );
+                break;
+            }
+            case JsonSerializer::Op::AddBoolField: {
+                column = m_reordered_columns[column_id_index++];
+                m_json_serializer->append_key(column->get_name());
+                m_json_serializer->append_value(
+                        std::get<uint8_t>(column->extract_value(m_cur_message)) != 0 ? "true"
+                                                                                     : "false"
+                );
+                break;
+            }
+            case JsonSerializer::Op::AddStringField: {
+                column = m_reordered_columns[column_id_index++];
+                m_json_serializer->append_key(column->get_name());
+                m_json_serializer->append_value_with_quotes(
+                        std::get<std::string>(column->extract_value(m_cur_message))
+                );
+                break;
+            }
+            case JsonSerializer::Op::AddArrayField: {
+                column = m_reordered_columns[column_id_index++];
+                m_json_serializer->append_key(column->get_name());
+                m_json_serializer->append_value(
+                        std::get<std::string>(column->extract_value(m_cur_message))
+                );
+                break;
+            }
+            case JsonSerializer::Op::AddNullField: {
+                m_json_serializer->append_key();
+                m_json_serializer->append_value("null");
+                break;
+            }
+        }
+    }
+
+    m_json_serializer->end_document();
+
+    message = m_json_serializer->get_serialized_string();
+
+    if (message.back() != '\n') {
+        message += '\n';
+    }
+
+    m_cur_message++;
+    return true;
+}
+
+bool SchemaReader::get_next_message(std::string& message, FilterClass* filter) {
+    while (m_cur_message < m_num_messages) {
+        if (false == filter->filter(m_cur_message, m_extracted_values)) {
+            m_cur_message++;
+            continue;
+        }
+
+        m_json_serializer->reset();
+        m_json_serializer->begin_document();
+        size_t column_id_index = 0;
+        BaseColumnReader* column;
+        JsonSerializer::Op op;
+        while (m_json_serializer->get_next_op(op)) {
+            switch (op) {
+                case JsonSerializer::Op::BeginObject: {
+                    m_json_serializer->begin_object();
+                    break;
+                }
+                case JsonSerializer::Op::EndObject: {
+                    m_json_serializer->end_object();
+                    break;
+                }
+                case JsonSerializer::Op::AddIntField: {
+                    column = m_reordered_columns[column_id_index++];
+                    m_json_serializer->append_key(column->get_name());
+                    m_json_serializer->append_value(
+                            std::to_string(std::get<int64_t>(m_extracted_values[column->get_id()]))
+                    );
+                    break;
+                }
+                case JsonSerializer::Op::AddFloatField: {
+                    column = m_reordered_columns[column_id_index++];
+                    m_json_serializer->append_key(column->get_name());
+                    m_json_serializer->append_value(
+                            std::to_string(std::get<double>(m_extracted_values[column->get_id()]))
+                    );
+                    break;
+                }
+                case JsonSerializer::Op::AddBoolField: {
+                    column = m_reordered_columns[column_id_index++];
+                    m_json_serializer->append_key(column->get_name());
+                    m_json_serializer->append_value(
+                            std::get<uint8_t>(m_extracted_values[column->get_id()]) != 0 ? "true"
+                                                                                         : "false"
+                    );
+                    break;
+                }
+                case JsonSerializer::Op::AddStringField: {
+                    column = m_reordered_columns[column_id_index++];
+                    m_json_serializer->append_key(column->get_name());
+                    m_json_serializer->append_value_with_quotes(
+                            std::get<std::string>(m_extracted_values[column->get_id()])
+                    );
+                    break;
+                }
+                case JsonSerializer::Op::AddArrayField: {
+                    column = m_reordered_columns[column_id_index++];
+                    m_json_serializer->append_key(column->get_name());
+                    m_json_serializer->append_value(
+                            std::get<std::string>(m_extracted_values[column->get_id()])
+                    );
+                    break;
+                }
+                case JsonSerializer::Op::AddNullField: {
+                    m_json_serializer->append_key();
+                    m_json_serializer->append_value("null");
+                    break;
+                }
+            }
+        }
+
+        m_json_serializer->end_document();
+
+        message = m_json_serializer->get_serialized_string();
+
+        if (message.back() != '\n') {
+            message += '\n';
+        }
+
+        m_cur_message++;
+        return true;
+    }
+
+    return false;
+}
+
+void SchemaReader::initialize_filter(FilterClass* filter) {
+    filter->init(this, m_schema_id, m_column_map);
+}
+
+void SchemaReader::generate_local_tree(int32_t global_id) {
+    auto node = m_global_schema_tree->get_node(global_id);
+    int32_t parent_id = node->get_parent_id();
+
+    if (parent_id != -1 && m_global_id_to_local_id.find(parent_id) == m_global_id_to_local_id.end())
+    {
+        generate_local_tree(parent_id);
+    }
+
+    int32_t local_id = m_local_schema_tree->add_node(
+            parent_id == -1 ? -1 : m_global_id_to_local_id[parent_id],
+            node->get_type(),
+            node->get_key_name()
+    );
+    m_global_id_to_local_id[global_id] = local_id;
+    m_local_id_to_global_id[local_id] = global_id;
+}
+
+void SchemaReader::generate_json_template(int32_t id) {
+    auto node = m_local_schema_tree->get_node(id);
+    auto children_ids = node->get_children_ids();
+
+    for (int32_t child_id : children_ids) {
+        int32_t child_global_id = m_local_id_to_global_id[child_id];
+        auto child_node = m_local_schema_tree->get_node(child_id);
+        std::string const& key = child_node->get_key_name();
+        switch (child_node->get_type()) {
+            case NodeType::OBJECT: {
+                m_json_serializer->add_op(JsonSerializer::Op::BeginObject);
+                m_json_serializer->add_special_key(key);
+                generate_json_template(child_id);
+                m_json_serializer->add_op(JsonSerializer::Op::EndObject);
+                break;
+            }
+            case NodeType::ARRAY: {
+                m_json_serializer->add_op(JsonSerializer::Op::AddArrayField);
+                m_reordered_columns.push_back(m_column_map[child_global_id]);
+                break;
+            }
+            case NodeType::INTEGER: {
+                m_json_serializer->add_op(JsonSerializer::Op::AddIntField);
+                m_reordered_columns.push_back(m_column_map[child_global_id]);
+                break;
+            }
+            case NodeType::FLOAT: {
+                m_json_serializer->add_op(JsonSerializer::Op::AddFloatField);
+                m_reordered_columns.push_back(m_column_map[child_global_id]);
+                break;
+            }
+            case NodeType::BOOLEAN: {
+                m_json_serializer->add_op(JsonSerializer::Op::AddBoolField);
+                m_reordered_columns.push_back(m_column_map[child_global_id]);
+                break;
+            }
+            case NodeType::CLPSTRING:
+            case NodeType::VARSTRING:
+            case NodeType::DATESTRING:
+            case NodeType::FLOATDATESTRING: {
+                m_json_serializer->add_op(JsonSerializer::Op::AddStringField);
+                m_reordered_columns.push_back(m_column_map[child_global_id]);
+                break;
+            }
+            case NodeType::NULLVALUE: {
+                m_json_serializer->add_op(JsonSerializer::Op::AddNullField);
+                m_json_serializer->add_special_key(key);
+                break;
+            }
+        }
+    }
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/SchemaReader.hpp b/components/core/src/clp_s/SchemaReader.hpp
new file mode 100644
index 000000000..87ac549ec
--- /dev/null
+++ b/components/core/src/clp_s/SchemaReader.hpp
@@ -0,0 +1,153 @@
+#ifndef CLP_S_SCHEMAREADER_HPP
+#define CLP_S_SCHEMAREADER_HPP
+
+#include <string>
+#include <type_traits>
+#include <unordered_map>
+#include <utility>
+
+#include <json/single_include/nlohmann/json.hpp>
+
+#include "ColumnReader.hpp"
+#include "FileReader.hpp"
+#include "JsonSerializer.hpp"
+#include "SchemaTree.hpp"
+#include "ZstdDecompressor.hpp"
+
+namespace clp_s {
+class SchemaReader;
+
+class FilterClass {
+public:
+    /**
+     * Initializes the filter
+     * @param reader
+     * @param schema_id
+     * @param columns
+     */
+    virtual void init(
+            SchemaReader* reader,
+            int32_t schema_id,
+            std::unordered_map<int32_t, BaseColumnReader*>& columns
+    ) = 0;
+
+    /**
+     * Filters the message
+     * @param cur_message
+     * @param extracted_values
+     * @return true if the message is accepted
+     */
+    virtual bool filter(
+            uint64_t cur_message,
+            std::map<int32_t, std::variant<int64_t, double, std::string, uint8_t>>& extracted_values
+    ) = 0;
+};
+
+class SchemaReader {
+public:
+    // Constructor
+    explicit SchemaReader(std::shared_ptr<SchemaTree> schema_tree, int32_t schema_id)
+            : m_num_messages(0),
+              m_cur_message(0),
+              m_global_schema_tree(std::move(schema_tree)),
+              m_schema_id(schema_id),
+              m_json_serializer(std::make_shared<JsonSerializer>()) {}
+
+    // Destructor
+    ~SchemaReader() = default;
+
+    /**
+     * Opens the scheam file
+     * @param path
+     */
+    void open(std::string path);
+
+    /**
+     * Closes the schema file
+     */
+    void close();
+
+    /**
+     * Appends a column to the schema reader
+     * @param column_reader
+     */
+    void append_column(BaseColumnReader* column_reader);
+
+    /**
+     * Appends a column to the schema reader
+     * @param id
+     */
+    void append_column(int32_t id);
+
+    /**
+     * Loads the encoded messages
+     */
+    void load();
+
+    /**
+     * Gets next message
+     * @param message
+     * @return true if there is a next message
+     */
+    bool get_next_message(std::string& message);
+
+    /**
+     * Gets next message with a filter
+     * @param message
+     * @param filter
+     * @return true if there is a next message
+     */
+    bool get_next_message(std::string& message, FilterClass* filter);
+
+    /**
+     * Initializes the filter
+     * @param filter
+     */
+    void initialize_filter(FilterClass* filter);
+
+private:
+    /**
+     * Generates a local schema tree
+     * @param global_id
+     */
+    void generate_local_tree(int32_t global_id);
+
+    /**
+     * Generates a json template
+     * @param object
+     * @param id
+     * @param json_pointer
+     */
+    void generate_json_template(int32_t id);
+
+    /**
+     * Gets a json pointer string
+     * @param s
+     * @return
+     */
+    static std::string get_json_pointer_string(std::string const& s);
+
+    int32_t m_schema_id;
+    std::string m_path;
+    uint64_t m_num_messages;
+    uint64_t m_cur_message;
+
+    FileReader m_file_reader;
+    ZstdDecompressor m_decompressor;
+
+    std::unordered_map<int32_t, BaseColumnReader*> m_column_map;
+    std::vector<BaseColumnReader*> m_columns;
+    std::vector<BaseColumnReader*> m_reordered_columns;
+
+    std::shared_ptr<SchemaTree> m_global_schema_tree;
+    std::shared_ptr<SchemaTree> m_local_schema_tree;
+    std::unordered_map<int32_t, int32_t> m_global_id_to_local_id;
+    std::unordered_map<int32_t, int32_t> m_local_id_to_global_id;
+
+    std::shared_ptr<JsonSerializer> m_json_serializer;
+
+    std::map<int32_t, std::variant<int64_t, double, std::string, uint8_t>> m_extracted_values;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_SCHEMAREADER_HPP
diff --git a/components/core/src/clp_s/SchemaTree.cpp b/components/core/src/clp_s/SchemaTree.cpp
new file mode 100644
index 000000000..36527f335
--- /dev/null
+++ b/components/core/src/clp_s/SchemaTree.cpp
@@ -0,0 +1,25 @@
+#include "SchemaTree.hpp"
+
+namespace clp_s {
+int32_t SchemaTree::add_node(int32_t parent_node_id, NodeType type, std::string const& key) {
+    std::tuple<int32_t, std::string, NodeType> node_key = {parent_node_id, key, type};
+    auto node_it = m_node_map.find(node_key);
+    if (node_it != m_node_map.end()) {
+        auto node_id = node_it->second;
+        m_nodes[node_id]->increase_count();
+        return node_id;
+    }
+
+    auto node = std::make_shared<SchemaNode>(parent_node_id, m_nodes.size(), key, type);
+    node->increase_count();
+    m_nodes.push_back(node);
+    int32_t node_id = node->get_id();
+    if (parent_node_id >= 0) {
+        auto parent_node = m_nodes[parent_node_id];
+        parent_node->add_child(node_id);
+    }
+    m_node_map[node_key] = node_id;
+
+    return node_id;
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/SchemaTree.hpp b/components/core/src/clp_s/SchemaTree.hpp
new file mode 100644
index 000000000..178f80f7c
--- /dev/null
+++ b/components/core/src/clp_s/SchemaTree.hpp
@@ -0,0 +1,99 @@
+#ifndef CLP_S_SCHEMATREE_HPP
+#define CLP_S_SCHEMATREE_HPP
+
+#include <functional>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <absl/container/flat_hash_map.h>
+
+namespace clp_s {
+enum class NodeType : uint8_t {
+    INTEGER,
+    FLOAT,
+    CLPSTRING,
+    VARSTRING,
+    BOOLEAN,
+    OBJECT,
+    ARRAY,
+    NULLVALUE,
+    DATESTRING,
+    FLOATDATESTRING
+};
+
+class SchemaNode {
+public:
+    // Constructor
+    SchemaNode() : m_parent_id(-1), m_id(-1), m_type(NodeType::INTEGER), m_count(0) {}
+
+    SchemaNode(int32_t parent_id, int32_t id, std::string key_name, NodeType type)
+            : m_parent_id(parent_id),
+              m_id(id),
+              m_key_name(std::move(key_name)),
+              m_type(type),
+              m_count(0) {}
+
+    /**
+     * Getters
+     */
+    int32_t get_id() const { return m_id; }
+
+    int32_t get_parent_id() const { return m_parent_id; }
+
+    std::vector<int32_t>& get_children_ids() { return m_children_ids; }
+
+    NodeType get_type() const { return m_type; }
+
+    std::string const& get_key_name() const { return m_key_name; }
+
+    int32_t get_count() const { return m_count; }
+
+    /**
+     * Increases the count of this node by 1
+     */
+    void increase_count() { m_count++; }
+
+    /**
+     * Adds a child node to this node
+     * @param child_id
+     */
+    void add_child(int32_t child_id) { m_children_ids.push_back(child_id); }
+
+private:
+    int32_t m_id;
+    int32_t m_parent_id;
+    std::vector<int32_t> m_children_ids;
+    std::string m_key_name;
+    NodeType m_type;
+    int32_t m_count;
+};
+
+class SchemaTree {
+public:
+    SchemaTree() = default;
+
+    int32_t add_node(int parent_node_id, NodeType type, std::string const& key);
+
+    bool has_node(int32_t id) { return id < m_nodes.size() && id >= 0; }
+
+    std::shared_ptr<SchemaNode> get_node(int32_t id) {
+        if (id >= m_nodes.size() || id < 0) {
+            throw std::invalid_argument("invalid access of id " + std::to_string(id));
+        }
+
+        return m_nodes[id];
+    }
+
+    int32_t get_root_node_id() { return m_nodes[0]->get_id(); }
+
+    std::vector<std::shared_ptr<SchemaNode>> get_nodes() { return m_nodes; }
+
+private:
+    std::vector<std::shared_ptr<SchemaNode>> m_nodes;
+    absl::flat_hash_map<std::tuple<int32_t, std::string, NodeType>, int32_t> m_node_map;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_SCHEMATREE_HPP
diff --git a/components/core/src/clp_s/SchemaWriter.cpp b/components/core/src/clp_s/SchemaWriter.cpp
new file mode 100644
index 000000000..56357874f
--- /dev/null
+++ b/components/core/src/clp_s/SchemaWriter.cpp
@@ -0,0 +1,56 @@
+#include "SchemaWriter.hpp"
+
+#include <utility>
+
+namespace clp_s {
+void SchemaWriter::open(std::string path, int compression_level) {
+    m_path = std::move(path);
+    m_compression_level = compression_level;
+}
+
+void SchemaWriter::close() {
+    m_compressor.close();
+    m_file_writer.close();
+
+    for (auto i : m_columns) {
+        delete i;
+    }
+
+    m_columns.clear();
+}
+
+void SchemaWriter::append_column(BaseColumnWriter* column_writer) {
+    m_columns.push_back(column_writer);
+}
+
+size_t SchemaWriter::append_message(ParsedMessage& message) {
+    int count = 0;
+    size_t size, total_size;
+    size = total_size = 0;
+    for (auto& i : message.get_content()) {
+        m_columns[count]->add_value(i.second, size);
+        total_size += size;
+        count++;
+    }
+
+    m_num_messages++;
+    return total_size;
+}
+
+void SchemaWriter::store() {
+    m_file_writer.open(m_path, FileWriter::OpenMode::CreateForWriting);
+    m_file_writer.write_numeric_value(m_num_messages);
+    m_compressor.open(m_file_writer, m_compression_level);
+
+    for (auto& writer : m_columns) {
+        writer->store(m_compressor);
+        //        compressor_.Write(writer->GetData(), writer->GetSize());
+    }
+}
+
+SchemaWriter::~SchemaWriter() {
+    for (auto i : m_columns) {
+        delete i;
+    }
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/SchemaWriter.hpp b/components/core/src/clp_s/SchemaWriter.hpp
new file mode 100644
index 000000000..edf3320d8
--- /dev/null
+++ b/components/core/src/clp_s/SchemaWriter.hpp
@@ -0,0 +1,61 @@
+#ifndef CLP_S_SCHEMAWRITER_HPP
+#define CLP_S_SCHEMAWRITER_HPP
+
+#include <vector>
+
+#include "ColumnWriter.hpp"
+#include "FileWriter.hpp"
+#include "ParsedMessage.hpp"
+#include "ZstdCompressor.hpp"
+
+namespace clp_s {
+class SchemaWriter {
+public:
+    // Constructor
+    SchemaWriter() : m_num_messages(0) {}
+
+    // Destructor
+    ~SchemaWriter();
+
+    /**
+     * Opens the schema writer.
+     * @param path
+     * @param compression_level
+     */
+    void open(std::string path, int compression_level);
+
+    /**
+     * Appends a column to the schema writer.
+     * @param column_writer
+     */
+    void append_column(BaseColumnWriter* column_writer);
+
+    /**
+     * Appends a message to the schema writer.
+     * @param message
+     * @return The size of the message in bytes.
+     */
+    size_t append_message(ParsedMessage& message);
+
+    /**
+     * Stores the schema to disk.
+     */
+    void store();
+
+    /**
+     * Closes the schema writer.
+     */
+    void close();
+
+private:
+    FileWriter m_file_writer;
+    ZstdCompressor m_compressor;
+    std::string m_path;
+    int m_compression_level{};
+    uint64_t m_num_messages;
+
+    std::vector<BaseColumnWriter*> m_columns;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_SCHEMAWRITER_HPP
diff --git a/components/core/src/clp_s/TimestampDictionaryReader.cpp b/components/core/src/clp_s/TimestampDictionaryReader.cpp
new file mode 100644
index 000000000..09dfe65fd
--- /dev/null
+++ b/components/core/src/clp_s/TimestampDictionaryReader.cpp
@@ -0,0 +1,91 @@
+#include "TimestampDictionaryReader.hpp"
+
+#include "Utils.hpp"
+
+namespace clp_s {
+void TimestampDictionaryReader::open(std::string const& dictionary_path) {
+    if (m_is_open) {
+        throw OperationFailed(ErrorCodeNotReady, __FILENAME__, __LINE__);
+    }
+
+    constexpr size_t cDecompressorFileReadBufferCapacity = 16 * 1024;  // 16 KB
+
+    m_dictionary_file_reader.open(dictionary_path);
+    m_dictionary_decompressor.open(m_dictionary_file_reader, cDecompressorFileReadBufferCapacity);
+
+    m_is_open = true;
+}
+
+void TimestampDictionaryReader::close() {
+    if (false == m_is_open) {
+        throw OperationFailed(ErrorCodeNotInit, __FILENAME__, __LINE__);
+    }
+
+    m_dictionary_decompressor.close();
+    m_dictionary_file_reader.close();
+}
+
+void TimestampDictionaryReader::read_local_entries() {
+    read_new_entries(/*local=*/true);
+}
+
+void TimestampDictionaryReader::read_new_entries(bool local) {
+    if (false == m_is_open) {
+        throw OperationFailed(ErrorCodeNotInit, __FILENAME__, __LINE__);
+    }
+
+    ErrorCode error;
+
+    uint64_t range_index_size;
+    error = m_dictionary_decompressor.try_read_numeric_value<uint64_t>(range_index_size);
+    if (ErrorCodeSuccess != error) {
+        throw OperationFailed(error, __FILENAME__, __LINE__);
+    }
+
+    for (int i = 0; i < range_index_size; ++i) {
+        std::string col;
+        TimestampEntry entry;
+        entry.try_read_from_file(m_dictionary_decompressor, col);
+        TimestampEntry& e = m_column_to_range[col] = entry;
+        std::vector<std::string> tokens;
+        StringUtils::tokenize_column_descriptor(col, tokens);
+        m_tokenized_column_to_range.emplace_back(std::move(tokens), &e);
+    }
+
+    // Local timestamp dictionaries only contain range indices, and
+    // not patterns. Exit early here.
+    if (local) {
+        return;
+    }
+
+    uint64_t num_patterns;
+    error = m_dictionary_decompressor.try_read_numeric_value<uint64_t>(num_patterns);
+    if (ErrorCodeSuccess != error) {
+        throw OperationFailed(error, __FILENAME__, __LINE__);
+    }
+    for (int i = 0; i < num_patterns; ++i) {
+        uint64_t id, pattern_len;
+        std::string pattern;
+        error = m_dictionary_decompressor.try_read_numeric_value<uint64_t>(id);
+        if (ErrorCodeSuccess != error) {
+            throw OperationFailed(error, __FILENAME__, __LINE__);
+        }
+        error = m_dictionary_decompressor.try_read_numeric_value<uint64_t>(pattern_len);
+        if (ErrorCodeSuccess != error) {
+            throw OperationFailed(error, __FILENAME__, __LINE__);
+        }
+        error = m_dictionary_decompressor.try_read_string(pattern_len, pattern);
+        if (ErrorCodeSuccess != error) {
+            throw OperationFailed(error, __FILENAME__, __LINE__);
+        }
+        m_patterns[id] = TimestampPattern(0, pattern);
+    }
+}
+
+std::string TimestampDictionaryReader::get_string_encoding(epochtime_t epoch, uint64_t format_id) {
+    std::string ret;
+    m_patterns[format_id].insert_formatted_timestamp(epoch, ret);
+
+    return ret;
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/TimestampDictionaryReader.hpp b/components/core/src/clp_s/TimestampDictionaryReader.hpp
new file mode 100644
index 000000000..d6f38743c
--- /dev/null
+++ b/components/core/src/clp_s/TimestampDictionaryReader.hpp
@@ -0,0 +1,99 @@
+#ifndef CLP_S_TIMESTAMPDICTIONARYREADER_HPP
+#define CLP_S_TIMESTAMPDICTIONARYREADER_HPP
+
+#include <map>
+
+#include "FileReader.hpp"
+#include "search/FilterOperation.hpp"
+#include "TimestampEntry.hpp"
+#include "TimestampPattern.hpp"
+#include "ZstdDecompressor.hpp"
+
+namespace clp_s {
+class TimestampDictionaryReader {
+public:
+    // Types
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    // Constructors
+    TimestampDictionaryReader() : m_is_open(false) {}
+
+    // Methods
+    /**
+     * Opens dictionary for reading
+     * @param dictionary_path
+     */
+    void open(std::string const& dictionary_path);
+
+    /**
+     * Closes the dictionary
+     */
+    void close();
+
+    /**
+     * Reads any new entries from disk
+     */
+    void read_new_entries(bool local = false);
+
+    /**
+     * Reads new entries from a *local* timestamp dictionary
+     *
+     * Local timestamp dictionaries contain only range indices,
+     * and have no timestamp pattern mappings
+     */
+    void read_local_entries();
+
+    /**
+     * Gets the string encoding for a given epoch and format ID
+     * @param epoch
+     * @param format_id
+     */
+    std::string get_string_encoding(epochtime_t epoch, uint64_t format_id);
+
+    typedef std::map<uint64_t, TimestampPattern>::iterator id_to_pattern_iterator_t;
+    typedef std::vector<std::pair<std::vector<std::string>, TimestampEntry*>>::iterator
+            tokenized_column_to_range_it_t;
+
+    /**
+     * Gets iterators for the timestamp patterns
+     * @return begin and end iterators for the timestamp patterns
+     */
+    id_to_pattern_iterator_t pattern_begin() { return m_patterns.begin(); }
+
+    id_to_pattern_iterator_t pattern_end() { return m_patterns.end(); }
+
+    /**
+     * Gets iterators for the column to range mappings
+     * @return begin and end iterators for the column to range mappings
+     */
+    tokenized_column_to_range_it_t tokenized_column_to_range_begin() {
+        return m_tokenized_column_to_range.begin();
+    }
+
+    tokenized_column_to_range_it_t tokenized_column_to_range_end() {
+        return m_tokenized_column_to_range.end();
+    }
+
+private:
+    typedef std::map<uint64_t, TimestampPattern> id_to_pattern_t;
+    typedef std::map<std::string, TimestampEntry> column_to_range_t;
+    typedef std::vector<std::pair<std::vector<std::string>, TimestampEntry*>>
+            tokenized_column_to_range_t;
+
+    // Variables
+    bool m_is_open;
+    FileReader m_dictionary_file_reader;
+    ZstdDecompressor m_dictionary_decompressor;
+
+    id_to_pattern_t m_patterns;
+    column_to_range_t m_column_to_range;
+    tokenized_column_to_range_t m_tokenized_column_to_range;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_TIMESTAMPDICTIONARYREADER_HPP
diff --git a/components/core/src/clp_s/TimestampDictionaryWriter.cpp b/components/core/src/clp_s/TimestampDictionaryWriter.cpp
new file mode 100644
index 000000000..32365049f
--- /dev/null
+++ b/components/core/src/clp_s/TimestampDictionaryWriter.cpp
@@ -0,0 +1,146 @@
+#include "TimestampDictionaryWriter.hpp"
+
+#include "Utils.hpp"
+
+namespace clp_s {
+void TimestampDictionaryWriter::write_timestamp_entries(
+        std::map<std::string, TimestampEntry> const& ranges,
+        ZstdCompressor& compressor
+) {
+    compressor.write_numeric_value<uint64_t>(ranges.size());
+
+    for (auto const& range : ranges) {
+        range.second.write_to_file(compressor, range.first);
+    }
+}
+
+void TimestampDictionaryWriter::write_and_flush_to_disk() {
+    write_timestamp_entries(m_global_column_to_range, m_dictionary_compressor);
+
+    m_dictionary_compressor.write_numeric_value<uint64_t>(m_pattern_to_id.size());
+    for (auto& it : m_pattern_to_id) {
+        // write pattern ID
+        m_dictionary_compressor.write_numeric_value<uint64_t>(it.second);
+
+        std::string const& pattern = it.first->get_format();
+        m_dictionary_compressor.write_numeric_value<uint64_t>(pattern.length());
+        m_dictionary_compressor.write_string(pattern);
+    }
+
+    m_dictionary_compressor.flush();
+    m_dictionary_file_writer.flush();
+}
+
+void TimestampDictionaryWriter::write_local_and_flush_to_disk() {
+    write_timestamp_entries(m_local_column_to_range, m_dictionary_compressor_local);
+
+    m_dictionary_compressor_local.flush();
+    m_dictionary_file_writer_local.flush();
+}
+
+void TimestampDictionaryWriter::open(std::string const& dictionary_path, int compression_level) {
+    if (m_is_open) {
+        throw OperationFailed(ErrorCodeNotReady, __FILENAME__, __LINE__);
+    }
+
+    m_dictionary_file_writer.open(dictionary_path, FileWriter::OpenMode::CreateForWriting);
+    m_dictionary_compressor.open(m_dictionary_file_writer, compression_level);
+
+    m_next_id = 0;
+    m_is_open = true;
+}
+
+void TimestampDictionaryWriter::open_local(
+        std::string const& dictionary_path,
+        int compression_level
+) {
+    if (m_is_open_local) {
+        throw OperationFailed(ErrorCodeNotReady, __FILENAME__, __LINE__);
+    }
+
+    m_dictionary_file_writer_local.open(dictionary_path, FileWriter::OpenMode::CreateForWriting);
+    m_dictionary_compressor_local.open(m_dictionary_file_writer_local, compression_level);
+
+    m_is_open_local = true;
+}
+
+void TimestampDictionaryWriter::close() {
+    if (false == m_is_open) {
+        throw OperationFailed(ErrorCodeNotInit, __FILENAME__, __LINE__);
+    }
+
+    // merge before writing overall archive because this
+    // happens before the last sub-archive is written
+    merge_local_range();
+    write_and_flush_to_disk();
+    m_dictionary_compressor.close();
+    m_dictionary_file_writer.close();
+
+    m_is_open = false;
+}
+
+void TimestampDictionaryWriter::close_local() {
+    if (false == m_is_open_local) {
+        throw OperationFailed(ErrorCodeNotInit, __FILENAME__, __LINE__);
+    }
+
+    write_local_and_flush_to_disk();
+    m_dictionary_compressor_local.close();
+    m_dictionary_file_writer_local.close();
+
+    m_is_open_local = false;
+
+    // merge after every sub-archive
+    merge_local_range();
+    m_local_column_to_range.clear();
+}
+
+uint64_t TimestampDictionaryWriter::get_pattern_id(TimestampPattern const* pattern) {
+    if (0 == m_pattern_to_id.count(pattern)) {
+        uint64_t id = m_next_id++;
+        m_pattern_to_id[pattern] = id;
+
+        return id;
+    }
+
+    return m_pattern_to_id.at(pattern);
+}
+
+epochtime_t TimestampDictionaryWriter::ingest_entry(
+        std::string const& key,
+        std::string const& timestamp,
+        uint64_t& id
+) {
+    epochtime_t ret;
+    size_t timestamp_begin_pos = 0, timestamp_end_pos = 0;
+    TimestampPattern const* pattern = TimestampPattern::search_known_ts_patterns(
+            timestamp,
+            ret,
+            timestamp_begin_pos,
+            timestamp_end_pos
+    );
+    m_local_column_to_range[key].ingest_timestamp(ret);
+
+    if (pattern == nullptr) {
+        throw OperationFailed(ErrorCodeFailure, __FILE__, __LINE__);
+    }
+
+    id = get_pattern_id(pattern);
+
+    return ret;
+}
+
+void TimestampDictionaryWriter::ingest_entry(std::string const& key, double timestamp) {
+    m_local_column_to_range[key].ingest_timestamp(timestamp);
+}
+
+void TimestampDictionaryWriter::ingest_entry(std::string const& key, int64_t timestamp) {
+    m_local_column_to_range[key].ingest_timestamp(timestamp);
+}
+
+void TimestampDictionaryWriter::merge_local_range() {
+    for (auto const& it : m_local_column_to_range) {
+        m_global_column_to_range[it.first].merge_range(it.second);
+    }
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/TimestampDictionaryWriter.hpp b/components/core/src/clp_s/TimestampDictionaryWriter.hpp
new file mode 100644
index 000000000..c810e675e
--- /dev/null
+++ b/components/core/src/clp_s/TimestampDictionaryWriter.hpp
@@ -0,0 +1,95 @@
+#ifndef CLP_S_TIMESTAMPDICTIONARYWRITER_HPP
+#define CLP_S_TIMESTAMPDICTIONARYWRITER_HPP
+
+#include <map>
+#include <string>
+#include <unordered_map>
+
+#include "FileWriter.hpp"
+#include "TimestampEntry.hpp"
+#include "TimestampPattern.hpp"
+#include "ZstdCompressor.hpp"
+
+namespace clp_s {
+class TimestampDictionaryWriter {
+public:
+    // Types
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    // Constructors
+    TimestampDictionaryWriter() : m_is_open(false), m_is_open_local(false) {}
+
+    /**
+     * Opens the global timestamp dictionary for writing
+     * @param dictionary_path
+     * @param compression_level
+     */
+    void open(std::string const& dictionary_path, int compression_level);
+
+    /**
+     * Opens a local timestamp dictionary for writing
+     * @param dictionary_path
+     * @param compression_level
+     */
+    void open_local(std::string const& dictionary_path, int compression_level);
+
+    /**
+     * Closes the global timestamp dictionary
+     */
+    void close();
+
+    /**
+     * Closes the local timestamp dictionary
+     */
+    void close_local();
+
+    /**
+     * Writes the global timestamp dictionary to disk
+     */
+    void write_and_flush_to_disk();
+
+    /**
+     * Writes the local timestamp dictionary to disk
+     */
+    void write_local_and_flush_to_disk();
+
+    uint64_t get_pattern_id(TimestampPattern const* pattern);
+
+    epochtime_t ingest_entry(std::string const& key, std::string const& timestamp, uint64_t& id);
+
+    void ingest_entry(std::string const& key, double timestamp);
+
+    void ingest_entry(std::string const& key, int64_t timestamp);
+
+private:
+    void merge_local_range();
+    static void write_timestamp_entries(
+            std::map<std::string, TimestampEntry> const& ranges,
+            ZstdCompressor& compressor
+    );
+
+    typedef std::unordered_map<TimestampPattern const*, uint64_t> pattern_to_id_t;
+
+    // Variables
+    bool m_is_open;
+    bool m_is_open_local;
+
+    // Variables related to on-disk storage
+    FileWriter m_dictionary_file_writer;
+    ZstdCompressor m_dictionary_compressor;
+    FileWriter m_dictionary_file_writer_local;
+    ZstdCompressor m_dictionary_compressor_local;
+
+    pattern_to_id_t m_pattern_to_id;
+    uint64_t m_next_id{};
+    std::map<std::string, TimestampEntry> m_global_column_to_range;
+    std::map<std::string, TimestampEntry> m_local_column_to_range;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_TIMESTAMPDICTIONARYWRITER_HPP
diff --git a/components/core/src/clp_s/TimestampEntry.cpp b/components/core/src/clp_s/TimestampEntry.cpp
new file mode 100644
index 000000000..cbc6515d6
--- /dev/null
+++ b/components/core/src/clp_s/TimestampEntry.cpp
@@ -0,0 +1,345 @@
+#include "TimestampEntry.hpp"
+
+#include <cmath>
+
+namespace clp_s {
+void TimestampEntry::ingest_timestamp(epochtime_t timestamp) {
+    if (m_encoding == DoubleEpoch) {
+        if (timestamp < std::ceil(m_epoch_start_double)) {
+            m_epoch_start_double = timestamp;
+        }
+        if (timestamp > std::floor(m_epoch_end_double)) {
+            m_epoch_end_double = timestamp;
+        }
+
+        return;
+    }
+
+    if (m_encoding == UnkownTimestampEncoding) {
+        m_encoding = Epoch;
+    }
+
+    if (timestamp < m_epoch_start) {
+        m_epoch_start = timestamp;
+    }
+    if (timestamp > m_epoch_end) {
+        m_epoch_end = timestamp;
+    }
+}
+
+void TimestampEntry::ingest_timestamp(double timestamp) {
+    if (m_encoding == UnkownTimestampEncoding) {
+        m_encoding = DoubleEpoch;
+    } else if (m_encoding == Epoch) {
+        m_encoding = DoubleEpoch;
+        m_epoch_start_double = m_epoch_start;
+        m_epoch_end_double = m_epoch_end;
+    }
+
+    if (timestamp < m_epoch_start_double) {
+        m_epoch_start_double = timestamp;
+    }
+    if (timestamp > m_epoch_end_double) {
+        m_epoch_end_double = timestamp;
+    }
+}
+
+void TimestampEntry::merge_range(TimestampEntry const& entry) {
+    if (entry.m_encoding == Epoch) {
+        ingest_timestamp(entry.m_epoch_start);
+        ingest_timestamp(entry.m_epoch_end);
+    } else if (entry.m_encoding == DoubleEpoch) {
+        ingest_timestamp(entry.m_epoch_start_double);
+        ingest_timestamp(entry.m_epoch_end_double);
+    }
+}
+
+void TimestampEntry::write_to_file(ZstdCompressor& compressor, std::string const& column) const {
+    compressor.write_numeric_value<uint64_t>(column.length());
+    compressor.write_string(column);
+
+    compressor.write_numeric_value<TimestampEncoding>(m_encoding);
+
+    if (m_encoding == Epoch) {
+        compressor.write_numeric_value<epochtime_t>(m_epoch_start);
+        compressor.write_numeric_value<epochtime_t>(m_epoch_end);
+    } else if (m_encoding == DoubleEpoch) {
+        compressor.write_numeric_value<double>(m_epoch_start_double);
+        compressor.write_numeric_value<double>(m_epoch_end_double);
+    }
+}
+
+ErrorCode TimestampEntry::try_read_from_file(ZstdDecompressor& decompressor, std::string& column) {
+    ErrorCode error_code;
+
+    uint64_t column_len;
+    error_code = decompressor.try_read_numeric_value<uint64_t>(column_len);
+    if (ErrorCodeSuccess != error_code) {
+        return error_code;
+    }
+    error_code = decompressor.try_read_string(column_len, column);
+    if (ErrorCodeSuccess != error_code) {
+        return error_code;
+    }
+
+    uint64_t encoding;
+    error_code = decompressor.try_read_numeric_value<TimestampEncoding>(m_encoding);
+    if (ErrorCodeSuccess != error_code) {
+        return error_code;
+    }
+
+    if (m_encoding == Epoch) {
+        error_code = decompressor.try_read_numeric_value<epochtime_t>(m_epoch_start);
+        if (ErrorCodeSuccess != error_code) {
+            return error_code;
+        }
+        error_code = decompressor.try_read_numeric_value<epochtime_t>(m_epoch_end);
+        if (ErrorCodeSuccess != error_code) {
+            return error_code;
+        }
+    } else if (m_encoding == DoubleEpoch) {
+        error_code = decompressor.try_read_numeric_value<double>(m_epoch_start_double);
+        if (ErrorCodeSuccess != error_code) {
+            return error_code;
+        }
+        error_code = decompressor.try_read_numeric_value<double>(m_epoch_end_double);
+        if (ErrorCodeSuccess != error_code) {
+            return error_code;
+        }
+    }
+
+    return error_code;
+}
+
+void TimestampEntry::read_from_file(ZstdDecompressor& decompressor, std::string& column) {
+    auto error_code = try_read_from_file(decompressor, column);
+    if (ErrorCodeSuccess != error_code) {
+        throw OperationFailed(error_code, __FILENAME__, __LINE__);
+    }
+}
+
+EvaluatedValue TimestampEntry::evaluate_filter(FilterOperation op, double timestamp) {
+    if (op == FilterOperation::EXISTS || op == FilterOperation::NEXISTS) {
+        return EvaluatedValue::Unknown;
+    }
+
+    if (m_encoding == DoubleEpoch) {
+        switch (op) {
+            case FilterOperation::EQ:
+                if (timestamp >= m_epoch_start_double && timestamp <= m_epoch_end_double) {
+                    return EvaluatedValue::Unknown;
+                } else {
+                    return EvaluatedValue::False;
+                }
+            case FilterOperation::NEQ:
+                if (timestamp >= m_epoch_start_double && timestamp <= m_epoch_end_double) {
+                    return EvaluatedValue::Unknown;
+                } else {
+                    return EvaluatedValue::True;
+                }
+            case FilterOperation::LT:
+                if (timestamp > m_epoch_end_double) {
+                    return EvaluatedValue::True;
+                } else if (timestamp <= m_epoch_start_double) {
+                    return EvaluatedValue::False;
+                } else {
+                    return EvaluatedValue::Unknown;
+                }
+            case FilterOperation::LTE:
+                if (timestamp >= m_epoch_end_double) {
+                    return EvaluatedValue::True;
+                } else if (timestamp < m_epoch_start_double) {
+                    return EvaluatedValue::False;
+                } else {
+                    return EvaluatedValue::Unknown;
+                }
+            case FilterOperation::GT:
+                if (timestamp < m_epoch_start_double) {
+                    return EvaluatedValue::True;
+                } else if (timestamp >= m_epoch_end_double) {
+                    return EvaluatedValue::False;
+                } else {
+                    return EvaluatedValue::Unknown;
+                }
+            case FilterOperation::GTE:
+                if (timestamp <= m_epoch_start_double) {
+                    return EvaluatedValue::True;
+                } else if (timestamp > m_epoch_end_double) {
+                    return EvaluatedValue::False;
+                } else {
+                    return EvaluatedValue::Unknown;
+                }
+            default:
+                return EvaluatedValue::Unknown;
+        }
+    } else if (m_encoding == Epoch) {
+        double epoch_start_tmp = m_epoch_start, epoch_end_tmp = m_epoch_end;
+        switch (op) {
+            case FilterOperation::EQ:
+                if (timestamp >= epoch_start_tmp && timestamp <= epoch_end_tmp) {
+                    return EvaluatedValue::Unknown;
+                } else {
+                    return EvaluatedValue::False;
+                }
+            case FilterOperation::NEQ:
+                if (timestamp >= epoch_start_tmp && timestamp <= epoch_end_tmp) {
+                    return EvaluatedValue::Unknown;
+                } else {
+                    return EvaluatedValue::True;
+                }
+            case FilterOperation::LT:
+                if (timestamp > epoch_end_tmp) {
+                    return EvaluatedValue::True;
+                } else if (timestamp <= epoch_start_tmp) {
+                    return EvaluatedValue::False;
+                } else {
+                    return EvaluatedValue::Unknown;
+                }
+            case FilterOperation::LTE:
+                if (timestamp >= epoch_end_tmp) {
+                    return EvaluatedValue::True;
+                } else if (timestamp < epoch_start_tmp) {
+                    return EvaluatedValue::False;
+                } else {
+                    return EvaluatedValue::Unknown;
+                }
+            case FilterOperation::GT:
+                if (timestamp < epoch_start_tmp) {
+                    return EvaluatedValue::True;
+                } else if (timestamp >= epoch_end_tmp) {
+                    return EvaluatedValue::False;
+                } else {
+                    return EvaluatedValue::Unknown;
+                }
+            case FilterOperation::GTE:
+                if (timestamp <= epoch_start_tmp) {
+                    return EvaluatedValue::True;
+                } else if (timestamp > epoch_end_tmp) {
+                    return EvaluatedValue::False;
+                } else {
+                    return EvaluatedValue::Unknown;
+                }
+            default:
+                return EvaluatedValue::Unknown;
+        }
+    } else {
+        return EvaluatedValue::Unknown;
+    }
+}
+
+EvaluatedValue TimestampEntry::evaluate_filter(FilterOperation op, epochtime_t timestamp) {
+    if (op == FilterOperation::EXISTS || op == FilterOperation::NEXISTS) {
+        return EvaluatedValue::Unknown;
+    }
+
+    if (m_encoding == DoubleEpoch) {
+        /**
+         * TODO: this borrows logic from the double_as_int function
+         * should
+         */
+        epochtime_t epoch_start_tmp_ltgte = std::ceil(m_epoch_start_double);
+        epochtime_t epoch_start_tmp_gtlte = std::floor(m_epoch_start_double);
+        epochtime_t epoch_end_tmp_ltgte = std::ceil(m_epoch_end_double);
+        epochtime_t epoch_end_tmp_gtlte = std::floor(m_epoch_end_double);
+        switch (op) {
+            case FilterOperation::EQ:
+                if (timestamp >= epoch_start_tmp_ltgte && timestamp <= epoch_end_tmp_gtlte) {
+                    return EvaluatedValue::Unknown;
+                } else {
+                    return EvaluatedValue::False;
+                }
+            case FilterOperation::NEQ:
+                if (timestamp >= epoch_start_tmp_ltgte && timestamp <= epoch_end_tmp_gtlte) {
+                    return EvaluatedValue::Unknown;
+                } else {
+                    return EvaluatedValue::True;
+                }
+            case FilterOperation::LT:
+                if (timestamp > epoch_end_tmp_gtlte) {
+                    return EvaluatedValue::True;
+                } else if (timestamp <= epoch_start_tmp_gtlte) {
+                    return EvaluatedValue::False;
+                } else {
+                    return EvaluatedValue::Unknown;
+                }
+            case FilterOperation::LTE:
+                if (timestamp >= epoch_end_tmp_ltgte) {
+                    return EvaluatedValue::True;
+                } else if (timestamp < epoch_start_tmp_ltgte) {
+                    return EvaluatedValue::False;
+                } else {
+                    return EvaluatedValue::Unknown;
+                }
+            case FilterOperation::GT:
+                if (timestamp < epoch_start_tmp_ltgte) {
+                    return EvaluatedValue::True;
+                } else if (timestamp >= epoch_end_tmp_ltgte) {
+                    return EvaluatedValue::False;
+                } else {
+                    return EvaluatedValue::Unknown;
+                }
+            case FilterOperation::GTE:
+                if (timestamp <= epoch_start_tmp_gtlte) {
+                    return EvaluatedValue::True;
+                } else if (timestamp > epoch_end_tmp_gtlte) {
+                    return EvaluatedValue::False;
+                } else {
+                    return EvaluatedValue::Unknown;
+                }
+            default:
+                return EvaluatedValue::Unknown;
+        }
+    } else if (m_encoding == Epoch) {
+        switch (op) {
+            case FilterOperation::EQ:
+                if (timestamp >= m_epoch_start && timestamp <= m_epoch_end) {
+                    return EvaluatedValue::Unknown;
+                } else {
+                    return EvaluatedValue::False;
+                }
+            case FilterOperation::NEQ:
+                if (timestamp >= m_epoch_start && timestamp <= m_epoch_end) {
+                    return EvaluatedValue::Unknown;
+                } else {
+                    return EvaluatedValue::True;
+                }
+            case FilterOperation::LT:
+                if (timestamp > m_epoch_end) {
+                    return EvaluatedValue::True;
+                } else if (timestamp <= m_epoch_start) {
+                    return EvaluatedValue::False;
+                } else {
+                    return EvaluatedValue::Unknown;
+                }
+            case FilterOperation::LTE:
+                if (timestamp >= m_epoch_end) {
+                    return EvaluatedValue::True;
+                } else if (timestamp < m_epoch_start) {
+                    return EvaluatedValue::False;
+                } else {
+                    return EvaluatedValue::Unknown;
+                }
+            case FilterOperation::GT:
+                if (timestamp < m_epoch_start) {
+                    return EvaluatedValue::True;
+                } else if (timestamp >= m_epoch_end) {
+                    return EvaluatedValue::False;
+                } else {
+                    return EvaluatedValue::Unknown;
+                }
+            case FilterOperation::GTE:
+                if (timestamp <= m_epoch_start) {
+                    return EvaluatedValue::True;
+                } else if (timestamp > m_epoch_end) {
+                    return EvaluatedValue::False;
+                } else {
+                    return EvaluatedValue::Unknown;
+                }
+            default:
+                return EvaluatedValue::Unknown;
+        }
+    } else {
+        return EvaluatedValue::Unknown;
+    }
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/TimestampEntry.hpp b/components/core/src/clp_s/TimestampEntry.hpp
new file mode 100644
index 000000000..1493173ba
--- /dev/null
+++ b/components/core/src/clp_s/TimestampEntry.hpp
@@ -0,0 +1,101 @@
+#ifndef CLP_S_TIMESTAMPENTRY_HPP
+#define CLP_S_TIMESTAMPENTRY_HPP
+
+#include <string>
+#include <variant>
+
+#include "Defs.hpp"
+#include "ErrorCode.hpp"
+#include "search/FilterOperation.hpp"
+#include "Utils.hpp"
+#include "ZstdCompressor.hpp"
+#include "ZstdDecompressor.hpp"
+
+using clp_s::search::FilterOperation;
+
+namespace clp_s {
+class TimestampEntry {
+public:
+    // Types
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+
+        // Methods
+        char const* what() const noexcept override { return "TimestampEntry operation failed"; }
+    };
+
+    // Constants
+    enum TimestampEncoding : uint64_t {
+        UnkownTimestampEncoding,
+        Epoch,
+        DoubleEpoch
+    };
+
+    // Constructors
+    TimestampEntry()
+            : m_encoding(UnkownTimestampEncoding),
+              m_epoch_start_double(cDoubleEpochTimeMax),
+              m_epoch_end_double(cDoubleEpochTimeMin),
+              m_epoch_start(cEpochTimeMax),
+              m_epoch_end(cEpochTimeMin) {}
+
+    /**
+     * Ingest a timestamp potentially adjusting the start and end bounds for this
+     * TimestampEntry.
+     *
+     * @param timestamp the timestamp to be ingested
+     * @return the epoch time corresponding to the string timestamp
+     */
+    void ingest_timestamp(epochtime_t timestamp);
+    void ingest_timestamp(double timestamp);
+
+    /**
+     * Merge a timestamp range potentially adjusting the start and end bounds for this
+     *
+     * @param timestamp the timestamp to be ingested
+     * @return the epoch time corresponding to the string timestamp
+     */
+    void merge_range(TimestampEntry const& entry);
+
+    /**
+     * Write the timestamp entry to a file
+     * @param compressor
+     * @param column
+     */
+    void write_to_file(ZstdCompressor& compressor, std::string const& column) const;
+
+    /**
+     * Try to read the timestamp entry from a file
+     * @param decompressor
+     * @param column
+     * @return ErrorCode
+     */
+    ErrorCode try_read_from_file(ZstdDecompressor& decompressor, std::string& column);
+
+    /**
+     * Read the timestamp entry from a file
+     * @param decompressor
+     * @param column
+     */
+    void read_from_file(ZstdDecompressor& decompressor, std::string& column);
+
+    /**
+     * Check if a timestamp is in the range of this TimestampEntry
+     * @param op
+     * @param timestamp
+     * @return
+     */
+    EvaluatedValue evaluate_filter(FilterOperation op, double timestamp);
+    EvaluatedValue evaluate_filter(FilterOperation op, epochtime_t timestamp);
+
+private:
+    TimestampEncoding m_encoding;
+    double m_epoch_start_double, m_epoch_end_double;
+    epochtime_t m_epoch_start, m_epoch_end;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_TIMESTAMPENTRY_HPP
diff --git a/components/core/src/clp_s/TimestampPattern.cpp b/components/core/src/clp_s/TimestampPattern.cpp
new file mode 100644
index 000000000..9457d5cda
--- /dev/null
+++ b/components/core/src/clp_s/TimestampPattern.cpp
@@ -0,0 +1,1008 @@
+// Code from CLP
+
+#include "TimestampPattern.hpp"
+
+#include <chrono>
+#include <cstring>
+#include <vector>
+
+#include <date/include/date/date.h>
+#include <spdlog/spdlog.h>
+
+using std::string;
+using std::to_string;
+using std::vector;
+
+namespace clp_s {
+// Static member default initialization
+std::unique_ptr<TimestampPattern[]> TimestampPattern::m_known_ts_patterns = nullptr;
+size_t TimestampPattern::m_known_ts_patterns_len = 0;
+
+// File-scope constants
+static constexpr int cNumDaysInWeek = 7;
+static char const* cAbbrevDaysOfWeek[cNumDaysInWeek]
+        = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
+static constexpr int cNumMonths = 12;
+static char const* cAbbrevMonthNames[cNumMonths]
+        = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
+static char const* cMonthNames[cNumMonths]
+        = {"January",
+           "February",
+           "March",
+           "April",
+           "May",
+           "June",
+           "July",
+           "August",
+           "September",
+           "October",
+           "November",
+           "December"};
+
+// File-scope functions
+/**
+ * Converts a value to a padded string with the given length and appends it to the given string
+ * @param value
+ * @param padding_character
+ * @param length
+ * @param str
+ */
+static void append_padded_value(int value, char padding_character, size_t length, string& str);
+/**
+ * Converts a value to a padded string with the given length and appends it to the given string.
+ * Omits trailing 0.
+ * @param value
+ * @param padding_character
+ * @param length
+ * @param str
+ */
+static void
+append_padded_value_notz(int value, char padding_character, size_t max_length, string& str);
+
+/**
+ * Converts a padded decimal integer string (from a larger string) to an integer
+ * @param str String containing the numeric string
+ * @param begin_ix Start position of the numeric string
+ * @param end_ix End position of the numeric string
+ * @param padding_character
+ * @param value String as a number
+ * @return true if conversion succeeds, false otherwise
+ */
+static bool convert_string_to_number(
+        string const& str,
+        size_t begin_ix,
+        size_t end_ix,
+        char padding_character,
+        int& value
+);
+
+/**
+ * Converts a padded decimal integer string with no trailing zeros (from a larger string) to an
+ * integer
+ * @param str String containing the numeric string
+ * @param max_digits
+ * @param begin_ix Start position of the numeric string
+ * @param end_ix Potentil end position of the numeric string
+ * @param value String as a number
+ * @return true if conversion succeeds, false otherwise
+ */
+static bool convert_string_to_number_notz(
+        string const& str,
+        size_t max_digits,
+        size_t begin_ix,
+        size_t& end_ix,
+        char padding_character,
+        int& value
+);
+
+static void append_padded_value(int value, char padding_character, size_t length, string& str) {
+    string value_str = to_string(value);
+    str.append(length - value_str.length(), padding_character);
+    str += value_str;
+}
+
+static void
+append_padded_value_notz(int value, char padding_character, size_t max_length, string& str) {
+    string value_str = to_string(value);
+    if ("0" != value_str) {
+        str.append(max_length - value_str.length(), padding_character);
+        size_t last_zero = string::npos;
+        for (size_t last = value_str.size() - 1; last >= 0; --last) {
+            if (value_str[last] == '0') {
+                last_zero = last;
+            } else {
+                break;
+            }
+        }
+
+        if (last_zero != string::npos) {
+            value_str.erase(last_zero, string::npos);
+        }
+    }
+
+    str += value_str;
+}
+
+static bool convert_string_to_number(
+        string const& str,
+        size_t begin_ix,
+        size_t end_ix,
+        char padding_character,
+        int& value
+) {
+    // Consume padding characters
+    size_t ix = begin_ix;
+    while (ix < end_ix && padding_character == str[ix]) {
+        ++ix;
+    }
+
+    // Convert remaining characters to number
+    int converted_value = 0;
+    for (; ix < end_ix; ++ix) {
+        char c = str[ix];
+        if (c < '0' || c > '9') {
+            return false;
+        }
+
+        converted_value *= 10;
+        converted_value += c - '0';
+    }
+
+    value = converted_value;
+    return true;
+}
+
+static bool convert_string_to_number_notz(
+        string const& str,
+        size_t max_digits,
+        size_t begin_ix,
+        size_t& end_ix,
+        int& value
+) {
+    value = 0;
+    size_t num_digits = 0;
+
+    bool trailing_zero = false;
+    size_t ix = begin_ix;
+    while (ix < end_ix && '0' == str[ix]) {
+        trailing_zero = true;
+        num_digits++;
+        ++ix;
+    }
+
+    // Convert remaining characters to number
+    for (; ix < end_ix; ++ix) {
+        char c = str[ix];
+        if (c < '0' || c > '9') {
+            break;
+        } else if ('0' == c) {
+            trailing_zero = true;
+        } else {
+            trailing_zero = false;
+        }
+        value *= 10;
+        value += c - '0';
+        num_digits++;
+    }
+
+    if (trailing_zero && num_digits > 1) {
+        return false;
+    }
+
+    end_ix = begin_ix + num_digits;
+
+    for (int i = 0; i < (max_digits - num_digits); ++i) {
+        value *= 10;
+    }
+
+    return true;
+}
+
+/*
+ * To initialize m_known_ts_patterns, we first create a vector of patterns then copy it to a
+ * dynamic array. This eases maintenance of the list and the cost doesn't matter since it is
+ * only done once when the program starts.
+ */
+void TimestampPattern::init() {
+    // First create vector of observed patterns so that it's easy to maintain
+    vector<TimestampPattern> patterns;
+    // E.g. 2022-04-06T03:33:23.476Z ...47, ...4 ...()
+    patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S.%TZ");
+    // E.g. 2022-04-06T03:33:23Z
+    patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%SZ");
+    // E.g. 2015-01-31T15:50:45.392
+    patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S.%3");
+    // E.g. 2015-01-31T15:50:45,392
+    patterns.emplace_back(0, "%Y-%m-%dT%H:%M:%S,%3");
+    // E.g. [2015-01-31T15:50:45
+    patterns.emplace_back(0, "[%Y-%m-%dT%H:%M:%S");
+    // E.g. [20170106-16:56:41]
+    patterns.emplace_back(0, "[%Y%m%d-%H:%M:%S]");
+    // E.g. 2015-01-31 15:50:45,392
+    patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S,%3");
+    // E.g. 2015-01-31 15:50:45.392
+    patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S.%3");
+    // E.g. [2015-01-31 15:50:45,085]
+    patterns.emplace_back(0, "[%Y-%m-%d %H:%M:%S,%3]");
+    // E.g. 2015-01-31 15:50:45
+    patterns.emplace_back(0, "%Y-%m-%d %H:%M:%S");
+    // E.g. Start-Date: 2015-01-31  15:50:45
+    patterns.emplace_back(1, "%Y-%m-%d  %H:%M:%S");
+    // E.g. 2015/01/31 15:50:45
+    patterns.emplace_back(0, "%Y/%m/%d %H:%M:%S");
+    // E.g. 15/01/31 15:50:45
+    patterns.emplace_back(0, "%y/%m/%d %H:%M:%S");
+    // E.g. 150131  9:50:45
+    patterns.emplace_back(0, "%y%m%d %k:%M:%S");
+    // E.g. 01 Jan 2016 15:50:17,085
+    patterns.emplace_back(0, "%d %b %Y %H:%M:%S,%3");
+    // E.g. Jan 01, 2016 3:50:17 PM
+    patterns.emplace_back(0, "%b %d, %Y %l:%M:%S %p");
+    // E.g. January 31, 2015 15:50
+    patterns.emplace_back(0, "%B %d, %Y %H:%M");
+    // E.g. E [31/Jan/2015:15:50:45
+    patterns.emplace_back(1, "[%d/%b/%Y:%H:%M:%S");
+    // E.g. localhost - - [01/Jan/2016:15:50:17
+    // E.g. 192.168.4.5 - - [01/Jan/2016:15:50:17
+    patterns.emplace_back(3, "[%d/%b/%Y:%H:%M:%S");
+    // E.g. 192.168.4.5 - - [01/01/2016:15:50:17
+    patterns.emplace_back(3, "[%d/%m/%Y:%H:%M:%S");
+    // E.g. INFO [main] 2015-01-31 15:50:45,085
+    patterns.emplace_back(2, "%Y-%m-%d %H:%M:%S,%3");
+    // E.g. Started POST "/api/v3/internal/allowed" for 127.0.0.1 at 2017-06-18 00:20:44
+    patterns.emplace_back(6, "%Y-%m-%d %H:%M:%S");
+    // E.g. update-alternatives 2015-01-31 15:50:45
+    patterns.emplace_back(1, "%Y-%m-%d %H:%M:%S");
+    // E.g. ERROR: apport (pid 4557) Sun Jan  1 15:50:45 2015
+    patterns.emplace_back(4, "%a %b %e %H:%M:%S %Y");
+    // E.g. <<<2016-11-10 03:02:29:936
+    patterns.emplace_back(0, "<<<%Y-%m-%d %H:%M:%S:%3");
+
+    // TODO These patterns are imprecise and will prevent searching by timestamp; but for now,
+    // it's no worse than not parsing a timestamp E.g. Jan 21 11:56:42
+    patterns.emplace_back(0, "%b %d %H:%M:%S");
+    // E.g. 01-21 11:56:42.392
+    patterns.emplace_back(0, "%m-%d %H:%M:%S.%3");
+
+    // Initialize m_known_ts_patterns with vector's contents
+    m_known_ts_patterns_len = patterns.size();
+    m_known_ts_patterns = std::make_unique<TimestampPattern[]>(m_known_ts_patterns_len);
+    for (size_t i = 0; i < patterns.size(); ++i) {
+        m_known_ts_patterns[i] = patterns[i];
+    }
+}
+
+TimestampPattern const* TimestampPattern::search_known_ts_patterns(
+        string const& line,
+        epochtime_t& timestamp,
+        size_t& timestamp_begin_pos,
+        size_t& timestamp_end_pos
+) {
+    for (size_t i = 0; i < m_known_ts_patterns_len; ++i) {
+        if (m_known_ts_patterns[i]
+                    .parse_timestamp(line, timestamp, timestamp_begin_pos, timestamp_end_pos))
+        {
+            return &m_known_ts_patterns[i];
+        }
+    }
+
+    timestamp_begin_pos = string::npos;
+    timestamp_end_pos = string::npos;
+    return nullptr;
+}
+
+string const& TimestampPattern::get_format() const {
+    return m_format;
+}
+
+uint8_t TimestampPattern::get_num_spaces_before_ts() const {
+    return m_num_spaces_before_ts;
+}
+
+bool TimestampPattern::is_empty() const {
+    return m_format.empty();
+}
+
+void TimestampPattern::clear() {
+    m_num_spaces_before_ts = 0;
+    m_format.clear();
+}
+
+bool TimestampPattern::parse_timestamp(
+        string const& line,
+        epochtime_t& timestamp,
+        size_t& timestamp_begin_pos,
+        size_t& timestamp_end_pos
+) const {
+    size_t line_ix = 0;
+    size_t const line_length = line.length();
+
+    // Find beginning of timestamp
+    int num_spaces_found;
+    for (num_spaces_found = 0; num_spaces_found < m_num_spaces_before_ts && line_ix < line_length;
+         ++line_ix)
+    {
+        if (' ' == line[line_ix]) {
+            ++num_spaces_found;
+        }
+    }
+    if (num_spaces_found < m_num_spaces_before_ts) {
+        return false;
+    }
+    size_t ts_begin_ix = line_ix;
+
+    int date = 1;
+    int month = 1;
+    int year = 1970;
+    int hour = 0;
+    bool uses_12_hour_clock = false;
+    int minute = 0;
+    int second = 0;
+    int millisecond = 0;
+    bool is_pm = false;
+
+    size_t const format_length = m_format.length();
+    size_t format_ix = 0;
+    bool is_specifier = false;
+    for (; format_ix < format_length && line_ix < line_length; ++format_ix) {
+        if (false == is_specifier) {
+            if ('%' == m_format[format_ix]) {
+                is_specifier = true;
+            } else {
+                if (m_format[format_ix] != line[line_ix]) {
+                    // Doesn't match
+                    return false;
+                }
+                ++line_ix;
+            }
+        } else {
+            // Parse fields
+            switch (m_format[format_ix]) {
+                case '%':
+                    if ('%' != line[line_ix]) {
+                        return false;
+                    }
+                    ++line_ix;
+                    break;
+
+                case 'y': {  // Zero-padded year in century
+                    constexpr int cFieldLength = 2;
+                    if (line_ix + cFieldLength > line_length) {
+                        // Too short
+                        return false;
+                    }
+
+                    int value;
+                    if (false
+                                == convert_string_to_number(
+                                        line,
+                                        line_ix,
+                                        line_ix + cFieldLength,
+                                        '0',
+                                        value
+                                )
+                        || value < 0 || value > 99)
+                    {
+                        return false;
+                    }
+                    year = value;
+                    // Year >= 69 treated as 1900s, year below 69 treated as 2000s
+                    if (year >= 69) {
+                        year += 1900;
+                    } else {
+                        year += 2000;
+                    }
+                    line_ix += cFieldLength;
+
+                    break;
+                }
+
+                case 'Y': {  // Zero-padded year with century
+                    constexpr int cFieldLength = 4;
+                    if (line_ix + cFieldLength > line_length) {
+                        // Too short
+                        return false;
+                    }
+
+                    int value;
+                    if (false
+                                == convert_string_to_number(
+                                        line,
+                                        line_ix,
+                                        line_ix + cFieldLength,
+                                        '0',
+                                        value
+                                )
+                        || value < 0 || value > 9999)
+                    {
+                        return false;
+                    }
+                    year = value;
+                    line_ix += cFieldLength;
+
+                    break;
+                }
+
+                case 'B': {  // Month name
+                    bool match_found = false;
+                    for (int month_ix = 0; !match_found && month_ix < cNumMonths; ++month_ix) {
+                        size_t const length = strlen(cMonthNames[month_ix]);
+                        if (0 == line.compare(line_ix, length, cMonthNames[month_ix])) {
+                            month = month_ix + 1;
+                            match_found = true;
+                            line_ix += length;
+                        }
+                    }
+                    if (false == match_found) {
+                        return false;
+                    }
+
+                    break;
+                }
+
+                case 'b': {  // Abbreviated month name
+                    bool match_found = false;
+                    for (int month_ix = 0; !match_found && month_ix < cNumMonths; ++month_ix) {
+                        size_t const length = strlen(cAbbrevMonthNames[month_ix]);
+                        if (0 == line.compare(line_ix, length, cAbbrevMonthNames[month_ix])) {
+                            month = month_ix + 1;
+                            match_found = true;
+                            line_ix += length;
+                        }
+                    }
+                    if (false == match_found) {
+                        return false;
+                    }
+
+                    break;
+                }
+
+                case 'm': {  // Zero-padded month
+                    constexpr int cFieldLength = 2;
+                    if (line_ix + cFieldLength > line_length) {
+                        // Too short
+                        return false;
+                    }
+
+                    int value;
+                    if (false
+                                == convert_string_to_number(
+                                        line,
+                                        line_ix,
+                                        line_ix + cFieldLength,
+                                        '0',
+                                        value
+                                )
+                        || value < 1 || value > 12)
+                    {
+                        return false;
+                    }
+                    month = value;
+                    line_ix += cFieldLength;
+
+                    break;
+                }
+
+                case 'd': {  // Zero-padded day in month
+                    constexpr int cFieldLength = 2;
+                    if (line_ix + cFieldLength > line_length) {
+                        // Too short
+                        return false;
+                    }
+
+                    int value;
+                    if (false
+                                == convert_string_to_number(
+                                        line,
+                                        line_ix,
+                                        line_ix + cFieldLength,
+                                        '0',
+                                        value
+                                )
+                        || value < 1 || value > 31)
+                    {
+                        return false;
+                    }
+                    date = value;
+                    line_ix += cFieldLength;
+
+                    break;
+                }
+
+                case 'e': {  // Space-padded day in month
+                    constexpr int cFieldLength = 2;
+                    if (line_ix + cFieldLength > line_length) {
+                        // Too short
+                        return false;
+                    }
+
+                    int value;
+                    if (false
+                                == convert_string_to_number(
+                                        line,
+                                        line_ix,
+                                        line_ix + cFieldLength,
+                                        ' ',
+                                        value
+                                )
+                        || value < 1 || value > 31)
+                    {
+                        return false;
+                    }
+                    date = value;
+                    line_ix += cFieldLength;
+
+                    break;
+                }
+
+                case 'a': {  // Abbreviated day of week
+                    bool match_found = false;
+                    for (int day_ix = 0; !match_found && day_ix < cNumDaysInWeek; ++day_ix) {
+                        size_t const abbrev_length = strlen(cAbbrevDaysOfWeek[day_ix]);
+                        if (0 == line.compare(line_ix, abbrev_length, cAbbrevDaysOfWeek[day_ix])) {
+                            match_found = true;
+                            line_ix += abbrev_length;
+                        }
+                    }
+                    if (false == match_found) {
+                        return false;
+                    }
+                    // Weekday is not useful in determining absolute timestamp, so we don't do
+                    // anything with it
+
+                    break;
+                }
+
+                case 'p': {  // Part of day
+                    if (0 == line.compare(line_ix, 2, "AM")) {
+                        is_pm = false;
+                    } else if (0 == line.compare(line_ix, 2, "PM")) {
+                        is_pm = true;
+                    } else {
+                        return false;
+                    }
+                    line_ix += 2;
+
+                    break;
+                }
+
+                case 'H': {  // Zero-padded hour on 24-hour clock
+                    constexpr int cFieldLength = 2;
+                    if (line_ix + cFieldLength > line_length) {
+                        // Too short
+                        return false;
+                    }
+
+                    int value;
+                    if (false
+                                == convert_string_to_number(
+                                        line,
+                                        line_ix,
+                                        line_ix + cFieldLength,
+                                        '0',
+                                        value
+                                )
+                        || value < 0 || value > 23)
+                    {
+                        return false;
+                    }
+                    hour = value;
+                    line_ix += cFieldLength;
+
+                    break;
+                }
+
+                case 'k': {  // Space-padded hour on 24-hour clock
+                    constexpr int cFieldLength = 2;
+                    if (line_ix + cFieldLength > line_length) {
+                        // Too short
+                        return false;
+                    }
+
+                    int value;
+                    if (false
+                                == convert_string_to_number(
+                                        line,
+                                        line_ix,
+                                        line_ix + cFieldLength,
+                                        ' ',
+                                        value
+                                )
+                        || value < 0 || value > 23)
+                    {
+                        return false;
+                    }
+                    hour = value;
+                    line_ix += cFieldLength;
+
+                    break;
+                }
+
+                case 'I': {  // Zero-padded hour on 12-hour clock
+                    constexpr int cFieldLength = 2;
+                    if (line_ix + cFieldLength > line_length) {
+                        // Too short
+                        return false;
+                    }
+
+                    int value;
+                    if (false
+                                == convert_string_to_number(
+                                        line,
+                                        line_ix,
+                                        line_ix + cFieldLength,
+                                        '0',
+                                        value
+                                )
+                        || value < 1 || value > 12)
+                    {
+                        return false;
+                    }
+                    hour = value;
+                    uses_12_hour_clock = true;
+                    line_ix += cFieldLength;
+
+                    break;
+                }
+
+                case 'l': {  // Space-padded hour on 12-hour clock
+                    constexpr int cFieldLength = 2;
+                    if (line_ix + cFieldLength > line_length) {
+                        // Too short
+                        return false;
+                    }
+
+                    int value;
+                    if (false
+                                == convert_string_to_number(
+                                        line,
+                                        line_ix,
+                                        line_ix + cFieldLength,
+                                        ' ',
+                                        value
+                                )
+                        || value < 1 || value > 12)
+                    {
+                        return false;
+                    }
+                    hour = value;
+                    uses_12_hour_clock = true;
+                    line_ix += cFieldLength;
+
+                    break;
+                }
+
+                case 'M': {  // Zero-padded minute
+                    constexpr int cFieldLength = 2;
+                    if (line_ix + cFieldLength > line_length) {
+                        // Too short
+                        return false;
+                    }
+
+                    int value;
+                    if (false
+                                == convert_string_to_number(
+                                        line,
+                                        line_ix,
+                                        line_ix + cFieldLength,
+                                        '0',
+                                        value
+                                )
+                        || value < 0 || value > 59)
+                    {
+                        return false;
+                    }
+                    minute = value;
+                    line_ix += cFieldLength;
+
+                    break;
+                }
+
+                case 'S': {  // Zero-padded second
+                    constexpr int cFieldLength = 2;
+                    if (line_ix + cFieldLength > line_length) {
+                        // Too short
+                        return false;
+                    }
+
+                    int value;
+                    if (false
+                                == convert_string_to_number(
+                                        line,
+                                        line_ix,
+                                        line_ix + cFieldLength,
+                                        '0',
+                                        value
+                                )
+                        || value < 0 || value > 60)
+                    {
+                        return false;
+                    }
+                    second = value;
+                    line_ix += cFieldLength;
+
+                    break;
+                }
+
+                case '3': {  // Zero-padded millisecond
+                    constexpr int cFieldLength = 3;
+                    if (line_ix + cFieldLength > line_length) {
+                        // Too short
+                        return false;
+                    }
+
+                    int value;
+                    if (false
+                                == convert_string_to_number(
+                                        line,
+                                        line_ix,
+                                        line_ix + cFieldLength,
+                                        '0',
+                                        value
+                                )
+                        || value < 0 || value > 999)
+                    {
+                        return false;
+                    }
+                    millisecond = value;
+                    line_ix += cFieldLength;
+
+                    break;
+                }
+
+                case 'T': {  // Zero-padded millisecond no trailing zero
+                    constexpr int cMaxFieldLength = 3;
+
+                    int value;
+                    size_t new_line_ix = line_ix + cMaxFieldLength;
+                    if (!convert_string_to_number_notz(
+                                line,
+                                cMaxFieldLength,
+                                line_ix,
+                                new_line_ix,
+                                value
+                        )
+                        || value < 0 || value > 999)
+                    {
+                        return false;
+                    }
+                    millisecond = value;
+                    line_ix = new_line_ix;
+
+                    break;
+                }
+
+                default:
+                    return false;
+            }
+            is_specifier = false;
+        }
+    }
+    if (format_ix < format_length) {
+        // Complete format string not present in line
+        return false;
+    }
+
+    // Process parsed fields
+    if (uses_12_hour_clock) {
+        if (12 == hour) {
+            // 12s require special handling
+            if (false == is_pm) {
+                // hour == 12AM which is 0 on 24-hour clock
+                hour = 0;
+            }
+        } else {
+            if (is_pm) {
+                // All PMs except 12 should be +12, e.g. 1PM becomes (1 + 12)PM
+                hour += 12;
+            }
+        }
+    }
+
+    // Create complete date
+    auto year_month_date = date::year(year) / month / date;
+    if (false == year_month_date.ok()) {
+        return false;
+    }
+    // Convert complete timestamp into a time point with millisecond resolution
+    auto timestamp_point = date::sys_days(year_month_date) + std::chrono::hours(hour)
+                           + std::chrono::minutes(minute) + std::chrono::seconds(second)
+                           + std::chrono::milliseconds(millisecond);
+    // Get time point since epoch
+    auto unix_epoch_point = date::sys_days(date::year(1970) / 1 / 1);
+    // Get timestamp since epoch
+    auto duration_since_epoch = timestamp_point - unix_epoch_point;
+    // Convert to raw milliseconds
+    timestamp = duration_since_epoch.count();
+
+    timestamp_begin_pos = ts_begin_ix;
+    timestamp_end_pos = line_ix;
+
+    return true;
+}
+
+void TimestampPattern::insert_formatted_timestamp(epochtime_t timestamp, string& msg) const {
+    size_t msg_length = msg.length();
+
+    string new_msg;
+    // We add 50 as an estimate of the timestamp's length
+    new_msg.reserve(msg_length + 50);
+
+    // Find where timestamp should go
+    size_t ts_begin_ix = 0;
+    int num_spaces_found;
+    for (num_spaces_found = 0;
+         num_spaces_found < m_num_spaces_before_ts && ts_begin_ix < msg_length;
+         ++ts_begin_ix)
+    {
+        if (' ' == msg[ts_begin_ix]) {
+            ++num_spaces_found;
+        }
+    }
+    if (num_spaces_found < m_num_spaces_before_ts) {
+        SPDLOG_ERROR(
+                "{} has {} spaces, but pattern has {}",
+                msg.c_str(),
+                num_spaces_found,
+                m_num_spaces_before_ts
+        );
+        throw OperationFailed(ErrorCodeFailure, __FILENAME__, __LINE__);
+    }
+
+    // Copy text before timestamp
+    new_msg.assign(msg, 0, ts_begin_ix);
+
+    // Separate parts of timestamp
+    auto timestamp_point
+            = date::sys_days(date::year(1970) / 1 / 1) + std::chrono::milliseconds(timestamp);
+    auto timestamp_date = date::floor<date::days>(timestamp_point);
+    int day_of_week_ix
+            = (date::year_month_weekday(timestamp_date).weekday_indexed().weekday() - date::Sunday)
+                      .count();
+    auto year_month_date = date::year_month_day(timestamp_date);
+    unsigned date = (unsigned)year_month_date.day();
+    unsigned month = (unsigned)year_month_date.month();
+    int year = (int)year_month_date.year();
+
+    auto time_of_day_duration = timestamp_point - timestamp_date;
+    auto time_of_day = date::make_time(time_of_day_duration);
+    int hour = time_of_day.hours().count();
+    int minute = time_of_day.minutes().count();
+    int second = time_of_day.seconds().count();
+    int millisecond = time_of_day.subseconds().count();
+
+    size_t const format_length = m_format.length();
+    bool is_specifier = false;
+    for (size_t format_ix = 0; format_ix < format_length; ++format_ix) {
+        if (false == is_specifier) {
+            if ('%' == m_format[format_ix]) {
+                is_specifier = true;
+            } else {
+                new_msg += m_format[format_ix];
+            }
+        } else {
+            // Parse fields
+            switch (m_format[format_ix]) {
+                case '%':
+                    new_msg += m_format[format_ix];
+                    break;
+
+                case 'y': {  // Zero-padded year in century
+                    int value = year;
+                    if (year >= 2000) {
+                        // year must be in range [2000,2068]
+                        value -= 2000;
+                    } else {
+                        // year must be in range [1969,1999]
+                        value -= 1900;
+                    }
+                    append_padded_value(value, '0', 2, new_msg);
+                    break;
+                }
+
+                case 'Y':  // Zero-padded year with century
+                    append_padded_value(year, '0', 4, new_msg);
+                    break;
+
+                case 'B':  // Month name
+                    new_msg += cMonthNames[month - 1];
+                    break;
+
+                case 'b':  // Abbreviated month name
+                    new_msg += cAbbrevMonthNames[month - 1];
+                    break;
+
+                case 'm':  // Zero-padded month
+                    append_padded_value(month, '0', 2, new_msg);
+                    break;
+
+                case 'd':  // Zero-padded day in month
+                    append_padded_value(date, '0', 2, new_msg);
+                    break;
+
+                case 'e':  // Space-padded day in month
+                    append_padded_value(date, ' ', 2, new_msg);
+                    break;
+
+                case 'a':  // Abbreviated day of week
+                    new_msg += cAbbrevDaysOfWeek[day_of_week_ix];
+                    break;
+
+                case 'p': {  // Part of day
+                    if (hour > 11) {
+                        new_msg += "PM";
+                    } else {
+                        new_msg += "AM";
+                    }
+                    break;
+                }
+
+                case 'H':  // Zero-padded hour on 24-hour clock
+                    append_padded_value(hour, '0', 2, new_msg);
+                    break;
+
+                case 'k':  // Space-padded hour on 24-hour clock
+                    append_padded_value(hour, ' ', 2, new_msg);
+                    break;
+
+                case 'I': {  // Zero-padded hour on 12-hour clock
+                    int value = hour;
+                    if (0 == value) {
+                        value = 12;
+                    } else if (value > 13) {
+                        value -= 12;
+                    }
+                    append_padded_value(value, '0', 2, new_msg);
+                    break;
+                }
+
+                case 'l': {  // Space-padded hour on 12-hour clock
+                    int value = hour;
+                    if (0 == value) {
+                        value = 12;
+                    } else if (value > 13) {
+                        value -= 12;
+                    }
+                    append_padded_value(value, ' ', 2, new_msg);
+                    break;
+                }
+
+                case 'M':  // Zero-padded minute
+                    append_padded_value(minute, '0', 2, new_msg);
+                    break;
+
+                case 'S':  // Zero-padded second
+                    append_padded_value(second, '0', 2, new_msg);
+                    break;
+
+                case '3':  // Zero-padded millisecond
+                    append_padded_value(millisecond, '0', 3, new_msg);
+                    break;
+
+                case 'T':  // Zero-padded millisecond no trailing 0
+                    append_padded_value_notz(millisecond, '0', 3, new_msg);
+                    break;
+
+                default: {
+                    throw OperationFailed(ErrorCodeUnsupported, __FILENAME__, __LINE__);
+                }
+            }
+            is_specifier = false;
+        }
+    }
+
+    // Copy text after timestamp
+    new_msg.append(msg, ts_begin_ix, string::npos);
+
+    msg = new_msg;
+}
+
+bool operator==(TimestampPattern const& lhs, TimestampPattern const& rhs) {
+    return (lhs.m_num_spaces_before_ts == rhs.m_num_spaces_before_ts && lhs.m_format == rhs.m_format
+    );
+}
+
+bool operator!=(TimestampPattern const& lhs, TimestampPattern const& rhs) {
+    return !(lhs == rhs);
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/TimestampPattern.hpp b/components/core/src/clp_s/TimestampPattern.hpp
new file mode 100644
index 000000000..f500df868
--- /dev/null
+++ b/components/core/src/clp_s/TimestampPattern.hpp
@@ -0,0 +1,166 @@
+// Code from CLP
+
+#ifndef CLP_S_TIMESTAMPPATTERN_HPP
+#define CLP_S_TIMESTAMPPATTERN_HPP
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <utility>
+
+#include "Defs.hpp"
+#include "FileWriter.hpp"
+#include "TraceableException.hpp"
+
+namespace clp_s {
+/**
+ * Class representing a timestamp pattern with methods for both parsing and formatting timestamps
+ * using the pattern. A format string contains directives specifying how a string should be parsed
+ * into a timestamp or how a timestamp should be formatted into a string. E.g., "[%H:%M:%S]" can
+ * parse from or format to "[23:45:19]"
+ *
+ * The supported directives are the same as strptime except that we require an exact number of
+ * spaces/padding digits so that we can reproduce the timestamp exactly. There are also additions
+ * beyond what strptime provides.
+ *
+ * The following directives are supported:
+ * - %  Literal %
+ * - y  2-digit 0-padded year in century. [69,99] refers to years [1969,1999]. [00,68] refers to
+ * years [2000,2068].
+ * - Y  4-digit 0-padded year including century (0000-9999)
+ * - B  Full month name (e.g., "January")
+ * - b  Abbreviated month name (e.g., "Jan")
+ * - m  2-digit 0-padded month (01-12)
+ * - d  2-digit 0-padded day in month (01-31)
+ * - e  2-character space-padded day in month ( 1-31)
+ * - a  Abbreviated day of week (e.g., "Mon")
+ * - p  Part of day (AM/PM)
+ * - H  2-digit 0-padded hour on 24-hour clock (00-23)
+ * - k  2-character space-padded hour on 24-hour clock ( 0-23)
+ * - I  2-digit 0-padded hour on 12-hour clock (01-12)
+ * - l  2-character space-padded hour on 12-hour clock ( 1-12)
+ * - M  2-digit 0-padded minute (00-59)
+ * - S  2-digit 0-padded second (00-60) (60 to account for leap seconds)
+ * - 3  0-padded millisecond (000-999)
+ * - T  0-padded millisecond no trailing 0 (000)-999) e.g. (000), 9(00), 99(0), 099
+ */
+class TimestampPattern {
+public:
+    // Types
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+
+        // Methods
+        char const* what() const noexcept override { return "TimestampPattern operation failed"; }
+    };
+
+    // Constructors
+    TimestampPattern() : m_num_spaces_before_ts(0) {}
+
+    TimestampPattern(uint8_t num_spaces_before_ts, std::string format)
+            : m_num_spaces_before_ts(num_spaces_before_ts),
+              m_format(std::move(format)) {}
+
+    // Methods
+    /**
+     * Static initializer for class. This must be called before using the class.
+     */
+    static void init();
+
+    /**
+     * Searches for a known timestamp pattern which can parse the timestamp from the given line, and
+     * if found, parses the timestamp
+     * @param line
+     * @param timestamp Parsed timestamp
+     * @param timestamp_begin_pos
+     * @param timestamp_end_pos
+     * @return pointer to the timestamp pattern if found, nullptr otherwise
+     */
+    static TimestampPattern const* search_known_ts_patterns(
+            std::string const& line,
+            epochtime_t& timestamp,
+            size_t& timestamp_begin_pos,
+            size_t& timestamp_end_pos
+    );
+
+    /**
+     * Gets the timestamp pattern's format string
+     * @return See description
+     */
+    std::string const& get_format() const;
+
+    /**
+     * Gets the number of spaces before the timestamp in a typical message
+     * @return See description
+     */
+    uint8_t get_num_spaces_before_ts() const;
+
+    /**
+     * Gets if the timestamp pattern is empty
+     * @return true if empty, false otherwise
+     */
+    bool is_empty() const;
+
+    /**
+     * Clears the pattern
+     */
+    void clear();
+
+    /**
+     * Tries to parse the timestamp from the given line
+     * @param line
+     * @param timestamp Parsed timestamp
+     * @param timestamp_begin_pos
+     * @param timestamp_end_pos
+     * @return true if parsed successfully, false otherwise
+     */
+    bool parse_timestamp(
+            std::string const& line,
+            epochtime_t& timestamp,
+            size_t& timestamp_begin_pos,
+            size_t& timestamp_end_pos
+    ) const;
+
+    /**
+     * Inserts the timestamp into the given message using this pattern
+     * @param timestamp
+     * @param msg
+     * @throw TimestampPattern::OperationFailed if the the pattern contains unsupported format
+     * specifiers or the message cannot fit the timestamp pattern
+     */
+
+    void insert_formatted_timestamp(epochtime_t timestamp, std::string& msg) const;
+
+    /**
+     * Compares two timestamp patterns for equality
+     * @param lhs
+     * @param rhs
+     * @return true if equal, false otherwise
+     */
+    friend bool operator==(TimestampPattern const& lhs, TimestampPattern const& rhs);
+
+    /**
+     * Compares two timestamp patterns for inequality
+     * @param lhs
+     * @param rhs
+     * @return true if not equal, false otherwise
+     */
+    friend bool operator!=(TimestampPattern const& lhs, TimestampPattern const& rhs);
+
+private:
+    // Variables
+    static std::unique_ptr<TimestampPattern[]> m_known_ts_patterns;
+    static size_t m_known_ts_patterns_len;
+
+    // The number of spaces before the timestamp in a message
+    // E.g. in "localhost - - [01/Jan/2016:15:50:17", there are 3 spaces before the timestamp
+    //                   ^ ^ ^
+    uint8_t m_num_spaces_before_ts;
+    std::string m_format;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_TIMESTAMPPATTERN_HPP
diff --git a/components/core/src/clp_s/TraceableException.hpp b/components/core/src/clp_s/TraceableException.hpp
new file mode 100644
index 000000000..e64ffb617
--- /dev/null
+++ b/components/core/src/clp_s/TraceableException.hpp
@@ -0,0 +1,49 @@
+// Code from CLP
+
+#ifndef CLP_S_TRACEABLEEXCEPTION_HPP
+#define CLP_S_TRACEABLEEXCEPTION_HPP
+
+#include <exception>
+#include <string>
+
+#include "ErrorCode.hpp"
+
+#define __FILENAME__ ((__FILE__) + SOURCE_PATH_SIZE)
+
+namespace clp_s {
+class TraceableException : public std::exception {
+public:
+    // Constructors
+    TraceableException(ErrorCode error_code, char const* const filename, int const line_number)
+            : m_error_code(error_code),
+              m_filename(filename),
+              m_line_number(line_number) {
+        m_message += std::string(m_filename) + ":" + std::to_string(m_line_number)
+                     + "  Error code: " + std::to_string(m_error_code) + "\n";
+    }
+
+    // Copy constructor / assignment operators
+    TraceableException(TraceableException const&) = default;
+    TraceableException& operator=(TraceableException const&) = default;
+
+    // Methods
+    ErrorCode get_error_code() const { return m_error_code; }
+
+    char const* get_filename() const { return m_filename; }
+
+    int get_line_number() const { return m_line_number; }
+
+    char const* what() const noexcept override { return m_message.c_str(); }
+
+protected:
+    std::string m_message;
+
+private:
+    // Variables
+    ErrorCode m_error_code;
+    char const* m_filename;
+    int m_line_number;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_TRACEABLEEXCEPTION_HPP
diff --git a/components/core/src/clp_s/Utils.cpp b/components/core/src/clp_s/Utils.cpp
new file mode 100644
index 000000000..cf59f3edb
--- /dev/null
+++ b/components/core/src/clp_s/Utils.cpp
@@ -0,0 +1,431 @@
+#include "Utils.hpp"
+
+#include <spdlog/spdlog.h>
+
+using std::string;
+using std::string_view;
+
+namespace clp_s {
+bool FileUtils::find_all_files(std::string const& path, std::vector<std::string>& file_paths) {
+    try {
+        if (false == boost::filesystem::is_directory(path)) {
+            // path is a file
+            file_paths.push_back(path);
+            return true;
+        }
+
+        if (boost::filesystem::is_empty(path)) {
+            // path is an empty directory
+            return true;
+        }
+
+        // Iterate directory
+        boost::filesystem::recursive_directory_iterator iter(
+                path,
+                boost::filesystem::symlink_option::recurse
+        );
+        boost::filesystem::recursive_directory_iterator end;
+        for (; iter != end; ++iter) {
+            // Check if current entry is an empty directory or a file
+            if (boost::filesystem::is_directory(iter->path())) {
+                if (boost::filesystem::is_empty(iter->path())) {
+                    iter.no_push();
+                }
+            } else {
+                file_paths.push_back(iter->path().string());
+            }
+        }
+    } catch (boost::filesystem::filesystem_error& exception) {
+        SPDLOG_ERROR(
+                "Failed to find files/directories at '{}' - {}.",
+                path.c_str(),
+                exception.what()
+        );
+        return false;
+    }
+
+    return true;
+}
+
+bool FileUtils::validate_path(std::vector<std::string> const& paths) {
+    bool all_paths_exist = true;
+    for (auto const& path : paths) {
+        if (false == boost::filesystem::exists(path)) {
+            SPDLOG_ERROR("'{}' does not exist.", path.c_str());
+            all_paths_exist = false;
+        }
+    }
+
+    return all_paths_exist;
+}
+
+bool StringUtils::get_bounds_of_next_var(string const& msg, size_t& begin_pos, size_t& end_pos) {
+    auto const msg_length = msg.length();
+    if (end_pos >= msg_length) {
+        return false;
+    }
+
+    while (true) {
+        begin_pos = end_pos;
+        // Find next non-delimiter
+        for (; begin_pos < msg_length; ++begin_pos) {
+            if (false == is_delim(msg[begin_pos])) {
+                break;
+            }
+        }
+        if (msg_length == begin_pos) {
+            // Early exit for performance
+            return false;
+        }
+
+        bool contains_decimal_digit = false;
+        bool contains_alphabet = false;
+
+        // Find next delimiter
+        end_pos = begin_pos;
+        for (; end_pos < msg_length; ++end_pos) {
+            char c = msg[end_pos];
+            if (is_decimal_digit(c)) {
+                contains_decimal_digit = true;
+            } else if (is_alphabet(c)) {
+                contains_alphabet = true;
+            } else if (is_delim(c)) {
+                break;
+            }
+        }
+
+        // Treat token as variable if:
+        // - it contains a decimal digit, or
+        // - it's directly preceded by an equals sign and contains an alphabet, or
+        // - it could be a multi-digit hex value
+        if (contains_decimal_digit
+            || (begin_pos > 0 && '=' == msg[begin_pos - 1] && contains_alphabet)
+            || could_be_multi_digit_hex_value(msg, begin_pos, end_pos))
+        {
+            break;
+        }
+    }
+
+    return (msg_length != begin_pos);
+}
+
+size_t StringUtils::find_first_of(
+        string const& haystack,
+        char const* needles,
+        size_t search_start_pos,
+        size_t& needle_ix
+) {
+    size_t haystack_length = haystack.length();
+    size_t needles_length = strlen(needles);
+    for (size_t i = search_start_pos; i < haystack_length; ++i) {
+        for (needle_ix = 0; needle_ix < needles_length; ++needle_ix) {
+            if (haystack[i] == needles[needle_ix]) {
+                return i;
+            }
+        }
+    }
+
+    return string::npos;
+}
+
+string StringUtils::replace_characters(
+        char const* characters_to_escape,
+        char const* replacement_characters,
+        string const& value,
+        bool escape
+) {
+    string new_value;
+    size_t search_start_pos = 0;
+    while (true) {
+        size_t replace_char_ix;
+        size_t char_to_replace_pos
+                = find_first_of(value, characters_to_escape, search_start_pos, replace_char_ix);
+        if (string::npos == char_to_replace_pos) {
+            new_value.append(value, search_start_pos, string::npos);
+            break;
+        } else {
+            new_value.append(value, search_start_pos, char_to_replace_pos - search_start_pos);
+            if (escape) {
+                new_value += "\\";
+            }
+            new_value += replacement_characters[replace_char_ix];
+            search_start_pos = char_to_replace_pos + 1;
+        }
+    }
+    return new_value;
+}
+
+void StringUtils::to_lower(string& str) {
+    std::transform(str.cbegin(), str.cend(), str.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+}
+
+bool StringUtils::is_wildcard(char c) {
+    static constexpr char cWildcards[] = "?*";
+    for (size_t i = 0; i < strlen(cWildcards); ++i) {
+        if (cWildcards[i] == c) {
+            return true;
+        }
+    }
+    return false;
+}
+
+string StringUtils::clean_up_wildcard_search_string(string_view str) {
+    string cleaned_str;
+
+    bool is_escaped = false;
+    auto str_end = str.cend();
+    for (auto current = str.cbegin(); current != str_end;) {
+        auto c = *current;
+        if (is_escaped) {
+            is_escaped = false;
+
+            if (is_wildcard(c) || '\\' == c) {
+                // Keep escaping if c is a wildcard character or an escape character
+                cleaned_str += '\\';
+            }
+            cleaned_str += c;
+            ++current;
+        } else if ('*' == c) {
+            cleaned_str += c;
+
+            // Skip over all '*' to find the next non-'*'
+            do {
+                ++current;
+            } while (current != str_end && '*' == *current);
+        } else {
+            if ('\\' == c) {
+                is_escaped = true;
+            } else {
+                cleaned_str += c;
+            }
+            ++current;
+        }
+    }
+
+    return cleaned_str;
+}
+
+bool StringUtils::advance_tame_to_next_match(
+        char const*& tame_current,
+        char const*& tame_bookmark,
+        char const* tame_end,
+        char const*& wild_current,
+        char const*& wild_bookmark
+) {
+    auto w = *wild_current;
+    if ('?' != w) {
+        // No need to check for '*' since the caller ensures wild doesn't
+        // contain consecutive '*'
+
+        // Handle escaped characters
+        if ('\\' == w) {
+            ++wild_current;
+            // This is safe without a bounds check since this the caller
+            // ensures there are no dangling escape characters
+            w = *wild_current;
+        }
+
+        // Advance tame_current until it matches wild_current
+        while (true) {
+            if (tame_end == tame_current) {
+                // Wild group is longer than last group in tame, so
+                // can't match
+                // e.g. "*abc" doesn't match "zab"
+                return false;
+            }
+            auto t = *tame_current;
+            if (t == w) {
+                break;
+            }
+            ++tame_current;
+        }
+    }
+
+    tame_bookmark = tame_current;
+
+    return true;
+}
+
+bool StringUtils::wildcard_match_unsafe(
+        string_view tame,
+        string_view wild,
+        bool case_sensitive_match
+) {
+    if (case_sensitive_match) {
+        return wildcard_match_unsafe_case_sensitive(tame, wild);
+    } else {
+        // We convert to lowercase (rather than uppercase) anticipating that
+        // callers use lowercase more frequently, so little will need to change.
+        string lowercase_tame(tame);
+        to_lower(lowercase_tame);
+        string lowercase_wild(wild);
+        to_lower(lowercase_wild);
+        return wildcard_match_unsafe_case_sensitive(lowercase_tame, lowercase_wild);
+    }
+}
+
+/**
+ * The algorithm basically works as follows:
+ * Given a wild string "*abc*def*ghi*", it can be broken into groups of
+ * characters delimited by one or more '*' characters. The goal of the
+ * algorithm is then to determine whether the tame string contains each of
+ * those groups in the same order.
+ *
+ * Thus, the algorithm:
+ * 1. searches for the start of one of these groups in wild,
+ * 2. searches for a group in tame starting with the same character, and then
+ * 3. checks if the two match. If not, the search repeats with the next group in
+ *    tame.
+ */
+bool StringUtils::wildcard_match_unsafe_case_sensitive(string_view tame, string_view wild) {
+    auto const tame_length = tame.length();
+    auto const wild_length = wild.length();
+    char const* tame_current = tame.data();
+    char const* wild_current = wild.data();
+    char const* tame_bookmark = nullptr;
+    char const* wild_bookmark = nullptr;
+    char const* tame_end = tame_current + tame_length;
+    char const* wild_end = wild_current + wild_length;
+
+    // Handle wild or tame being empty
+    if (0 == wild_length) {
+        return 0 == tame_length;
+    } else {
+        if (0 == tame_length) {
+            return "*" == wild;
+        }
+    }
+
+    char w;
+    char t;
+    bool is_escaped = false;
+    while (true) {
+        w = *wild_current;
+        if ('*' == w) {
+            ++wild_current;
+            if (wild_end == wild_current) {
+                // Trailing '*' means everything remaining in tame will match
+                return true;
+            }
+
+            // Set wild and tame bookmarks
+            wild_bookmark = wild_current;
+            if (!advance_tame_to_next_match(
+                        tame_current,
+                        tame_bookmark,
+                        tame_end,
+                        wild_current,
+                        wild_bookmark
+                ))
+            {
+                return false;
+            }
+        } else {
+            // Handle escaped characters
+            if ('\\' == w) {
+                is_escaped = true;
+                ++wild_current;
+                // This is safe without a bounds check since this the caller
+                // ensures there are no dangling escape characters
+                w = *wild_current;
+            }
+
+            // Handle a mismatch
+            t = *tame_current;
+            if (false == ((false == is_escaped && '?' == w) || t == w)) {
+                if (nullptr == wild_bookmark) {
+                    // No bookmark to return to
+                    return false;
+                }
+
+                wild_current = wild_bookmark;
+                tame_current = tame_bookmark + 1;
+                if (!advance_tame_to_next_match(
+                            tame_current,
+                            tame_bookmark,
+                            tame_end,
+                            wild_current,
+                            wild_bookmark
+                    ))
+                {
+                    return false;
+                }
+            }
+        }
+
+        ++tame_current;
+        ++wild_current;
+
+        // Handle reaching the end of tame or wild
+        if (tame_end == tame_current) {
+            return (wild_end == wild_current
+                    || ('*' == *wild_current && (wild_current + 1) == wild_end));
+        } else {
+            if (wild_end == wild_current) {
+                if (nullptr == wild_bookmark) {
+                    // No bookmark to return to
+                    return false;
+                } else {
+                    wild_current = wild_bookmark;
+                    tame_current = tame_bookmark + 1;
+                    if (!advance_tame_to_next_match(
+                                tame_current,
+                                tame_bookmark,
+                                tame_end,
+                                wild_current,
+                                wild_bookmark
+                        ))
+                    {
+                        return false;
+                    }
+                }
+            }
+        }
+    }
+}
+
+bool StringUtils::convert_string_to_int64(std::string_view raw, int64_t& converted) {
+    auto raw_end = raw.cend();
+    auto result = std::from_chars(raw.cbegin(), raw_end, converted);
+    if (raw_end != result.ptr) {
+        return false;
+    } else {
+        return result.ec == std::errc();
+    }
+}
+
+bool StringUtils::convert_string_to_double(std::string const& raw, double& converted) {
+    if (raw.empty()) {
+        // Can't convert an empty string
+        return false;
+    }
+
+    char const* c_str = raw.c_str();
+    char* end_ptr;
+    // Reset errno so we can detect a new error
+    errno = 0;
+    double raw_as_double = strtod(c_str, &end_ptr);
+    if (ERANGE == errno || (end_ptr - c_str) < raw.length()) {
+        return false;
+    }
+    converted = raw_as_double;
+    return true;
+}
+
+void StringUtils::tokenize_column_descriptor(
+        std::string const& descriptor,
+        std::vector<std::string>& tokens
+) {
+    // TODO: handle escaped . correctly
+    auto start = 0U;
+    auto end = descriptor.find('.');
+    while (end != std::string::npos) {
+        tokens.push_back(descriptor.substr(start, end - start));
+        start = end + 1;
+        end = descriptor.find('.', start);
+    }
+    tokens.push_back(descriptor.substr(start));
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/Utils.hpp b/components/core/src/clp_s/Utils.hpp
new file mode 100644
index 000000000..1cc7a4a4d
--- /dev/null
+++ b/components/core/src/clp_s/Utils.hpp
@@ -0,0 +1,273 @@
+#ifndef CLP_S_UTILS_HPP
+#define CLP_S_UTILS_HPP
+
+#include <charconv>
+#include <string>
+
+#include <boost/filesystem.hpp>
+
+namespace clp_s {
+class FileUtils {
+public:
+    /**
+     * Find all files in a directory
+     * @param path
+     * @param file_paths
+     * @return true if successful, false otherwise
+     */
+    static bool find_all_files(std::string const& path, std::vector<std::string>& file_paths);
+
+    /**
+     * Validate if all paths exist
+     * @param paths
+     * @return true if all paths exist, false otherwise
+     */
+    static bool validate_path(std::vector<std::string> const& paths);
+};
+
+class StringUtils {
+public:
+    /**
+     * Checks if the given character is an alphabet
+     * @param c
+     * @return true if c is an alphabet, false otherwise
+     */
+    static inline bool is_alphabet(char c) {
+        return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
+    }
+
+    /**
+     * Checks if character is a decimal (base-10) digit
+     * @param c
+     * @return true if c is a decimal digit, false otherwise
+     */
+    static inline bool is_decimal_digit(char c) { return '0' <= c && c <= '9'; }
+
+    /**
+     * Checks if character is a hexadecimal (base-16) digit
+     * @param c
+     * @return true if c is a hexadecimal digit, false otherwise
+     */
+    static inline bool is_delim(char c) {
+        return !(
+                '+' == c || ('-' <= c && c <= '9') || ('A' <= c && c <= 'Z') || '\\' == c
+                || '_' == c || ('a' <= c && c <= 'z')
+        );
+    }
+
+    /**
+     * Checks if the string could be a hexadecimal value
+     * @param str
+     * @param begin_pos
+     * @param end_pos
+     * @return true if str could be a hexadecimal value, false otherwise
+     */
+    static inline bool
+    could_be_multi_digit_hex_value(std::string const& str, size_t begin_pos, size_t end_pos) {
+        if (end_pos - begin_pos < 2) {
+            return false;
+        }
+
+        for (size_t i = begin_pos; i < end_pos; ++i) {
+            auto c = str[i];
+            if (false
+                == (('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') || ('0' <= c && c <= '9')))
+            {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    /**
+     * Returns bounds of next variable in given string
+     * A variable is a token (word between two delimiters) that contains numbers or is directly
+     * preceded by an equals sign
+     * @param msg
+     * @param begin_pos Begin position of last variable, changes to begin position of next variable
+     * @param end_pos End position of last variable, changes to end position of next variable
+     * @return true if a variable was found, false otherwise
+     */
+    static bool get_bounds_of_next_var(std::string const& msg, size_t& begin_pos, size_t& end_pos);
+
+    /**
+     * Searches haystack starting at the given position for one of the given needles
+     * @param haystack
+     * @param needles
+     * @param search_start_pos
+     * @param needle_ix The index of the needle found
+     * @return The position of the match or string::npos if none
+     */
+    static size_t find_first_of(
+            std::string const& haystack,
+            char const* needles,
+            size_t search_start_pos,
+            size_t& needle_ix
+    );
+
+    /**
+     * Replaces the given characters in the given value with the given replacements
+     * @param characters_to_escape
+     * @param replacement_characters
+     * @param value
+     * @param escape Whether to precede the replacement with a '\' (e.g., so that a
+     * line-feed character is output as "\n")
+     * @return The string with replacements
+     */
+    static std::string replace_characters(
+            char const* characters_to_escape,
+            char const* replacement_characters,
+            std::string const& value,
+            bool escape
+    );
+
+    /**
+     * Converts a string to lowercase
+     * @param str
+     */
+    static void to_lower(std::string& str);
+
+    /**
+     * Cleans wildcard search string
+     * <ul>
+     *   <li>Removes consecutive '*'</li>
+     *   <li>Removes escaping from non-wildcard characters</li>
+     *   <li>Removes dangling escape character from the end of the string</li>
+     * </ul>
+     * @param str Wildcard search string to clean
+     * @return Cleaned wildcard search string
+     */
+    static std::string clean_up_wildcard_search_string(std::string_view str);
+
+    /**
+     * Checks if character is a wildcard
+     * @param c
+     * @return true if c is a wildcard, false otherwise
+     */
+    static bool is_wildcard(char c);
+
+    /**
+     * Same as ``wildcard_match_unsafe_case_sensitive`` except this method
+     * allows the caller to specify whether the match should be case sensitive.
+     *
+     * @param tame The literal string
+     * @param wild The wildcard string
+     * @param case_sensitive_match Whether to consider case when matching
+     * @return Whether the two strings match
+     */
+    static bool wildcard_match_unsafe(
+            std::string_view tame,
+            std::string_view wild,
+            bool case_sensitive_match = true
+    );
+
+    /**
+     * Checks if a string matches a wildcard string. Two wildcards are currently
+     * supported: '*' to match 0 or more characters, and '?' to match any single
+     * character. Each can be escaped using a preceding '\'. Other characters which
+     * are escaped are treated as normal characters.
+     * <br/>
+     * This method is optimized for performance by omitting some checks on the
+     * wildcard string that are unnecessary if the caller cleans up the wildcard
+     * string as follows:
+     * <ul>
+     *   <li>The wildcard string should not contain consecutive '*'.</li>
+     *   <li>The wildcard string should not contain an escape character without a
+     *   character following it.</li>
+     * </ul>
+     *
+     * @param tame The literal string
+     * @param wild The wildcard string
+     * @return Whether the two strings match
+     */
+    static bool wildcard_match_unsafe_case_sensitive(std::string_view tame, std::string_view wild);
+
+    /**
+     * Converts the given string to a 64-bit integer if possible
+     * @param raw
+     * @param converted
+     * @return true if the conversion was successful, false otherwise
+     */
+    static bool convert_string_to_int64(std::string_view raw, int64_t& converted);
+
+    /**
+     * Converts the given string to a double if possible
+     * @param raw
+     * @param converted
+     * @return true if the conversion was successful, false otherwise
+     */
+    static bool convert_string_to_double(std::string const& raw, double& converted);
+
+    /**
+     * Converts a string column descriptor delimited by '.' into a list of tokens
+     * @param descriptor
+     * @param tokens
+     * @return the list of tokens pushed into the 'tokens' parameter
+     */
+    static void
+    tokenize_column_descriptor(std::string const& descriptor, std::vector<std::string>& tokens);
+
+private:
+    /**
+     * Helper for ``wildcard_match_unsafe_case_sensitive`` to advance the
+     * pointer in tame to the next character which matches wild. This method
+     * should be inlined for performance.
+     * @param tame_current
+     * @param tame_bookmark
+     * @param tame_end
+     * @param wild_current
+     * @param wild_bookmark
+     * @return true on success, false if wild cannot match tame
+     */
+    static inline bool advance_tame_to_next_match(
+            char const*& tame_current,
+            char const*& tame_bookmark,
+            char const* tame_end,
+            char const*& wild_current,
+            char const*& wild_bookmark
+    );
+};
+
+enum EvaluatedValue {
+    True,
+    False,
+    Unknown
+};
+
+template <class T2, class T1>
+inline T2 bit_cast(T1 t1) {
+    static_assert(sizeof(T1) == sizeof(T2), "Must match size");
+    static_assert(std::is_standard_layout<T1>::value, "Need to be standard layout");
+    static_assert(std::is_standard_layout<T2>::value, "Need to be standard layout");
+
+    T2 t2;
+    std::memcpy(std::addressof(t2), std::addressof(t1), sizeof(T1));
+    return t2;
+}
+
+/**
+ * A span of memory
+ * @tparam T
+ */
+template <typename T>
+class Span {
+public:
+    Span() = default;
+    Span(T* begin, size_t size) : m_begin(begin), m_size(size){};
+
+    T* begin() { return m_begin; }
+
+    T* end() { return m_begin + m_size; }
+
+    size_t size() { return m_size; }
+
+    T& operator[](size_t i) { return m_begin[i]; }
+
+private:
+    T* m_begin;
+    size_t m_size{};
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_UTILS_HPP
diff --git a/components/core/src/clp_s/VariableDecoder.cpp b/components/core/src/clp_s/VariableDecoder.cpp
new file mode 100644
index 000000000..ff91a87bb
--- /dev/null
+++ b/components/core/src/clp_s/VariableDecoder.cpp
@@ -0,0 +1,118 @@
+// Code from CLP
+
+#include "VariableDecoder.hpp"
+
+namespace clp_s {
+bool VariableDecoder::decode_variables_into_message(
+        LogTypeDictionaryEntry const& logtype_dict_entry,
+        VariableDictionaryReader const& var_dict,
+        Span<int64_t> encoded_vars,
+        std::string& decompressed_msg
+) {
+    size_t num_vars_in_logtype = logtype_dict_entry.get_num_vars();
+
+    // Ensure the number of variables in the logtype matches the number of encoded variables given
+    auto const& logtype_value = logtype_dict_entry.get_value();
+    if (num_vars_in_logtype != encoded_vars.size()) {
+        SPDLOG_ERROR(
+                "VariableDecoder: Logtype '{}' contains {} variables, but {} were given for "
+                "decoding.",
+                logtype_value.c_str(),
+                num_vars_in_logtype,
+                encoded_vars.size()
+        );
+        return false;
+    }
+
+    LogTypeDictionaryEntry::VarDelim var_delim;
+    size_t constant_begin_pos = 0;
+    std::string double_str;
+    for (size_t i = 0; i < num_vars_in_logtype; ++i) {
+        size_t var_position = logtype_dict_entry.get_var_info(i, var_delim);
+
+        // Add the constant that's between the last variable and this one
+        decompressed_msg
+                .append(logtype_value, constant_begin_pos, var_position - constant_begin_pos);
+
+        if (LogTypeDictionaryEntry::VarDelim::NonDouble == var_delim) {
+            if (false == is_var_dict_id(encoded_vars[i])) {
+                decompressed_msg += std::to_string(encoded_vars[i]);
+            } else {
+                auto var_dict_id = decode_var_dict_id(encoded_vars[i]);
+                decompressed_msg += var_dict.get_value(var_dict_id);
+            }
+        } else {  // LogTypeDictionaryEntry::VarDelim::Double == var_delim
+            convert_encoded_double_to_string(encoded_vars[i], double_str);
+
+            decompressed_msg += double_str;
+        }
+        // Move past the variable delimiter
+        constant_begin_pos = var_position + 1;
+    }
+    // Append remainder of logtype, if any
+    if (constant_begin_pos < logtype_value.length()) {
+        decompressed_msg.append(logtype_value, constant_begin_pos, std::string::npos);
+    }
+
+    return true;
+}
+
+void VariableDecoder::convert_encoded_double_to_string(int64_t encoded_var, std::string& value) {
+    uint64_t encoded_double;
+    static_assert(
+            sizeof(encoded_double) == sizeof(encoded_var),
+            "sizeof(encoded_double) != sizeof(encoded_var)"
+    );
+    // NOTE: We use memcpy rather than reinterpret_cast to avoid violating strict aliasing; a smart
+    // compiler should optimize it to a register move
+    std::memcpy(&encoded_double, &encoded_var, sizeof(encoded_var));
+
+    // Decode according to the format described in
+    // VariableDecoder::convert_string_to_representable_double_var
+    uint64_t digits = encoded_double & 0x003F'FFFF'FFFF'FFFF;
+    encoded_double >>= 55;
+    uint8_t decimal_pos = (encoded_double & 0x0F) + 1;
+    encoded_double >>= 4;
+    uint8_t num_digits = (encoded_double & 0x0F) + 1;
+    encoded_double >>= 4;
+    bool is_negative = encoded_double > 0;
+
+    size_t value_length = num_digits + 1 + is_negative;
+    value.resize(value_length);
+    size_t num_chars_to_process = value_length;
+
+    // Add sign
+    if (is_negative) {
+        value[0] = '-';
+        --num_chars_to_process;
+    }
+
+    // Decode until the decimal or the non-zero digits are exhausted
+    size_t pos = value_length - 1;
+    for (; pos > (value_length - 1 - decimal_pos) && digits > 0; --pos) {
+        value[pos] = (char)('0' + (digits % 10));
+        digits /= 10;
+        --num_chars_to_process;
+    }
+
+    if (digits > 0) {
+        // Skip decimal since it's added at the end
+        --pos;
+        --num_chars_to_process;
+
+        while (digits > 0) {
+            value[pos--] = (char)('0' + (digits % 10));
+            digits /= 10;
+            --num_chars_to_process;
+        }
+    }
+
+    // Add remaining zeros
+    for (; num_chars_to_process > 0; --num_chars_to_process) {
+        value[pos--] = '0';
+    }
+
+    // Add decimal
+    value[value_length - 1 - decimal_pos] = '.';
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/VariableDecoder.hpp b/components/core/src/clp_s/VariableDecoder.hpp
new file mode 100644
index 000000000..f99a08dad
--- /dev/null
+++ b/components/core/src/clp_s/VariableDecoder.hpp
@@ -0,0 +1,61 @@
+// Code from CLP
+
+#ifndef CLP_S_VARIABLEDECODER_HPP
+#define CLP_S_VARIABLEDECODER_HPP
+
+#include "DictionaryEntry.hpp"
+#include "DictionaryReader.hpp"
+#include "Utils.hpp"
+
+namespace clp_s {
+class VariableDecoder {
+public:
+    /**
+     * Decode variables into a message
+     * @param logtype_dict_entry
+     * @param var_dict
+     * @param encoded_var
+     * @param value
+     */
+    static bool decode_variables_into_message(
+            LogTypeDictionaryEntry const& logtype_dict_entry,
+            VariableDictionaryReader const& var_dict,
+            Span<int64_t> encoded_vars,
+            std::string& decompressed_msg
+    );
+
+private:
+    /**
+     * Convert an encoded double into a string
+     * @param logtype_dict_entry
+     * @param var_dict
+     * @param encoded_var
+     * @param value
+     */
+    static void convert_encoded_double_to_string(int64_t encoded_var, std::string& value);
+
+    /**
+     * Checks if the given encoded variable is a variable dictionary id
+     * @param encoded_var
+     * @return true if encoded_var is a variable dictionary id, false otherwise
+     */
+    static bool is_var_dict_id(int64_t encoded_var) {
+        return (cVarDictIdRangeBegin <= encoded_var && encoded_var < cVarDictIdRangeEnd);
+    }
+
+    /**
+     * Decodes the given variable dictionary id
+     * @param encoded_var
+     * @return the decoded id
+     */
+    static uint64_t decode_var_dict_id(int64_t encoded_var) {
+        uint64_t id = encoded_var - cVarDictIdRangeBegin;
+        return id;
+    }
+
+    static constexpr int64_t cVarDictIdRangeBegin = 1LL << 62;
+    static constexpr int64_t cVarDictIdRangeEnd = (1ULL << 63) - 1;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_VARIABLEDECODER_HPP
diff --git a/components/core/src/clp_s/VariableEncoder.cpp b/components/core/src/clp_s/VariableEncoder.cpp
new file mode 100644
index 000000000..169b3da3a
--- /dev/null
+++ b/components/core/src/clp_s/VariableEncoder.cpp
@@ -0,0 +1,184 @@
+// Code from CLP
+
+#include "VariableEncoder.hpp"
+
+namespace clp_s {
+void VariableEncoder::encode_and_add_to_dictionary(
+        std::string const& message,
+        LogTypeDictionaryEntry& logtype_dict_entry,
+        VariableDictionaryWriter& var_dict,
+        std::vector<int64_t>& encoded_vars
+) {
+    // Extract all variables and add to dictionary while building logtype
+    size_t var_begin_pos = 0;
+    size_t var_end_pos = 0;
+    std::string var_str;
+    logtype_dict_entry.clear();
+    // To avoid reallocating the logtype as we append to it, reserve enough space to hold the entire
+    // message
+    logtype_dict_entry.reserve_constant_length(message.length());
+    while (logtype_dict_entry.parse_next_var(message, var_begin_pos, var_end_pos, var_str)) {
+        // Encode variable
+        int64_t encoded_var;
+        if (convert_string_to_representable_integer_var(var_str, encoded_var)) {
+            logtype_dict_entry.add_non_double_var();
+        } else if (convert_string_to_representable_double_var(var_str, encoded_var)) {
+            logtype_dict_entry.add_double_var();
+        } else {
+            // Variable string looks like a dictionary variable, so encode it as so
+            uint64_t id;
+            var_dict.add_entry(var_str, id);
+            encoded_var = encode_var_dict_id(id);
+
+            logtype_dict_entry.add_non_double_var();
+        }
+
+        encoded_vars.push_back(encoded_var);
+    }
+}
+
+bool VariableEncoder::convert_string_to_int64(std::string const& raw, int64_t& converted) {
+    if (raw.empty()) {
+        // Can't convert an empty string
+        return false;
+    }
+
+    char const* c_str = raw.c_str();
+    char* endptr;
+    // Reset errno so we can detect if it's been set
+    errno = 0;
+    int64_t raw_as_int = strtoll(c_str, &endptr, 10);
+    if (endptr - c_str != raw.length() || (LLONG_MAX == raw_as_int && ERANGE == errno)) {
+        // Conversion failed
+        return false;
+    }
+    converted = raw_as_int;
+    return true;
+}
+
+bool VariableEncoder::convert_string_to_representable_integer_var(
+        std::string const& value,
+        int64_t& encoded_var
+) {
+    size_t length = value.length();
+    if (0 == length) {
+        // Empty string cannot be converted
+        return false;
+    }
+
+    // Ensure start of value is an integer with no zero-padding or positive sign
+    if ('-' == value[0]) {
+        // Ensure first character after sign is a non-zero integer
+        if (length < 2 || value[1] < '1' || '9' < value[1]) {
+            return false;
+        }
+    } else {
+        // Ensure first character is a digit
+        if (value[0] < '0' || '9' < value[0]) {
+            return false;
+        }
+
+        // Ensure value is not zero-padded
+        if (length > 1 && '0' == value[0]) {
+            return false;
+        }
+    }
+
+    int64_t result;
+    // Conversion failed or value is in dictionary variable range, so cannot be converted
+    if (false == convert_string_to_int64(value, result) || result >= cVarDictIdRangeBegin) {
+        return false;
+    } else {
+        encoded_var = result;
+    }
+
+    return true;
+}
+
+bool VariableEncoder::convert_string_to_representable_double_var(
+        std::string const& value,
+        int64_t& encoded_var
+) {
+    if (value.empty()) {
+        // Can't convert an empty string
+        return false;
+    }
+
+    size_t pos = 0;
+    constexpr size_t cMaxDigitsInRepresentableDoubleVar = 16;
+    // +1 for decimal point
+    size_t max_length = cMaxDigitsInRepresentableDoubleVar + 1;
+
+    // Check for a negative sign
+    bool is_negative = false;
+    if ('-' == value[pos]) {
+        is_negative = true;
+        ++pos;
+        // Include sign in max length
+        ++max_length;
+    }
+
+    // Check if value can be represented in encoded format
+    if (value.length() > max_length) {
+        return false;
+    }
+
+    size_t num_digits = 0;
+    size_t decimal_point_pos = std::string::npos;
+    uint64_t digits = 0;
+    for (; pos < value.length(); ++pos) {
+        auto c = value[pos];
+        if ('0' <= c && c <= '9') {
+            digits *= 10;
+            digits += (c - '0');
+            ++num_digits;
+        } else if (std::string::npos == decimal_point_pos && '.' == c) {
+            decimal_point_pos = value.length() - 1 - pos;
+        } else {
+            // Invalid character
+            return false;
+        }
+    }
+    if (std::string::npos == decimal_point_pos || 0 == decimal_point_pos || 0 == num_digits) {
+        // No decimal point found, decimal point is after all digits, or no digits found
+        return false;
+    }
+
+    // Encode into 64 bits with the following format (from MSB to LSB):
+    // -  1 bit : is negative
+    // -  4 bits: # of decimal digits minus 1
+    //     - This format can represent doubles with between 1 and 16 decimal digits, so we use 4
+    //     bits and map the range [1, 16] to [0x0, 0xF]
+    // -  4 bits: position of the decimal from the right minus 1
+    //     - To see why the position is taken from the right, consider (1) "-123456789012345.6", (2)
+    //     "-.1234567890123456", and (3) ".1234567890123456"
+    //         - For (1), the decimal point is at index 16 from the left and index 1 from the right.
+    //         - For (2), the decimal point is at index 1 from the left and index 16 from the right.
+    //         - For (3), the decimal point is at index 0 from the left and index 16 from the right.
+    //         - So if we take the decimal position from the left, it can range from 0 to 16 because
+    //         of the negative sign. Whereas from the right, the
+    //           negative sign is inconsequential.
+    //     - Thus, we use 4 bits and map the range [1, 16] to [0x0, 0xF].
+    // -  1 bit : unused
+    // - 54 bits: The digits of the double without the decimal, as an integer
+    uint64_t encoded_double = 0;
+    if (is_negative) {
+        encoded_double = 1;
+    }
+    encoded_double <<= 4;
+    encoded_double |= (num_digits - 1) & 0x0F;
+    encoded_double <<= 4;
+    encoded_double |= (decimal_point_pos - 1) & 0x0F;
+    encoded_double <<= 55;
+    encoded_double |= digits & 0x003F'FFFF'FFFF'FFFF;
+    static_assert(
+            sizeof(encoded_var) == sizeof(encoded_double),
+            "sizeof(encoded_var) != sizeof(encoded_double)"
+    );
+    // NOTE: We use memcpy rather than reinterpret_cast to avoid violating strict aliasing; a smart
+    // compiler should optimize it to a register move
+    std::memcpy(&encoded_var, &encoded_double, sizeof(encoded_double));
+
+    return true;
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/VariableEncoder.hpp b/components/core/src/clp_s/VariableEncoder.hpp
new file mode 100644
index 000000000..d604b7c0d
--- /dev/null
+++ b/components/core/src/clp_s/VariableEncoder.hpp
@@ -0,0 +1,71 @@
+// Code from CLP
+
+#ifndef CLP_S_VARIABLEENCODER_HPP
+#define CLP_S_VARIABLEENCODER_HPP
+
+#include <string>
+
+#include <simdjson.h>
+
+#include "DictionaryEntry.hpp"
+#include "DictionaryWriter.hpp"
+
+using namespace simdjson;
+
+namespace clp_s {
+class VariableEncoder {
+public:
+    /**
+     * Encodes the given message and adds the encoded variables to the given vector
+     * @param message
+     * @param logtype_dict_entry
+     * @param var_dict
+     * @param encoded_vars
+     */
+    static void encode_and_add_to_dictionary(
+            std::string const& message,
+            LogTypeDictionaryEntry& logtype_dict_entry,
+            VariableDictionaryWriter& var_dict,
+            std::vector<int64_t>& encoded_vars
+    );
+
+    /**
+     * Converts the given string to an int64_t
+     * @param raw
+     * @param converted
+     * @return true if the conversion was successful, false otherwise
+     */
+    static bool convert_string_to_int64(std::string const& raw, int64_t& converted);
+
+    /**
+     * Converts the given string to a representable int64_t
+     * @param value
+     * @param encoded_var
+     * @return true if the conversion was successful, false otherwise
+     */
+    static bool
+    convert_string_to_representable_integer_var(std::string const& value, int64_t& encoded_var);
+
+    /**
+     * Converts the given string to a representable encoded double
+     * @param value
+     * @param encoded_var
+     * @return true if the conversion was successful, false otherwise
+     */
+    static bool
+    convert_string_to_representable_double_var(std::string const& value, int64_t& encoded_var);
+
+    /**
+     * Encodes the given dictionary id as a variable dictionary id
+     * @param id
+     * @return the encoded id
+     */
+    static int64_t encode_var_dict_id(uint64_t id) { return (int64_t)id + cVarDictIdRangeBegin; }
+
+private:
+    static constexpr int64_t cVarDictIdRangeBegin = 1LL << 62;
+    static constexpr int64_t cVarDictIdRangeEnd = (1ULL << 63) - 1;
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_VARIABLEENCODER_HPP
diff --git a/components/core/src/clp_s/ZstdCompressor.cpp b/components/core/src/clp_s/ZstdCompressor.cpp
new file mode 100644
index 000000000..8bfba6167
--- /dev/null
+++ b/components/core/src/clp_s/ZstdCompressor.cpp
@@ -0,0 +1,120 @@
+// Code from CLP
+
+#include "ZstdCompressor.hpp"
+
+namespace clp_s {
+ZstdCompressor::ZstdCompressor()
+        : Compressor(CompressorType::ZSTD),
+          m_compression_stream_contains_data(false),
+          m_compressed_stream_file_writer(nullptr) {
+    m_compression_stream = ZSTD_createCStream();
+    if (nullptr == m_compression_stream) {
+        SPDLOG_ERROR("ZstdCompressor: ZSTD_createCStream() error");
+        throw OperationFailed(ErrorCodeFailure, __FILENAME__, __LINE__);
+    }
+}
+
+ZstdCompressor::~ZstdCompressor() {
+    ZSTD_freeCStream(m_compression_stream);
+}
+
+void ZstdCompressor::open(FileWriter& file_writer, int const compression_level) {
+    if (nullptr != m_compressed_stream_file_writer) {
+        throw OperationFailed(ErrorCodeNotReady, __FILENAME__, __LINE__);
+    }
+
+    // Setup compressed stream parameters
+    size_t compressed_stream_block_size = ZSTD_CStreamOutSize();
+    m_compressed_stream_block_buffer = std::make_unique<char[]>(compressed_stream_block_size);
+    m_compressed_stream_block.dst = m_compressed_stream_block_buffer.get();
+    m_compressed_stream_block.size = compressed_stream_block_size;
+
+    // Setup compression stream
+    auto init_result = ZSTD_initCStream(m_compression_stream, compression_level);
+    if (ZSTD_isError(init_result)) {
+        SPDLOG_ERROR(
+                "ZstdCompressor: ZSTD_initCStream() error: {}",
+                ZSTD_getErrorName(init_result)
+        );
+        throw OperationFailed(ErrorCodeFailure, __FILENAME__, __LINE__);
+    }
+
+    m_compressed_stream_file_writer = &file_writer;
+
+    m_uncompressed_stream_pos = 0;
+}
+
+void ZstdCompressor::close() {
+    if (nullptr == m_compressed_stream_file_writer) {
+        throw OperationFailed(ErrorCodeNotInit, __FILENAME__, __LINE__);
+    }
+
+    flush();
+    m_compressed_stream_file_writer = nullptr;
+}
+
+void ZstdCompressor::write(char const* data, size_t data_length) {
+    if (nullptr == m_compressed_stream_file_writer) {
+        throw OperationFailed(ErrorCodeNotInit, __FILENAME__, __LINE__);
+    }
+
+    if (0 == data_length) {
+        // Nothing needs to be done because we do not need to compress anything
+        return;
+    }
+    if (nullptr == data) {
+        throw OperationFailed(ErrorCodeBadParam, __FILENAME__, __LINE__);
+    }
+
+    ZSTD_inBuffer uncompressed_stream_block = {data, data_length, 0};
+    while (uncompressed_stream_block.pos < uncompressed_stream_block.size) {
+        m_compressed_stream_block.pos = 0;
+        auto error = ZSTD_compressStream(
+                m_compression_stream,
+                &m_compressed_stream_block,
+                &uncompressed_stream_block
+        );
+        if (ZSTD_isError(error)) {
+            SPDLOG_ERROR(
+                    "ZstdCompressor: ZSTD_compressStream() error: {}",
+                    ZSTD_getErrorName(error)
+            );
+            throw OperationFailed(ErrorCodeFailure, __FILENAME__, __LINE__);
+        }
+        if (m_compressed_stream_block.pos) {
+            // Write to disk only if there is data in the compressed stream block buffer
+            m_compressed_stream_file_writer->write(
+                    reinterpret_cast<char const*>(m_compressed_stream_block.dst),
+                    m_compressed_stream_block.pos
+            );
+        }
+    }
+
+    m_compression_stream_contains_data = true;
+    m_uncompressed_stream_pos += data_length;
+}
+
+void ZstdCompressor::flush() {
+    if (false == m_compression_stream_contains_data) {
+        return;
+    }
+
+    m_compressed_stream_block.pos = 0;
+    auto end_stream_result = ZSTD_endStream(m_compression_stream, &m_compressed_stream_block);
+    if (end_stream_result) {
+        // Note: Output buffer is large enough that it is guaranteed to have enough room to be able
+        // to Flush the entire buffer, so this can only be an error
+        SPDLOG_ERROR(
+                "ZstdCompressor: ZSTD_endStream() error: {}",
+                ZSTD_getErrorName(end_stream_result)
+        );
+        throw OperationFailed(ErrorCodeFailure, __FILENAME__, __LINE__);
+    }
+    m_compressed_stream_file_writer->write(
+            reinterpret_cast<char const*>(m_compressed_stream_block.dst),
+            m_compressed_stream_block.pos
+    );
+
+    m_compression_stream_contains_data = false;
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/ZstdCompressor.hpp b/components/core/src/clp_s/ZstdCompressor.hpp
new file mode 100644
index 000000000..4104571c7
--- /dev/null
+++ b/components/core/src/clp_s/ZstdCompressor.hpp
@@ -0,0 +1,98 @@
+// Code from CLP
+
+#ifndef CLP_S_ZSTDCOMPRESSOR_HPP
+#define CLP_S_ZSTDCOMPRESSOR_HPP
+
+#include <memory>
+#include <string>
+
+#include <spdlog/spdlog.h>
+#include <zstd.h>
+#include <zstd_errors.h>
+
+#include "Compressor.hpp"
+#include "FileWriter.hpp"
+#include "TraceableException.hpp"
+
+namespace clp_s {
+constexpr int cDefaultCompressionLevel = 3;
+
+class ZstdCompressor : public Compressor {
+public:
+    // Types
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    // Constructor
+    ZstdCompressor();
+
+    // Destructor
+    ~ZstdCompressor() override;
+
+    // Explicitly disable copy and move constructor/assignment
+    ZstdCompressor(ZstdCompressor const&) = delete;
+
+    ZstdCompressor& operator=(ZstdCompressor const&) = delete;
+
+    // Methods implementing the WriterInterface
+    /**
+     * Writes the given data to the compressor
+     * @param data
+     * @param data_length
+     */
+    void write(char const* data, size_t data_length);
+
+    /**
+     * Writes the given numeric value to the compressor
+     * @param val
+     * @tparam ValueType
+     */
+    template <typename ValueType>
+    void write_numeric_value(ValueType val) {
+        write(reinterpret_cast<char*>(&val), sizeof(val));
+    }
+
+    /**
+     * Writes the given string to the compressor
+     * @param str
+     */
+    void write_string(std::string const& str) { write(str.c_str(), str.length()); }
+
+    /**
+     * Writes any internally buffered data to file and ends the current frame
+     */
+    void flush();
+
+    // Methods implementing the Compressor interface
+    /**
+     * Closes the compressor
+     */
+    void close() override;
+
+    /**
+     * Initialize streaming compressor
+     * @param file_writer
+     * @param compression_level
+     */
+    void open(FileWriter& file_writer, int compression_level = cDefaultCompressionLevel);
+
+private:
+    // Variables
+    FileWriter* m_compressed_stream_file_writer{};
+
+    // Compressed stream variables
+    ZSTD_CStream* m_compression_stream;
+    bool m_compression_stream_contains_data;
+
+    ZSTD_outBuffer m_compressed_stream_block{};
+    std::unique_ptr<char[]> m_compressed_stream_block_buffer;
+
+    size_t m_uncompressed_stream_pos{};
+};
+}  // namespace clp_s
+
+#endif  // CLP_S_ZSTDCOMPRESSOR_HPP
diff --git a/components/core/src/clp_s/ZstdDecompressor.cpp b/components/core/src/clp_s/ZstdDecompressor.cpp
new file mode 100644
index 000000000..ee1632732
--- /dev/null
+++ b/components/core/src/clp_s/ZstdDecompressor.cpp
@@ -0,0 +1,238 @@
+// Code from CLP
+
+#include "ZstdDecompressor.hpp"
+
+#include <algorithm>
+
+#include <boost/filesystem.hpp>
+#include <spdlog/spdlog.h>
+
+namespace clp_s {
+ZstdDecompressor::ZstdDecompressor()
+        : Decompressor(CompressorType::ZSTD),
+          m_input_type(InputType::NotInitialized),
+          m_decompression_stream(nullptr),
+          m_file_reader(nullptr),
+          m_file_reader_initial_pos(0),
+          m_file_read_buffer_length(0),
+          m_file_read_buffer_capacity(0),
+          m_decompressed_stream_pos(0),
+          m_unused_decompressed_stream_block_size(0) {
+    m_decompression_stream = ZSTD_createDStream();
+    if (nullptr == m_decompression_stream) {
+        SPDLOG_ERROR("ZstdDecompressor: ZSTD_createDStream() error");
+        throw OperationFailed(ErrorCodeFailure, __FILENAME__, __LINE__);
+    }
+
+    // Create block to hold unused decompressed data
+    m_unused_decompressed_stream_block_size = ZSTD_DStreamOutSize();
+    m_unused_decompressed_stream_block_buffer
+            = std::make_unique<char[]>(m_unused_decompressed_stream_block_size);
+}
+
+ZstdDecompressor::~ZstdDecompressor() {
+    ZSTD_freeDStream(m_decompression_stream);
+}
+
+ErrorCode
+ZstdDecompressor::try_read(char const* buf, size_t num_bytes_to_read, size_t& num_bytes_read) {
+    if (InputType::NotInitialized == m_input_type) {
+        throw OperationFailed(ErrorCodeNotInit, __FILENAME__, __LINE__);
+    }
+    if (nullptr == buf) {
+        throw OperationFailed(ErrorCodeBadParam, __FILENAME__, __LINE__);
+    }
+
+    num_bytes_read = 0;
+
+    ZSTD_outBuffer decompressed_stream_block = {(void*)buf, num_bytes_to_read, 0};
+    while (decompressed_stream_block.pos < num_bytes_to_read) {
+        // Check if there's data that can be decompressed
+        if (m_compressed_stream_block.pos == m_compressed_stream_block.size) {
+            switch (m_input_type) {
+                case InputType::CompressedDataBuf:
+                    // Fall through
+                case InputType::MemoryMappedCompressedFile:
+                    num_bytes_read = decompressed_stream_block.pos;
+                    if (0 == decompressed_stream_block.pos) {
+                        return ErrorCodeEndOfFile;
+                    } else {
+                        return ErrorCodeSuccess;
+                    }
+                case InputType::File: {
+                    auto error_code = m_file_reader->try_read(
+                            reinterpret_cast<char*>(m_file_read_buffer.get()),
+                            m_file_read_buffer_capacity,
+                            m_file_read_buffer_length
+                    );
+                    if (ErrorCodeSuccess != error_code) {
+                        if (ErrorCodeEndOfFile == error_code) {
+                            num_bytes_read = decompressed_stream_block.pos;
+                            if (0 == decompressed_stream_block.pos) {
+                                return ErrorCodeEndOfFile;
+                            } else {
+                                return ErrorCodeSuccess;
+                            }
+                        } else {
+                            return error_code;
+                        }
+                    }
+
+                    m_compressed_stream_block.pos = 0;
+                    m_compressed_stream_block.size = m_file_read_buffer_length;
+                    break;
+                }
+                default:
+                    throw OperationFailed(ErrorCodeUnsupported, __FILENAME__, __LINE__);
+            }
+        }
+
+        // Decompress
+        size_t error = ZSTD_decompressStream(
+                m_decompression_stream,
+                &decompressed_stream_block,
+                &m_compressed_stream_block
+        );
+        if (ZSTD_isError(error)) {
+            SPDLOG_ERROR(
+                    "ZstdDecompressor: ZSTD_decompressStream() error: {}",
+                    ZSTD_getErrorName(error)
+            );
+            return ErrorCodeFailure;
+        }
+    }
+
+    // Update decompression stream position
+    m_decompressed_stream_pos += decompressed_stream_block.pos;
+
+    num_bytes_read = decompressed_stream_block.pos;
+    return ErrorCodeSuccess;
+}
+
+ErrorCode ZstdDecompressor::try_read_string(size_t str_length, std::string& str) {
+    str.resize(str_length);
+
+    return try_read_exact_length(&str[0], str_length);
+}
+
+ErrorCode ZstdDecompressor::try_read_exact_length(char* buf, size_t num_bytes) {
+    size_t num_bytes_read;
+    auto error_code = try_read(buf, num_bytes, num_bytes_read);
+    if (ErrorCodeSuccess != error_code) {
+        return error_code;
+    }
+    if (num_bytes_read < num_bytes) {
+        return ErrorCodeTruncated;
+    }
+
+    return ErrorCodeSuccess;
+}
+
+void ZstdDecompressor::open(char const* compressed_data_buf, size_t compressed_data_buf_size) {
+    if (InputType::NotInitialized != m_input_type) {
+        throw OperationFailed(ErrorCodeNotReady, __FILENAME__, __LINE__);
+    }
+    m_input_type = InputType::CompressedDataBuf;
+
+    m_compressed_stream_block = {compressed_data_buf, compressed_data_buf_size, 0};
+
+    reset_stream();
+}
+
+void ZstdDecompressor::open(FileReader& file_reader, size_t file_read_buffer_capacity) {
+    if (InputType::NotInitialized != m_input_type) {
+        throw OperationFailed(ErrorCodeNotReady, __FILENAME__, __LINE__);
+    }
+    m_input_type = InputType::File;
+
+    m_file_reader = &file_reader;
+    m_file_reader_initial_pos = m_file_reader->get_pos();
+
+    m_file_read_buffer_capacity = file_read_buffer_capacity;
+    m_file_read_buffer = std::make_unique<char[]>(m_file_read_buffer_capacity);
+    m_file_read_buffer_length = 0;
+
+    m_compressed_stream_block = {m_file_read_buffer.get(), m_file_read_buffer_length, 0};
+
+    reset_stream();
+}
+
+void ZstdDecompressor::close() {
+    switch (m_input_type) {
+        case InputType::MemoryMappedCompressedFile:
+            if (m_memory_mapped_compressed_file.is_open()) {
+                // An existing file is memory mapped by the decompressor
+                m_memory_mapped_compressed_file.close();
+            }
+            break;
+        case InputType::File:
+            m_file_read_buffer.reset();
+            m_file_read_buffer_capacity = 0;
+            m_file_read_buffer_length = 0;
+            m_file_reader = nullptr;
+            break;
+        case InputType::CompressedDataBuf:
+        case InputType::NotInitialized:
+            // Do nothing
+            break;
+        default:
+            throw OperationFailed(ErrorCodeUnsupported, __FILENAME__, __LINE__);
+    }
+    m_input_type = InputType::NotInitialized;
+}
+
+ErrorCode ZstdDecompressor::open(std::string const& compressed_file_path) {
+    if (InputType::NotInitialized != m_input_type) {
+        throw OperationFailed(ErrorCodeNotReady, __FILENAME__, __LINE__);
+    }
+    m_input_type = InputType::MemoryMappedCompressedFile;
+
+    // Create memory mapping for compressed_file_path, use boost read only memory mapped file
+    boost::system::error_code boost_error_code;
+    size_t compressed_file_size
+            = boost::filesystem::file_size(compressed_file_path, boost_error_code);
+    if (boost_error_code) {
+        SPDLOG_ERROR(
+                "ZstdDecompressor: Unable to obtain file size for '{}' - {}.",
+                compressed_file_path.c_str(),
+                boost_error_code.message().c_str()
+        );
+        return ErrorCodeFailure;
+    }
+
+    boost::iostreams::mapped_file_params memory_map_params;
+    memory_map_params.path = compressed_file_path;
+    memory_map_params.flags = boost::iostreams::mapped_file::readonly;
+    memory_map_params.length = compressed_file_size;
+    memory_map_params.hint = m_memory_mapped_compressed_file.data(
+    );  // Try to map it to the same memory location as previous memory mapped file
+    m_memory_mapped_compressed_file.open(memory_map_params);
+    if (false == m_memory_mapped_compressed_file.is_open()) {
+        SPDLOG_ERROR(
+                "ZstdDecompressor: Unable to memory map the compressed file with path: {}",
+                compressed_file_path.c_str()
+        );
+        return ErrorCodeFailure;
+    }
+
+    // Configure input stream
+    m_compressed_stream_block = {m_memory_mapped_compressed_file.data(), compressed_file_size, 0};
+
+    reset_stream();
+
+    return ErrorCodeSuccess;
+}
+
+void ZstdDecompressor::reset_stream() {
+    if (InputType::File == m_input_type) {
+        m_file_reader->seek_from_begin(m_file_reader_initial_pos);
+        m_file_read_buffer_length = 0;
+        m_compressed_stream_block.size = m_file_read_buffer_length;
+    }
+
+    ZSTD_initDStream(m_decompression_stream);
+    m_decompressed_stream_pos = 0;
+
+    m_compressed_stream_block.pos = 0;
+}
+}  // namespace clp_s
diff --git a/components/core/src/clp_s/ZstdDecompressor.hpp b/components/core/src/clp_s/ZstdDecompressor.hpp
new file mode 100644
index 000000000..6382d54d3
--- /dev/null
+++ b/components/core/src/clp_s/ZstdDecompressor.hpp
@@ -0,0 +1,146 @@
+// Code from CLP
+
+#ifndef CLP_S_ZSTDDECOMPRESSOR_HPP
+#define CLP_S_ZSTDDECOMPRESSOR_HPP
+
+#include <memory>
+#include <string>
+
+#include <boost/iostreams/device/mapped_file.hpp>
+#include <zstd.h>
+
+#include "Decompressor.hpp"
+#include "TraceableException.hpp"
+
+namespace clp_s {
+class ZstdDecompressor : public Decompressor {
+public:
+    // Types
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    // Constructor
+    /**
+     * @throw Decompressor::OperationFailed if zstd decompressor stream cannot be initialized
+     */
+    ZstdDecompressor();
+
+    // Destructor
+    ~ZstdDecompressor();
+
+    // Explicitly disable copy and move constructor/assignment
+    ZstdDecompressor(ZstdDecompressor const&) = delete;
+
+    ZstdDecompressor& operator=(ZstdDecompressor const&) = delete;
+
+    // Methods implementing the Decompressor interface
+    void open(char const* compressed_data_buf, size_t compressed_data_buf_size) override;
+
+    void open(FileReader& file_reader, size_t file_read_buffer_capacity) override;
+
+    void close() override;
+
+    // Methods
+    /***
+     * Initialize streaming decompressor to decompress from a compressed file specified by the given
+     * path
+     * @param compressed_file_path
+     * @param decompressed_stream_block_size
+     * @return ErrorCodeFailure if the provided path cannot be memory mapped
+     * @return ErrorCodeSuccess on success
+     */
+    ErrorCode open(std::string const& compressed_file_path);
+
+    // Methods implementing the ReaderInterface
+    /**
+     * Tries to read up to a given number of bytes from the decompressor
+     * @param buf
+     * @param num_bytes_to_read The number of bytes to try and read
+     * @param num_bytes_read The actual number of bytes read
+     * @return Same as FileReader::try_read if the decompressor is attached to a file
+     * @return ErrorCodeNotInit if the decompressor is not open
+     * @return ErrorCodeBadParam if buf is invalid
+     * @return ErrorCodeEndOfFile on EOF
+     * @return ErrorCodeFailure on decompression failure
+     * @return ErrorCodeSuccess on success
+     */
+    ErrorCode try_read(char const* buf, size_t num_bytes_to_read, size_t& num_bytes_read);
+
+    /**
+     * Tries to read a number of bytes
+     * @param buf
+     * @param num_bytes Number of bytes to read
+     * @return Same as the underlying medium's try_read method
+     * @return ErrorCodeTruncated if 0 < # bytes read < num_bytes
+     */
+    ErrorCode try_read_exact_length(char* buf, size_t num_bytes);
+
+    /**
+     * Tries to read a numeric value
+     * @tparam ValueType
+     * @param value
+     * @return Same as the underlying medium's try_read_exact_length method
+     */
+    template <typename ValueType>
+    ErrorCode try_read_numeric_value(ValueType& value);
+
+    /**
+     * Tries to read a string
+     * @param str_length length of the string to read
+     * @param str
+     * @return Same as the underlying medium's try_read_exact_length method
+     */
+    ErrorCode try_read_string(size_t str_length, std::string& str);
+
+private:
+    // Enum class
+    enum class InputType {
+        NotInitialized,  // Note: do nothing but generate an error to prevent this required
+                         // parameter is not initialized properly
+        CompressedDataBuf,
+        MemoryMappedCompressedFile,
+        File
+    };
+
+    // Methods
+    /**
+     * Reset streaming decompression state so it will start decompressing from the beginning of the
+     * stream afterwards
+     */
+    void reset_stream();
+
+    // Variables
+    InputType m_input_type;
+
+    // Compressed stream variables
+    ZSTD_DStream* m_decompression_stream;
+
+    boost::iostreams::mapped_file_source m_memory_mapped_compressed_file;
+    FileReader* m_file_reader;
+    size_t m_file_reader_initial_pos;
+    std::unique_ptr<char[]> m_file_read_buffer;
+    size_t m_file_read_buffer_length;
+    size_t m_file_read_buffer_capacity;
+
+    ZSTD_inBuffer m_compressed_stream_block{};
+
+    size_t m_decompressed_stream_pos;
+    size_t m_unused_decompressed_stream_block_size;
+    std::unique_ptr<char[]> m_unused_decompressed_stream_block_buffer;
+};
+
+template <typename ValueType>
+ErrorCode ZstdDecompressor::try_read_numeric_value(ValueType& value) {
+    ErrorCode error_code = try_read_exact_length(reinterpret_cast<char*>(&value), sizeof(value));
+    if (ErrorCodeSuccess != error_code) {
+        return error_code;
+    }
+    return ErrorCodeSuccess;
+}
+}  // namespace clp_s
+
+#endif  // CLP_S_ZSTDDECOMPRESSOR_HPP
diff --git a/components/core/src/clp_s/clp-s.cpp b/components/core/src/clp_s/clp-s.cpp
new file mode 100644
index 000000000..98de6a4b9
--- /dev/null
+++ b/components/core/src/clp_s/clp-s.cpp
@@ -0,0 +1,125 @@
+#include <spdlog/sinks/stdout_sinks.h>
+
+#include "CommandLineArguments.hpp"
+#include "JsonConstructor.hpp"
+#include "JsonParser.hpp"
+#include "ReaderUtils.hpp"
+#include "search/ConvertToExists.hpp"
+#include "search/EmptyExpr.hpp"
+#include "search/EvaluateTimestampIndex.hpp"
+#include "search/kql/kql.hpp"
+#include "search/NarrowTypes.hpp"
+#include "search/OrOfAndForm.hpp"
+#include "search/Output.hpp"
+#include "search/SchemaMatch.hpp"
+#include "TimestampPattern.hpp"
+#include "Utils.hpp"
+
+using namespace clp_s::search;
+using clp_s::CommandLineArguments;
+
+int main(int argc, char const* argv[]) {
+    try {
+        auto stderr_logger = spdlog::stderr_logger_st("stderr");
+        spdlog::set_default_logger(stderr_logger);
+        spdlog::set_pattern("%Y-%m-%dT%H:%M:%S.%e%z [%l] %v");
+    } catch (std::exception& e) {
+        // NOTE: We can't log an exception if the logger couldn't be constructed
+        return -1;
+    }
+
+    CommandLineArguments command_line_arguments("clp-s");
+    auto parsing_result = command_line_arguments.parse_arguments(argc, argv);
+    switch (parsing_result) {
+        case CommandLineArguments::ParsingResult::Failure:
+            return -1;
+        case CommandLineArguments::ParsingResult::InfoCommand:
+            return 0;
+        case CommandLineArguments::ParsingResult::Success:
+            // Continue processing
+            break;
+    }
+
+    if (CommandLineArguments::Command::Compress == command_line_arguments.get_command()) {
+        clp_s::TimestampPattern::init();
+
+        clp_s::JsonParserOption option;
+        option.file_paths = command_line_arguments.get_file_paths();
+        option.archives_dir = command_line_arguments.get_archives_dir();
+        option.target_encoded_size = command_line_arguments.get_target_encoded_size();
+        option.compression_level = command_line_arguments.get_compression_level();
+        auto const& timestamp_key = command_line_arguments.get_timestamp_key();
+        if (false == timestamp_key.empty()) {
+            clp_s::StringUtils::tokenize_column_descriptor(timestamp_key, option.timestamp_column);
+        }
+
+        clp_s::JsonParser parser(option);
+        parser.parse();
+        parser.store();
+        parser.close();
+    } else if (CommandLineArguments::Command::Extract == command_line_arguments.get_command()) {
+        clp_s::JsonConstructorOption option;
+        option.archives_dir = command_line_arguments.get_archives_dir();
+        option.output_dir = command_line_arguments.get_output_dir();
+
+        clp_s::JsonConstructor constructor(option);
+        constructor.construct();
+        constructor.store();
+        constructor.close();
+    } else {
+        auto const& archives_dir = command_line_arguments.get_archives_dir();
+        auto const& query = command_line_arguments.get_query();
+        clp_s::TimestampPattern::init();
+
+        auto query_stream = std::istringstream(query);
+        auto expr = kql::parse_kql_expression(query_stream);
+
+        if (std::dynamic_pointer_cast<EmptyExpr>(expr)) {
+            SPDLOG_ERROR("Query '{}' is logically false", query);
+            return 1;
+        }
+
+        OrOfAndForm standardize_pass;
+        if (expr = standardize_pass.run(expr); std::dynamic_pointer_cast<EmptyExpr>(expr)) {
+            SPDLOG_ERROR("Query '{}' is logically false", query);
+            return 1;
+        }
+
+        NarrowTypes narrow_pass;
+        if (expr = narrow_pass.run(expr); std::dynamic_pointer_cast<EmptyExpr>(expr)) {
+            SPDLOG_ERROR("Query '{}' is logically false", query);
+            return 1;
+        }
+
+        ConvertToExists convert_pass;
+        if (expr = convert_pass.run(expr); std::dynamic_pointer_cast<EmptyExpr>(expr)) {
+            SPDLOG_ERROR("Query '{}' is logically false", query);
+            return 1;
+        }
+
+        // skip decompressing the archive if we won't match based on
+        // the timestamp index
+        auto timestamp_dict = clp_s::ReaderUtils::read_timestamp_dictionary(archives_dir);
+        EvaluateTimestampIndex timestamp_index(timestamp_dict);
+        if (clp_s::EvaluatedValue::False == timestamp_index.run(expr)) {
+            SPDLOG_ERROR("No matching timestamp ranges for query '{}'", query);
+            return 1;
+        }
+
+        auto schema_tree = clp_s::ReaderUtils::read_schema_tree(archives_dir);
+        auto schemas = clp_s::ReaderUtils::read_schemas(archives_dir);
+
+        // Narrow against schemas
+        SchemaMatch match_pass(schema_tree, schemas);
+        if (expr = match_pass.run(expr); std::dynamic_pointer_cast<EmptyExpr>(expr)) {
+            SPDLOG_ERROR("No matching schemas for query '{}'", query);
+            return 1;
+        }
+
+        // output result
+        Output output(schema_tree, schemas, match_pass, expr, archives_dir, timestamp_dict);
+        output.filter();
+    }
+
+    return 0;
+}
diff --git a/components/core/src/clp_s/search/AndExpr.cpp b/components/core/src/clp_s/search/AndExpr.cpp
new file mode 100644
index 000000000..87a57509e
--- /dev/null
+++ b/components/core/src/clp_s/search/AndExpr.cpp
@@ -0,0 +1,57 @@
+#include "AndExpr.hpp"
+
+#include <iostream>
+
+namespace clp_s::search {
+AndExpr::AndExpr(bool inverted, Expression* parent) : Expression(inverted, parent) {}
+
+AndExpr::AndExpr(AndExpr const& expr) : Expression(expr) {}
+
+void AndExpr::print() {
+    auto& os = get_print_stream();
+    if (is_inverted()) {
+        os << "!";
+    }
+
+    os << "AndExpr(";
+    for (auto it = op_begin(); it != op_end();) {
+        (*it)->print();
+        it++;
+        if (it != op_end()) {
+            os << ", ";
+        }
+    }
+    os << ")";
+
+    if (get_parent() == nullptr) {
+        os << std::endl;
+    } else {
+        os << std::flush;
+    }
+}
+
+std::shared_ptr<Expression> AndExpr::copy() const {
+    auto new_expr = std::shared_ptr<Expression>(new AndExpr(*this));
+    for (auto it = new_expr->op_begin(); it != new_expr->op_end(); it++) {
+        auto expr = std::static_pointer_cast<Expression>(*it);
+        expr->copy_replace(new_expr.get(), it);
+    }
+    return new_expr;
+}
+
+std::shared_ptr<Expression> AndExpr::create(bool inverted, Expression* parent) {
+    return std::shared_ptr<Expression>(static_cast<Expression*>(new AndExpr(inverted, parent)));
+}
+
+std::shared_ptr<Expression> AndExpr::create(
+        std::shared_ptr<Expression>& op1,
+        std::shared_ptr<Expression>& op2,
+        bool inverted,
+        Expression* parent
+) {
+    std::shared_ptr<Expression> expr(static_cast<Expression*>(new AndExpr(inverted, parent)));
+    op1->copy_append(expr.get());
+    op2->copy_append(expr.get());
+    return expr;
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/AndExpr.hpp b/components/core/src/clp_s/search/AndExpr.hpp
new file mode 100644
index 000000000..3ba614ff5
--- /dev/null
+++ b/components/core/src/clp_s/search/AndExpr.hpp
@@ -0,0 +1,58 @@
+#ifndef CLP_S_SEARCH_ANDEXPR_HPP
+#define CLP_S_SEARCH_ANDEXPR_HPP
+
+#include "Expression.hpp"
+
+namespace clp_s::search {
+/**
+ * Class representing a logical And operation across all
+ * children in its OpList. Can have arbitrarily many children.
+ */
+class AndExpr : public Expression {
+public:
+    void print() override;
+
+    /**
+     * And expressions only have other expressions as children by construction
+     */
+    bool has_only_expression_operands() override { return true; }
+
+    /**
+     * Deep copy
+     * @return A deep copy of this expression
+     */
+    std::shared_ptr<Expression> copy() const override;
+
+    /**
+     * Create an empty And expression which can optionally be inverted and attached to a parent.
+     * Children can be added via mutators inherited from Expression.
+     * @param inverted expression is inverted when true
+     * @param parent parent this expression is attached to
+     * @return Newly created Or expression
+     */
+    static std::shared_ptr<Expression> create(bool inverted = false, Expression* parent = nullptr);
+
+    /**
+     * Create an And expression with two children
+     * @param op1 the first child operand
+     * @param op2 the second child operand
+     * @param inverted expression is inverted when true
+     * @param parent parent this expression is attached to
+     * @return Newly created Or expression
+     */
+    static std::shared_ptr<Expression> create(
+            std::shared_ptr<Expression>& op1,
+            std::shared_ptr<Expression>& op2,
+            bool inverted = false,
+            Expression* parent = nullptr
+    );
+
+private:
+    // Constructor
+    explicit AndExpr(bool inverted = false, Expression* parent = nullptr);
+
+    AndExpr(AndExpr const&);
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_ANDEXPR_HPP
diff --git a/components/core/src/clp_s/search/BooleanLiteral.cpp b/components/core/src/clp_s/search/BooleanLiteral.cpp
new file mode 100644
index 000000000..127e085d3
--- /dev/null
+++ b/components/core/src/clp_s/search/BooleanLiteral.cpp
@@ -0,0 +1,44 @@
+#include "BooleanLiteral.hpp"
+
+namespace clp_s::search {
+std::shared_ptr<Literal> BooleanLiteral::create_from_bool(bool v) {
+    return std::shared_ptr<Literal>(new BooleanLiteral(v));
+}
+
+std::shared_ptr<Literal> BooleanLiteral::create_from_string(std::string const& v) {
+    if (v == "true") {
+        return std::shared_ptr<Literal>(new BooleanLiteral(true));
+    } else if (v == "false") {
+        return std::shared_ptr<Literal>(new BooleanLiteral(false));
+    }
+
+    return {nullptr};
+}
+
+void BooleanLiteral::print() {
+    auto& os = get_print_stream();
+    if (m_v) {
+        os << "true";
+    } else {
+        os << "false";
+    }
+}
+
+bool BooleanLiteral::as_var_string(std::string& ret, FilterOperation op) {
+    if (op == FilterOperation::EQ || op == FilterOperation::NEQ) {
+        ret = m_v ? "true" : "false";
+        return true;
+    }
+
+    return false;
+}
+
+bool BooleanLiteral::as_bool(bool& ret, FilterOperation op) {
+    if (op == FilterOperation::EQ || op == FilterOperation::NEQ) {
+        ret = m_v;
+        return true;
+    }
+
+    return false;
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/BooleanLiteral.hpp b/components/core/src/clp_s/search/BooleanLiteral.hpp
new file mode 100644
index 000000000..af409b81d
--- /dev/null
+++ b/components/core/src/clp_s/search/BooleanLiteral.hpp
@@ -0,0 +1,58 @@
+#ifndef CLP_S_SEARCH_BOOLEANLITERAL_HPP
+#define CLP_S_SEARCH_BOOLEANLITERAL_HPP
+
+#include <memory>
+#include <string>
+#include <variant>
+
+#include "Literal.hpp"
+
+namespace clp_s::search {
+/**
+ * Class representing a Boolean literal in the search AST
+ */
+class BooleanLiteral : public Literal {
+public:
+    // Deleted copy
+    BooleanLiteral(BooleanLiteral const&) = delete;
+    BooleanLiteral& operator=(BooleanLiteral const&) = delete;
+
+    /**
+     * Create a bool literal
+     * @param v the value of the boolean
+     * @return A Boolean literal
+     */
+    static std::shared_ptr<Literal> create_from_bool(bool v);
+
+    /**
+     * Attempt to create a bool literal from a string
+     * @param v the string we are attempting to convert to bool
+     * @return A Boolean literal, or nullptr if the string does not represent a bool
+     */
+    static std::shared_ptr<Literal> create_from_string(std::string const& v);
+
+    // Methods inherited from Value
+    void print() override;
+
+    // Methods inherited from Literal
+    bool matches_type(LiteralType type) override { return type & LiteralType::BooleanT; }
+
+    bool matches_any(LiteralTypeBitmask mask) override { return mask & LiteralType::BooleanT; }
+
+    bool matches_exactly(LiteralTypeBitmask mask) override { return mask == LiteralType::BooleanT; }
+
+    bool as_var_string(std::string& ret, FilterOperation op) override;
+
+    bool as_bool(bool& ret, FilterOperation op) override;
+
+private:
+    bool m_v;
+
+    // Constructors
+    BooleanLiteral() = default;
+
+    explicit BooleanLiteral(bool v) : m_v(v){};
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_BOOLEANLITERAL_HPP
diff --git a/components/core/src/clp_s/search/ColumnDescriptor.cpp b/components/core/src/clp_s/search/ColumnDescriptor.cpp
new file mode 100644
index 000000000..7c82310ad
--- /dev/null
+++ b/components/core/src/clp_s/search/ColumnDescriptor.cpp
@@ -0,0 +1,90 @@
+#include "ColumnDescriptor.hpp"
+
+#include <memory>
+
+namespace clp_s::search {
+DescriptorList tokenize_descriptor(std::vector<std::string> const& descriptors) {
+    DescriptorList list;
+    for (std::string const& descriptor : descriptors) {
+        list.push_back(DescriptorToken(descriptor));
+    }
+    return list;
+}
+
+void ColumnDescriptor::check_and_set_unresolved_descriptor_flag() {
+    m_unresolved_descriptors = false;
+    m_pure_wildcard = m_descriptors.size() == 1 && m_descriptors[0].wildcard();
+    for (auto const& token : m_descriptors) {
+        if (token.wildcard() || token.regex()) {
+            m_unresolved_descriptors = true;
+            break;
+        }
+    }
+}
+
+ColumnDescriptor::ColumnDescriptor(std::string const& descriptor) {
+    m_flags = cAllTypes;
+    m_descriptors.emplace_back(descriptor);
+    check_and_set_unresolved_descriptor_flag();
+}
+
+ColumnDescriptor::ColumnDescriptor(std::vector<std::string> const& descriptors) {
+    m_flags = cAllTypes;
+    m_descriptors = std::move(tokenize_descriptor(descriptors));
+    check_and_set_unresolved_descriptor_flag();
+}
+
+ColumnDescriptor::ColumnDescriptor(DescriptorList const& descriptors) {
+    m_flags = cAllTypes;
+    m_descriptors = descriptors;
+    check_and_set_unresolved_descriptor_flag();
+}
+
+std::shared_ptr<ColumnDescriptor> ColumnDescriptor::create(std::string const& descriptor) {
+    return std::shared_ptr<ColumnDescriptor>(new ColumnDescriptor(descriptor));
+}
+
+std::shared_ptr<ColumnDescriptor> ColumnDescriptor::create(
+        std::vector<std::string> const& descriptors
+) {
+    return std::shared_ptr<ColumnDescriptor>(new ColumnDescriptor(descriptors));
+}
+
+std::shared_ptr<ColumnDescriptor> ColumnDescriptor::create(DescriptorList const& descriptors) {
+    return std::shared_ptr<ColumnDescriptor>(new ColumnDescriptor(descriptors));
+}
+
+std::shared_ptr<ColumnDescriptor> ColumnDescriptor::copy() {
+    return std::make_shared<ColumnDescriptor>(*this);
+}
+
+void ColumnDescriptor::print() {
+    auto& os = get_print_stream();
+    os << "ColumnDescriptor<";
+    for (uint32_t flag = LiteralType::TypesBegin; flag < LiteralType::TypesEnd; flag <<= 1) {
+        if (m_flags & flag) {
+            os << Literal::type_to_string(static_cast<LiteralType>(flag));
+
+            // If there are any types remaining add a comma
+            if (flag << 1 <= m_flags) {
+                os << ",";
+            }
+        }
+    }
+    os << ">(";
+
+    for (auto it = m_descriptors.begin(); it != m_descriptors.end();) {
+        os << "\"" << (*it).get_token() << "\"";
+
+        it++;
+        if (it != m_descriptors.end()) {
+            os << ", ";
+        }
+    }
+    os << ")";
+}
+
+void ColumnDescriptor::add_unresolved_tokens(DescriptorList::iterator it) {
+    m_unresolved_tokens.assign(it, descriptor_end());
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/ColumnDescriptor.hpp b/components/core/src/clp_s/search/ColumnDescriptor.hpp
new file mode 100644
index 000000000..b0260eb67
--- /dev/null
+++ b/components/core/src/clp_s/search/ColumnDescriptor.hpp
@@ -0,0 +1,214 @@
+#ifndef CLP_S_SEARCH_COLUMNDESCRIPTOR_HPP
+#define CLP_S_SEARCH_COLUMNDESCRIPTOR_HPP
+
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "Literal.hpp"
+
+namespace clp_s::search {
+/**
+ * Class representing a token used to describe one level of hierarchy in a column.
+ */
+class DescriptorToken {
+public:
+    // Constructors
+    DescriptorToken() = default;
+
+    /**
+     * Initialize the token from a string and set flags based on whether the token contains
+     * wildcards
+     * @param token the string to initialize the token from
+     */
+    explicit DescriptorToken(std::string const& token)
+            : m_token(token),
+              m_wildcard(false),
+              m_regex(false) {
+        if (token == "*") {
+            m_wildcard = true;
+        }
+
+        for (char c : token) {
+            if (c == '*') {
+                m_regex = true;
+            }
+        }
+    }
+
+    /**
+     * Whether the descriptor is a wildcard
+     * @return true if the descriptor is a single wildcard
+     */
+    bool wildcard() const { return m_wildcard; }
+
+    /**
+     * Whether the descriptor contains a wildcard somewhere
+     * TODO: Not currently used, and regex isn't currently supported
+     * @return true if the descriptor contains a wildcard
+     */
+    bool regex() const { return m_regex; }
+
+    /**
+     * Get a reference to the underlying token string
+     * @return a reference to the underlying string
+     */
+    std::string const& get_token() const { return m_token; }
+
+private:
+    bool m_wildcard{};
+    bool m_regex{};
+    std::string m_token;
+};
+
+typedef std::vector<DescriptorToken> DescriptorList;
+
+DescriptorList tokenize_descriptor(std::vector<std::string> const& descriptors);
+
+/**
+ * Class representing a Column in the Search AST. The Column is specified
+ * by a list of DescriptorTokens which may be wildcards.
+ *
+ * Currently only pure wildcard DescriptorTokens are supported -- some descriptor
+ * in the list of descriptors can be a wildcard, but individual descriptors can not mix
+ * wildcards with other characters.
+ */
+class ColumnDescriptor : public Literal {
+public:
+    /**
+     * Create a ColumnDescriptor literal from an integral value
+     * @param descriptor(s) the token or list of tokens making up the descriptor
+     * @return A ColumnDescriptor
+     */
+    static std::shared_ptr<ColumnDescriptor> create(std::string const& descriptor);
+    static std::shared_ptr<ColumnDescriptor> create(std::vector<std::string> const& descriptors);
+    static std::shared_ptr<ColumnDescriptor> create(DescriptorList const& descriptors);
+
+    /**
+     * Deep copy of this ColumnDescriptor
+     * @return A deep copy of this Column descriptor
+     */
+    std::shared_ptr<ColumnDescriptor> copy();
+
+    /**
+     * Get iterators to this Column's list of descriptors
+     * @return Iterators to the beginning and end of the list of descriptors
+     */
+    DescriptorList::iterator descriptor_begin() { return m_descriptors.begin(); }
+
+    DescriptorList::iterator descriptor_end() { return m_descriptors.end(); }
+
+    /**
+     * @return A reference to the underlying list of descriptors.
+     * Useful when the descriptors need to be mutated e.g. when being resolved.
+     */
+    DescriptorList& get_descriptor_list() { return m_descriptors; }
+
+    /**
+     * Set the unresolved tokens for this column descriptor to a suffix of the descriptor list.
+     * Used for array searches.
+     * FIXME: this is incredibly confusing to use
+     * @param it the iterator to start from when setting unresolved tokens to the suffix
+     */
+    void add_unresolved_tokens(DescriptorList::iterator it);
+
+    /**
+     * Set types this column can match
+     * @param flags that can be matched by this column
+     */
+    void set_matching_types(LiteralTypeBitmask flags) { m_flags = flags; }
+
+    /**
+     * Set type this column can match
+     * @param type that can be matched by this column
+     */
+    void set_matching_type(LiteralType type) { m_flags = type; }
+
+    /**
+     * Remove types from set of types this column can match
+     * @param flags to be removed
+     */
+    void remove_matching_types(LiteralTypeBitmask flags) { m_flags &= ~flags; }
+
+    /**
+     * Remove type from set of types this column can match
+     * @param type to be removed
+     */
+    void remove_matching_type(LiteralType type) { m_flags &= ~type; }
+
+    /**
+     * @return the CLJ column Id this Column represents. Garbage value if it was never set.
+     */
+    int32_t get_column_id() const { return m_id; }
+
+    /**
+     * Set the CLJ column Id this column represents
+     * @param id the CLJ column Id to set this column to
+     */
+    void set_column_id(int32_t id) { m_id = id; }
+
+    /**
+     * Get the list of unresolved tokens used for array search
+     * @return the list of unresolved tokens
+     * FIXME: should be reference?
+     */
+    DescriptorList get_unresolved_tokens() const { return m_unresolved_tokens; }
+
+    /**
+     * Whether the Column has any unresolved tokens for array search
+     * @return true if there are unresolved tokens for array search
+     */
+    bool has_unresolved_tokens() const { return !m_unresolved_tokens.empty(); }
+
+    // Safe only if this column has been explicitly set to
+    // only have a single type
+    LiteralType get_literal_type() const { return static_cast<LiteralType>(m_flags); }
+
+    /**
+     * Whether the list of Descriptor's contains any wildcards
+     * @return true if the descriptor contains any wildcards that need to be resolved
+     */
+    bool is_unresolved_descriptor() const { return m_unresolved_descriptors; }
+
+    /**
+     * Whether this Column is a single wildcard
+     * @return true if this descriptor is just a single wildcard
+     */
+    bool is_pure_wildcard() const { return m_pure_wildcard; }
+
+    // Methods inherited from Value
+    void print() override;
+
+    // Methods inherited from Literal
+    // ColumnDescriptor can implicitly match several different types at the same time.
+    bool matches_type(LiteralType type) override { return m_flags & type; }
+
+    bool matches_any(LiteralTypeBitmask mask) override { return m_flags & mask; }
+
+    bool matches_exactly(LiteralTypeBitmask mask) override { return m_flags == mask; }
+
+private:
+    DescriptorList m_descriptors;  // list of descriptors describing the column
+    DescriptorList m_unresolved_tokens;  // unresolved tokens used for array search
+    LiteralTypeBitmask m_flags;  // set of types this column can match
+    int32_t m_id;  // unambiguous CLJ column id this column represents. May be unset.
+    bool m_unresolved_descriptors;  // true if contains wildcards
+    bool m_pure_wildcard;  // true if column is single wildcard
+
+    // Constructors
+    explicit ColumnDescriptor(std::string const&);
+
+    explicit ColumnDescriptor(std::vector<std::string> const&);
+
+    explicit ColumnDescriptor(DescriptorList const&);
+
+    /**
+     * Scan the list of descriptors to check if they contain wildcards and
+     * set the appropriate flags.
+     */
+    void check_and_set_unresolved_descriptor_flag();
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_COLUMNDESCRIPTOR_HPP
diff --git a/components/core/src/clp_s/search/ConstantProp.cpp b/components/core/src/clp_s/search/ConstantProp.cpp
new file mode 100644
index 000000000..0f19288bd
--- /dev/null
+++ b/components/core/src/clp_s/search/ConstantProp.cpp
@@ -0,0 +1,43 @@
+#include "ConstantProp.hpp"
+
+#include <vector>
+
+#include "AndExpr.hpp"
+#include "EmptyExpr.hpp"
+#include "OrExpr.hpp"
+
+namespace clp_s::search {
+std::shared_ptr<Expression> ConstantProp::run(std::shared_ptr<Expression>& expr) {
+    return propagate_empty(expr);
+}
+
+std::shared_ptr<Expression> ConstantProp::propagate_empty(std::shared_ptr<Expression> cur) {
+    if (std::dynamic_pointer_cast<OrExpr>(cur)) {
+        std::vector<OpList::iterator> deleted;
+        for (auto it = cur->op_begin(); it != cur->op_end(); it++) {
+            auto new_child = propagate_empty(std::static_pointer_cast<Expression>(*it));
+            if (std::dynamic_pointer_cast<EmptyExpr>(new_child)) {
+                deleted.push_back(it);
+            }
+        }
+
+        if (deleted.size() == cur->get_op_list().size()) {
+            return EmptyExpr::create(cur->get_parent());
+        }
+
+        for (auto const& it : deleted) {
+            cur->get_op_list().erase(it);
+        }
+    } else if (std::dynamic_pointer_cast<AndExpr>(cur)) {
+        for (auto it = cur->op_begin(); it != cur->op_end(); it++) {
+            auto new_child = propagate_empty(std::static_pointer_cast<Expression>(*it));
+            if (std::dynamic_pointer_cast<EmptyExpr>(new_child)) {
+                new_child->set_parent(cur->get_parent());
+                return new_child;
+            }
+        }
+    }
+
+    return cur;
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/ConstantProp.hpp b/components/core/src/clp_s/search/ConstantProp.hpp
new file mode 100644
index 000000000..b17032001
--- /dev/null
+++ b/components/core/src/clp_s/search/ConstantProp.hpp
@@ -0,0 +1,23 @@
+#ifndef CLP_S_SEARCH_CONSTANTPROP_HPP
+#define CLP_S_SEARCH_CONSTANTPROP_HPP
+
+#include "Transformation.hpp"
+
+namespace clp_s::search {
+// Constant propagate empty expressions keeping all remaining data IN PLACE
+class ConstantProp : public Transformation {
+public:
+    // Methods inherited from Transformation
+    std::shared_ptr<Expression> run(std::shared_ptr<Expression>& expr) override;
+
+private:
+    /**
+     * Propagate empty expressions through the expression tree
+     * @param cur
+     * @return A new expression with empty expressions propagated
+     */
+    static std::shared_ptr<Expression> propagate_empty(std::shared_ptr<Expression> cur);
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_CONSTANTPROP_HPP
diff --git a/components/core/src/clp_s/search/ConvertToExists.cpp b/components/core/src/clp_s/search/ConvertToExists.cpp
new file mode 100644
index 000000000..c926a0552
--- /dev/null
+++ b/components/core/src/clp_s/search/ConvertToExists.cpp
@@ -0,0 +1,116 @@
+#include "ConvertToExists.hpp"
+
+#include "ColumnDescriptor.hpp"
+#include "ConstantProp.hpp"
+#include "EmptyExpr.hpp"
+#include "FilterExpr.hpp"
+#include "Literal.hpp"
+#include "OrExpr.hpp"
+#include "OrOfAndForm.hpp"
+
+namespace clp_s::search {
+std::shared_ptr<Expression> ConvertToExists::run(std::shared_ptr<Expression>& expr) {
+    expr = convert(expr);
+
+    if (m_needs_standard_form) {
+        OrOfAndForm pass;
+        expr = pass.run(expr);
+    }
+
+    if (m_needs_constant_prop) {
+        ConstantProp pass;
+        expr = pass.run(expr);
+    }
+
+    return expr;
+}
+
+std::shared_ptr<Expression> ConvertToExists::convert(std::shared_ptr<Expression> cur) {
+    if (cur->has_only_expression_operands()) {
+        for (auto it = cur->op_begin(); it != cur->op_end(); it++) {
+            auto child = std::static_pointer_cast<Expression>(*it);
+            auto new_child = convert(child);
+            if (new_child != child) {
+                new_child->copy_replace(cur.get(), it);
+            }
+        }
+    } else if (auto filter = std::dynamic_pointer_cast<FilterExpr>(cur)) {
+        // TODO: will have to change if we start supporting multi column expressions
+        auto column = filter->get_column();
+        auto op = filter->get_operation();
+
+        if (op == FilterOperation::EXISTS || op == FilterOperation::NEXISTS) {
+            if (false == filter->is_inverted()) {
+                return cur;
+            }
+
+            FilterOperation new_op = (op == FilterOperation::EXISTS) ? FilterOperation::NEXISTS
+                                                                     : FilterOperation::EXISTS;
+            auto new_col = column->copy();
+            return FilterExpr::create(new_col, new_op);
+        }
+
+        auto literal = filter->get_operand();
+
+        bool exists;
+        if (filter->is_inverted()) {
+            exists = op == FilterOperation::NEQ;
+        } else {
+            exists = op == FilterOperation::EQ;
+        }
+
+        if (literal->as_any(op)) {
+            auto new_col = column->copy();
+            if (exists) {
+                return FilterExpr::create(new_col, FilterOperation::EXISTS);
+            } else {
+                return FilterExpr::create(new_col, FilterOperation::NEXISTS);
+            }
+        } else if (literal->as_null(op)) {
+            auto new_col = column->copy();
+            auto new_col_null = column->copy();
+            if (exists) {
+                m_needs_standard_form = true;
+                new_col->remove_matching_types(
+                        cAllTypes
+                        & ~(LiteralType::ArrayT | LiteralType::ClpStringT | LiteralType::VarStringT)
+                );
+                new_col_null->remove_matching_types(cAllTypes & ~LiteralType::NullT);
+                std::shared_ptr<Expression> non_null_filter;
+                if (new_col->matches_any(cAllTypes)) {
+                    non_null_filter = FilterExpr::create(new_col, FilterOperation::EQ);
+                    non_null_filter->add_operand(literal);
+                } else {
+                    non_null_filter = EmptyExpr::create();
+                    m_needs_constant_prop = true;
+                }
+
+                std::shared_ptr<Expression> null_filter;
+                if (new_col_null->matches_any(cAllTypes)) {
+                    null_filter = FilterExpr::create(new_col_null, FilterOperation::EXISTS);
+                } else {
+                    null_filter = EmptyExpr::create();
+                    m_needs_constant_prop = true;
+                }
+
+                return OrExpr::create(null_filter, non_null_filter);
+            } else {
+                if (new_col->matches_type(LiteralType::NullT)) {
+                    // != null supercedes all other types
+                    new_col->set_matching_types(cAllTypes & ~LiteralType::NullT);
+                    return FilterExpr::create(new_col, FilterOperation::EXISTS);
+                } else {
+                    new_col->remove_matching_type(LiteralType::NullT);
+                    if (new_col->matches_any(cAllTypes)) {
+                        return FilterExpr::create(new_col, FilterOperation::EXISTS);
+                    } else {
+                        m_needs_constant_prop = true;
+                        return EmptyExpr::create();
+                    }
+                }
+            }
+        }
+    }
+    return cur;
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/ConvertToExists.hpp b/components/core/src/clp_s/search/ConvertToExists.hpp
new file mode 100644
index 000000000..6e417806a
--- /dev/null
+++ b/components/core/src/clp_s/search/ConvertToExists.hpp
@@ -0,0 +1,29 @@
+#ifndef CLP_S_SEARCH_CONVERTTOEXISTS_HPP
+#define CLP_S_SEARCH_CONVERTTOEXISTS_HPP
+
+#include "Transformation.hpp"
+
+namespace clp_s::search {
+// Must run after NarrowTypes pass
+class ConvertToExists : public Transformation {
+public:
+    // Constructors
+    ConvertToExists() : m_needs_constant_prop(false), m_needs_standard_form(false) {}
+
+    // Methods inherited from Transformation
+    std::shared_ptr<Expression> run(std::shared_ptr<Expression>& expr) override;
+
+private:
+    bool m_needs_constant_prop;
+    bool m_needs_standard_form;
+
+    /**
+     * Convert an expression to exists form
+     * @param cur the expression to convert
+     * @return A new expression in exists form
+     */
+    std::shared_ptr<Expression> convert(std::shared_ptr<Expression> cur);
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_CONVERTTOEXISTS_HPP
diff --git a/components/core/src/clp_s/search/DateLiteral.cpp b/components/core/src/clp_s/search/DateLiteral.cpp
new file mode 100644
index 000000000..6296baa64
--- /dev/null
+++ b/components/core/src/clp_s/search/DateLiteral.cpp
@@ -0,0 +1,92 @@
+#include "DateLiteral.hpp"
+
+#include <sstream>
+
+#include "../TimestampPattern.hpp"
+#include "SearchUtils.hpp"
+
+namespace clp_s::search {
+DateLiteral::DateLiteral(double v, std::string s) : Integral(v), m_epoch_str(std::move(s)) {}
+
+DateLiteral::DateLiteral(epochtime_t v, std::string s) : Integral(v), m_epoch_str(std::move(s)) {}
+
+std::shared_ptr<Literal> DateLiteral::create_from_float(double v) {
+    std::ostringstream s;
+    s << v;
+    s.str();
+    return std::shared_ptr<Literal>(static_cast<Literal*>(new DateLiteral(v, s.str())));
+}
+
+std::shared_ptr<Literal> DateLiteral::create_from_int(epochtime_t v) {
+    std::ostringstream s;
+    s << v;
+    s.str();
+    return std::shared_ptr<Literal>(static_cast<Literal*>(new DateLiteral(v, s.str())));
+}
+
+std::shared_ptr<Literal> DateLiteral::create_from_string(std::string const& v) {
+    std::istringstream ss(v);
+    epochtime_t tmp_int_epoch;
+    double tmp_double_epoch;
+
+    ss >> std::noskipws >> tmp_int_epoch;
+    if (false == ss.fail() && ss.eof()) {
+        return std::shared_ptr<Literal>(static_cast<Literal*>(new DateLiteral(tmp_int_epoch, v)));
+    }
+
+    ss = std::istringstream(v);
+    ss >> std::noskipws >> tmp_double_epoch;
+    if (false == ss.fail() && ss.eof()) {
+        return std::shared_ptr<Literal>(static_cast<Literal*>(new DateLiteral(tmp_double_epoch, v))
+        );
+    }
+
+    // begin end arguments are returned only -- their value doesn't matter
+    size_t timestamp_begin_pos = 0, timestamp_end_pos = 0;
+    auto pattern = TimestampPattern::search_known_ts_patterns(
+            v,
+            tmp_int_epoch,
+            timestamp_begin_pos,
+            timestamp_end_pos
+    );
+    if (pattern == nullptr) {
+        return std::shared_ptr<Literal>(nullptr);
+    }
+
+    return std::shared_ptr<Literal>(static_cast<Literal*>(new DateLiteral(tmp_int_epoch, v)));
+}
+
+void DateLiteral::print() {
+    get_print_stream() << m_epoch_str;
+}
+
+bool DateLiteral::as_clp_string(std::string& ret, FilterOperation op) {
+    if (op == FilterOperation::LT || op == FilterOperation::GT || op == FilterOperation::LTE
+        || op == FilterOperation::GTE)
+    {
+        return false;
+    }
+
+    if (m_epoch_str.find(' ') == std::string::npos) {
+        return false;
+    }
+
+    ret = m_epoch_str;
+    return true;
+}
+
+bool DateLiteral::as_var_string(std::string& ret, FilterOperation op) {
+    if (op == FilterOperation::LT || op == FilterOperation::GT || op == FilterOperation::LTE
+        || op == FilterOperation::GTE)
+    {
+        return false;
+    }
+
+    if (m_epoch_str.find(' ') != std::string::npos) {
+        return false;
+    }
+
+    ret = m_epoch_str;
+    return true;
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/DateLiteral.hpp b/components/core/src/clp_s/search/DateLiteral.hpp
new file mode 100644
index 000000000..09df3fb03
--- /dev/null
+++ b/components/core/src/clp_s/search/DateLiteral.hpp
@@ -0,0 +1,65 @@
+#ifndef CLP_S_SEARCH_DATELITERAL_HPP
+#define CLP_S_SEARCH_DATELITERAL_HPP
+
+#include <memory>
+
+#include "../Defs.hpp"
+#include "Integral.hpp"
+
+namespace clp_s::search {
+constexpr LiteralTypeBitmask cDateLiteralTypes = EpochDateT | FloatDateT;
+
+/**
+ * Class for Date literal in the search AST. Represents time
+ * in epoch time.
+ */
+class DateLiteral : public Integral {
+public:
+    // Deleted copy
+    DateLiteral(DateLiteral const&) = delete;
+    DateLiteral& operator=(DateLiteral const&) = delete;
+
+    /**
+     * Create a Date literal from an integral value
+     * @param v the time as a double or epoch
+     * @return A Date literal
+     */
+    static std::shared_ptr<Literal> create_from_float(double v);
+    static std::shared_ptr<Literal> create_from_int(epochtime_t v);
+
+    /**
+     * Attempt to create a Date literal from string. Tries to parse the string using
+     * TimestampPattern.
+     * @return A Date Literal or nullptr if the string can not be parsed as date.
+     */
+    static std::shared_ptr<Literal> create_from_string(std::string const& v);
+
+    // Methods inherited from Value
+    void print() override;
+
+    // Methods inherited from Literal
+    bool matches_type(LiteralType type) override { return type & cDateLiteralTypes; }
+
+    bool matches_any(LiteralTypeBitmask mask) override { return mask & cDateLiteralTypes; }
+
+    bool matches_exactly(LiteralTypeBitmask mask) override { return mask == cDateLiteralTypes; }
+
+    bool as_epoch_date() override { return true; }
+
+    bool as_float_date() override { return true; }
+
+    bool as_clp_string(std::string& ret, FilterOperation op) override;
+
+    bool as_var_string(std::string& ret, FilterOperation op) override;
+
+private:
+    std::string m_epoch_str;
+
+    // Constructors
+    explicit DateLiteral(double v, std::string s);
+
+    explicit DateLiteral(epochtime_t v, std::string s);
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_DATELITERAL_HPP
diff --git a/components/core/src/clp_s/search/EmptyExpr.cpp b/components/core/src/clp_s/search/EmptyExpr.cpp
new file mode 100644
index 000000000..201ef739a
--- /dev/null
+++ b/components/core/src/clp_s/search/EmptyExpr.cpp
@@ -0,0 +1,27 @@
+#include "EmptyExpr.hpp"
+
+namespace clp_s::search {
+EmptyExpr::EmptyExpr(Expression* parent) : Expression(false, parent) {}
+
+EmptyExpr::EmptyExpr(EmptyExpr const& expr) : Expression(expr) {}
+
+std::shared_ptr<Expression> EmptyExpr::create(Expression* parent) {
+    return std::shared_ptr<Expression>(static_cast<Expression*>(new EmptyExpr(parent)));
+}
+
+void EmptyExpr::print() {
+    auto& os = get_print_stream();
+    os << "EmptyExpr()";
+
+    if (get_parent() == nullptr) {
+        os << std::endl;
+    } else {
+        os << std::flush;
+    }
+}
+
+std::shared_ptr<Expression> EmptyExpr::copy() const {
+    // Copy on EmptyExpr can use default shallow copy
+    return std::shared_ptr<Expression>(static_cast<Expression*>(new EmptyExpr(*this)));
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/EmptyExpr.hpp b/components/core/src/clp_s/search/EmptyExpr.hpp
new file mode 100644
index 000000000..3a002eafd
--- /dev/null
+++ b/components/core/src/clp_s/search/EmptyExpr.hpp
@@ -0,0 +1,37 @@
+#ifndef CLP_S_SEARCH_EMPTYEXPR_HPP
+#define CLP_S_SEARCH_EMPTYEXPR_HPP
+
+#include "Expression.hpp"
+
+namespace clp_s::search {
+/**
+ * Class representing the empty set/false. Useful
+ * for constant propagation and eliminating expressions.
+ */
+class EmptyExpr : public Expression {
+public:
+    /**
+     * Create an Empty expression which can optionally be attached to a parent
+     * @param parent parent this expression is attached to
+     * @return newly created Empty expression
+     */
+    static std::shared_ptr<Expression> create(Expression* parent = nullptr);
+
+    // Methods inherited from Value
+    void print() override;
+
+    // Methods inherited from Expression
+    // EmptyExpr never has any operands, so we arbitrarily say that all operands are Expression
+    bool has_only_expression_operands() override { return true; }
+
+    std::shared_ptr<Expression> copy() const override;
+
+private:
+    // Constructor
+    explicit EmptyExpr(Expression* parent = nullptr);
+
+    EmptyExpr(EmptyExpr const&);
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_EMPTYEXPR_HPP
diff --git a/components/core/src/clp_s/search/EvaluateTimestampIndex.cpp b/components/core/src/clp_s/search/EvaluateTimestampIndex.cpp
new file mode 100644
index 000000000..7e3339f10
--- /dev/null
+++ b/components/core/src/clp_s/search/EvaluateTimestampIndex.cpp
@@ -0,0 +1,103 @@
+#include "EvaluateTimestampIndex.hpp"
+
+#include "AndExpr.hpp"
+#include "FilterExpr.hpp"
+#include "Integral.hpp"
+#include "OrExpr.hpp"
+
+namespace clp_s::search {
+constexpr LiteralTypeBitmask cDateTypes = cIntegralTypes | EpochDateT | FloatDateT;
+
+EvaluatedValue EvaluateTimestampIndex::run(std::shared_ptr<Expression> const& expr) {
+    if (std::dynamic_pointer_cast<OrExpr>(expr)) {
+        bool any_unkown = false;
+        for (auto it = expr->op_begin(); it != expr->op_end(); it++) {
+            auto sub_expr = std::static_pointer_cast<Expression>(*it);
+            EvaluatedValue ret = run(sub_expr);
+            if (ret == EvaluatedValue::True) {
+                return expr->is_inverted() ? EvaluatedValue::False : EvaluatedValue::True;
+            } else if (ret == EvaluatedValue::Unknown) {
+                any_unkown = true;
+            }
+        }
+
+        if (any_unkown) {
+            return EvaluatedValue::Unknown;
+        }
+        // must have been all false
+        return expr->is_inverted() ? EvaluatedValue::True : EvaluatedValue::False;
+    } else if (std::dynamic_pointer_cast<AndExpr>(expr)) {
+        bool any_unkown = false;
+        for (auto it = expr->op_begin(); it != expr->op_end(); it++) {
+            auto sub_expr = std::static_pointer_cast<Expression>(*it);
+            EvaluatedValue ret = run(sub_expr);
+            if (ret == EvaluatedValue::False) {
+                return expr->is_inverted() ? EvaluatedValue::True : EvaluatedValue::False;
+            } else if (ret == EvaluatedValue::Unknown) {
+                any_unkown = true;
+            }
+        }
+
+        if (any_unkown) {
+            return EvaluatedValue::Unknown;
+        }
+        // must have been all true
+        return expr->is_inverted() ? EvaluatedValue::False : EvaluatedValue::True;
+    } else if (auto filter = std::dynamic_pointer_cast<FilterExpr>(expr)) {
+        auto column = filter->get_column();
+        if (false == column->matches_any(cDateTypes)) {
+            return EvaluatedValue::Unknown;
+        }
+
+        for (auto range_it = m_timestamp_dict->tokenized_column_to_range_begin();
+             range_it != m_timestamp_dict->tokenized_column_to_range_end();
+             range_it++)
+        {
+            std::vector<std::string>& tokens = range_it->first;
+            auto const& descriptors = column->get_descriptor_list();
+            // TODO: handle wildcard matching; the initial check on timestamp index happens
+            // before schema matching, so
+            if (tokens.size() != descriptors.size()) {
+                continue;
+            }
+
+            bool matched = true;
+            for (size_t i = 0; i < descriptors.size(); ++i) {
+                if (tokens[i] != descriptors[i].get_token()) {
+                    matched = false;
+                    break;
+                }
+            }
+            if (false == matched) {
+                continue;
+            }
+
+            EvaluatedValue ret;
+            // this is safe after type narrowing because all DateType literals are either
+            // Integral or a derived class of Integral
+            Integral64 literal = std::static_pointer_cast<Integral>(filter->get_operand())->get();
+            if (std::holds_alternative<int64_t>(literal)) {
+                ret = range_it->second->evaluate_filter(
+                        filter->get_operation(),
+                        std::get<int64_t>(literal)
+                );
+            } else {
+                ret = range_it->second->evaluate_filter(
+                        filter->get_operation(),
+                        std::get<double>(literal)
+                );
+            }
+
+            if (ret == EvaluatedValue::True) {
+                return filter->is_inverted() ? EvaluatedValue::False : EvaluatedValue::True;
+            } else if (ret == EvaluatedValue::False) {
+                return filter->is_inverted() ? EvaluatedValue::True : EvaluatedValue::False;
+            }
+            return EvaluatedValue::Unknown;
+        }
+        return EvaluatedValue::Unknown;
+    } else {
+        return EvaluatedValue::Unknown;
+    }
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/EvaluateTimestampIndex.hpp b/components/core/src/clp_s/search/EvaluateTimestampIndex.hpp
new file mode 100644
index 000000000..9799ec68d
--- /dev/null
+++ b/components/core/src/clp_s/search/EvaluateTimestampIndex.hpp
@@ -0,0 +1,31 @@
+#ifndef CLP_S_SEARCH_EVALUATETIMESTAMPINDEX_HPP
+#define CLP_S_SEARCH_EVALUATETIMESTAMPINDEX_HPP
+
+#include "../TimestampDictionaryReader.hpp"
+#include "../Utils.hpp"
+#include "Expression.hpp"
+
+namespace clp_s::search {
+class EvaluateTimestampIndex {
+public:
+    // Constructors
+    EvaluateTimestampIndex(std::shared_ptr<TimestampDictionaryReader> const& timestamp_dict)
+            : m_timestamp_dict(timestamp_dict) {}
+
+    /**
+     * Takes an expression and attempts to prove its output (true/false/unknown) based on
+     * a timestamp index. Currently doesn't do any constant propagation.
+     *
+     * Should only be run after type narrowing.
+     *
+     * @param expr the expression to evaluate against the timestamp index
+     * @return The evaluated value of the expression given the index (True, False, Unknown)
+     */
+    EvaluatedValue run(std::shared_ptr<Expression> const& expr);
+
+private:
+    std::shared_ptr<TimestampDictionaryReader> m_timestamp_dict;
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_EVALUATETIMESTAMPINDEX_HPP
diff --git a/components/core/src/clp_s/search/Expression.cpp b/components/core/src/clp_s/search/Expression.cpp
new file mode 100644
index 000000000..45c5b1168
--- /dev/null
+++ b/components/core/src/clp_s/search/Expression.cpp
@@ -0,0 +1,35 @@
+#include "Expression.hpp"
+
+namespace clp_s::search {
+Expression::Expression(bool inverted, Expression* parent) {
+    m_inverted = inverted;
+    m_parent = parent;
+}
+
+Expression::Expression(Expression const& expr) {
+    m_parent = nullptr;
+    m_inverted = expr.m_inverted;
+    m_operands = expr.m_operands;
+}
+
+void Expression::add_operand(std::shared_ptr<Expression> const& operand) {
+    m_operands.push_back(std::static_pointer_cast<Value>(operand));
+    operand->set_parent(this);
+}
+
+void Expression::add_operand(std::shared_ptr<Literal> const& operand) {
+    m_operands.push_back(std::static_pointer_cast<Value>(operand));
+}
+
+void Expression::copy_append(Expression* parent) const {
+    auto new_expr = this->copy();
+    new_expr->set_parent(parent);
+    parent->add_operand(new_expr);
+}
+
+void Expression::copy_replace(Expression* parent, OpList::iterator it) const {
+    auto new_expr = this->copy();
+    new_expr->set_parent(parent);
+    *it = std::static_pointer_cast<Value>(new_expr);
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/Expression.hpp b/components/core/src/clp_s/search/Expression.hpp
new file mode 100644
index 000000000..3b67bc16e
--- /dev/null
+++ b/components/core/src/clp_s/search/Expression.hpp
@@ -0,0 +1,118 @@
+#ifndef CLP_S_SEARCH_EXPRESSION_HPP
+#define CLP_S_SEARCH_EXPRESSION_HPP
+
+#include <list>
+#include <memory>
+
+#include "Literal.hpp"
+#include "Value.hpp"
+
+namespace clp_s::search {
+typedef std::list<std::shared_ptr<Value>> OpList;
+
+/**
+ * Top level class for all logical expressions which represent filters
+ * on columns.
+ *
+ * Key subclasses are AndExpr, OrExpr, and FilterExpr
+ */
+class Expression : public Value {
+public:
+    /**
+     * True if this expression is inverted
+     * @return Whether the expression is inverted
+     */
+    bool is_inverted() const { return m_inverted; }
+
+    /**
+     * Flip whether the expression is inverted
+     */
+    void invert() { m_inverted = !m_inverted; }
+
+    /**
+     * @return The number of operands that this expression has
+     */
+    unsigned get_num_operands() override { return m_operands.size(); }
+
+    /**
+     * Get iterators to this Expression's OpList
+     * @return Iterators to the beggining/end of the OpList
+     */
+    OpList::iterator op_begin() { return m_operands.begin(); }
+
+    OpList::iterator op_end() { return m_operands.end(); }
+
+    /**
+     * @return A reference to the underlying OpList. Useful in cases where certain children
+     * need to be deleted, or multiple children need to be spliced in.
+     */
+    OpList& get_op_list() { return m_operands; }
+
+    /**
+     * Add an operand to the end of the OpList. When the operand is an
+     * Expression its parent is set to this Expression.
+     * @param operand the operand to append to the OpList
+     */
+    void add_operand(std::shared_ptr<Expression> const& operand);
+
+    void add_operand(std::shared_ptr<Literal> const& operand);
+
+    /**
+     * @return The parent for this Expression. Can be nullptr if this is the top level.
+     */
+    Expression* get_parent() { return m_parent; }
+
+    /**
+     * Set the parent for this Expression
+     * @param parent this Expression's new parent
+     */
+    void set_parent(Expression* parent) { m_parent = parent; }
+
+    /**
+     * Deep copy
+     * @return A deep copy of this expression
+     */
+    virtual std::shared_ptr<Expression> copy() const = 0;
+
+    /**
+     * Deep copy this expression and append it into *parent*'s OpList.
+     * Also sets the parent for copy to parent.
+     * @param parent the parent to copy into
+     */
+    void copy_append(Expression* parent) const;
+
+    /**
+     * Deep copy this expression and replace a specific operand in the
+     * *parent*'s OpList.
+     * @param parent the parent to copy into
+     * @param it an iterator into the parent's OpList representing the operand that will get
+     * replaced
+     */
+    void copy_replace(Expression* parent, OpList::iterator it) const;
+
+    /**
+     * Whether this Expression's operands are all Expression
+     * @return true if this Expression's operands are all Expression
+     */
+    virtual bool has_only_expression_operands() = 0;
+
+    // Methods inherited from Value
+    void print() override = 0;
+
+protected:
+    /**
+     * All expressions can be inverted, have a parent (nullptr for top level),
+     * and have 0 or more operands
+     */
+    bool m_inverted;
+    Expression* m_parent;
+    std::list<std::shared_ptr<Value>> m_operands;
+
+    // Copy Semantic is create shallow copy with parent pointing to null
+    Expression(Expression const&);
+
+    explicit Expression(bool inverted, Expression* parent = nullptr);
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_EXPRESSION_HPP
diff --git a/components/core/src/clp_s/search/FilterExpr.cpp b/components/core/src/clp_s/search/FilterExpr.cpp
new file mode 100644
index 000000000..55c62733c
--- /dev/null
+++ b/components/core/src/clp_s/search/FilterExpr.cpp
@@ -0,0 +1,106 @@
+#include "FilterExpr.hpp"
+
+namespace clp_s::search {
+FilterExpr::FilterExpr(
+        std::shared_ptr<ColumnDescriptor> const& column,
+        FilterOperation op,
+        bool inverted,
+        Expression* parent
+)
+        : Expression(inverted, parent) {
+    m_op = op;
+    add_operand(std::static_pointer_cast<Literal>(column));
+}
+
+FilterExpr::FilterExpr(FilterExpr const& expr) : Expression(expr) {
+    m_op = expr.m_op;
+}
+
+std::string FilterExpr::op_type_str(FilterOperation op) {
+    switch (op) {
+        case FilterOperation::EXISTS:
+            return "EXISTS";
+        case FilterOperation::EQ:
+            return "EQ";
+        case FilterOperation::NEQ:
+            return "NEQ";
+        case FilterOperation::LT:
+            return "LT";
+        case FilterOperation::GT:
+            return "GT";
+        case FilterOperation::LTE:
+            return "LTE";
+        case FilterOperation::GTE:
+            return "GTE";
+        default:
+            return "UNKNOWN";
+    }
+}
+
+void FilterExpr::print() {
+    auto& os = get_print_stream();
+    if (is_inverted()) {
+        os << "!";
+    }
+
+    os << "FilterExpr(";
+    os << op_type_str(m_op);
+    for (auto it = op_begin(); it != op_end(); it++) {
+        os << ", ";
+        (*it)->print();
+    }
+    os << ")";
+
+    if (get_parent() == nullptr) {
+        os << std::endl;
+    } else {
+        os << std::flush;
+    }
+}
+
+std::shared_ptr<Expression> FilterExpr::create(
+        std::shared_ptr<ColumnDescriptor>& column,
+        FilterOperation op,
+        bool inverted,
+        Expression* parent
+) {
+    return std::shared_ptr<Expression>(
+            static_cast<Expression*>(new FilterExpr(column->copy(), op, inverted, parent))
+    );
+}
+
+std::shared_ptr<Expression> FilterExpr::create(
+        std::shared_ptr<ColumnDescriptor>& column,
+        FilterOperation op,
+        std::shared_ptr<Literal>& operand,
+        bool inverted,
+        Expression* parent
+) {
+    std::shared_ptr<Expression> expr(
+            static_cast<Expression*>(new FilterExpr(column->copy(), op, inverted, parent))
+    );
+    expr->add_operand(operand);
+    return expr;
+}
+
+std::shared_ptr<Expression> FilterExpr::copy() const {
+    // Only deep copy column descriptors
+    auto new_filter = std::shared_ptr<Expression>(static_cast<Expression*>(new FilterExpr(*this)));
+    for (auto it = new_filter->op_begin(); it != new_filter->op_end(); it++) {
+        if (auto descriptor = std::dynamic_pointer_cast<ColumnDescriptor>(*it)) {
+            *it = descriptor->copy();
+        }
+    }
+    return new_filter;
+}
+
+std::shared_ptr<Literal> FilterExpr::get_operand() {
+    auto it = op_begin();
+    it++;
+    if (it == op_end()) {
+        return nullptr;
+    } else {
+        return std::static_pointer_cast<Literal>(*it);
+    }
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/FilterExpr.hpp b/components/core/src/clp_s/search/FilterExpr.hpp
new file mode 100644
index 000000000..706a375c6
--- /dev/null
+++ b/components/core/src/clp_s/search/FilterExpr.hpp
@@ -0,0 +1,100 @@
+#ifndef CLP_S_SEARCH_FILTEREXPR_HPP
+#define CLP_S_SEARCH_FILTEREXPR_HPP
+
+#include <string>
+
+#include "ColumnDescriptor.hpp"
+#include "Expression.hpp"
+#include "FilterOperation.hpp"
+#include "Literal.hpp"
+
+namespace clp_s::search {
+/**
+ * Class for simple filter conditions in the AST. Consists of a column,
+ * a filtering operation, and usually a literal.
+ *
+ * Conventionally the OpList contains a ColumnExpr followed by some Literal. I.e. a FilterExpr
+ * always has a ColumnExpr, but may not have a Literal.
+ */
+class FilterExpr : public Expression {
+public:
+    /**
+     * @return FilterOperation this Filter performs
+     */
+    FilterOperation get_operation() { return m_op; }
+
+    /**
+     * @return The Column this Filter acts on
+     */
+    std::shared_ptr<ColumnDescriptor> get_column() {
+        return std::static_pointer_cast<ColumnDescriptor>(*op_begin());
+    }
+
+    /**
+     * @return This Filter's Literal or nullptr if there is no Literal
+     */
+    std::shared_ptr<Literal> get_operand();
+
+    /**
+     * Create a Filter expression with a Column and FilterOperation but no Literal
+     * Literal can be added later using mutators provided by the Expression parent class
+     * @param column the Column this Filter acts on
+     * @param op the Operation this Filter uses to Filter the Column
+     * @param inverted expression is inverted when true
+     * @param parent parent this expression is attached to
+     * @return Newly created Or expression
+     */
+    static std::shared_ptr<Expression> create(
+            std::shared_ptr<ColumnDescriptor>& column,
+            FilterOperation op,
+            bool inverted = false,
+            Expression* parent = nullptr
+    );
+
+    /**
+     * Create a Filter expression with a Column, FilterOperation and Literal
+     * @param column the Column this Filter acts on
+     * @param op the Operation this Filter uses to Filter the Column
+     * @param inverted expression is inverted when true
+     * @param parent parent this expression is attached to
+     * @return newly created Or expression
+     */
+    static std::shared_ptr<Expression> create(
+            std::shared_ptr<ColumnDescriptor>& column,
+            FilterOperation op,
+            std::shared_ptr<Literal>& operand,
+            bool inverted = false,
+            Expression* parent = nullptr
+    );
+
+    /**
+     * Helper function to turn FilterOperation into string for printing
+     * @param op the operation we want to convert to string
+     * @return a string representing the operation
+     */
+    static std::string op_type_str(FilterOperation op);
+
+    // Methods inherited from Value
+    void print() override;
+
+    // Methods inherited from Expression
+    bool has_only_expression_operands() override { return false; }
+
+    std::shared_ptr<Expression> copy() const override;
+
+private:
+    FilterOperation m_op;
+
+    // Constructor
+    FilterExpr(
+            std::shared_ptr<ColumnDescriptor> const& column,
+            FilterOperation op,
+            bool inverted = false,
+            Expression* parent = nullptr
+    );
+
+    FilterExpr(FilterExpr const&);
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_FILTEREXPR_HPP
diff --git a/components/core/src/clp_s/search/FilterOperation.hpp b/components/core/src/clp_s/search/FilterOperation.hpp
new file mode 100644
index 000000000..e484e7098
--- /dev/null
+++ b/components/core/src/clp_s/search/FilterOperation.hpp
@@ -0,0 +1,20 @@
+#ifndef CLP_S_SEARCH_FILTEROPERATION_HPP
+#define CLP_S_SEARCH_FILTEROPERATION_HPP
+
+namespace clp_s::search {
+/**
+ * Enum describing all supported filtering operations in the search AST
+ */
+enum FilterOperation {
+    EXISTS,
+    NEXISTS,
+    EQ,
+    NEQ,
+    LT,
+    GT,
+    LTE,
+    GTE
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_FILTEROPERATION_HPP
diff --git a/components/core/src/clp_s/search/Integral.cpp b/components/core/src/clp_s/search/Integral.cpp
new file mode 100644
index 000000000..459ac9dc0
--- /dev/null
+++ b/components/core/src/clp_s/search/Integral.cpp
@@ -0,0 +1,96 @@
+#include "Integral.hpp"
+
+#include <sstream>
+
+#include "SearchUtils.hpp"
+
+namespace clp_s::search {
+Integral::Integral(double v) : m_v(v) {}
+
+Integral::Integral(int64_t v) : m_v(v) {}
+
+std::shared_ptr<Literal> Integral::create_from_float(double v) {
+    return std::shared_ptr<Literal>(static_cast<Literal*>(new Integral(v)));
+}
+
+std::shared_ptr<Literal> Integral::create_from_int(int64_t v) {
+    return std::shared_ptr<Literal>(static_cast<Literal*>(new Integral(v)));
+}
+
+std::shared_ptr<Literal> Integral::create_from_string(std::string const& v) {
+    Integral* ret = nullptr;
+    int64_t tmpint;
+    std::istringstream ss(v);
+    ss >> std::noskipws >> tmpint;
+    if (false == ss.fail() && ss.eof()) {
+        ret = new Integral(tmpint);
+        ret->m_vstr = v;
+        return std::shared_ptr<Literal>(static_cast<Literal*>(ret));
+    }
+
+    double tmpdouble;
+    ss = std::istringstream(v);
+    ss >> std::noskipws >> tmpdouble;
+    if (false == ss.fail() && ss.eof()) {
+        ret = new Integral(tmpdouble);
+        ret->m_vstr = v;
+        return std::shared_ptr<Literal>(static_cast<Literal*>(ret));
+    }
+    return std::shared_ptr<Literal>(static_cast<Literal*>(ret));
+}
+
+void Integral::print() {
+    auto& os = get_print_stream();
+    if (false == m_vstr.empty()) {
+        os << m_vstr;
+    } else if (std::holds_alternative<int64_t>(m_v)) {
+        os << std::get<int64_t>(m_v);
+    } else {
+        os << std::get<double>(m_v);
+    }
+}
+
+Integral64 Integral::get() {
+    return m_v;
+}
+
+bool Integral::as_var_string(std::string& ret, FilterOperation op) {
+    if (op == FilterOperation::LT || op == FilterOperation::GT || op == FilterOperation::LTE
+        || op == FilterOperation::GTE)
+    {
+        return false;
+    }
+    if (false == m_vstr.empty()) {
+        ret = m_vstr;
+    } else {
+        std::ostringstream ss;
+        if (std::holds_alternative<double>(m_v)) {
+            ss << std::get<double>(m_v);
+        } else {
+            ss << std::get<int64_t>(m_v);
+        }
+        m_vstr = ss.str();
+        ret = m_vstr;
+    }
+    return true;
+}
+
+bool Integral::as_float(double& ret, FilterOperation op) {
+    if (std::holds_alternative<double>(m_v)) {
+        ret = std::get<double>(m_v);
+    } else {
+        ret = std::get<int64_t>(m_v);
+    }
+    return true;
+}
+
+bool Integral::as_int(int64_t& ret, FilterOperation op) {
+    if (std::holds_alternative<double>(m_v)) {
+        double tmp = std::get<double>(m_v);
+        return double_as_int(tmp, op, ret);
+    } else {
+        ret = std::get<int64_t>(m_v);
+    }
+    return true;
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/Integral.hpp b/components/core/src/clp_s/search/Integral.hpp
new file mode 100644
index 000000000..eb619deed
--- /dev/null
+++ b/components/core/src/clp_s/search/Integral.hpp
@@ -0,0 +1,84 @@
+#ifndef CLP_S_SEARCH_INTEGRAL_HPP
+#define CLP_S_SEARCH_INTEGRAL_HPP
+
+#include <memory>
+#include <string>
+#include <variant>
+
+#include "Literal.hpp"
+
+namespace clp_s::search {
+typedef std::variant<int64_t, double> Integral64;
+
+// FIXME: figure out why String types are part of this bitmask
+constexpr LiteralTypeBitmask cIntegralLiteralTypes = cIntegralTypes | VarStringT;
+
+/**
+ * Class for Integral values (float/int) in the search AST
+ */
+class Integral : public Literal {
+public:
+    // Deleted copy
+    Integral(Integral const&) = delete;
+
+    Integral& operator=(Integral const&) = delete;
+
+    /**
+     * Create an Integral literal from an double value
+     * @param v the value
+     * @return an Integral literal
+     */
+    static std::shared_ptr<Literal> create_from_float(double v);
+
+    /**
+     * Create an Integral literal from an integral value
+     * @param v the value
+     * @return an Integral literal
+     */
+    static std::shared_ptr<Literal> create_from_int(int64_t v);
+
+    /**
+     * Try to create an integral literal from a string
+     * @param v the string we are attempting to convert to Integral
+     * @return an Integral literal, or nullptr if the string does not represent an integral
+     */
+    static std::shared_ptr<Literal> create_from_string(std::string const& v);
+
+    /**
+     * Return the underlying integral value
+     * @return the underlying integral value
+     */
+    Integral64 get();
+
+    // Methods inherited from Value
+    void print() override;
+
+    // Methods inherited from Literal
+    bool matches_type(LiteralType type) override { return type & cIntegralLiteralTypes; }
+
+    bool matches_any(LiteralTypeBitmask mask) override { return mask & cIntegralLiteralTypes; }
+
+    bool matches_exactly(LiteralTypeBitmask mask) override { return mask == cIntegralLiteralTypes; }
+
+    bool as_epoch_date() override { return true; }
+
+    bool as_float_date() override { return true; }
+
+    bool as_var_string(std::string& ret, FilterOperation op) override;
+
+    bool as_float(double& ret, FilterOperation op) override;
+
+    bool as_int(int64_t& ret, FilterOperation op) override;
+
+protected:
+    Integral64 m_v;
+    std::string m_vstr;  // original string representation if created from string
+
+    // Constructors
+    explicit Integral(double v);
+
+    explicit Integral(int64_t v);
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_INTEGRAL_HPP
diff --git a/components/core/src/clp_s/search/Literal.hpp b/components/core/src/clp_s/search/Literal.hpp
new file mode 100644
index 000000000..5e06e2a49
--- /dev/null
+++ b/components/core/src/clp_s/search/Literal.hpp
@@ -0,0 +1,115 @@
+#ifndef CLP_S_SEARCH_LITERAL_HPP
+#define CLP_S_SEARCH_LITERAL_HPP
+
+#include <string>
+
+#include "FilterOperation.hpp"
+#include "Value.hpp"
+
+namespace clp_s::search {
+/**
+ * An enum representing all of the Literal types that can show up in the AST.
+ */
+enum LiteralType : uint32_t {
+    TypesBegin = 1,
+    IntegerT = 1,
+    FloatT = 1 << 1,
+    ClpStringT = 1 << 2,
+    VarStringT = 1 << 3,
+    BooleanT = 1 << 4,
+    ArrayT = 1 << 5,
+    NullT = 1 << 6,
+    EpochDateT = 1 << 7,
+    FloatDateT = 1 << 8,
+    TypesEnd = 1 << 9,
+    UnknownT = ((uint32_t)1) << 31
+};
+
+typedef uint32_t LiteralTypeBitmask;
+
+constexpr LiteralTypeBitmask cIntegralTypes = LiteralType::IntegerT | LiteralType::FloatT;
+constexpr LiteralTypeBitmask cAllTypes = TypesEnd - 1;
+
+/**
+ * Parent class for all Literals in the AST.
+ */
+class Literal : public Value {
+public:
+    /**
+     * Literals are considered to have 1 operand.
+     * @return 1
+     */
+    unsigned get_num_operands() override { return 1; }
+
+    /**
+     * Strict checks for type matching against a given literal type.
+     * @return true if the check succeeds
+     */
+    virtual bool matches_type(LiteralType type) = 0;
+
+    virtual bool matches_any(LiteralTypeBitmask mask) = 0;
+
+    virtual bool matches_exactly(LiteralTypeBitmask mask) = 0;
+
+    /**
+     * Convert LiteralType enum values to strings . Only used for printing.
+     * @param type the enum value being turned in a string
+     * @return A string representing the enum value
+     */
+    static std::string type_to_string(LiteralType type) {
+        switch (type) {
+            case LiteralType::IntegerT:
+                return "int";
+            case LiteralType::FloatT:
+                return "float";
+            case LiteralType::ClpStringT:
+                return "clpstring";
+            case LiteralType::VarStringT:
+                return "varstring";
+            case LiteralType::BooleanT:
+                return "bool";
+            case LiteralType::ArrayT:
+                return "array";
+            case LiteralType::NullT:
+                return "null";
+            case LiteralType::EpochDateT:
+                return "epochdate";
+            case LiteralType::FloatDateT:
+                return "floatdate";
+            default:
+                return "errtype";
+        }
+    }
+
+    /**
+     * Functions to check type conversion and cast when possible under a given filter operation.
+     * By default all casts fail until overriden by the derived literal types.
+     * @param ret the casted value
+     * @param op the FilterOperation operating on the Literal
+     * @return true if cast is successful
+     */
+    virtual bool as_clp_string(std::string& ret, FilterOperation op) { return false; }
+
+    virtual bool as_var_string(std::string& ret, FilterOperation op) { return false; }
+
+    virtual bool as_float(double& ret, FilterOperation op) { return false; }
+
+    virtual bool as_int(int64_t& ret, FilterOperation op) { return false; }
+
+    virtual bool as_bool(bool& ret, FilterOperation op) { return false; }
+
+    virtual bool as_null(FilterOperation op) { return false; }
+
+    inline bool as_array(std::string& ret, FilterOperation op) {
+        return as_var_string(ret, op) || as_clp_string(ret, op);
+    }
+
+    virtual bool as_epoch_date() { return false; }
+
+    virtual bool as_float_date() { return false; }
+
+    virtual bool as_any(FilterOperation op) { return false; }
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_LITERAL_HPP
diff --git a/components/core/src/clp_s/search/NarrowTypes.cpp b/components/core/src/clp_s/search/NarrowTypes.cpp
new file mode 100644
index 000000000..82b8e7e5b
--- /dev/null
+++ b/components/core/src/clp_s/search/NarrowTypes.cpp
@@ -0,0 +1,76 @@
+#include "NarrowTypes.hpp"
+
+#include "ConstantProp.hpp"
+#include "EmptyExpr.hpp"
+#include "FilterExpr.hpp"
+#include "Literal.hpp"
+
+namespace clp_s::search {
+std::shared_ptr<Expression> NarrowTypes::run(std::shared_ptr<Expression>& expr) {
+    expr = narrow(expr);
+
+    ConstantProp constant_prop;
+    return constant_prop.run(expr);
+}
+
+std::shared_ptr<Expression> NarrowTypes::narrow(std::shared_ptr<Expression> cur) {
+    if (cur->has_only_expression_operands()) {
+        for (auto it = cur->op_begin(); it != cur->op_end(); it++) {
+            auto child = std::static_pointer_cast<Expression>(*it);
+            auto new_child = narrow(child);
+            if (new_child != child) {
+                new_child->copy_replace(cur.get(), it);
+            }
+        }
+    } else if (auto filter = std::dynamic_pointer_cast<FilterExpr>(cur)) {
+        // TODO: will have to change if we start supporting multi column expressions
+        auto column = filter->get_column();
+        auto op = filter->get_operation();
+
+        if (op == FilterOperation::EXISTS || op == FilterOperation::NEXISTS) {
+            return cur;
+        }
+
+        auto literal = filter->get_operand();
+        std::string tmpstring;
+        int64_t tmpint;
+        double tmpdouble;
+        bool tmpbool;
+
+        if (false == literal->as_any(op)) {
+            if (false == literal->as_clp_string(tmpstring, op)) {
+                column->remove_matching_type(LiteralType::ClpStringT);
+            }
+            if (false == literal->as_var_string(tmpstring, op)) {
+                column->remove_matching_type(LiteralType::VarStringT);
+            }
+            if (false == literal->as_int(tmpint, op)) {
+                column->remove_matching_type(LiteralType::IntegerT);
+            }
+            if (false == literal->as_float(tmpdouble, op)) {
+                column->remove_matching_type(LiteralType::FloatT);
+            }
+            if (false == literal->as_bool(tmpbool, op)) {
+                column->remove_matching_type(LiteralType::BooleanT);
+            }
+            if (false == literal->as_array(tmpstring, op)) {
+                column->remove_matching_type(LiteralType::ArrayT);
+            }
+            if (false == literal->as_null(op)) {
+                column->remove_matching_type(LiteralType::NullT);
+            }
+            if (false == literal->as_epoch_date()) {
+                column->remove_matching_type(LiteralType::EpochDateT);
+            }
+            if (false == literal->as_float_date()) {
+                column->remove_matching_type(LiteralType::EpochDateT);
+            }
+        }
+
+        if (false == column->matches_any(cAllTypes)) {
+            return EmptyExpr::create();
+        }
+    }
+    return cur;
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/NarrowTypes.hpp b/components/core/src/clp_s/search/NarrowTypes.hpp
new file mode 100644
index 000000000..8504d6e02
--- /dev/null
+++ b/components/core/src/clp_s/search/NarrowTypes.hpp
@@ -0,0 +1,22 @@
+#ifndef CLP_S_SEARCH_NARROWTYPES_HPP
+#define CLP_S_SEARCH_NARROWTYPES_HPP
+
+#include "Transformation.hpp"
+
+namespace clp_s::search {
+class NarrowTypes : public Transformation {
+public:
+    // Methods inherited from Transformation
+    std::shared_ptr<Expression> run(std::shared_ptr<Expression>& expr) override;
+
+private:
+    /**
+     * Narrow the type of an expression
+     * @param cur the expression to narrow
+     * @return the narrowed expression
+     */
+    static std::shared_ptr<Expression> narrow(std::shared_ptr<Expression> cur);
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_NARROWTYPES_HPP
diff --git a/components/core/src/clp_s/search/NullLiteral.cpp b/components/core/src/clp_s/search/NullLiteral.cpp
new file mode 100644
index 000000000..9a7b51ffc
--- /dev/null
+++ b/components/core/src/clp_s/search/NullLiteral.cpp
@@ -0,0 +1,32 @@
+#include "NullLiteral.hpp"
+
+namespace clp_s::search {
+std::shared_ptr<Literal> NullLiteral::create() {
+    return std::shared_ptr<Literal>(new NullLiteral());
+}
+
+std::shared_ptr<Literal> NullLiteral::create_from_string(std::string const& v) {
+    if (v == "null") {
+        return std::shared_ptr<Literal>(new NullLiteral());
+    }
+
+    return {nullptr};
+}
+
+void NullLiteral::print() {
+    get_print_stream() << "null";
+}
+
+bool NullLiteral::as_var_string(std::string& ret, FilterOperation op) {
+    if (op == FilterOperation::EQ || op == FilterOperation::NEQ) {
+        ret = "null";
+        return true;
+    }
+
+    return false;
+}
+
+bool NullLiteral::as_null(FilterOperation op) {
+    return op == FilterOperation::EQ || op == FilterOperation::NEQ;
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/NullLiteral.hpp b/components/core/src/clp_s/search/NullLiteral.hpp
new file mode 100644
index 000000000..072529e48
--- /dev/null
+++ b/components/core/src/clp_s/search/NullLiteral.hpp
@@ -0,0 +1,54 @@
+#ifndef CLP_S_SEARCH_NULLLITERAL_HPP
+#define CLP_S_SEARCH_NULLLITERAL_HPP
+
+#include <memory>
+#include <string>
+#include <variant>
+
+#include "Literal.hpp"
+
+namespace clp_s::search {
+/**
+ * Class for Null literals in the search AST
+ */
+class NullLiteral : public Literal {
+public:
+    // Deleted copy
+    NullLiteral(NullLiteral const&) = delete;
+
+    NullLiteral& operator=(NullLiteral const&) = delete;
+
+    /**
+     * Explicit create a null literal
+     * @return A newly created null literal
+     */
+    static std::shared_ptr<Literal> create();
+
+    /**
+     * Try to create a null literal from a string
+     * @param v the string we are attempting to convert to Null
+     * @return A null literal, or nullptr if the string does not represent "null"
+     */
+    static std::shared_ptr<Literal> create_from_string(std::string const& v);
+
+    // Methods inherited from Value
+    void print() override;
+
+    // Methods inherited from Literal
+    bool matches_type(LiteralType type) override { return type & LiteralType::NullT; }
+
+    bool matches_any(LiteralTypeBitmask mask) override { return mask & LiteralType::NullT; }
+
+    bool matches_exactly(LiteralTypeBitmask mask) override { return mask == LiteralType::NullT; }
+
+    bool as_var_string(std::string& ret, FilterOperation op) override;
+
+    bool as_null(FilterOperation op) override;
+
+private:
+    // Constructor
+    NullLiteral() = default;
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_NULLLITERAL_HPP
diff --git a/components/core/src/clp_s/search/OrExpr.cpp b/components/core/src/clp_s/search/OrExpr.cpp
new file mode 100644
index 000000000..e327710c8
--- /dev/null
+++ b/components/core/src/clp_s/search/OrExpr.cpp
@@ -0,0 +1,55 @@
+#include "OrExpr.hpp"
+
+namespace clp_s::search {
+OrExpr::OrExpr(bool inverted, Expression* parent) : Expression(inverted, parent) {}
+
+OrExpr::OrExpr(OrExpr const& expr) : Expression(expr) {}
+
+void OrExpr::print() {
+    auto& os = get_print_stream();
+    if (is_inverted()) {
+        os << "!";
+    }
+
+    os << "OrExpr(";
+    for (auto it = op_begin(); it != op_end();) {
+        (*it)->print();
+        it++;
+        if (it != op_end()) {
+            os << ", ";
+        }
+    }
+    os << ")";
+
+    if (get_parent() == nullptr) {
+        os << std::endl;
+    } else {
+        os << std::flush;
+    }
+}
+
+std::shared_ptr<Expression> OrExpr::copy() const {
+    auto new_expr = std::shared_ptr<Expression>(new OrExpr(*this));
+    for (auto it = new_expr->op_begin(); it != new_expr->op_end(); it++) {
+        auto expr = std::static_pointer_cast<Expression>(*it);
+        expr->copy_replace(new_expr.get(), it);
+    }
+    return new_expr;
+}
+
+std::shared_ptr<Expression> OrExpr::create(bool inverted, Expression* parent) {
+    return std::shared_ptr<Expression>(static_cast<Expression*>(new OrExpr(inverted, parent)));
+}
+
+std::shared_ptr<Expression> OrExpr::create(
+        std::shared_ptr<Expression>& op1,
+        std::shared_ptr<Expression>& op2,
+        bool inverted,
+        Expression* parent
+) {
+    std::shared_ptr<Expression> expr(static_cast<Expression*>(new OrExpr(inverted, parent)));
+    op1->copy_append(expr.get());
+    op2->copy_append(expr.get());
+    return expr;
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/OrExpr.hpp b/components/core/src/clp_s/search/OrExpr.hpp
new file mode 100644
index 000000000..8e95cf24f
--- /dev/null
+++ b/components/core/src/clp_s/search/OrExpr.hpp
@@ -0,0 +1,53 @@
+#ifndef CLP_S_SEARCH_OREXPR_HPP
+#define CLP_S_SEARCH_OREXPR_HPP
+
+#include "Expression.hpp"
+
+namespace clp_s::search {
+/**
+ * Class representing a logical Or operation across all
+ * children in its OpList. Can have arbitrarily many children.
+ */
+class OrExpr : public Expression {
+public:
+    /**
+     * Create an empty Or expression which can optionally be inverted and attached to a parent
+     * Children can be added via mutators inherited from Expression.
+     * @param inverted expression is inverted when true
+     * @param parent parent this expression is attached to
+     * @return A newly created Or expression
+     */
+    static std::shared_ptr<Expression> create(bool inverted = false, Expression* parent = nullptr);
+
+    /**
+     * Create an Or expression with two children
+     * @param op1 the first child operand
+     * @param op2 the second child operand
+     * @param inverted expression is inverted when true
+     * @param parent parent this expression is attached to
+     * @return A newly created Or expression
+     */
+    static std::shared_ptr<Expression> create(
+            std::shared_ptr<Expression>& op1,
+            std::shared_ptr<Expression>& op2,
+            bool inverted = false,
+            Expression* parent = nullptr
+    );
+
+    // Methods inherited from Value
+    void print() override;
+
+    // Methods inherited from Expression
+    bool has_only_expression_operands() override { return true; }
+
+    std::shared_ptr<Expression> copy() const override;
+
+private:
+    // Constructor
+    explicit OrExpr(bool inverted = false, Expression* parent = nullptr);
+
+    OrExpr(OrExpr const&);
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_OREXPR_HPP
diff --git a/components/core/src/clp_s/search/OrOfAndForm.cpp b/components/core/src/clp_s/search/OrOfAndForm.cpp
new file mode 100644
index 000000000..7a9ae906f
--- /dev/null
+++ b/components/core/src/clp_s/search/OrOfAndForm.cpp
@@ -0,0 +1,179 @@
+#include "OrOfAndForm.hpp"
+
+#include <vector>
+
+#include "SearchUtils.hpp"
+
+namespace clp_s::search {
+std::shared_ptr<Expression> OrOfAndForm::run(std::shared_ptr<Expression>& expr) {
+    auto parent = expr->get_parent();
+    while (expr->get_num_operands() == 1 && expr->has_only_expression_operands()) {
+        bool invert = expr->is_inverted();
+        expr = std::static_pointer_cast<Expression>(*expr->op_begin());
+        expr->set_parent(parent);
+        if (invert) {
+            expr->invert();
+        }
+    }
+
+    if (expr->is_inverted()) {
+        de_morgan(expr);
+    }
+
+    // only need to further simplify and/or expressions
+    if (false == expr->has_only_expression_operands()) {
+        return expr;
+    }
+
+    return simplify(expr);
+}
+
+void OrOfAndForm::de_morgan(std::shared_ptr<Expression>& expr) {
+    std::shared_ptr<Expression> new_expr;
+
+    if (std::dynamic_pointer_cast<AndExpr>(expr)) {
+        new_expr = OrExpr::create(!expr->is_inverted(), expr->get_parent());
+    } else if (std::dynamic_pointer_cast<OrExpr>(expr)) {
+        new_expr = AndExpr::create(!expr->is_inverted(), expr->get_parent());
+    } else {
+        // DeMorgan's doesn't apply; no modification required
+        return;
+    }
+
+    new_expr->get_op_list().splice(new_expr->op_end(), expr->get_op_list());
+    for (auto it = new_expr->op_begin(); it != new_expr->op_end(); it++) {
+        auto sub_expr = std::static_pointer_cast<Expression>(*it);
+        sub_expr->set_parent(new_expr.get());
+        sub_expr->invert();
+    }
+
+    expr = new_expr;
+}
+
+std::shared_ptr<Expression> OrOfAndForm::simplify(std::shared_ptr<Expression> const& expr) {
+    for (auto it = expr->op_begin(); it != expr->op_end(); it++) {
+        auto sub_expr = std::static_pointer_cast<Expression>(*it);
+        if (sub_expr->is_inverted()) {
+            // DeMorgan's already makes checks that input is Or or And so don't
+            // need to double check here
+            de_morgan(sub_expr);
+            *it = sub_expr;
+        }
+
+        while (sub_expr->get_num_operands() == 1 && sub_expr->has_only_expression_operands()) {
+            bool invert = sub_expr->is_inverted();
+            sub_expr = std::static_pointer_cast<Expression>(*sub_expr->op_begin());
+            sub_expr->set_parent(expr.get());
+            *it = sub_expr;
+            if (invert) {
+                sub_expr->invert();
+            }
+        }
+
+        // Only need to simplify Or/And subexpr
+        if (sub_expr->has_only_expression_operands()) {
+            *it = simplify(sub_expr);
+        }
+    }
+
+    if (std::dynamic_pointer_cast<OrExpr>(expr)) {
+        return simplify_or(expr);
+    } else if (std::dynamic_pointer_cast<AndExpr>(expr)) {
+        return simplify_and(expr);
+    } else {
+        // currently and/or are the only form of expressions we need to simplify
+        return expr;
+    }
+}
+
+std::shared_ptr<Expression> OrOfAndForm::simplify_or(std::shared_ptr<Expression> const& expr) {
+    std::vector<OpList::iterator> deleted;
+
+    for (auto it = expr->op_begin(); it != expr->op_end(); it++) {
+        if (std::dynamic_pointer_cast<OrExpr>(*it)) {
+            auto sub_expr = std::static_pointer_cast<Expression>(*it);
+            deleted.push_back(it);
+            splice_into(expr, sub_expr, expr->op_begin());
+        }
+    }
+
+    for (auto const& it : deleted) {
+        expr->get_op_list().erase(it);
+    }
+
+    return expr;
+}
+
+std::shared_ptr<Expression> OrOfAndForm::simplify_and(std::shared_ptr<Expression> const& expr) {
+    std::vector<OpList::iterator> deleted;
+    std::vector<OpList::iterator> deleted_or_expr;
+    std::vector<std::shared_ptr<Expression>> or_expressions;
+
+    for (auto it = expr->op_begin(); it != expr->op_end(); it++) {
+        if (std::dynamic_pointer_cast<AndExpr>(*it)) {
+            auto sub_expr = std::static_pointer_cast<Expression>(*it);
+            deleted.push_back(it);
+            splice_into(expr, sub_expr, expr->op_begin());
+        } else if (std::dynamic_pointer_cast<OrExpr>(*it)) {
+            deleted_or_expr.push_back(it);
+        }
+    }
+
+    for (auto const& it : deleted) {
+        expr->get_op_list().erase(it);
+    }
+
+    if (deleted_or_expr.empty()) {
+        return expr;
+    }
+
+    for (auto const& it : deleted_or_expr) {
+        or_expressions.push_back(std::static_pointer_cast<Expression>(*it));
+        expr->get_op_list().erase(it);
+    }
+
+    auto new_or_expr = OrExpr::create(false, expr->get_parent());
+    ExpressionList prefix;
+    insert_all_combinations(
+            new_or_expr,
+            expr,
+            or_expressions.begin(),
+            or_expressions.end(),
+            prefix
+    );
+
+    return new_or_expr;
+}
+
+void OrOfAndForm::insert_all_combinations(
+        std::shared_ptr<Expression> const& new_or_expr,
+        std::shared_ptr<Expression> const& base_and_expr,
+        ExpressionVector::iterator cur,
+        ExpressionVector::iterator end,
+        ExpressionList& prefix
+) {
+    if (cur == end) {
+        auto new_and_expr = base_and_expr->copy();
+        for (auto const& it : prefix) {
+            // these OrExpr are guaranteed to contain only FilterExpr/AndExpr
+            if (std::dynamic_pointer_cast<AndExpr>(it)) {
+                splice_into(new_and_expr, it->copy(), new_and_expr->op_end());
+            } else {
+                it->copy_append(new_and_expr.get());
+            }
+        }
+        new_or_expr->add_operand(new_and_expr);
+        return;
+    }
+
+    auto current_or = *cur;
+    cur++;
+    for (auto it = current_or->op_begin(); it != current_or->op_end(); it++) {
+        prefix.push_back(std::static_pointer_cast<Expression>(*it));
+        auto cur_copy = cur;
+        cur_copy++;
+        insert_all_combinations(new_or_expr, base_and_expr, cur, end, prefix);
+        prefix.pop_back();
+    }
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/OrOfAndForm.hpp b/components/core/src/clp_s/search/OrOfAndForm.hpp
new file mode 100644
index 000000000..7a400eb3f
--- /dev/null
+++ b/components/core/src/clp_s/search/OrOfAndForm.hpp
@@ -0,0 +1,66 @@
+#ifndef CLP_S_SEARCH_OROFANDFORM_HPP
+#define CLP_S_SEARCH_OROFANDFORM_HPP
+
+#include <vector>
+
+#include "AndExpr.hpp"
+#include "OrExpr.hpp"
+#include "Transformation.hpp"
+
+namespace clp_s::search {
+typedef std::vector<std::shared_ptr<Expression>> ExpressionVector;
+typedef std::list<std::shared_ptr<Expression>> ExpressionList;
+
+// TODO: handle degenerate forms like empty or/and expressions
+class OrOfAndForm : public Transformation {
+public:
+    // Methods inherited from Transformation
+    std::shared_ptr<Expression> run(std::shared_ptr<Expression>& expr) override;
+
+private:
+    /**
+     * Use De Morgan's laws to convert the expression to a canonical form
+     * @param expr
+     */
+    static void de_morgan(std::shared_ptr<Expression>& expr);
+
+    /**
+     * Simplify an expression
+     * @param expr
+     * @return The simplified expression
+     */
+    static std::shared_ptr<Expression> simplify(std::shared_ptr<Expression> const& expr);
+
+    /**
+     * Simplify an Or expression
+     * @param expr
+     * @return The simplified expression
+     */
+    static std::shared_ptr<Expression> simplify_or(std::shared_ptr<Expression> const& expr);
+
+    /**
+     * Simplify an And expression
+     * @param expr
+     * @return The simplified expression
+     */
+    static std::shared_ptr<Expression> simplify_and(std::shared_ptr<Expression> const& expr);
+
+    /**
+     * Insert all combinations of And expressions into an Or expression
+     * @param new_or_expr
+     * @param base_and_expr
+     * @param cur
+     * @param end
+     * @param prefix
+     */
+    static void insert_all_combinations(
+            std::shared_ptr<Expression> const& new_or_expr,
+            std::shared_ptr<Expression> const& base_and_expr,
+            ExpressionVector::iterator cur,
+            ExpressionVector::iterator end,
+            ExpressionList& prefix
+    );
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_OROFANDFORM_HPP
diff --git a/components/core/src/clp_s/search/Output.cpp b/components/core/src/clp_s/search/Output.cpp
new file mode 100644
index 000000000..37c89eccd
--- /dev/null
+++ b/components/core/src/clp_s/search/Output.cpp
@@ -0,0 +1,1182 @@
+#include "Output.hpp"
+
+#include <regex>
+#include <stack>
+
+#include <json/single_include/nlohmann/json.hpp>
+
+#include "../FileWriter.hpp"
+#include "../ReaderUtils.hpp"
+#include "../Utils.hpp"
+#include "AndExpr.hpp"
+#include "clp_search/EncodedVariableInterpreter.hpp"
+#include "clp_search/Grep.hpp"
+#include "EvaluateTimestampIndex.hpp"
+#include "FilterExpr.hpp"
+#include "OrExpr.hpp"
+#include "SearchUtils.hpp"
+
+using json = nlohmann::json;
+
+#define eval(op, a, b) (((op) == FilterOperation::EQ) ? ((a) == (b)) : ((a) != (b)))
+
+namespace clp_s::search {
+void Output::filter() {
+    auto top_level_expr = m_expr;
+
+    for (auto const& archive : ReaderUtils::get_archives(m_archives_dir)) {
+        std::vector<int32_t> matched_schemas;
+        bool has_array = false;
+        bool has_array_search = false;
+        for (int32_t schema_id : ReaderUtils::get_schemas(archive)) {
+            if (m_match.schema_matched(schema_id)) {
+                matched_schemas.push_back(schema_id);
+                if (m_match.has_array(schema_id)) {
+                    has_array = true;
+                }
+                if (m_match.has_array_search(schema_id)) {
+                    has_array_search = true;
+                }
+            }
+        }
+
+        // Skip decompressing segment if it contains no
+        // relevant schemas
+        if (matched_schemas.empty()) {
+            continue;
+        }
+
+        // Skip decompressing sub-archive if it won't match based on the timestamp
+        // range index
+        EvaluateTimestampIndex timestamp_index(ReaderUtils::read_local_timestamp_dictionary(archive)
+        );
+        if (timestamp_index.run(top_level_expr) == EvaluatedValue::False) {
+            continue;
+        }
+
+        m_var_dict = ReaderUtils::get_variable_dictionary_reader(archive);
+        m_log_dict = ReaderUtils::get_log_type_dictionary_reader(archive);
+        //        array_dict_ = GetArrayDictionaryReader(archive);
+        m_var_dict->read_new_entries();
+        m_log_dict->read_new_entries();
+
+        if (has_array) {
+            m_array_dict = ReaderUtils::get_array_dictionary_reader(archive);
+            if (has_array_search) {
+                m_array_dict->read_new_entries();
+            } else {
+                m_array_dict->read_new_entries(true);
+            }
+        }
+
+        m_string_query_map.clear();
+        m_string_var_match_map.clear();
+        populate_string_queries(top_level_expr);
+
+        std::string message;
+        for (int32_t schema_id : matched_schemas) {
+            m_expr_clp_query.clear();
+            m_expr_var_match_map.clear();
+            m_expr = m_match.get_query_for_schema(schema_id)->copy();
+            m_wildcard_to_searched_columns.clear();
+            m_wildcard_to_searched_clpstrings.clear();
+            m_wildcard_to_searched_varstrings.clear();
+            m_wildcard_to_searched_datestrings.clear();
+            m_wildcard_to_searched_floatdatestrings.clear();
+            m_schema = schema_id;
+
+            populate_searched_wildcard_columns(m_expr);
+
+            m_expression_value = constant_propagate(m_expr, schema_id);
+
+            if (m_expression_value == EvaluatedValue::False) {
+                continue;
+            }
+
+            add_wildcard_columns_to_searched_columns();
+
+            SchemaReader reader(m_schema_tree, schema_id);
+            reader.open(archive + "/encoded_messages/" + std::to_string(schema_id));
+            ReaderUtils::append_reader_columns(
+                    &reader,
+                    (*m_schemas)[schema_id],
+                    m_schema_tree,
+                    m_var_dict,
+                    m_log_dict,
+                    m_array_dict,
+                    m_timestamp_dict
+            );
+            reader.load();
+
+            reader.initialize_filter(this);
+            while (reader.get_next_message(message, this)) {
+                write(STDOUT_FILENO, message.c_str(), message.length());
+            }
+            reader.close();
+        }
+
+        m_var_dict->close();
+        m_log_dict->close();
+
+        if (has_array) {
+            m_array_dict->close();
+        }
+    }
+}
+
+void Output::init(
+        SchemaReader* reader,
+        int32_t schema_id,
+        std::unordered_map<int32_t, BaseColumnReader*>& columns
+) {
+    m_reader = reader;
+    m_schema = schema_id;
+
+    m_searched_columns.clear();
+    m_other_columns.clear();
+
+    for (auto& column : columns) {
+        ClpStringColumnReader* clp_reader = dynamic_cast<ClpStringColumnReader*>(column.second);
+        VariableStringColumnReader* var_reader
+                = dynamic_cast<VariableStringColumnReader*>(column.second);
+        if (m_match.schema_searches_against_column(schema_id, column.first)) {
+            if (clp_reader != nullptr && clp_reader->get_type() == "string") {
+                m_clp_string_readers[column.first] = clp_reader;
+                m_other_columns.push_back(column.second);
+            } else if (var_reader != nullptr && var_reader->get_type() == "string") {
+                m_var_string_readers[column.first] = var_reader;
+                m_other_columns.push_back(column.second);
+            } else if (auto date_column_reader = dynamic_cast<DateStringColumnReader*>(column.second))
+            {
+                m_datestring_readers[column.first] = date_column_reader;
+                m_other_columns.push_back(column.second);
+            } else if (auto float_date_column_reader = dynamic_cast<FloatDateStringColumnReader*>(column.second))
+            {
+                m_floatdatestring_readers[column.first] = float_date_column_reader;
+                m_other_columns.push_back(column.second);
+            } else {
+                m_searched_columns.push_back(column.second);
+            }
+        } else {
+            m_other_columns.push_back(column.second);
+        }
+    }
+}
+
+bool Output::filter(
+        uint64_t cur_message,
+        std::map<int32_t, std::variant<int64_t, double, std::string, uint8_t>>& extracted_values
+) {
+    m_cur_message = cur_message;
+    m_cached_string_columns.clear();
+    for (auto* column : m_searched_columns) {
+        extracted_values[column->get_id()] = column->extract_value(cur_message);
+    }
+
+    // filter
+    if (false == evaluate(m_expr.get(), m_schema, extracted_values)) {
+        return false;
+    }
+
+    for (auto* column : m_other_columns) {
+        if (m_cached_string_columns.find(column->get_id()) == m_cached_string_columns.end()) {
+            extracted_values[column->get_id()] = column->extract_value(cur_message);
+        }
+    }
+
+    return true;
+}
+
+enum CurExpr {
+    AND,
+    OR,
+    FILTER
+};
+
+bool Output::evaluate(
+        Expression* expr,
+        int32_t schema,
+        std::map<int32_t, std::variant<int64_t, double, std::string, uint8_t>>& extracted_values
+) {
+    if (m_expression_value == EvaluatedValue::True) {
+        return true;
+    }
+
+    std::stack<CurExpr, std::vector<CurExpr>> parent_type;
+    std::stack<OpList::iterator, std::vector<OpList::iterator>> parent_it;
+
+    Expression* cur = expr;
+    CurExpr cur_type = CurExpr::FILTER;
+    bool ret = false;
+
+    if (dynamic_cast<AndExpr*>(cur)) {
+        cur_type = CurExpr::AND;
+        parent_type.push(CurExpr::AND);
+        parent_it.push(cur->op_begin());
+        ret = true;
+    } else if (dynamic_cast<OrExpr*>(cur)) {
+        cur_type = CurExpr::OR;
+        parent_type.push(CurExpr::OR);
+        parent_it.push(cur->op_begin());
+        ret = false;
+    }
+
+    do {
+        switch (cur_type) {
+            case CurExpr::AND:
+                if (false == ret || parent_it.top() == cur->op_end()) {
+                    parent_type.pop();
+                    parent_it.pop();
+                    break;
+                } else {
+                    cur = static_cast<Expression*>((parent_it.top()++)->get());
+                    if (dynamic_cast<FilterExpr*>(cur)) {
+                        cur_type = CurExpr::FILTER;
+                    } else {
+                        // must be an OR-expr because AST would have been simplified
+                        // to eliminate nested AND
+                        cur_type = CurExpr::OR;
+                        parent_type.push(CurExpr::OR);
+                        parent_it.push(cur->op_begin());
+                        ret = false;
+                    }
+                    continue;
+                }
+            case CurExpr::FILTER:
+                if (static_cast<FilterExpr*>(cur)->get_column()->is_pure_wildcard()) {
+                    ret = evaluate_wildcard_filter(
+                            static_cast<FilterExpr*>(cur),
+                            schema,
+                            extracted_values
+                    );
+                } else {
+                    ret = evaluate_filter(static_cast<FilterExpr*>(cur), schema, extracted_values);
+                }
+                break;
+            case CurExpr::OR:
+                if (ret || parent_it.top() == cur->op_end()) {
+                    parent_type.pop();
+                    parent_it.pop();
+                    break;
+                } else {
+                    cur = static_cast<Expression*>((parent_it.top()++)->get());
+                    if (dynamic_cast<FilterExpr*>(cur)) {
+                        cur_type = CurExpr::FILTER;
+                    } else {
+                        // must be an AND-expr because AST would have been simplified
+                        // to eliminate nested OR
+                        cur_type = CurExpr::AND;
+                        parent_type.push(CurExpr::AND);
+                        parent_it.push(cur->op_begin());
+                        ret = true;
+                    }
+                    continue;
+                }
+        }
+
+        ret = cur->is_inverted() ? !ret : ret;
+        if (false == parent_type.empty()) {
+            cur_type = parent_type.top();
+        }
+        cur = cur->get_parent();
+    } while (cur != nullptr);
+
+    return ret;
+}
+
+bool Output::evaluate_wildcard_filter(
+        FilterExpr* expr,
+        int32_t schema,
+        std::map<int32_t, std::variant<int64_t, double, std::string, uint8_t>>& extracted_values
+) {
+    auto literal = expr->get_operand();
+    auto* column = expr->get_column().get();
+    Query* q = m_expr_clp_query[expr];
+    std::unordered_set<int64_t>* matching_vars = m_expr_var_match_map[expr];
+    auto op = expr->get_operation();
+    for (int32_t column_id : m_wildcard_to_searched_clpstrings[column]) {
+        if (evaluate_clp_string_filter(op, q, column_id, literal, extracted_values)) {
+            return true;
+        }
+    }
+
+    for (int32_t column_id : m_wildcard_to_searched_varstrings[column]) {
+        if (evaluate_var_string_filter(op, m_var_string_readers[column_id], matching_vars, literal))
+        {
+            return true;
+        }
+    }
+
+    for (int32_t column_id : m_wildcard_to_searched_datestrings[column]) {
+        if (evaluate_epoch_date_filter(op, m_datestring_readers[column_id], literal)) {
+            return true;
+        }
+    }
+
+    for (int32_t column_id : m_wildcard_to_searched_floatdatestrings[column]) {
+        if (evaluate_float_date_filter(op, m_floatdatestring_readers[column_id], literal)) {
+            return true;
+        }
+    }
+
+    m_maybe_number = expr->get_column()->matches_type(LiteralType::FloatT);
+    for (int32_t column_id : m_wildcard_to_searched_columns[column]) {
+        bool ret = false;
+        switch (node_to_literal_type(m_schema_tree->get_node(column_id)->get_type())) {
+            case LiteralType::IntegerT:
+                ret = evaluate_int_filter(
+                        op,
+                        std::get<int64_t>(extracted_values[column_id]),
+                        literal
+                );
+                break;
+            case LiteralType::FloatT:
+                ret = evaluate_float_filter(
+                        op,
+                        std::get<double>(extracted_values[column_id]),
+                        literal
+                );
+                break;
+            case LiteralType::BooleanT:
+                ret = evaluate_bool_filter(
+                        op,
+                        std::get<uint8_t>(extracted_values[column_id]),
+                        literal
+                );
+                break;
+            case LiteralType::ArrayT:
+                ret = evaluate_wildcard_array_filter(
+                        op,
+                        std::get<std::string>(extracted_values[column_id]),
+                        literal
+                );
+                break;
+        }
+
+        if (ret) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+bool Output::evaluate_filter(
+        FilterExpr* expr,
+        int32_t schema,
+        std::map<int32_t, std::variant<int64_t, double, std::string, uint8_t>>& extracted_values
+) {
+    auto column = expr->get_column().get();
+    int32_t column_id = column->get_column_id();
+    auto literal = expr->get_operand();
+    Query* q = nullptr;
+    ClpStringColumnReader* clp_reader = nullptr;
+    VariableStringColumnReader* var_reader = nullptr;
+    std::unordered_set<int64_t>* matching_vars = nullptr;
+    switch (column->get_literal_type()) {
+        case LiteralType::IntegerT:
+            return evaluate_int_filter(
+                    expr->get_operation(),
+                    std::get<int64_t>(extracted_values[column_id]),
+                    literal
+            );
+        case LiteralType::FloatT:
+            return evaluate_float_filter(
+                    expr->get_operation(),
+                    std::get<double>(extracted_values[column_id]),
+                    literal
+            );
+        case LiteralType::ClpStringT:
+            q = m_expr_clp_query[expr];
+            clp_reader = m_clp_string_readers[column_id];
+            return evaluate_clp_string_filter(
+                    expr->get_operation(),
+                    q,
+                    column_id,
+                    literal,
+                    extracted_values
+            );
+        case LiteralType::VarStringT:
+            var_reader = m_var_string_readers[column_id];
+            matching_vars = m_expr_var_match_map.at(expr);
+            return evaluate_var_string_filter(
+                    expr->get_operation(),
+                    var_reader,
+                    matching_vars,
+                    literal
+            );
+        case LiteralType::BooleanT:
+            return evaluate_bool_filter(
+                    expr->get_operation(),
+                    std::get<uint8_t>(extracted_values[column_id]),
+                    literal
+            );
+        case LiteralType::ArrayT:
+            return evaluate_array_filter(
+                    expr->get_operation(),
+                    column->get_unresolved_tokens(),
+                    std::get<std::string>(extracted_values[column_id]),
+                    literal
+            );
+        case LiteralType::EpochDateT:
+            return evaluate_epoch_date_filter(
+                    expr->get_operation(),
+                    m_datestring_readers[column_id],
+                    literal
+            );
+        case LiteralType::FloatDateT:
+            return evaluate_float_date_filter(
+                    expr->get_operation(),
+                    m_floatdatestring_readers[column_id],
+                    literal
+            );
+            // case LiteralType::NullT:
+            //  null checks are always turned into existence operators --
+            //  no need to evaluate here
+        default:
+            return false;
+    }
+}
+
+bool Output::evaluate_int_filter(
+        FilterOperation op,
+        int64_t value,
+        std::shared_ptr<Literal> const& operand
+) {
+    if (FilterOperation::EXISTS == op || FilterOperation::NEXISTS == op) {
+        return true;
+    }
+
+    int64_t op_value;
+    if (false == operand->as_int(op_value, op)) {
+        return false;
+    }
+
+    switch (op) {
+        case FilterOperation::EQ:
+            return value == op_value;
+        case FilterOperation::NEQ:
+            return value != op_value;
+        case FilterOperation::LT:
+            return value < op_value;
+        case FilterOperation::GT:
+            return value > op_value;
+        case FilterOperation::LTE:
+            return value <= op_value;
+        case FilterOperation::GTE:
+            return value >= op_value;
+        default:
+            return false;
+    }
+}
+
+bool Output::evaluate_float_filter(
+        FilterOperation op,
+        double value,
+        std::shared_ptr<Literal> const& operand
+) {
+    if (FilterOperation::EXISTS == op || FilterOperation::NEXISTS == op) {
+        return true;
+    }
+
+    double op_value;
+    if (false == operand->as_float(op_value, op)) {
+        return false;
+    }
+
+    switch (op) {
+        case FilterOperation::EQ:
+            return value == op_value;
+        case FilterOperation::NEQ:
+            return value != op_value;
+        case FilterOperation::LT:
+            return value < op_value;
+        case FilterOperation::GT:
+            return value > op_value;
+        case FilterOperation::LTE:
+            return value <= op_value;
+        case FilterOperation::GTE:
+            return value >= op_value;
+        default:
+            return false;
+    }
+}
+
+bool Output::evaluate_clp_string_filter(
+        FilterOperation op,
+        Query* q,
+        int32_t column_id,
+        std::shared_ptr<Literal> const& operand,
+        std::map<int32_t, std::variant<int64_t, double, std::string, uint8_t>>& extracted_values
+) {
+    if (FilterOperation::EXISTS == op || FilterOperation::NEXISTS == op) {
+        return true;
+    }
+
+    if (op != FilterOperation::EQ && op != FilterOperation::NEQ) {
+        return false;
+    }
+
+    auto* reader = m_clp_string_readers[column_id];
+    int64_t id = reader->get_encoded_id(m_cur_message);
+    bool matched = false;
+
+    if (q->search_string_matches_all()) {
+        return op == FilterOperation::EQ;
+    }
+
+    auto vars = reader->get_encoded_vars(m_cur_message);
+    for (auto const& subquery : q->get_sub_queries()) {
+        if (subquery.matches_logtype(id) && subquery.matches_vars(vars)) {
+            matched = true;
+
+            if (subquery.wildcard_match_required()) {
+                std::string decompressed_message
+                        = std::get<std::string>(reader->extract_value(m_cur_message));
+                matched = StringUtils::wildcard_match_unsafe(
+                        decompressed_message,
+                        q->get_search_string(),
+                        !q->get_ignore_case()
+                );
+                matched = (op == FilterOperation::EQ) == matched;
+                if (matched) {
+                    extracted_values[column_id] = std::move(decompressed_message);
+                    m_cached_string_columns.insert(column_id);
+                }
+                return matched;
+            }
+
+            break;
+        }
+    }
+
+    return (op == FilterOperation::EQ) == matched;
+}
+
+bool Output::evaluate_var_string_filter(
+        FilterOperation op,
+        VariableStringColumnReader* reader,
+        std::unordered_set<int64_t>* matching_vars,
+        std::shared_ptr<Literal> const& operand
+) const {
+    if (FilterOperation::EXISTS == op || FilterOperation::NEXISTS == op) {
+        return true;
+    }
+
+    int64_t id = reader->get_variable_id(m_cur_message);
+    bool matched = matching_vars->count(id);
+    switch (op) {
+        case FilterOperation::EQ:
+            return matched;
+        case FilterOperation::NEQ:
+            return !matched;
+        default:
+            return false;
+    }
+}
+
+bool Output::evaluate_array_filter(
+        FilterOperation op,
+        DescriptorList const& unresolved_tokens,
+        std::string const& value,
+        std::shared_ptr<Literal> const& operand
+) const {
+    auto object = json::parse(value);
+    return evaluate_array_filter(object, op, unresolved_tokens, 0, operand, true);
+}
+
+bool Output::evaluate_array_filter(
+        json& object,
+        FilterOperation op,
+        DescriptorList const& unresolved_tokens,
+        size_t cur_idx,
+        std::shared_ptr<Literal> const& operand,
+        bool array_or_object
+) const {
+    bool match = false;
+    if (cur_idx > unresolved_tokens.size()) {
+        return false;
+    }
+
+    for (auto i = object.begin(); i != object.end(); ++i) {
+        auto& value = i.value();
+        if (value.is_array()) {
+            match |= evaluate_array_filter(value, op, unresolved_tokens, cur_idx, operand, true);
+        } else if (value.is_object()) {
+            if (false == array_or_object && cur_idx < unresolved_tokens.size()
+                && i.key() == unresolved_tokens[cur_idx].get_token())
+            {
+                match |= evaluate_array_filter(
+                        value,
+                        op,
+                        unresolved_tokens,
+                        cur_idx + 1,
+                        operand,
+                        false
+                );
+            } else if (array_or_object) {
+                match |= evaluate_array_filter(
+                        value,
+                        op,
+                        unresolved_tokens,
+                        cur_idx,
+                        operand,
+                        false
+                );
+            }
+        } else if (((array_or_object && cur_idx == unresolved_tokens.size())
+                    || (!array_or_object && cur_idx == unresolved_tokens.size() - 1
+                        && i.key() == unresolved_tokens[cur_idx].get_token())))
+        {
+            std::string tmp_string;
+            int64_t tmp_int;
+            double tmp_float;
+            bool tmp_bool;
+            if (FilterOperation::EXISTS == op || FilterOperation::NEXISTS == op
+                || (value.is_number_integer() && operand->as_int(tmp_int, op)
+                    && eval(op, value.get<int64_t>(), tmp_int))
+                || (value.is_number_float() && operand->as_float(tmp_float, op)
+                    && eval(op, value.get<double>(), tmp_float))
+                || (value.is_boolean() && operand->as_bool(tmp_bool, op)
+                    && eval(op, value.get<bool>(), tmp_bool)))
+            {
+                match = true;
+            } else if (value.is_string() && (operand->as_var_string(tmp_string, op) || operand->as_clp_string(tmp_string, op)))
+            {
+                std::string s = value.get<std::string>();
+                match = wildcard_match(s, tmp_string) ? op == FilterOperation::EQ
+                                                      : op == FilterOperation::NEQ;
+            }
+        }
+
+        if (match) {
+            return true;
+        }
+    }
+
+    return match;
+}
+
+bool Output::evaluate_wildcard_array_filter(
+        FilterOperation op,
+        std::string& value,
+        std::shared_ptr<Literal> const& operand
+) {
+    if (value.capacity() < (value.size() + simdjson::SIMDJSON_PADDING)) {
+        value.reserve(value.size() + simdjson::SIMDJSON_PADDING);
+    }
+    auto obj = m_array_parser.iterate(value);
+    ondemand::array array = obj.get_array();
+
+    // pre-evaluate whether we can match strings or numbers to eliminate
+    // duplicate effort on every item
+    m_maybe_string = operand->as_var_string(m_array_search_string, op)
+                     || operand->as_clp_string(m_array_search_string, op);
+
+    return evaluate_wildcard_array_filter(array, op, operand);
+}
+
+bool Output::evaluate_wildcard_array_filter(
+        ondemand::array& array,
+        FilterOperation op,
+        std::shared_ptr<Literal> const& operand
+) const {
+    bool match = false;
+    for (auto item : array) {
+        switch (item.type()) {
+            case ondemand::json_type::object: {
+                ondemand::object nested_object = item.get_object();
+                if (evaluate_wildcard_array_filter(nested_object, op, operand)) {
+                    match = true;
+                }
+            } break;
+            case ondemand::json_type::array: {
+                ondemand::array nested_array = item.get_array();
+                if (evaluate_wildcard_array_filter(nested_array, op, operand)) {
+                    match = true;
+                }
+            } break;
+            case ondemand::json_type::string: {
+                if (false == m_maybe_string) {
+                    break;
+                }
+                if (wildcard_match(item.get_string().value(), m_array_search_string)) {
+                    match |= op == FilterOperation::EQ;
+                }
+                break;
+            } break;
+            case ondemand::json_type::number: {
+                if (false == m_maybe_number) {
+                    break;
+                }
+                ondemand::number number = item.get_number();
+                if (number.is_double()) {
+                    double tmp_double;
+                    operand->as_float(tmp_double, op);
+                    match |= eval(op, number.get_double(), tmp_double);
+                } else if (number.is_uint64()) {
+                    int64_t tmp_int;
+                    operand->as_int(tmp_int, op);
+                    match |= eval(op, number.get_uint64(), tmp_int);
+                } else {
+                    int64_t tmp_int;
+                    operand->as_int(tmp_int, op);
+                    match |= eval(op, number.get_int64(), tmp_int);
+                }
+            } break;
+            case ondemand::json_type::boolean: {
+                bool tmp;
+                if (operand->as_bool(tmp, op) && eval(op, item.get_bool(), tmp)) {
+                    match = true;
+                }
+            } break;
+            case ondemand::json_type::null:
+                if (operand->as_null(op)) {
+                    match |= op == FilterOperation::EQ;
+                }
+                break;
+        }
+
+        if (match) {
+            return true;
+        }
+    }
+    return false;
+}
+
+bool Output::evaluate_wildcard_array_filter(
+        ondemand::object& object,
+        FilterOperation op,
+        std::shared_ptr<Literal> const& operand
+) const {
+    bool match = false;
+    for (auto field : object) {
+        ondemand::value item = field.value();
+        switch (item.type()) {
+            case ondemand::json_type::object: {
+                ondemand::object nested_object = item.get_object();
+                if (evaluate_wildcard_array_filter(nested_object, op, operand)) {
+                    match = true;
+                }
+            } break;
+            case ondemand::json_type::array: {
+                ondemand::array nested_array = item.get_array();
+                if (evaluate_wildcard_array_filter(nested_array, op, operand)) {
+                    match = true;
+                }
+            } break;
+            case ondemand::json_type::string: {
+                if (false == m_maybe_string) {
+                    break;
+                }
+                if (wildcard_match(item.get_string().value(), m_array_search_string)) {
+                    match |= op == FilterOperation::EQ;
+                }
+                break;
+            } break;
+            case ondemand::json_type::number: {
+                if (false == m_maybe_number) {
+                    break;
+                }
+                ondemand::number number = item.get_number();
+                if (number.is_double()) {
+                    double tmp_double;
+                    operand->as_float(tmp_double, op);
+                    match |= eval(op, number.get_double(), tmp_double);
+                } else if (number.is_uint64()) {
+                    int64_t tmp_int;
+                    operand->as_int(tmp_int, op);
+                    match |= eval(op, number.get_uint64(), tmp_int);
+                } else {
+                    int64_t tmp_int;
+                    operand->as_int(tmp_int, op);
+                    match |= eval(op, number.get_int64(), tmp_int);
+                }
+            } break;
+            case ondemand::json_type::boolean: {
+                bool tmp;
+                if (operand->as_bool(tmp, op) && eval(op, item.get_bool(), tmp)) {
+                    match = true;
+                }
+            } break;
+            case ondemand::json_type::null:
+                if (operand->as_null(op)) {
+                    match |= op == FilterOperation::EQ;
+                }
+                break;
+        }
+
+        if (match) {
+            return true;
+        }
+    }
+    return false;
+}
+
+bool Output::evaluate_bool_filter(
+        FilterOperation op,
+        bool value,
+        std::shared_ptr<Literal> const& operand
+) {
+    if (FilterOperation::EXISTS == op || FilterOperation::NEXISTS == op) {
+        return true;
+    }
+
+    bool op_value;
+    if (false == operand->as_bool(op_value, op)) {
+        return false;
+    }
+
+    switch (op) {
+        case FilterOperation::EQ:
+            return value == op_value;
+        case FilterOperation::NEQ:
+            return value != op_value;
+        default:
+            return false;
+    }
+}
+
+void Output::populate_string_queries(std::shared_ptr<Expression> const& expr) {
+    if (expr->has_only_expression_operands()) {
+        for (auto const& op : expr->get_op_list()) {
+            populate_string_queries(std::static_pointer_cast<Expression>(op));
+        }
+        return;
+    }
+
+    auto filter = std::dynamic_pointer_cast<FilterExpr>(expr);
+    if (filter != nullptr
+        && !(filter->get_operation() == FilterOperation::EXISTS
+             || filter->get_operation() == FilterOperation::NEXISTS))
+    {
+        if (filter->get_column()->matches_type(LiteralType::ClpStringT)) {
+            std::string query_string;
+            filter->get_operand()->as_clp_string(query_string, filter->get_operation());
+
+            if (m_string_query_map.count(query_string)) {
+                return;
+            }
+
+            // search on log type dictionary
+            Query& q = m_string_query_map[query_string];
+            if (query_string.find("*") != std::string::npos
+                || filter->get_column()->matches_type(LiteralType::VarStringT))
+            {
+                // if it matches VarStringT then it contains no space, so we
+                // don't't add more wildcards. Likewise if it already contains some wildcards
+                // we do not add more
+                Grep::process_raw_query(m_log_dict, m_var_dict, query_string, false, q, false);
+            } else {
+                Grep::process_raw_query(m_log_dict, m_var_dict, query_string, false, q);
+            }
+        }
+        SubQuery sub_query;
+        if (filter->get_column()->matches_type(LiteralType::VarStringT)) {
+            std::string query_string;
+            filter->get_operand()->as_var_string(query_string, filter->get_operation());
+            if (m_string_var_match_map.count(query_string)) {
+                return;
+            }
+
+            std::unordered_set<int64_t>& matching_vars = m_string_var_match_map[query_string];
+            if (query_string.find('*') == std::string::npos) {
+                auto entry = m_var_dict->get_entry_matching_value(query_string, false);
+
+                if (entry != nullptr) {
+                    matching_vars.insert(entry->get_id());
+                }
+            } else if (EncodedVariableInterpreter::
+                               wildcard_search_dictionary_and_get_encoded_matches(
+                                       query_string,
+                                       *m_var_dict,
+                                       false,
+                                       sub_query
+                               ))
+            {
+                for (auto& var : sub_query.get_vars()) {
+                    if (var.is_precise_var()) {
+                        auto entry = var.get_var_dict_entry();
+                        if (entry != nullptr) {
+                            matching_vars.insert(entry->get_id());
+                        }
+                    } else {
+                        for (auto entry : var.get_possible_var_dict_entries()) {
+                            matching_vars.insert(entry->get_id());
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void Output::populate_searched_wildcard_columns(std::shared_ptr<Expression> const& expr) {
+    if (expr->has_only_expression_operands()) {
+        for (auto const& op : expr->get_op_list()) {
+            populate_searched_wildcard_columns(std::static_pointer_cast<Expression>(op));
+        }
+    } else if (auto filter = std::dynamic_pointer_cast<FilterExpr>(expr)) {
+        auto col = filter->get_column().get();
+        if (false == col->is_pure_wildcard()) {
+            return;
+        }
+        for (int32_t node : (*m_schemas)[m_schema]) {
+            auto tree_node_type = m_schema_tree->get_node(node)->get_type();
+            if (col->matches_type(node_to_literal_type(tree_node_type))) {
+                if (tree_node_type == NodeType::CLPSTRING) {
+                    m_wildcard_to_searched_clpstrings[col].push_back(node);
+                } else if (tree_node_type == NodeType::VARSTRING) {
+                    m_wildcard_to_searched_varstrings[col].push_back(node);
+                } else if (tree_node_type == NodeType::DATESTRING) {
+                    m_wildcard_to_searched_datestrings[col].push_back(node);
+                } else if (tree_node_type == NodeType::FLOATDATESTRING) {
+                    m_wildcard_to_searched_floatdatestrings[col].push_back(node);
+                } else {
+                    // Arrays and basic types
+                    m_wildcard_to_searched_columns[col].push_back(node);
+                }
+            }
+        }
+    }
+}
+
+void Output::add_wildcard_columns_to_searched_columns() {
+    for (auto& e : m_wildcard_to_searched_clpstrings) {
+        for (int32_t node : e.second) {
+            m_match.add_searched_column_to_schema(m_schema, node);
+        }
+    }
+
+    for (auto& e : m_wildcard_to_searched_varstrings) {
+        for (int32_t node : e.second) {
+            m_match.add_searched_column_to_schema(m_schema, node);
+        }
+    }
+
+    for (auto& e : m_wildcard_to_searched_datestrings) {
+        for (int32_t node : e.second) {
+            m_match.add_searched_column_to_schema(m_schema, node);
+        }
+    }
+
+    for (auto& e : m_wildcard_to_searched_floatdatestrings) {
+        for (int32_t node : e.second) {
+            m_match.add_searched_column_to_schema(m_schema, node);
+        }
+    }
+
+    for (auto& e : m_wildcard_to_searched_columns) {
+        for (int32_t node : e.second) {
+            m_match.add_searched_column_to_schema(m_schema, node);
+        }
+    }
+}
+
+EvaluatedValue
+Output::constant_propagate(std::shared_ptr<Expression> const& expr, int32_t schema_id) {
+    if (std::dynamic_pointer_cast<OrExpr>(expr)) {
+        bool any_unknown = false;
+        std::vector<OpList::iterator> to_delete;
+        for (auto it = expr->op_begin(); it != expr->op_end(); it++) {
+            auto sub_expr = std::static_pointer_cast<Expression>(*it);
+            EvaluatedValue ret = constant_propagate(sub_expr, schema_id);
+            if (ret == EvaluatedValue::True) {
+                return expr->is_inverted() ? EvaluatedValue::False : EvaluatedValue::True;
+            } else if (ret == EvaluatedValue::False) {
+                // no need to add this sub expression to used expression set
+                // but mark it for deletion
+                to_delete.push_back(it);
+            } else /*if (ret == EvaluatedValue::Unknown)*/ {
+                any_unknown = true;
+            }
+        }
+
+        if (any_unknown) {
+            // some unknowns -- delete guaranteed false entries, and
+            // propagate unknown
+            for (OpList::iterator& it : to_delete) {
+                expr->get_op_list().erase(it);
+            }
+            return EvaluatedValue::Unknown;
+        } else {
+            // no unknowns, and didn't early exit, so before inversion the evaluated
+            // value must be False
+            return expr->is_inverted() ? EvaluatedValue::True : EvaluatedValue::False;
+        }
+    } else if (std::dynamic_pointer_cast<AndExpr>(expr)) {
+        bool any_unknown = true;
+        std::vector<OpList::iterator> to_delete;
+        for (auto it = expr->op_begin(); it != expr->op_end(); it++) {
+            auto subExpr = std::static_pointer_cast<Expression>(*it);
+
+            EvaluatedValue ret = constant_propagate(subExpr, schema_id);
+
+            if (ret == EvaluatedValue::False) {
+                return expr->is_inverted() ? EvaluatedValue::True : EvaluatedValue::False;
+            } else if (ret == EvaluatedValue::True) {
+                // no need to add this sub expression to used expression set
+                // but mark it for deletion
+                to_delete.push_back(it);
+            } else /*if (ret == EvaluatedValue::Unknown)*/ {
+                any_unknown = true;
+            }
+        }
+
+        if (any_unknown) {
+            // some unknowns -- delete guaranteed true entries, and
+            // propagate unknown
+            for (OpList::iterator& it : to_delete) {
+                expr->get_op_list().erase(it);
+            }
+            return EvaluatedValue::Unknown;
+        } else {
+            // no unknowns, and didn't early exit, so before inversion the evaluated
+            // value must be True
+            return expr->is_inverted() ? EvaluatedValue::False : EvaluatedValue::True;
+        }
+        return EvaluatedValue::Unknown;
+    } else if (auto filter = std::dynamic_pointer_cast<FilterExpr>(expr)) {
+        if ((filter->get_operation() == FilterOperation::EXISTS
+             || filter->get_operation() == FilterOperation::NEXISTS)
+            && (!filter->get_column()->has_unresolved_tokens()
+                || filter->get_column()->is_pure_wildcard()
+                || !filter->get_column()->matches_exactly(LiteralType::ArrayT)))
+        {
+            // semantics of previous passes means that EXISTS and NEXISTS are
+            // trivially matching
+            // FIXME: have an edgecase to handle with NEXISTS on pure wildcard columns
+            return EvaluatedValue::True;
+        } else if (filter->get_column()->is_pure_wildcard() && filter->get_column()->matches_any(LiteralType::ClpStringT | LiteralType::VarStringT))
+        {
+            auto wildcard = filter->get_column().get();
+            bool has_var_string = false;
+            bool matches_var_string = false;
+            bool has_clp_string = false;
+            bool matches_clp_string = false;
+            bool has_other = !m_wildcard_to_searched_columns[wildcard].empty()
+                             || !m_wildcard_to_searched_datestrings[wildcard].empty()
+                             || !m_wildcard_to_searched_floatdatestrings[wildcard].empty();
+            std::string filter_string;
+            bool valid
+                    = filter->get_operand()->as_var_string(filter_string, filter->get_operation())
+                      || filter->get_operand()->as_clp_string(
+                              filter_string,
+                              filter->get_operation()
+                      );
+            if (false == valid) {
+                // FIXME: throw
+                return EvaluatedValue::False;
+            }
+            if (filter->get_column()->matches_type(LiteralType::ClpStringT)) {
+                m_expr_clp_query[expr.get()] = &m_string_query_map.at(filter_string);
+                has_clp_string = !m_wildcard_to_searched_clpstrings[wildcard].empty();
+                matches_clp_string
+                        = !m_expr_clp_query.at(expr.get())->get_sub_queries().empty()
+                          || m_expr_clp_query.at(expr.get())->search_string_matches_all();
+            }
+            if (filter->get_column()->matches_type(LiteralType::VarStringT)) {
+                m_expr_var_match_map[expr.get()] = &m_string_var_match_map.at(filter_string);
+                has_var_string = !m_wildcard_to_searched_varstrings[wildcard].empty();
+                matches_var_string = !m_expr_var_match_map.at(expr.get())->empty();
+            }
+
+            if (filter->get_operation() == FilterOperation::EQ) {
+                if (false == matches_clp_string) {
+                    m_wildcard_to_searched_clpstrings[wildcard].clear();
+                }
+                if (false == matches_var_string) {
+                    m_wildcard_to_searched_varstrings[wildcard].clear();
+                }
+
+                if (has_other) {
+                    return EvaluatedValue::Unknown;
+                }
+
+                if (has_clp_string || has_var_string) {
+                    if ((!has_clp_string || (has_clp_string && !matches_clp_string))
+                        && (!has_var_string || (has_var_string && !matches_var_string)))
+                    {
+                        return filter->is_inverted() ? EvaluatedValue::True : EvaluatedValue::False;
+                    }
+                }
+            } else if (filter->get_operation() == FilterOperation::NEQ) {
+                if (has_clp_string && !matches_clp_string || has_var_string && !matches_var_string)
+                {
+                    return filter->is_inverted() ? EvaluatedValue::False : EvaluatedValue::True;
+                } else if (false == has_clp_string && false == has_var_string && !has_other) {
+                    return EvaluatedValue::False;
+                }
+            } else {
+                // FIXME: throw
+                return EvaluatedValue::False;
+            }
+            return EvaluatedValue::Unknown;
+        } else if (filter->get_column()->matches_type(LiteralType::ClpStringT)) {
+            std::string filter_string;
+            filter->get_operand()->as_clp_string(filter_string, filter->get_operation());
+
+            // set up string query for this filter
+            m_expr_clp_query[expr.get()] = &m_string_query_map.at(filter_string);
+
+            // use string queries to potentially propagate known result
+            if (m_expr_clp_query.at(expr.get())->get_sub_queries().empty()
+                && !m_expr_clp_query.at(expr.get())->search_string_matches_all())
+            {
+                // If filter can not match then return it's guaranteed value based on
+                // whether the filter is inverted and whether the operation was == or !=
+                if (filter->get_operation() == FilterOperation::EQ) {
+                    return filter->is_inverted() ? EvaluatedValue::True : EvaluatedValue::False;
+                } else if (filter->get_operation() == FilterOperation::NEQ) {
+                    return filter->is_inverted() ? EvaluatedValue::False : EvaluatedValue::True;
+                }
+                // FIXME: throw
+                return EvaluatedValue::False;
+            } else {
+                return EvaluatedValue::Unknown;
+            }
+        } else if (filter->get_column()->matches_type(LiteralType::VarStringT)) {
+            std::string filter_string;
+            filter->get_operand()->as_var_string(filter_string, filter->get_operation());
+
+            // set up string query for this filter
+            m_expr_var_match_map[expr.get()] = &m_string_var_match_map.at(filter_string);
+
+            // use string queries to potentially propagate known result
+            if (m_expr_var_match_map.at(expr.get())->empty()) {
+                // If filter can not match then return it's guaranteed value based on
+                // whether the filter is inverted and whether the operation was == or !=
+                if (filter->get_operation() == FilterOperation::EQ) {
+                    return filter->is_inverted() ? EvaluatedValue::True : EvaluatedValue::False;
+                } else if (filter->get_operation() == FilterOperation::NEQ) {
+                    return filter->is_inverted() ? EvaluatedValue::False : EvaluatedValue::True;
+                }
+                // FIXME: throw
+                return EvaluatedValue::False;
+            } else {
+                return EvaluatedValue::Unknown;
+            }
+        } else {
+            return EvaluatedValue::Unknown;
+        }
+    }
+
+    return EvaluatedValue::Unknown;
+}
+
+bool Output::evaluate_epoch_date_filter(
+        FilterOperation op,
+        DateStringColumnReader* reader,
+        std::shared_ptr<Literal>& operand
+) {
+    return evaluate_int_filter(op, reader->get_encoded_time(m_cur_message), operand);
+}
+
+bool Output::evaluate_float_date_filter(
+        FilterOperation op,
+        FloatDateStringColumnReader* reader,
+        std::shared_ptr<Literal>& operand
+) {
+    return evaluate_float_filter(op, reader->get_encoded_time(m_cur_message), operand);
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/Output.hpp b/components/core/src/clp_s/search/Output.hpp
new file mode 100644
index 000000000..36afac9c9
--- /dev/null
+++ b/components/core/src/clp_s/search/Output.hpp
@@ -0,0 +1,338 @@
+#ifndef CLP_S_SEARCH_OUTPUT_HPP
+#define CLP_S_SEARCH_OUTPUT_HPP
+
+#include <map>
+#include <set>
+#include <string>
+#include <unordered_set>
+#include <utility>
+
+#include <simdjson.h>
+
+#include "../SchemaReader.hpp"
+#include "../Utils.hpp"
+#include "clp_search/Query.hpp"
+#include "Expression.hpp"
+#include "Integral.hpp"
+#include "SchemaMatch.hpp"
+#include "StringLiteral.hpp"
+
+using namespace simdjson;
+using nlohmann::json;
+using namespace clp_s::search::clp_search;
+
+namespace clp_s::search {
+class Output : public FilterClass {
+public:
+    Output(std::shared_ptr<SchemaTree> tree,
+           std::shared_ptr<ReaderUtils::SchemaMap> schemas,
+           SchemaMatch& match,
+           std::shared_ptr<Expression> expr,
+           std::string archives_dir,
+           std::shared_ptr<TimestampDictionaryReader> timestamp_dict)
+            : m_schema_tree(std::move(tree)),
+              m_schemas(std::move(schemas)),
+              m_match(match),
+              m_expr(std::move(expr)),
+              m_archives_dir(std::move(archives_dir)),
+              m_timestamp_dict(std::move(timestamp_dict)) {}
+
+    /**
+     * Filters messages from all archives
+     */
+    void filter();
+
+private:
+    SchemaMatch& m_match;
+    std::shared_ptr<Expression> m_expr;
+    std::string m_archives_dir;
+
+    // variables for the current schema being filtered
+    std::vector<BaseColumnReader*> m_searched_columns;
+    std::vector<BaseColumnReader*> m_other_columns;
+    std::set<int32_t> m_cached_string_columns;
+
+    int32_t m_schema;
+    SchemaReader* m_reader;
+
+    std::shared_ptr<SchemaTree> m_schema_tree;
+    std::shared_ptr<VariableDictionaryReader> m_var_dict;
+    std::shared_ptr<LogTypeDictionaryReader> m_log_dict;
+    std::shared_ptr<LogTypeDictionaryReader> m_array_dict;
+    std::shared_ptr<TimestampDictionaryReader> m_timestamp_dict;
+
+    std::shared_ptr<ReaderUtils::SchemaMap> m_schemas;
+
+    std::map<std::string, Query> m_string_query_map;
+    std::map<std::string, std::unordered_set<int64_t>> m_string_var_match_map;
+    std::unordered_map<Expression*, Query*> m_expr_clp_query;
+    std::unordered_map<Expression*, std::unordered_set<int64_t>*> m_expr_var_match_map;
+    std::unordered_map<int32_t, ClpStringColumnReader*> m_clp_string_readers;
+    std::unordered_map<int32_t, VariableStringColumnReader*> m_var_string_readers;
+    std::unordered_map<int32_t, DateStringColumnReader*> m_datestring_readers;
+    std::unordered_map<int32_t, FloatDateStringColumnReader*> m_floatdatestring_readers;
+    uint64_t m_cur_message;
+    EvaluatedValue m_expression_value;
+
+    std::map<ColumnDescriptor*, std::vector<int32_t>> m_wildcard_to_searched_clpstrings;
+    std::map<ColumnDescriptor*, std::vector<int32_t>> m_wildcard_to_searched_varstrings;
+    std::map<ColumnDescriptor*, std::vector<int32_t>> m_wildcard_to_searched_datestrings;
+    std::map<ColumnDescriptor*, std::vector<int32_t>> m_wildcard_to_searched_floatdatestrings;
+    std::map<ColumnDescriptor*, std::vector<int32_t>> m_wildcard_to_searched_columns;
+
+    simdjson::ondemand::parser m_array_parser;
+    std::string m_array_search_string;
+    bool m_maybe_string, m_maybe_number;
+
+    /**
+     * Initializes the variables. It is init is called once for each schema after which filter
+     * is called once for every message in the schema
+     * @param reader
+     * @param schema_id
+     * @param columns
+     */
+    void init(
+            SchemaReader* reader,
+            int32_t schema_id,
+            std::unordered_map<int32_t, BaseColumnReader*>& columns
+    ) override;
+
+    /**
+     * Evaluates an expression
+     * @param expr
+     * @param schema
+     * @param extracted_values
+     * @return true if the expression evaluates to true, false otherwise
+     */
+    bool evaluate(
+            Expression* expr,
+            int32_t schema,
+            std::map<int32_t, std::variant<int64_t, double, std::string, uint8_t>>& extracted_values
+    );
+
+    /**
+     * Evaluates a filter expression
+     * @param expr
+     * @param schema
+     * @param extracted_values
+     * @return true if the expression evaluates to true, false otherwise
+     */
+    bool evaluate_filter(
+            FilterExpr* expr,
+            int32_t schema,
+            std::map<int32_t, std::variant<int64_t, double, std::string, uint8_t>>& extracted_values
+    );
+
+    /**
+     * Evaluates a wildcard filter expression
+     * @param expr
+     * @param schema
+     * @param extracted_values
+     * @return true if the expression evaluates to true, false otherwise
+     */
+    bool evaluate_wildcard_filter(
+            FilterExpr* expr,
+            int32_t schema,
+            std::map<int32_t, std::variant<int64_t, double, std::string, uint8_t>>& extracted_values
+    );
+
+    /**
+     * Evaluates a int filter expression
+     * @param op
+     * @param value
+     * @param operand
+     * @return true if the expression evaluates to true, false otherwise
+     */
+    static bool
+    evaluate_int_filter(FilterOperation op, int64_t value, std::shared_ptr<Literal> const& operand);
+
+    /**
+     * Evaluates a float filter expression
+     * @param op
+     * @param value
+     * @param operand
+     * @return true if the expression evaluates to true, false otherwise
+     */
+    static bool evaluate_float_filter(
+            FilterOperation op,
+            double value,
+            std::shared_ptr<Literal> const& operand
+    );
+
+    /**
+     * Evaluates a clp string filter expression
+     * @param op
+     * @param q
+     * @param column_id
+     * @param operand
+     * @param extracted_values
+     * @return true if the expression evaluates to true, false otherwise
+     */
+    bool evaluate_clp_string_filter(
+            FilterOperation op,
+            Query* q,
+            int32_t column_id,
+            std::shared_ptr<Literal> const& operand,
+            std::map<int32_t, std::variant<int64_t, double, std::string, uint8_t>>& extracted_values
+    );
+
+    /**
+     * Evaluates a var string filter expression
+     * @param op
+     * @param reader
+     * @param matching_vars
+     * @param operand
+     * @return true if the expression evaluates to true, false otherwise
+     */
+    bool evaluate_var_string_filter(
+            FilterOperation op,
+            VariableStringColumnReader* reader,
+            std::unordered_set<int64_t>* matching_vars,
+            std::shared_ptr<Literal> const& operand
+    ) const;
+
+    /**
+     * Evaluates a epoch date string filter expression
+     * @param op
+     * @param reader
+     * @param operand
+     * @return true if the expression evaluates to true, false otherwise
+     */
+    bool evaluate_epoch_date_filter(
+            FilterOperation op,
+            DateStringColumnReader* reader,
+            std::shared_ptr<Literal>& operand
+    );
+
+    /**
+     * Evaluates a float date string filter expression
+     * @param op
+     * @param reader
+     * @param operand
+     * @return true if the expression evaluates to true, false otherwise
+     */
+    bool evaluate_float_date_filter(
+            FilterOperation op,
+            FloatDateStringColumnReader* reader,
+            std::shared_ptr<Literal>& operand
+    );
+
+    /**
+     * Evaluates an array filter expression
+     * @param op
+     * @param unresolved_tokens
+     * @param value
+     * @param operand
+     * @return true if the expression evaluates to true, false otherwise
+     */
+    bool evaluate_array_filter(
+            FilterOperation op,
+            DescriptorList const& unresolved_tokens,
+            std::string const& value,
+            std::shared_ptr<Literal> const& operand
+    ) const;
+
+    /**
+     * The implementation of evaluate_array_filter
+     * @param object
+     * @param op
+     * @param unresolved_tokens
+     * @param cur_idx
+     * @param value
+     * @param operand
+     * @param array_or_object if true, we are traversing an array
+     * @return true if the expression evaluates to true, false otherwise
+     */
+    bool evaluate_array_filter(
+            json& object,
+            FilterOperation op,
+            DescriptorList const& unresolved_tokens,
+            size_t cur_idx,
+            std::shared_ptr<Literal> const& operand,
+            bool array_or_object
+    ) const;
+
+    /**
+     * Evaluates a wildcard array filter expression
+     * @param op
+     * @param value
+     * @param operand
+     * @return true if the expression evaluates to true, false otherwise
+     */
+    bool evaluate_wildcard_array_filter(
+            FilterOperation op,
+            std::string& value,
+            std::shared_ptr<Literal> const& operand
+    );
+
+    /**
+     * The implementation of evaluate_wildcard_array_filter
+     * @param array
+     * @param op
+     * @param operand
+     * @return true if the expression evaluates to true, false otherwise
+     */
+    bool evaluate_wildcard_array_filter(
+            ondemand::array& array,
+            FilterOperation op,
+            std::shared_ptr<Literal> const& operand
+    ) const;
+
+    /**
+     * The implementation of evaluate_wildcard_array_filter
+     * @param object
+     * @param op
+     * @param operand
+     * @return true if the expression evaluates to true, false otherwise
+     */
+    bool evaluate_wildcard_array_filter(
+            ondemand::object& object,
+            FilterOperation op,
+            std::shared_ptr<Literal> const& operand
+    ) const;
+
+    /**
+     * Evaluates a bool filter expression
+     * @param op
+     * @param value
+     * @param operand
+     * @return true if the expression evaluates to true, false otherwise
+     */
+    static bool
+    evaluate_bool_filter(FilterOperation op, bool value, std::shared_ptr<Literal> const& operand);
+
+    /**
+     * Populates the string queries
+     * @param expr
+     */
+    void populate_string_queries(std::shared_ptr<Expression> const& expr);
+
+    /**
+     * Constant propagates an expression
+     * @param expr
+     * @param schema_id
+     * @return EvaluatedValue::True if the expression evaluates to true, EvaluatedValue::False
+     * if the expression evaluates to false, EvaluatedValue::Unknown otherwise
+     */
+    EvaluatedValue constant_propagate(std::shared_ptr<Expression> const& expr, int32_t schema_id);
+
+    /**
+     * Populates searched wildcard columns
+     * @param expr
+     */
+    void populate_searched_wildcard_columns(std::shared_ptr<Expression> const& expr);
+
+    /**
+     * Adds wildcard columns to searched columns
+     */
+    void add_wildcard_columns_to_searched_columns();
+
+    // Methods inherited from FilterClass
+    bool filter(
+            uint64_t cur_message,
+            std::map<int32_t, std::variant<int64_t, double, std::string, uint8_t>>& extracted_values
+    ) override;
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_OUTPUT_HPP
diff --git a/components/core/src/clp_s/search/SchemaMatch.cpp b/components/core/src/clp_s/search/SchemaMatch.cpp
new file mode 100644
index 000000000..83ef44b6a
--- /dev/null
+++ b/components/core/src/clp_s/search/SchemaMatch.cpp
@@ -0,0 +1,452 @@
+#include "SchemaMatch.hpp"
+
+#include <algorithm>
+#include <utility>
+
+#include "AndExpr.hpp"
+#include "ConstantProp.hpp"
+#include "EmptyExpr.hpp"
+#include "OrExpr.hpp"
+#include "OrOfAndForm.hpp"
+#include "SearchUtils.hpp"
+
+namespace clp_s::search {
+// TODO: write proper iterators on the AST to make this code less awful.
+// In particular schema intersection needs AST iterators and a proper refactor
+SchemaMatch::SchemaMatch(
+        std::shared_ptr<SchemaTree> tree,
+        std::shared_ptr<ReaderUtils::SchemaMap> schemas
+)
+        : m_tree(std::move(tree)),
+          m_schemas(std::move(schemas)) {}
+
+std::shared_ptr<Expression> SchemaMatch::run(std::shared_ptr<Expression>& expr) {
+    ConstantProp propagate_empty;
+    expr = populate_column_mapping(expr);
+    expr = propagate_empty.run(expr);
+    if (std::dynamic_pointer_cast<EmptyExpr>(expr)) {
+        return expr;
+    }
+
+    // if we had ambiguous column descriptors containing regex which were
+    // resolved we need to restandardize the expression
+    if (false == m_unresolved_descriptor_to_descriptor.empty()) {
+        m_column_to_descriptor.clear();
+        m_unresolved_descriptor_to_descriptor.clear();
+
+        // restandardize the form, and rerun column mapping
+        OrOfAndForm standard_form;
+        expr = standard_form.run(expr);
+        expr = populate_column_mapping(expr);
+    }
+
+    populate_schema_mapping();
+
+    expr = intersect_schemas(expr);
+    expr = propagate_empty.run(expr);
+
+    if (std::dynamic_pointer_cast<EmptyExpr>(expr)) {
+        return expr;
+    }
+
+    split_expression_by_schema(expr, m_schema_to_query, m_matched_schema_ids);
+
+    return expr;
+}
+
+std::shared_ptr<Expression> SchemaMatch::populate_column_mapping(std::shared_ptr<Expression> cur) {
+    for (auto it = cur->op_begin(); it != cur->op_end(); it++) {
+        if (auto child = std::dynamic_pointer_cast<Expression>(*it)) {
+            auto new_child = populate_column_mapping(child);
+            if (new_child != child) {
+                new_child->copy_replace(cur.get(), it);
+            }
+        } else if (auto column = dynamic_cast<ColumnDescriptor*>((*it).get())) {
+            if (false == populate_column_mapping(column)) {
+                // no matching columns -- replace this expression with empty;
+                return EmptyExpr::create();
+            } else if (column->is_unresolved_descriptor() && false == column->is_pure_wildcard()) {
+                auto possibilities = OrExpr::create();
+
+                // TODO: will have to decide how we wan't to handle multi-column expressions
+                // with unresolved descriptors
+                for (int32_t node_id : m_unresolved_descriptor_to_descriptor[column]) {
+                    auto node = m_tree->get_node(node_id);
+                    auto literal_type = node_to_literal_type(node->get_type());
+                    DescriptorList descriptors;
+                    while (node->get_id() != m_tree->get_root_node_id()) {
+                        // may have to explicitly mark non-regex
+                        descriptors.emplace_back(node->get_key_name());
+                        node = m_tree->get_node(node->get_parent_id());
+                    }
+                    std::reverse(descriptors.begin(), descriptors.end());
+                    auto resolved_column = ColumnDescriptor::create(descriptors);
+                    resolved_column->set_matching_type(literal_type);
+                    *it = resolved_column;
+                    cur->copy_append(possibilities.get());
+                }
+                return possibilities;
+            }
+        }
+    }
+    return cur;
+}
+
+bool SchemaMatch::populate_column_mapping(ColumnDescriptor* column) {
+    bool matched = false;
+    if (column->is_pure_wildcard()) {
+        for (auto& node : m_tree->get_nodes()) {
+            if (column->matches_type(node_to_literal_type(node->get_type()))) {
+                // column_to_descriptor_[node->get_id()].insert(column);
+                //  At least some node matches; break
+                //  Don't use column_to_descriptor_ for pure wildcard columns anyway, so
+                //  no need to waste memory
+                matched = true;
+                break;
+            }
+        }
+
+        return matched;
+    }
+
+    auto root = m_tree->get_node(m_tree->get_root_node_id());
+    for (int32_t child_node_id : root->get_children_ids()) {
+        matched |= populate_column_mapping(column, column->descriptor_begin(), child_node_id);
+    }
+
+    return matched;
+}
+
+bool SchemaMatch::populate_column_mapping(
+        ColumnDescriptor* column,
+        DescriptorList::iterator it,
+        int32_t node_id,
+        bool wildcard_special_flag
+) {
+    if (it == column->descriptor_end()) {
+        return false;
+    }
+
+    bool matched = false;
+    bool accepted = false, wildcard_accepted = false;
+    auto cur_node = m_tree->get_node(node_id);
+    DescriptorToken const& token = *it;
+    auto next = it;
+    next++;
+
+    // accept current token
+    if (token.wildcard()) {
+        accepted = true;
+        wildcard_accepted = true;
+    } else if (cur_node->get_key_name() == token.get_token()) {
+        accepted = true;
+    }
+
+    if (accepted) {
+        // For array search, users need to specify the full path
+        if (cur_node->get_type() == NodeType::ARRAY && !column->is_unresolved_descriptor()) {
+            matched = true;
+            column->add_unresolved_tokens(next);
+            m_column_to_descriptor[node_id].insert(column);
+        } else if ((next == column->descriptor_end()
+                    && column->matches_type(node_to_literal_type(cur_node->get_type()))))
+        {
+            // potentially match current node if accepted its token
+            matched = true;
+            if (false == column->is_unresolved_descriptor()) {
+                m_column_to_descriptor[node_id].insert(column);
+            } else {
+                m_unresolved_descriptor_to_descriptor[column].insert(node_id);
+            }
+        }
+    } else {
+        return matched;
+    }
+
+    // handle wildcard match 0 case
+    bool wildcard_special_continue = (wildcard_special_flag || !wildcard_accepted)
+                                     && next != column->descriptor_end() && next->wildcard();
+    if (wildcard_special_continue) {
+        // have to allow matching current node again to honour
+        // 0 or more matches. Set the wildcard special flag to avoid matching
+        // the following case erroneously
+        // tok.*.tok
+        matched |= populate_column_mapping(column, next, node_id, true);
+    } else if (false == wildcard_special_flag && wildcard_accepted) {
+        matched |= populate_column_mapping(column, next, node_id);
+    }
+
+    // match against children
+    for (int32_t child_node_id : cur_node->get_children_ids()) {
+        if (wildcard_accepted && !wildcard_special_continue) {
+            matched |= populate_column_mapping(column, next, child_node_id);
+            matched |= populate_column_mapping(column, it, child_node_id);
+        } else if (false == wildcard_accepted) {
+            matched |= populate_column_mapping(column, next, child_node_id);
+        }
+    }
+
+    return matched;
+}
+
+void SchemaMatch::populate_schema_mapping() {
+    // TODO: consider refactoring this now that schemas are std::set s
+    for (auto& it : *m_schemas) {
+        int32_t schema_id = it.first;
+        for (int32_t column_id : it.second) {
+            if (m_tree->get_node(column_id)->get_type() == NodeType::ARRAY) {
+                m_array_schema_ids.insert(schema_id);
+            }
+            if (false == m_column_to_descriptor.count(column_id)) {
+                continue;
+            }
+            for (auto descriptor : m_column_to_descriptor[column_id]) {
+                if (false == descriptor->is_pure_wildcard()) {
+                    m_descriptor_to_schema[descriptor][schema_id] = column_id;
+                }
+            }
+        }
+    }
+}
+
+std::shared_ptr<Expression> SchemaMatch::intersect_schemas(std::shared_ptr<Expression> cur) {
+    if (std::dynamic_pointer_cast<AndExpr>(cur) || std::dynamic_pointer_cast<FilterExpr>(cur)) {
+        std::set<int32_t> common_schema;
+        std::set<ColumnDescriptor*> columns;
+        intersect_and_sub_expr(cur, common_schema, columns, true);
+
+        if (common_schema.empty()) {
+            return EmptyExpr::create(cur->get_parent());
+        }
+
+        for (int32_t schema_id : common_schema) {
+            m_expression_to_schemas[cur.get()].insert(schema_id);
+        }
+
+        for (auto column : columns) {
+            if (column->is_pure_wildcard()) {
+                continue;
+            }
+
+            LiteralTypeBitmask types = 0;
+            for (int32_t schema : common_schema) {
+                if (m_descriptor_to_schema[column].count(schema)) {
+                    types |= node_to_literal_type(
+                            m_tree->get_node(m_descriptor_to_schema[column][schema])->get_type()
+                    );
+                }
+            }
+            column->set_matching_types(types);
+        }
+
+        for (int32_t schema : common_schema) {
+            m_matched_schema_ids.insert(schema);
+
+            for (auto column : columns) {
+                if (false == column->is_pure_wildcard()) {
+                    m_schema_to_searched_columns[schema].insert(
+                            get_column_id_for_descriptor(column, schema)
+                    );
+                }
+            }
+        }
+    } else if (cur->has_only_expression_operands()) {
+        for (auto it = cur->op_begin(); it != cur->op_end(); it++) {
+            auto sub_expr = std::static_pointer_cast<Expression>(*it);
+            auto new_expr = intersect_schemas(sub_expr);
+
+            if (new_expr != sub_expr) {
+                *it = new_expr;
+            }
+        }
+    }
+    return cur;
+}
+
+bool SchemaMatch::intersect_and_sub_expr(
+        std::shared_ptr<Expression> const& cur,
+        std::set<int32_t>& common_schema,
+        std::set<ColumnDescriptor*>& columns,
+        bool first
+) {
+    // Note: EmptyExpr are already constant propogated out of the ands, so don't
+    // need to check for them here
+    for (auto it = cur->op_begin(); it != cur->op_end(); it++) {
+        if (auto sub_expr = std::dynamic_pointer_cast<Expression>(*it)) {
+            first &= intersect_and_sub_expr(sub_expr, common_schema, columns, first);
+            if (false == first && common_schema.empty()) {
+                break;
+            }
+        } else if (auto column = std::dynamic_pointer_cast<ColumnDescriptor>(*it)) {
+            FilterOperation op = std::static_pointer_cast<FilterExpr>(cur)->get_operation();
+            if ((op != FilterOperation::EXISTS && op != FilterOperation::NEXISTS)
+                || column->has_unresolved_tokens())
+            {
+                columns.insert(column.get());
+            }
+
+            if (column->is_pure_wildcard()) {
+                // TODO: consider handling `*:null` NEXISTS edgecase here instead of during
+                // output
+                if (first) {
+                    for (auto schema_it : *m_schemas) {
+                        common_schema.insert(schema_it.first);
+                    }
+                }
+                return false;
+            } else if (first && op != FilterOperation::NEXISTS) {
+                for (auto schema_it : m_descriptor_to_schema[column.get()]) {
+                    common_schema.insert(schema_it.first);
+                }
+                return false;
+            } else if (first /*&& op == FilterOperation::NEXISTS */) {
+                for (auto& schema : *m_schemas) {
+                    if (0 == m_descriptor_to_schema[column.get()].count(schema.first)) {
+                        common_schema.insert(schema.first);
+                    }
+                }
+                return false;
+            } else if (op == FilterOperation::NEXISTS) {
+                std::set<int32_t> intersection;
+                auto const& cur_schemas = m_descriptor_to_schema[column.get()];
+                for (int32_t schema : common_schema) {
+                    if (0 == cur_schemas.count(schema)) {
+                        intersection.insert(schema);
+                    }
+                }
+                common_schema = intersection;
+            } else {
+                std::set<int32_t> intersection;
+                auto const& cur_schemas = m_descriptor_to_schema[column.get()];
+                for (int32_t schema : common_schema) {
+                    if (cur_schemas.count(schema)) {
+                        intersection.insert(schema);
+                    }
+                }
+                common_schema = intersection;
+            }
+        }
+    }
+    return first;
+}
+
+void SchemaMatch::split_expression_by_schema(
+        std::shared_ptr<Expression> const& expr,
+        std::map<int32_t, std::shared_ptr<Expression>>& queries,
+        std::unordered_set<int32_t> const& relevant_schemas
+) {
+    if (auto filter = std::dynamic_pointer_cast<FilterExpr>(expr)) {
+        for (int32_t schema_id : relevant_schemas) {
+            auto new_filter = filter->copy();
+            auto descriptor = std::static_pointer_cast<FilterExpr>(new_filter)->get_column().get();
+            auto old_descriptor = filter->get_column().get();
+
+            if (false == descriptor->is_pure_wildcard()) {
+                descriptor->set_column_id(get_column_id_for_descriptor(old_descriptor, schema_id));
+                auto literal_type = get_literal_type_for_column(old_descriptor, schema_id);
+                if (literal_type == LiteralType::ArrayT) {
+                    m_array_search_schema_ids.insert(schema_id);
+                }
+                descriptor->set_matching_type(literal_type);
+            } else if ((descriptor->is_pure_wildcard()
+                        && descriptor->matches_type(LiteralType::ArrayT)
+                        && 0 == m_array_search_schema_ids.count(schema_id)))
+            {
+                for (auto column_id : (*m_schemas)[schema_id]) {
+                    if (m_tree->get_node(column_id)->get_type() == NodeType::ARRAY) {
+                        m_array_search_schema_ids.insert(schema_id);
+                        break;
+                    }
+                }
+            }
+            queries[schema_id] = new_filter;
+        }
+    } else if (std::dynamic_pointer_cast<AndExpr>(expr)) {
+        std::map<int32_t, std::shared_ptr<Expression>> sub_expressions;
+        for (auto const& op : expr->get_op_list()) {
+            auto sub_expr = std::static_pointer_cast<Expression>(op);
+            split_expression_by_schema(sub_expr, sub_expressions, relevant_schemas);
+
+            for (auto const& it : sub_expressions) {
+                if (queries.count(it.first)) {
+                    it.second->copy_append(queries[it.first].get());
+                } else {
+                    auto parent_expr = AndExpr::create(expr->is_inverted());
+                    it.second->copy_append(parent_expr.get());
+                    queries[it.first] = parent_expr;
+                }
+            }
+
+            sub_expressions.clear();
+        }
+    } else if (std::dynamic_pointer_cast<OrExpr>(expr)) {
+        std::map<int32_t, std::shared_ptr<Expression>> sub_expressions;
+        for (auto const& op : expr->get_op_list()) {
+            auto sub_expr = std::static_pointer_cast<Expression>(op);
+            split_expression_by_schema(
+                    sub_expr,
+                    sub_expressions,
+                    m_expression_to_schemas.at(sub_expr.get())
+            );
+
+            for (auto const& it : sub_expressions) {
+                if (queries.count(it.first)) {
+                    auto& cur_subexpr = queries[it.first];
+                    if (std::dynamic_pointer_cast<OrExpr>(cur_subexpr)) {
+                        it.second->copy_append(cur_subexpr.get());
+                    } else {
+                        auto parent_expr = OrExpr::create();
+                        cur_subexpr->copy_append(parent_expr.get());
+                        it.second->copy_append(parent_expr.get());
+                        queries[it.first] = parent_expr;
+                    }
+                } else {
+                    queries[it.first] = it.second;
+                }
+            }
+
+            sub_expressions.clear();
+        }
+
+        if (expr->is_inverted()) {
+            for (auto const& it : queries) {
+                it.second->invert();
+            }
+        }
+    }
+}
+
+int32_t SchemaMatch::get_column_id_for_descriptor(ColumnDescriptor* column, int32_t schema) {
+    return m_descriptor_to_schema[column][schema];
+}
+
+bool SchemaMatch::schema_matched(int32_t schema_id) {
+    return m_matched_schema_ids.count(schema_id);
+}
+
+bool SchemaMatch::schema_searches_against_column(int32_t schema, int32_t column_id) {
+    return m_schema_to_searched_columns[schema].count(column_id);
+}
+
+void SchemaMatch::add_searched_column_to_schema(int32_t schema, int32_t column) {
+    m_schema_to_searched_columns[schema].insert(column);
+}
+
+bool SchemaMatch::has_array(int32_t schema_id) {
+    return m_array_schema_ids.count(schema_id);
+}
+
+bool SchemaMatch::has_array_search(int32_t schema_id) {
+    return m_array_search_schema_ids.count(schema_id);
+}
+
+LiteralType SchemaMatch::get_literal_type_for_column(ColumnDescriptor* column, int32_t schema) {
+    return node_to_literal_type(
+            m_tree->get_node(get_column_id_for_descriptor(column, schema))->get_type()
+    );
+}
+
+std::shared_ptr<Expression> SchemaMatch::get_query_for_schema(int32_t schema) {
+    return m_schema_to_query.at(schema);
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/SchemaMatch.hpp b/components/core/src/clp_s/search/SchemaMatch.hpp
new file mode 100644
index 000000000..abee8628b
--- /dev/null
+++ b/components/core/src/clp_s/search/SchemaMatch.hpp
@@ -0,0 +1,172 @@
+#ifndef CLP_S_SEARCH_SCHEMAMATCH_HPP
+#define CLP_S_SEARCH_SCHEMAMATCH_HPP
+
+#include <map>
+#include <set>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "../ReaderUtils.hpp"
+#include "ColumnDescriptor.hpp"
+#include "Expression.hpp"
+#include "FilterExpr.hpp"
+#include "Literal.hpp"
+#include "Transformation.hpp"
+
+namespace clp_s::search {
+class SchemaMatch : public Transformation {
+public:
+    // Constructor
+    SchemaMatch(std::shared_ptr<SchemaTree> tree, std::shared_ptr<ReaderUtils::SchemaMap> schemas);
+
+    /**
+     * Runs the transformation on an expression
+     * @param expr
+     * @return The transformed expression
+     */
+    std::shared_ptr<Expression> run(std::shared_ptr<Expression>& expr) override;
+
+    /**
+     * @param schema
+     * @return The query for a given schema
+     */
+    std::shared_ptr<Expression> get_query_for_schema(int32_t schema);
+
+    /**
+     * Checks if a schema has been matched
+     * @param schema_id
+     * @return true if the schema has been matched, false otherwise
+     */
+    bool schema_matched(int32_t schema_id);
+
+    /**
+     * Checks if the column
+     * @param schema
+     * @param column_id
+     * @return true if the column has been matched, false otherwise
+     */
+    bool schema_searches_against_column(int32_t schema, int32_t column_id);
+
+    /**
+     * Adds a searched column to the schema. only used for pure wildcard
+     * @param schema
+     * @param column
+     */
+    void add_searched_column_to_schema(int32_t schema, int32_t column);
+
+    /**
+     * Checks if the schema has an array field
+     * @param schema_id
+     * @return true if the schema has, false otherwise
+     */
+    bool has_array(int32_t schema_id);
+
+    /**
+     * Checks if the schema has an array field to be searched against
+     * @param schema_id
+     * @return true if the schema has, false otherwise
+     */
+    bool has_array_search(int32_t schema_id);
+
+private:
+    std::unordered_map<uint32_t, std::set<ColumnDescriptor*>> m_column_to_descriptor;
+    // TODO: The value in the map can be a set of k:v pairs with a hash & comparison
+    // that only considers the key since each column descriptor only has one matching
+    // column id per schema
+    std::unordered_map<ColumnDescriptor*, std::map<int32_t, int32_t>> m_descriptor_to_schema;
+    std::map<ColumnDescriptor*, std::set<int32_t>> m_unresolved_descriptor_to_descriptor;
+    std::unordered_map<Expression*, std::unordered_set<int32_t>> m_expression_to_schemas;
+    std::unordered_set<int32_t> m_matched_schema_ids;
+    std::unordered_set<int32_t> m_array_schema_ids;
+    std::unordered_set<int32_t> m_array_search_schema_ids;
+    std::map<int32_t, std::shared_ptr<Expression>> m_schema_to_query;
+
+    std::unordered_map<int32_t, std::set<int32_t>> m_schema_to_searched_columns;
+    std::shared_ptr<SchemaTree> m_tree;
+    std::shared_ptr<ReaderUtils::SchemaMap> m_schemas;
+
+    /**
+     * Populates the column mapping for a given column
+     * @param column
+     * @param it
+     * @param node_id
+     * @param wildcard_special_flag
+     * @return true if matching is successful, false otherwise
+     */
+    bool populate_column_mapping(
+            ColumnDescriptor* column,
+            DescriptorList::iterator it,
+            int32_t node_id,
+            bool wildcard_special_flag = false
+    );
+
+    /**
+     * Populates the column mapping for a given column
+     * @param column
+     * @return
+     */
+    bool populate_column_mapping(ColumnDescriptor* column);
+
+    /**
+     * Populates the column mapping for a given expression
+     * @param cur
+     * @return The transformed expression
+     */
+    std::shared_ptr<Expression> populate_column_mapping(std::shared_ptr<Expression> cur);
+
+    /**
+     * Populates the schema mapping
+     */
+    void populate_schema_mapping();
+
+    /**
+     * Finds common schemas and relevant columns across filters and stores the mapping
+     * @param cur
+     * @return The transformed expression
+     */
+    std::shared_ptr<Expression> intersect_schemas(std::shared_ptr<Expression> cur);
+
+    /**
+     * Finds common schemas and relevant columns across filters
+     * @param cur
+     * @param common_schema
+     * @param columns
+     * @param first
+     * @return true before firstly processing common schemas, false otherwise
+     */
+    bool intersect_and_sub_expr(
+            std::shared_ptr<Expression> const& cur,
+            std::set<int32_t>& common_schema,
+            std::set<ColumnDescriptor*>& columns,
+            bool first
+    );
+
+    /**
+     * Splits an expression into sub-expressions based on the schemas it searches against
+     * @param expr
+     * @param queries a map from schema id to expression
+     * @param relevant_schemas
+     */
+    void split_expression_by_schema(
+            std::shared_ptr<Expression> const& expr,
+            std::map<int32_t, std::shared_ptr<Expression>>& queries,
+            std::unordered_set<int32_t> const& relevant_schemas
+    );
+
+    /**
+     * @param column
+     * @param schema
+     * @return The column id for a given column descriptor
+     */
+    int32_t get_column_id_for_descriptor(ColumnDescriptor* column, int32_t schema);
+
+    /**
+     * @param column
+     * @param schema
+     * @return The literal type for a given column descriptor
+     */
+    LiteralType get_literal_type_for_column(ColumnDescriptor* column, int32_t schema);
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_SCHEMAMATCH_HPP
diff --git a/components/core/src/clp_s/search/SearchUtils.cpp b/components/core/src/clp_s/search/SearchUtils.cpp
new file mode 100644
index 000000000..c255c1f38
--- /dev/null
+++ b/components/core/src/clp_s/search/SearchUtils.cpp
@@ -0,0 +1,87 @@
+#include "SearchUtils.hpp"
+
+#include <cmath>
+
+namespace clp_s::search {
+void splice_into(
+        std::shared_ptr<Expression> const& parent,
+        std::shared_ptr<Expression> const& child,
+        OpList::iterator location
+) {
+    for (auto it = child->op_begin(); it != child->op_end(); it++) {
+        auto sub_expr = std::static_pointer_cast<Expression>(*it);
+        sub_expr->set_parent(parent.get());
+    }
+    parent->get_op_list().splice(location, child->get_op_list());
+}
+
+// TODO: make sure to handle Object types correctly
+LiteralType node_to_literal_type(NodeType type) {
+    switch (type) {
+        case NodeType::INTEGER:
+            return LiteralType::IntegerT;
+        case NodeType::FLOAT:
+            return LiteralType::FloatT;
+        case NodeType::CLPSTRING:
+            return LiteralType::ClpStringT;
+        case NodeType::VARSTRING:
+            return LiteralType::VarStringT;
+        case NodeType::BOOLEAN:
+            return LiteralType::BooleanT;
+        case NodeType::ARRAY:
+            return LiteralType::ArrayT;
+        case NodeType::NULLVALUE:
+            return LiteralType::NullT;
+        case NodeType::DATESTRING:
+            return LiteralType::EpochDateT;
+        case NodeType::FLOATDATESTRING:
+            return LiteralType::FloatDateT;
+        default:
+            return LiteralType::UnknownT;
+    }
+}
+
+bool double_as_int(double in, FilterOperation op, int64_t& out) {
+    switch (op) {
+        case FilterOperation::EQ:
+            out = static_cast<int64_t>(in);
+            return in == static_cast<double>(out);
+        case FilterOperation::LT:
+        case FilterOperation::GTE:
+            out = std::ceil(in);
+        case FilterOperation::GT:
+        case FilterOperation::LTE:
+            out = std::floor(in);
+        default:
+            out = static_cast<int64_t>(in);
+    }
+    return true;
+}
+
+bool wildcard_match(std::string_view s, std::string_view p) {
+    size_t i, j, star, last;
+    i = j = 0;
+    star = last = -1;
+
+    while (i < s.length()) {
+        if (j < p.length() && (s[i] == p[j] || p[j] == '?')) {
+            ++i;
+            ++j;
+        } else if (j < p.length() && p[j] == '*') {
+            star = j++;
+            last = i;
+        } else if (star != -1) {
+            i = last++;
+            j = star + 1;
+        } else {
+            return false;
+        }
+    }
+
+    while (j < p.length() && p[j] == '*') {
+        ++j;
+    }
+
+    return j == p.length();
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/SearchUtils.hpp b/components/core/src/clp_s/search/SearchUtils.hpp
new file mode 100644
index 000000000..1ba8719e3
--- /dev/null
+++ b/components/core/src/clp_s/search/SearchUtils.hpp
@@ -0,0 +1,48 @@
+#ifndef CLP_S_SEARCH_SEARCHUTILS_HPP
+#define CLP_S_SEARCH_SEARCHUTILS_HPP
+
+#include "../SchemaTree.hpp"
+#include "Expression.hpp"
+#include "Literal.hpp"
+
+namespace clp_s::search {
+
+/**
+ * Splice a child expression into a parent expression at a given location
+ * @param parent
+ * @param child
+ * @param location
+ */
+void splice_into(
+        std::shared_ptr<Expression> const& parent,
+        std::shared_ptr<Expression> const& child,
+        OpList::iterator location
+);
+
+/**
+ * Converts a node type to a literal type
+ * @param type
+ * @return A literal type
+ */
+LiteralType node_to_literal_type(NodeType type);
+
+/**
+ * Casts a double to an int64_t, rounding up or down depending on the filter operation
+ * @param in
+ * @param op
+ * @param out
+ * @return false if under FilterOperation::EQ the cast double is not equal to int64_t out, true
+ * otherwise
+ */
+bool double_as_int(double in, FilterOperation op, int64_t& out);
+
+/**
+ * Performs a wildcard match of a string against a pattern
+ * @param s the string to match
+ * @param p the pattern to match against
+ * @return true if s matches p, false otherwise
+ */
+bool wildcard_match(std::string_view s, std::string_view p);
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_SEARCHUTILS_HPP
diff --git a/components/core/src/clp_s/search/StringLiteral.cpp b/components/core/src/clp_s/search/StringLiteral.cpp
new file mode 100644
index 000000000..63600ee7f
--- /dev/null
+++ b/components/core/src/clp_s/search/StringLiteral.cpp
@@ -0,0 +1,95 @@
+#include "StringLiteral.hpp"
+
+#include <sstream>
+
+#include "SearchUtils.hpp"
+
+namespace clp_s::search {
+std::shared_ptr<Literal> StringLiteral::create(std::string const& v) {
+    return std::shared_ptr<Literal>(static_cast<Literal*>(new StringLiteral(v)));
+}
+
+void StringLiteral::print() {
+    get_print_stream() << "\"" << m_v << "\"";
+}
+
+std::string& StringLiteral::get() {
+    return m_v;
+}
+
+bool StringLiteral::as_clp_string(std::string& ret, FilterOperation op) {
+    if (op == FilterOperation::LT || op == FilterOperation::GT || op == FilterOperation::LTE
+        || op == FilterOperation::GTE)
+    {
+        return false;
+    }
+
+    if (false == matches_type(LiteralType::ClpStringT)) {
+        return false;
+    }
+
+    ret = m_v;
+    return true;
+}
+
+bool StringLiteral::as_var_string(std::string& ret, FilterOperation op) {
+    if (op == FilterOperation::LT || op == FilterOperation::GT || op == FilterOperation::LTE
+        || op == FilterOperation::GTE)
+    {
+        return false;
+    }
+
+    if (false == matches_type(LiteralType::VarStringT)) {
+        return false;
+    }
+
+    ret = m_v;
+    return true;
+}
+
+bool StringLiteral::as_float(double& ret, FilterOperation op) {
+    std::istringstream ss(m_v);
+    ss >> std::noskipws >> ret;
+    return !ss.fail() && ss.eof();
+}
+
+bool StringLiteral::as_int(int64_t& ret, FilterOperation op) {
+    std::istringstream ss(m_v);
+    ss >> std::noskipws >> ret;
+    if (false == ss.fail() && ss.eof()) {
+        return true;
+    } else {
+        double tmp;
+        ss = std::istringstream(m_v);
+        ss >> std::noskipws >> tmp;
+        if (false == ss.fail() && ss.eof()) {
+            return double_as_int(tmp, op, ret);
+        }
+    }
+    return false;
+}
+
+bool StringLiteral::as_bool(bool& ret, FilterOperation op) {
+    if (op == FilterOperation::LT || op == FilterOperation::GT || op == FilterOperation::LTE
+        || op == FilterOperation::GTE)
+    {
+        return false;
+    }
+    if (m_v == "true") {
+        ret = true;
+        return true;
+    } else if (m_v == "false") {
+        ret = false;
+        return true;
+    }
+    return false;
+}
+
+bool StringLiteral::as_null(FilterOperation op) {
+    return (op == FilterOperation::EQ || op == FilterOperation::NEQ) && m_v == "null";
+}
+
+bool StringLiteral::as_any(FilterOperation op) {
+    return (op == FilterOperation::EQ || op == FilterOperation::NEQ) && m_v == "*";
+}
+}  // namespace clp_s::search
diff --git a/components/core/src/clp_s/search/StringLiteral.hpp b/components/core/src/clp_s/search/StringLiteral.hpp
new file mode 100644
index 000000000..cd05f59fe
--- /dev/null
+++ b/components/core/src/clp_s/search/StringLiteral.hpp
@@ -0,0 +1,78 @@
+#ifndef CLP_S_SEARCH_STRINGLITERAL_HPP
+#define CLP_S_SEARCH_STRINGLITERAL_HPP
+
+#include <memory>
+#include <string>
+
+#include "Literal.hpp"
+
+namespace clp_s::search {
+/**
+ * Class for String literals in the search AST
+ *
+ * StringLiteral will automatically classify itself as possibly matching
+ * a clp style (containing spaces) and/or variable style (not containing spaces)
+ * string at creation time.
+ */
+class StringLiteral : public Literal {
+public:
+    // Deleted copy
+    StringLiteral(StringLiteral const&) = delete;
+    StringLiteral& operator=(StringLiteral const&) = delete;
+
+    /**
+     * Create a StringLiteral from a string
+     * @param v
+     * @return A new StringLiteral
+     */
+    static std::shared_ptr<Literal> create(std::string const& v);
+
+    /**
+     * @return Reference to underlying string
+     */
+    std::string& get();
+
+    // Methods inherited from Value
+    void print() override;
+
+    // Methods inherited from Literal
+    bool matches_type(LiteralType type) override { return type & m_string_type; }
+
+    bool matches_any(LiteralTypeBitmask mask) override { return mask & m_string_type; }
+
+    bool matches_exactly(LiteralTypeBitmask mask) override { return mask == m_string_type; }
+
+    bool as_clp_string(std::string& ret, FilterOperation op) override;
+
+    bool as_var_string(std::string& ret, FilterOperation op) override;
+
+    bool as_float(double& ret, FilterOperation op) override;
+
+    bool as_int(int64_t& ret, FilterOperation op) override;
+
+    bool as_bool(bool& ret, FilterOperation op) override;
+
+    bool as_null(FilterOperation op) override;
+
+    bool as_any(FilterOperation op) override;
+
+private:
+    std::string m_v;
+    LiteralTypeBitmask m_string_type;
+
+    // Constructor
+    explicit StringLiteral(std::string v) : m_v(std::move(v)), m_string_type(0) {
+        if (m_v.find(' ') != std::string::npos) {
+            m_string_type = LiteralType::ClpStringT;
+        } else {
+            m_string_type = LiteralType::VarStringT;
+        }
+
+        if (m_v.find('*') != std::string::npos) {
+            m_string_type |= LiteralType::ClpStringT;
+        }
+    }
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_STRINGLITERAL_HPP
diff --git a/components/core/src/clp_s/search/Transformation.hpp b/components/core/src/clp_s/search/Transformation.hpp
new file mode 100644
index 000000000..a200f9ed5
--- /dev/null
+++ b/components/core/src/clp_s/search/Transformation.hpp
@@ -0,0 +1,21 @@
+#ifndef CLP_S_SEARCH_TRANSFORMATION_HPP
+#define CLP_S_SEARCH_TRANSFORMATION_HPP
+
+#include "Expression.hpp"
+
+namespace clp_s::search {
+/**
+ * Generic class representing a transformation on some expression.
+ */
+class Transformation {
+public:
+    /**
+     * Runs the pass. The expression passed as input may be mutated by the pass.
+     * @param expr the expression that the pass will run on
+     * @return a new expression; may be the same as the input expression or different
+     */
+    virtual std::shared_ptr<Expression> run(std::shared_ptr<Expression>& expr) = 0;
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_TRANSFORMATION_HPP
diff --git a/components/core/src/clp_s/search/Value.hpp b/components/core/src/clp_s/search/Value.hpp
new file mode 100644
index 000000000..a7e812294
--- /dev/null
+++ b/components/core/src/clp_s/search/Value.hpp
@@ -0,0 +1,33 @@
+#ifndef CLP_S_SEARCH_VALUE_HPP
+#define CLP_S_SEARCH_VALUE_HPP
+
+#include <iostream>
+
+namespace clp_s::search {
+/**
+ * Class representing a generic value in the AST. Key subclasses are Literal and Expression.
+ */
+class Value {
+public:
+    /**
+     * @return The number of operands this value has
+     */
+    virtual unsigned get_num_operands() = 0;
+
+    /**
+     * Print a string representation of the value to standard error.
+     * Useful for debugging in gdb.
+     */
+    virtual void print() = 0;
+
+    virtual ~Value() = default;
+
+protected:
+    /**
+     * @return The stream to print to
+     */
+    static std::ostream& get_print_stream() { return std::cerr; }
+};
+}  // namespace clp_s::search
+
+#endif  // CLP_S_SEARCH_VALUE_HPP
diff --git a/components/core/src/clp_s/search/clp_search/EncodedVariableInterpreter.cpp b/components/core/src/clp_s/search/clp_search/EncodedVariableInterpreter.cpp
new file mode 100644
index 000000000..241f3dde7
--- /dev/null
+++ b/components/core/src/clp_s/search/clp_search/EncodedVariableInterpreter.cpp
@@ -0,0 +1,75 @@
+// Code from CLP
+
+#include "EncodedVariableInterpreter.hpp"
+
+#include <cassert>
+#include <cmath>
+
+#include <spdlog/spdlog.h>
+
+#include "../../VariableEncoder.hpp"
+
+using std::string;
+using std::unordered_set;
+using std::vector;
+
+namespace clp_s::search::clp_search {
+bool EncodedVariableInterpreter::encode_and_search_dictionary(
+        string const& var_str,
+        VariableDictionaryReader const& var_dict,
+        bool ignore_case,
+        string& logtype,
+        SubQuery& sub_query
+) {
+    size_t length = var_str.length();
+    if (0 == length) {
+        throw OperationFailed(ErrorCodeBadParam, __FILENAME__, __LINE__);
+    }
+
+    encoded_variable_t encoded_var;
+    if (VariableEncoder::convert_string_to_representable_integer_var(var_str, encoded_var)) {
+        LogTypeDictionaryEntry::add_non_double_var(logtype);
+        sub_query.add_non_dict_var(encoded_var);
+    } else if (VariableEncoder::convert_string_to_representable_double_var(var_str, encoded_var)) {
+        LogTypeDictionaryEntry::add_double_var(logtype);
+        sub_query.add_non_dict_var(encoded_var);
+    } else {
+        auto entry = var_dict.get_entry_matching_value(var_str, ignore_case);
+        if (nullptr == entry) {
+            // Not in dictionary
+            return false;
+        }
+        encoded_var = VariableEncoder::encode_var_dict_id(entry->get_id());
+
+        LogTypeDictionaryEntry::add_non_double_var(logtype);
+        sub_query.add_dict_var(encoded_var, entry);
+    }
+
+    return true;
+}
+
+bool EncodedVariableInterpreter::wildcard_search_dictionary_and_get_encoded_matches(
+        std::string const& var_wildcard_str,
+        VariableDictionaryReader const& var_dict,
+        bool ignore_case,
+        SubQuery& sub_query
+) {
+    // Find matches
+    unordered_set<VariableDictionaryEntry const*> var_dict_entries;
+    var_dict.get_entries_matching_wildcard_string(var_wildcard_str, ignore_case, var_dict_entries);
+    if (var_dict_entries.empty()) {
+        // Not in dictionary
+        return false;
+    }
+
+    // Encode matches
+    unordered_set<encoded_variable_t> encoded_vars;
+    for (auto const* entry : var_dict_entries) {
+        encoded_vars.insert(VariableEncoder::encode_var_dict_id(entry->get_id()));
+    }
+
+    sub_query.add_imprecise_dict_var(encoded_vars, var_dict_entries);
+
+    return true;
+}
+}  // namespace clp_s::search::clp_search
diff --git a/components/core/src/clp_s/search/clp_search/EncodedVariableInterpreter.hpp b/components/core/src/clp_s/search/clp_search/EncodedVariableInterpreter.hpp
new file mode 100644
index 000000000..92e0907a2
--- /dev/null
+++ b/components/core/src/clp_s/search/clp_search/EncodedVariableInterpreter.hpp
@@ -0,0 +1,84 @@
+// Code from CLP
+
+#ifndef CLP_S_SEARCH_CLP_SEARCH_ENCODEDVARIABLEINTERPRETER_HPP
+#define CLP_S_SEARCH_CLP_SEARCH_ENCODEDVARIABLEINTERPRETER_HPP
+
+#include <string>
+#include <vector>
+
+#include "../../DictionaryReader.hpp"
+#include "../../DictionaryWriter.hpp"
+#include "../../TraceableException.hpp"
+#include "Query.hpp"
+
+namespace clp_s::search::clp_search {
+/**
+ * Class to parse and encode strings into encoded variables and to interpret encoded variables
+ * back into strings. An encoded variable is one of: i)   a variable dictionary ID, referring to
+ * an entry in the variable dictionary, or ii)  a value, representing an integer variable
+ * exactly as it appears in the original log message, or iii) a value, representing a base-10,
+ * 16-digit number with a decimal point, where at least one digit is after the decimal point,
+ * encoded with a custom format.
+ *
+ * To decode an encoded variable, the logtype specifies whether the variable is either:
+ * - i/ii, or
+ * - iii
+ * This class differentiates between i & ii by using a certain range of values for variable
+ * dictionary IDs, and the rest for non-dictionary variables.
+ *
+ * We collectively refer to ii & iii as non-dictionary variables.
+ */
+class EncodedVariableInterpreter {
+public:
+    // Types
+    class OperationFailed : public TraceableException {
+    public:
+        // Constructors
+        OperationFailed(ErrorCode error_code, char const* const filename, int line_number)
+                : TraceableException(error_code, filename, line_number) {}
+    };
+
+    /**
+     * Encodes a string-form variable, and if it is dictionary variable, searches for its ID in
+     * the given variable dictionary
+     * @param var_str
+     * @param var_dict
+     * @param ignore_case
+     * @param logtype
+     * @param sub_query
+     * @return true if variable is a non-dictionary variable or was found in the given variable
+     * dictionary, false otherwise
+     */
+    static bool encode_and_search_dictionary(
+            std::string const& var_str,
+            VariableDictionaryReader const& var_dict,
+            bool ignore_case,
+            std::string& logtype,
+            SubQuery& sub_query
+    );
+    /**
+     * Search for the given string-form variable in the variable dictionary, encode any matches,
+     * and add them to the given sub-query
+     * @param var_wildcard_str
+     * @param var_dict
+     * @param ignore_case
+     * @param sub_query
+     * @return true if any match found, false otherwise
+     */
+    static bool wildcard_search_dictionary_and_get_encoded_matches(
+            std::string const& var_wildcard_str,
+            VariableDictionaryReader const& var_dict,
+            bool ignore_case,
+            SubQuery& sub_query
+    );
+
+private:
+    // Variables
+    // The beginning of the range used for encoding variable dictionary IDs
+    static constexpr encoded_variable_t cVarDictIdRangeBegin = 1LL << 62;
+    // The end (exclusive) of the range used for encoding variable dictionary IDs
+    static constexpr encoded_variable_t cVarDictIdRangeEnd = (1ULL << 63) - 1;
+};
+}  // namespace clp_s::search::clp_search
+
+#endif  // CLP_S_SEARCH_CLP_SEARCH_ENCODEDVARIABLEINTERPRETER_HPP
diff --git a/components/core/src/clp_s/search/clp_search/Grep.cpp b/components/core/src/clp_s/search/clp_search/Grep.cpp
new file mode 100644
index 000000000..54031446a
--- /dev/null
+++ b/components/core/src/clp_s/search/clp_search/Grep.cpp
@@ -0,0 +1,639 @@
+// Code from CLP
+
+#include "Grep.hpp"
+
+#include <algorithm>
+
+#include "../../VariableEncoder.hpp"
+#include "EncodedVariableInterpreter.hpp"
+
+using std::string;
+using std::vector;
+
+namespace clp_s::search::clp_search {
+// Local types
+enum class SubQueryMatchabilityResult {
+    MayMatch,  // The subquery might match a message
+    WontMatch,  // The subquery has no chance of matching a message
+    SupercedesAllSubQueries  // The subquery will cause all messages to be matched
+};
+
+// Class representing a token in a query. It is used to interpret a token in user's search
+// string.
+class QueryToken {
+public:
+    // Constructors
+    QueryToken(string const& query_string, size_t begin_pos, size_t end_pos, bool is_var);
+
+    // Methods
+    bool cannot_convert_to_non_dict_var() const;
+    bool contains_wildcards() const;
+    bool has_greedy_wildcard_in_middle() const;
+    bool has_prefix_greedy_wildcard() const;
+    bool has_suffix_greedy_wildcard() const;
+    bool is_ambiguous_token() const;
+    bool is_double_var() const;
+    bool is_var() const;
+    bool is_wildcard() const;
+
+    size_t get_begin_pos() const;
+    size_t get_end_pos() const;
+    string const& get_value() const;
+
+    bool change_to_next_possible_type();
+
+private:
+    // Types
+    // Type for the purpose of generating different subqueries. E.g., if a token is of type
+    // DictOrIntVar, it would generate a different subquery than if it was of type Logtype.
+    enum class Type {
+        Wildcard,
+        // Ambiguous indicates the token can be more than one of the types listed below
+        Ambiguous,
+        Logtype,
+        DictOrIntVar,
+        DoubleVar
+    };
+
+    // Variables
+    bool m_cannot_convert_to_non_dict_var;
+    bool m_contains_wildcards;
+    bool m_has_greedy_wildcard_in_middle;
+    bool m_has_prefix_greedy_wildcard;
+    bool m_has_suffix_greedy_wildcard;
+
+    size_t m_begin_pos;
+    size_t m_end_pos;
+    string m_value;
+
+    // Type if variable has unambiguous type
+    Type m_type;
+    // Types if variable type is ambiguous
+    vector<Type> m_possible_types;
+    // Index of the current possible type selected for generating a subquery
+    size_t m_current_possible_type_ix;
+};
+
+QueryToken::QueryToken(
+        string const& query_string,
+        size_t const begin_pos,
+        size_t const end_pos,
+        bool const is_var
+)
+        : m_current_possible_type_ix(0) {
+    m_begin_pos = begin_pos;
+    m_end_pos = end_pos;
+    m_value.assign(query_string, m_begin_pos, m_end_pos - m_begin_pos);
+
+    // Set wildcard booleans and determine type
+    if ("*" == m_value) {
+        m_has_prefix_greedy_wildcard = true;
+        m_has_suffix_greedy_wildcard = false;
+        m_has_greedy_wildcard_in_middle = false;
+        m_contains_wildcards = true;
+        m_type = Type::Wildcard;
+    } else {
+        m_has_prefix_greedy_wildcard = ('*' == m_value[0]);
+        m_has_suffix_greedy_wildcard = ('*' == m_value[m_value.length() - 1]);
+
+        m_has_greedy_wildcard_in_middle = false;
+        for (size_t i = 1; i < m_value.length() - 1; ++i) {
+            if ('*' == m_value[i]) {
+                m_has_greedy_wildcard_in_middle = true;
+                break;
+            }
+        }
+
+        m_contains_wildcards
+                = (m_has_prefix_greedy_wildcard || m_has_suffix_greedy_wildcard
+                   || m_has_greedy_wildcard_in_middle);
+
+        if (false == is_var) {
+            if (false == m_contains_wildcards) {
+                m_type = Type::Logtype;
+            } else {
+                m_type = Type::Ambiguous;
+                m_possible_types.push_back(Type::Logtype);
+                m_possible_types.push_back(Type::DictOrIntVar);
+                m_possible_types.push_back(Type::DoubleVar);
+            }
+        } else {
+            string value_without_wildcards = m_value;
+            if (m_has_prefix_greedy_wildcard) {
+                value_without_wildcards = value_without_wildcards.substr(1);
+            }
+            if (m_has_suffix_greedy_wildcard) {
+                value_without_wildcards.resize(value_without_wildcards.length() - 1);
+            }
+
+            encoded_variable_t encoded_var;
+            bool converts_to_non_dict_var = false;
+            if (VariableEncoder::convert_string_to_representable_integer_var(
+                        value_without_wildcards,
+                        encoded_var
+                )
+                || VariableEncoder::convert_string_to_representable_double_var(
+                        value_without_wildcards,
+                        encoded_var
+                ))
+            {
+                converts_to_non_dict_var = true;
+            }
+
+            if (false == converts_to_non_dict_var) {
+                // Dictionary variable
+                m_type = Type::DictOrIntVar;
+                m_cannot_convert_to_non_dict_var = true;
+            } else {
+                m_type = Type::Ambiguous;
+                m_possible_types.push_back(Type::DictOrIntVar);
+                m_possible_types.push_back(Type::DoubleVar);
+                m_cannot_convert_to_non_dict_var = false;
+            }
+        }
+    }
+}
+
+bool QueryToken::cannot_convert_to_non_dict_var() const {
+    return m_cannot_convert_to_non_dict_var;
+}
+
+bool QueryToken::contains_wildcards() const {
+    return m_contains_wildcards;
+}
+
+bool QueryToken::has_greedy_wildcard_in_middle() const {
+    return m_has_greedy_wildcard_in_middle;
+}
+
+bool QueryToken::has_prefix_greedy_wildcard() const {
+    return m_has_prefix_greedy_wildcard;
+}
+
+bool QueryToken::has_suffix_greedy_wildcard() const {
+    return m_has_suffix_greedy_wildcard;
+}
+
+bool QueryToken::is_ambiguous_token() const {
+    return Type::Ambiguous == m_type;
+}
+
+bool QueryToken::is_double_var() const {
+    Type type;
+    if (Type::Ambiguous == m_type) {
+        type = m_possible_types[m_current_possible_type_ix];
+    } else {
+        type = m_type;
+    }
+    return Type::DoubleVar == type;
+}
+
+bool QueryToken::is_var() const {
+    Type type;
+    if (Type::Ambiguous == m_type) {
+        type = m_possible_types[m_current_possible_type_ix];
+    } else {
+        type = m_type;
+    }
+    return (Type::DictOrIntVar == type || Type::DoubleVar == type);
+}
+
+bool QueryToken::is_wildcard() const {
+    return Type::Wildcard == m_type;
+}
+
+size_t QueryToken::get_begin_pos() const {
+    return m_begin_pos;
+}
+
+size_t QueryToken::get_end_pos() const {
+    return m_end_pos;
+}
+
+string const& QueryToken::get_value() const {
+    return m_value;
+}
+
+bool QueryToken::change_to_next_possible_type() {
+    if (m_current_possible_type_ix < m_possible_types.size() - 1) {
+        ++m_current_possible_type_ix;
+        return true;
+    } else {
+        m_current_possible_type_ix = 0;
+        return false;
+    }
+}
+
+// Local prototypes
+/**
+ * Process a QueryToken that is definitely a variable
+ * @param query_token
+ * @param archive
+ * @param ignore_case
+ * @param sub_query
+ * @param logtype
+ * @return true if this token might match a message, false otherwise
+ */
+static bool process_var_token(
+        QueryToken const& query_token,
+        std::shared_ptr<VariableDictionaryReader> var_dict, /*const Archive& archive,*/
+        bool ignore_case,
+        SubQuery& sub_query,
+        string& logtype
+);
+/**
+ * Finds a message matching the given query
+ * @param query
+ * @param archive
+ * @param matching_sub_query
+ * @param compressed_file
+ * @param compressed_msg
+ * @return true on success, false otherwise
+ */
+// static bool find_matching_message (const Query& query, Archive& archive, const SubQuery*&
+// matching_sub_query, File& compressed_file, Message& compressed_msg);
+/**
+ * Generates logtypes and variables for subquery
+ * @param archive
+ * @param processed_search_string
+ * @param query_tokens
+ * @param ignore_case
+ * @param sub_query
+ * @return SubQueryMatchabilityResult::SupercedesAllSubQueries
+ * @return SubQueryMatchabilityResult::WontMatch
+ * @return SubQueryMatchabilityResult::MayMatch
+ */
+static SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery(
+        std::shared_ptr<LogTypeDictionaryReader> log_dict,
+        std::shared_ptr<VariableDictionaryReader> var_dict, /*const Archive& archive,*/
+        string& processed_search_string,
+        vector<QueryToken>& query_tokens,
+        bool ignore_case,
+        SubQuery& sub_query
+);
+
+static bool process_var_token(
+        QueryToken const& query_token,
+        std::shared_ptr<VariableDictionaryReader> var_dict, /*const Archive& archive,*/
+        bool ignore_case,
+        SubQuery& sub_query,
+        string& logtype
+) {
+    // Even though we may have a precise variable, we still fallback to decompressing to ensure
+    // that it is in the right place in the message
+    sub_query.mark_wildcard_match_required();
+
+    // Create QueryVar corresponding to token
+    if (false == query_token.contains_wildcards()) {
+        if (EncodedVariableInterpreter::encode_and_search_dictionary(
+                    query_token.get_value(),
+                    *var_dict,
+                    ignore_case,
+                    logtype,
+                    sub_query
+            )
+            == false)
+        {
+            // Variable doesn't exist in dictionary
+            return false;
+        }
+    } else {
+        if (query_token.has_prefix_greedy_wildcard()) {
+            logtype += '*';
+        }
+
+        if (query_token.is_double_var()) {
+            LogTypeDictionaryEntry::add_double_var(logtype);
+        } else {
+            LogTypeDictionaryEntry::add_non_double_var(logtype);
+
+            if (query_token.cannot_convert_to_non_dict_var()) {
+                // Must be a dictionary variable, so search variable dictionary
+                if (!EncodedVariableInterpreter::wildcard_search_dictionary_and_get_encoded_matches(
+                            query_token.get_value(),
+                            *var_dict,
+                            ignore_case,
+                            sub_query
+                    ))
+                {
+                    // Variable doesn't exist in dictionary
+                    return false;
+                }
+            }
+        }
+
+        if (query_token.has_suffix_greedy_wildcard()) {
+            logtype += '*';
+        }
+    }
+
+    return true;
+}
+
+SubQueryMatchabilityResult generate_logtypes_and_vars_for_subquery(
+        std::shared_ptr<LogTypeDictionaryReader> log_dict,
+        std::shared_ptr<VariableDictionaryReader> var_dict, /*const Archive& archive,*/
+        string& processed_search_string,
+        vector<QueryToken>& query_tokens,
+        bool ignore_case,
+        SubQuery& sub_query
+) {
+    size_t last_token_end_pos = 0;
+    string logtype;
+    for (auto const& query_token : query_tokens) {
+        // Append from end of last token to beginning of this token, to logtype
+        logtype.append(
+                processed_search_string,
+                last_token_end_pos,
+                query_token.get_begin_pos() - last_token_end_pos
+        );
+        last_token_end_pos = query_token.get_end_pos();
+
+        if (query_token.is_wildcard()) {
+            logtype += '*';
+        } else if (query_token.has_greedy_wildcard_in_middle()) {
+            // Fallback to decompression + wildcard matching for now to avoid handling queries
+            // where the pieces of the token on either side of each wildcard need to be
+            // processed as ambiguous tokens
+            sub_query.mark_wildcard_match_required();
+            if (false == query_token.is_var()) {
+                logtype += '*';
+            } else {
+                logtype += '*';
+                LogTypeDictionaryEntry::add_non_double_var(logtype);
+                logtype += '*';
+            }
+        } else {
+            if (false == query_token.is_var()) {
+                logtype += query_token.get_value();
+            } else if (false == process_var_token(query_token, var_dict, ignore_case, sub_query, logtype))
+            {
+                return SubQueryMatchabilityResult::WontMatch;
+            }
+        }
+    }
+
+    if (last_token_end_pos < processed_search_string.length()) {
+        // Append from end of last token to end
+        logtype.append(processed_search_string, last_token_end_pos, string::npos);
+        last_token_end_pos = processed_search_string.length();
+    }
+
+    if ("*" == logtype) {
+        // Logtype will match all messages
+        return SubQueryMatchabilityResult::SupercedesAllSubQueries;
+    }
+
+    // Find matching logtypes
+    std::unordered_set<LogTypeDictionaryEntry const*> possible_logtype_entries;
+    log_dict->get_entries_matching_wildcard_string(logtype, ignore_case, possible_logtype_entries);
+    if (possible_logtype_entries.empty()) {
+        return SubQueryMatchabilityResult::WontMatch;
+    }
+    sub_query.set_possible_logtypes(possible_logtype_entries);
+
+    // Calculate the IDs of the segments that may contain results for the sub-query now that
+    // we've calculated the matching logtypes and variables
+    // TODO: double check that this can be safely ignored for CLJ
+    // sub_query.calculate_ids_of_matching_segments();
+
+    return SubQueryMatchabilityResult::MayMatch;
+}
+
+bool Grep::process_raw_query(
+        std::shared_ptr<LogTypeDictionaryReader> log_dict,
+        std::shared_ptr<VariableDictionaryReader> var_dict, /*const Archive& archive,*/
+        string const& search_string, /*epochtime_t search_begin_ts, epochtime_t search_end_ts,*/
+        bool ignore_case,
+        Query& query, /* compressor_frontend::lexers::ByteLexer& forward_lexer,
+                         compressor_frontend::lexers::ByteLexer& reverse_lexer,*/
+        bool add_wildcards,
+        bool use_heuristic
+) {
+    // Set properties which require no processing
+    // query.set_search_begin_timestamp(search_begin_ts);
+    // query.set_search_end_timestamp(search_end_ts);
+    query.set_ignore_case(ignore_case);
+
+    // Add prefix and suffix '*' to make the search a sub-string match
+    string processed_search_string;
+    if (add_wildcards) {
+        processed_search_string = "*";
+        processed_search_string += search_string;
+        processed_search_string += '*';
+    } else {
+        processed_search_string = search_string;
+    }
+
+    // Clean-up search string
+    processed_search_string = StringUtils::clean_up_wildcard_search_string(processed_search_string);
+    query.set_search_string(processed_search_string);
+
+    // Replace non-greedy wildcards with greedy wildcards since we currently have no support for
+    // searching compressed files with non-greedy wildcards
+    std::replace(processed_search_string.begin(), processed_search_string.end(), '?', '*');
+    // Clean-up in case any instances of "?*" or "*?" were changed into "**"
+    processed_search_string = StringUtils::clean_up_wildcard_search_string(processed_search_string);
+
+    // Split search_string into tokens with wildcards
+    vector<QueryToken> query_tokens;
+    size_t begin_pos = 0;
+    size_t end_pos = 0;
+    bool is_var;
+    // FIXME: may want to use non-heuristic method of tokenizing query
+    // if (use_heuristic) {
+    while (get_bounds_of_next_potential_var(processed_search_string, begin_pos, end_pos, is_var)) {
+        query_tokens.emplace_back(processed_search_string, begin_pos, end_pos, is_var);
+    }
+    /*} else {
+        while (get_bounds_of_next_potential_var(processed_search_string, begin_pos, end_pos,
+    is_var, forward_lexer, reverse_lexer)) { query_tokens.emplace_back(processed_search_string,
+    begin_pos, end_pos, is_var);
+        }
+    }*/
+
+    // Get pointers to all ambiguous tokens. Exclude tokens with wildcards in the middle since
+    // we fall-back to decompression + wildcard matching for those.
+    vector<QueryToken*> ambiguous_tokens;
+    for (auto& query_token : query_tokens) {
+        if (false == query_token.has_greedy_wildcard_in_middle()
+            && query_token.is_ambiguous_token())
+        {
+            ambiguous_tokens.push_back(&query_token);
+        }
+    }
+
+    // Generate a sub-query for each combination of ambiguous tokens
+    // E.g., if there are two ambiguous tokens each of which could be a logtype or variable, we
+    // need to create:
+    // - (token1 as logtype) (token2 as logtype)
+    // - (token1 as logtype) (token2 as var)
+    // - (token1 as var) (token2 as logtype)
+    // - (token1 as var) (token2 as var)
+    SubQuery sub_query;
+    string logtype;
+    bool type_of_one_token_changed = true;
+    while (type_of_one_token_changed) {
+        sub_query.clear();
+
+        // Compute logtypes and variables for query
+        auto matchability = generate_logtypes_and_vars_for_subquery(
+                log_dict,
+                var_dict,
+                processed_search_string,
+                query_tokens,
+                query.get_ignore_case(),
+                sub_query
+        );
+        switch (matchability) {
+            case SubQueryMatchabilityResult::SupercedesAllSubQueries:
+                // Clear all sub-queries since they will be superceded by this sub-query
+                query.clear_sub_queries();
+
+                // Since other sub-queries will be superceded by this one, we can stop
+                // processing now
+                return true;
+            case SubQueryMatchabilityResult::MayMatch:
+                query.add_sub_query(sub_query);
+                break;
+            case SubQueryMatchabilityResult::WontMatch:
+            default:
+                // Do nothing
+                break;
+        }
+
+        // Update combination of ambiguous tokens
+        type_of_one_token_changed = false;
+        for (auto* ambiguous_token : ambiguous_tokens) {
+            if (ambiguous_token->change_to_next_possible_type()) {
+                type_of_one_token_changed = true;
+                break;
+            }
+        }
+    }
+
+    return query.contains_sub_queries();
+}
+
+bool Grep::get_bounds_of_next_potential_var(
+        string const& value,
+        size_t& begin_pos,
+        size_t& end_pos,
+        bool& is_var
+) {
+    auto const value_length = value.length();
+    if (end_pos >= value_length) {
+        return false;
+    }
+
+    is_var = false;
+    bool contains_wildcard = false;
+    while (false == is_var && false == contains_wildcard && begin_pos < value_length) {
+        // Start search at end of last token
+        begin_pos = end_pos;
+
+        // Find next wildcard or non-delimiter
+        bool is_escaped = false;
+        for (; begin_pos < value_length; ++begin_pos) {
+            char c = value[begin_pos];
+
+            if (is_escaped) {
+                is_escaped = false;
+
+                if (StringUtils::is_delim(c)) {
+                    // Found escaped non-delimiter, so reverse the index to retain the escape
+                    // character
+                    --begin_pos;
+                    break;
+                }
+            } else if ('\\' == c) {
+                // Escape character
+                is_escaped = true;
+            } else {
+                if (StringUtils::is_wildcard(c)) {
+                    contains_wildcard = true;
+                    break;
+                }
+                if (false == StringUtils::is_delim(c)) {
+                    break;
+                }
+            }
+        }
+
+        bool contains_decimal_digit = false;
+        bool contains_alphabet = false;
+
+        // Find next delimiter
+        is_escaped = false;
+        end_pos = begin_pos;
+        for (; end_pos < value_length; ++end_pos) {
+            char c = value[end_pos];
+
+            if (is_escaped) {
+                is_escaped = false;
+
+                if (StringUtils::is_delim(c)) {
+                    // Found escaped delimiter, so reverse the index to retain the escape
+                    // character
+                    --end_pos;
+                    break;
+                }
+            } else if ('\\' == c) {
+                // Escape character
+                is_escaped = true;
+            } else {
+                if (StringUtils::is_wildcard(c)) {
+                    contains_wildcard = true;
+                } else if (StringUtils::is_delim(c)) {
+                    // Found delimiter that's not also a wildcard
+                    break;
+                }
+            }
+
+            if (StringUtils::is_decimal_digit(c)) {
+                contains_decimal_digit = true;
+            } else if (StringUtils::is_alphabet(c)) {
+                contains_alphabet = true;
+            }
+        }
+
+        // Treat token as a definite variable if:
+        // - it contains a decimal digit, or
+        // - it could be a multi-digit hex value, or
+        // - it's directly preceded by an equals sign and contains an alphabet without a
+        // wildcard between the equals sign and the first alphabet of the token
+        if (contains_decimal_digit
+            || StringUtils::could_be_multi_digit_hex_value(value, begin_pos, end_pos))
+        {
+            is_var = true;
+        } else if (begin_pos > 0 && '=' == value[begin_pos - 1] && contains_alphabet) {
+            // Find first alphabet or wildcard in token
+            is_escaped = false;
+            bool found_wildcard_before_alphabet = false;
+            for (auto i = begin_pos; i < end_pos; ++i) {
+                auto c = value[i];
+
+                if (is_escaped) {
+                    is_escaped = false;
+
+                    if (StringUtils::is_alphabet(c)) {
+                        break;
+                    }
+                } else if ('\\' == c) {
+                    // Escape character
+                    is_escaped = true;
+                } else if (StringUtils::is_wildcard(c)) {
+                    found_wildcard_before_alphabet = true;
+                    break;
+                }
+            }
+
+            if (false == found_wildcard_before_alphabet) {
+                is_var = true;
+            }
+        }
+    }
+
+    return (value_length != begin_pos);
+}
+}  // namespace clp_s::search::clp_search
diff --git a/components/core/src/clp_s/search/clp_search/Grep.hpp b/components/core/src/clp_s/search/clp_search/Grep.hpp
new file mode 100644
index 000000000..baf5bfcd2
--- /dev/null
+++ b/components/core/src/clp_s/search/clp_search/Grep.hpp
@@ -0,0 +1,54 @@
+// Code from CLP
+
+#ifndef CLP_S_SEARCH_CLP_SEARCH_GREP_HPP
+#define CLP_S_SEARCH_CLP_SEARCH_GREP_HPP
+
+#include <string>
+
+#include "../../Defs.hpp"
+#include "../../DictionaryReader.hpp"
+#include "Query.hpp"
+
+namespace clp_s::search::clp_search {
+class Grep {
+public:
+    // Methods
+    /**
+     * Processes a raw user query into a Query
+     * @param archive
+     * @param search_string
+     * @param search_begin_ts
+     * @param search_end_ts
+     * @param ignore_case
+     * @param query
+     * @return true if query may match messages, false otherwise
+     */
+    static bool process_raw_query(
+            std::shared_ptr<LogTypeDictionaryReader> log_dict,
+            std::shared_ptr<VariableDictionaryReader> var_dict,
+            std::string const& search_string,
+            bool ignore_case,
+            Query& query,
+            bool add_wildcards = true,
+            bool use_heuristic = true
+    );
+
+    /**
+     * Returns bounds of next potential variable (either a definite variable or a token with
+     * wildcards)
+     * @param value String containing token
+     * @param begin_pos Begin position of last token, changes to begin position of next token
+     * @param end_pos End position of last token, changes to end position of next token
+     * @param is_var Whether the token is definitely a variable
+     * @return true if another potential variable was found, false otherwise
+     */
+    static bool get_bounds_of_next_potential_var(
+            std::string const& value,
+            size_t& begin_pos,
+            size_t& end_pos,
+            bool& is_var
+    );
+};
+}  // namespace clp_s::search::clp_search
+
+#endif  // CLP_S_SEARCH_CLP_SEARCH_GREP_HPP
diff --git a/components/core/src/clp_s/search/clp_search/Query.cpp b/components/core/src/clp_s/search/clp_search/Query.cpp
new file mode 100644
index 000000000..507d7a0da
--- /dev/null
+++ b/components/core/src/clp_s/search/clp_search/Query.cpp
@@ -0,0 +1,150 @@
+// Code from CLP
+
+#include "Query.hpp"
+
+using std::set;
+using std::string;
+using std::unordered_set;
+
+namespace clp_s::search::clp_search {
+// Local function prototypes
+/**
+ * Performs a set intersection of a & b, storing the result in b
+ * @tparam SetType
+ * @param a
+ * @param b
+ */
+template <typename SetType>
+static void inplace_set_intersection(SetType const& a, SetType& b);
+
+template <typename SetType>
+static void inplace_set_intersection(SetType const& a, SetType& b) {
+    for (auto ix = b.cbegin(); ix != b.cend();) {
+        if (a.count(*ix) == 0) {
+            ix = b.erase(ix);
+        } else {
+            ++ix;
+        }
+    }
+}
+
+QueryVar::QueryVar(encoded_variable_t precise_non_dict_var) {
+    m_precise_var = precise_non_dict_var;
+    m_is_precise_var = true;
+    m_is_dict_var = false;
+    m_var_dict_entry = nullptr;
+}
+
+QueryVar::QueryVar(
+        encoded_variable_t precise_dict_var,
+        VariableDictionaryEntry const* var_dict_entry
+) {
+    m_precise_var = precise_dict_var;
+    m_is_precise_var = true;
+    m_is_dict_var = true;
+    m_var_dict_entry = var_dict_entry;
+}
+
+QueryVar::QueryVar(
+        unordered_set<encoded_variable_t> const& possible_dict_vars,
+        unordered_set<VariableDictionaryEntry const*> const& possible_var_dict_entries
+) {
+    m_is_dict_var = true;
+    if (possible_dict_vars.size() == 1) {
+        // A single possible variable is the same as a precise variable
+        m_precise_var = *possible_dict_vars.cbegin();
+        m_is_precise_var = true;
+        m_var_dict_entry = *possible_var_dict_entries.cbegin();
+    } else {
+        m_possible_dict_vars = possible_dict_vars;
+        m_is_precise_var = false;
+        m_possible_var_dict_entries = possible_var_dict_entries;
+    }
+}
+
+bool QueryVar::matches(encoded_variable_t var) const {
+    return (m_is_precise_var && m_precise_var == var)
+           || (false == m_is_precise_var && m_possible_dict_vars.count(var) > 0);
+}
+
+void SubQuery::add_non_dict_var(encoded_variable_t precise_non_dict_var) {
+    m_vars.emplace_back(precise_non_dict_var);
+}
+
+void SubQuery::add_dict_var(
+        encoded_variable_t precise_dict_var,
+        VariableDictionaryEntry const* var_dict_entry
+) {
+    m_vars.emplace_back(precise_dict_var, var_dict_entry);
+}
+
+void SubQuery::add_imprecise_dict_var(
+        unordered_set<encoded_variable_t> const& possible_dict_vars,
+        unordered_set<VariableDictionaryEntry const*> const& possible_var_dict_entries
+) {
+    m_vars.emplace_back(possible_dict_vars, possible_var_dict_entries);
+}
+
+void SubQuery::set_possible_logtypes(
+        unordered_set<LogTypeDictionaryEntry const*> const& logtype_entries
+) {
+    m_possible_logtype_ids.clear();
+
+    for (auto const* entry : logtype_entries) {
+        m_possible_logtype_ids.insert(entry->get_id());
+    }
+    m_possible_logtype_entries = logtype_entries;
+}
+
+void SubQuery::mark_wildcard_match_required() {
+    m_wildcard_match_required = true;
+}
+
+void SubQuery::clear() {
+    m_vars.clear();
+    m_possible_logtype_ids.clear();
+    m_wildcard_match_required = false;
+}
+
+bool SubQuery::matches_logtype(logtype_dictionary_id_t const logtype) const {
+    return m_possible_logtype_ids.count(logtype) > 0;
+}
+
+bool SubQuery::matches_vars(Span<int64_t> vars) const {
+    if (vars.size() < m_vars.size()) {
+        // Not enough variables to satisfy query
+        return false;
+    }
+
+    // Try to find m_vars in vars, in order, but not necessarily contiguously
+    size_t possible_vars_ix = 0;
+    size_t const num_possible_vars = m_vars.size();
+    size_t vars_ix = 0;
+    size_t const num_vars = vars.size();
+    while (possible_vars_ix < num_possible_vars && vars_ix < num_vars) {
+        QueryVar const& possible_var = m_vars[possible_vars_ix];
+
+        if (possible_var.matches(vars[vars_ix])) {
+            // Matched
+            ++possible_vars_ix;
+            ++vars_ix;
+        } else {
+            ++vars_ix;
+        }
+    }
+    return (num_possible_vars == possible_vars_ix);
+}
+
+void Query::set_search_string(string const& search_string) {
+    m_search_string = search_string;
+    m_search_string_matches_all = (m_search_string.empty() || "*" == m_search_string);
+}
+
+void Query::add_sub_query(SubQuery const& sub_query) {
+    m_sub_queries.push_back(sub_query);
+}
+
+void Query::clear_sub_queries() {
+    m_sub_queries.clear();
+}
+}  // namespace clp_s::search::clp_search
diff --git a/components/core/src/clp_s/search/clp_search/Query.hpp b/components/core/src/clp_s/search/clp_search/Query.hpp
new file mode 100644
index 000000000..daba27dcc
--- /dev/null
+++ b/components/core/src/clp_s/search/clp_search/Query.hpp
@@ -0,0 +1,192 @@
+// Code from CLP
+
+#ifndef CLP_S_SEARCH_CLP_SEARCH_QUERY_HPP
+#define CLP_S_SEARCH_CLP_SEARCH_QUERY_HPP
+
+#include <set>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "../../Defs.hpp"
+#include "../../DictionaryEntry.hpp"
+#include "../../Utils.hpp"
+
+namespace clp_s::search::clp_search {
+/**
+ * Class representing a variable in a subquery. It can represent a precise encoded variable or
+ * an imprecise dictionary variable (i.e., a set of possible encoded dictionary variable IDs)
+ */
+class QueryVar {
+public:
+    // Constructors
+    explicit QueryVar(encoded_variable_t precise_non_dict_var);
+    QueryVar(encoded_variable_t precise_dict_var, VariableDictionaryEntry const* var_dict_entry);
+    QueryVar(
+            std::unordered_set<encoded_variable_t> const& possible_dict_vars,
+            std::unordered_set<VariableDictionaryEntry const*> const& possible_var_dict_entries
+    );
+
+    // Methods
+    /**
+     * Checks if the given encoded variable matches this QueryVar
+     * @param var
+     * @return true if matched, false otherwise
+     */
+    bool matches(encoded_variable_t var) const;
+
+    bool is_precise_var() const { return m_is_precise_var; }
+
+    bool is_dict_var() const { return m_is_dict_var; }
+
+    VariableDictionaryEntry const* get_var_dict_entry() const { return m_var_dict_entry; }
+
+    std::unordered_set<VariableDictionaryEntry const*> const& get_possible_var_dict_entries(
+    ) const {
+        return m_possible_var_dict_entries;
+    }
+
+private:
+    // Variables
+    bool m_is_precise_var;
+    bool m_is_dict_var;
+
+    encoded_variable_t m_precise_var;
+    // Only used if the precise variable is a dictionary variable
+    VariableDictionaryEntry const* m_var_dict_entry;
+
+    // Only used if the variable is an imprecise dictionary variable
+    std::unordered_set<encoded_variable_t> m_possible_dict_vars;
+    std::unordered_set<VariableDictionaryEntry const*> m_possible_var_dict_entries;
+};
+
+/**
+ * Class representing a subquery (or informally, an interpretation) of a user query. It contains
+ * a series of possible logtypes, a set of QueryVars, and whether the query still requires
+ * wildcard matching after it matches an encoded message.
+ */
+class SubQuery {
+public:
+    // Methods
+    /**
+     * Adds a precise non-dictionary variable to the subquery
+     * @param precise_non_dict_var
+     */
+    void add_non_dict_var(encoded_variable_t precise_non_dict_var);
+    /**
+     * Adds a precise dictionary variable to the subquery
+     * @param precise_dict_var
+     * @param var_dict_entry
+     */
+    void add_dict_var(
+            encoded_variable_t precise_dict_var,
+            VariableDictionaryEntry const* var_dict_entry
+    );
+    /**
+     * Adds an imprecise dictionary variable (i.e., a set of possible precise dictionary
+     * variables) to the subquery
+     * @param possible_dict_vars
+     * @param possible_var_dict_entries
+     */
+    void add_imprecise_dict_var(
+            std::unordered_set<encoded_variable_t> const& possible_dict_vars,
+            std::unordered_set<VariableDictionaryEntry const*> const& possible_var_dict_entries
+    );
+    /**
+     * Add a set of possible logtypes to the subquery
+     * @param logtype_entries
+     */
+    void set_possible_logtypes(
+            std::unordered_set<LogTypeDictionaryEntry const*> const& logtype_entries
+    );
+    void mark_wildcard_match_required();
+
+    /**
+     * Calculates the segment IDs that should contain a match for the subquery's current
+     * logtypes and QueryVars
+     */
+    // void calculate_ids_of_matching_segments ();
+
+    void clear();
+
+    bool wildcard_match_required() const { return m_wildcard_match_required; }
+
+    size_t get_num_possible_logtypes() const { return m_possible_logtype_ids.size(); }
+
+    std::unordered_set<LogTypeDictionaryEntry const*> const& get_possible_logtype_entries() const {
+        return m_possible_logtype_entries;
+    }
+
+    size_t get_num_possible_vars() const { return m_vars.size(); }
+
+    std::vector<QueryVar> const& get_vars() const { return m_vars; }
+
+    std::set<segment_id_t> const& get_ids_of_matching_segments() const {
+        return m_ids_of_matching_segments;
+    }
+
+    /**
+     * Whether the given logtype ID matches one of the possible logtypes in this subquery
+     * @param logtype
+     * @return true if matched, false otherwise
+     */
+    bool matches_logtype(logtype_dictionary_id_t logtype) const;
+    /**
+     * Whether the given variables contain the subquery's variables in order (but not
+     * necessarily contiguously)
+     * @param vars
+     * @return true if matched, false otherwise
+     */
+    bool matches_vars(Span<int64_t> vars) const;
+
+private:
+    // Variables
+    std::unordered_set<LogTypeDictionaryEntry const*> m_possible_logtype_entries;
+    std::unordered_set<logtype_dictionary_id_t> m_possible_logtype_ids;
+    std::set<segment_id_t> m_ids_of_matching_segments;
+    std::vector<QueryVar> m_vars;
+    bool m_wildcard_match_required;
+};
+
+/**
+ * Class representing a user query with potentially multiple sub-queries.
+ */
+class Query {
+public:
+    // Constructors
+    Query() : m_ignore_case(false), m_search_string_matches_all(true) {}
+
+    void set_ignore_case(bool ignore_case) { m_ignore_case = ignore_case; }
+
+    void set_search_string(std::string const& search_string);
+
+    void add_sub_query(SubQuery const& sub_query);
+
+    void clear_sub_queries();
+
+    bool get_ignore_case() const { return m_ignore_case; }
+
+    std::string const& get_search_string() const { return m_search_string; }
+
+    /**
+     * Checks if the search string will match all messages (i.e., it's "" or "*")
+     * @return true if the search string will match all messages
+     * @return false otherwise
+     */
+    bool search_string_matches_all() const { return m_search_string_matches_all; }
+
+    std::vector<SubQuery> const& get_sub_queries() const { return m_sub_queries; }
+
+    bool contains_sub_queries() const { return m_sub_queries.empty() == false; }
+
+private:
+    // Variables
+    bool m_ignore_case;
+    std::string m_search_string;
+    std::vector<SubQuery> m_sub_queries;
+    std::vector<SubQuery const*> m_relevant_sub_queries;
+    bool m_search_string_matches_all;
+};
+}  // namespace clp_s::search::clp_search
+
+#endif  // CLP_S_SEARCH_CLP_SEARCH_QUERY_HPP
diff --git a/components/core/src/clp_s/search/kql/CMakeLists.txt b/components/core/src/clp_s/search/kql/CMakeLists.txt
new file mode 100644
index 000000000..385bd6571
--- /dev/null
+++ b/components/core/src/clp_s/search/kql/CMakeLists.txt
@@ -0,0 +1,28 @@
+ANTLR_TARGET(
+        KqlParser
+        Kql.g4
+        LEXER PARSER VISITOR
+        PACKAGE kql
+)
+
+add_library(
+        kql
+        ../../Utils.hpp
+        ../AndExpr.hpp
+        ../BooleanLiteral.hpp
+        ../ColumnDescriptor.hpp
+        ../DateLiteral.hpp
+        ../EmptyExpr.hpp
+        ../Expression.hpp
+        ../FilterExpr.hpp
+        ../Integral.hpp
+        ../NullLiteral.hpp
+        ../OrExpr.hpp
+        ../StringLiteral.hpp
+        ${ANTLR_KqlParser_CXX_OUTPUTS}
+        kql.cpp
+        kql.hpp
+)
+target_compile_features(kql PRIVATE cxx_std_17)
+target_include_directories(kql PRIVATE ${ANTLR_KqlParser_OUTPUT_DIR})
+target_link_libraries(kql PRIVATE antlr4_static Boost::filesystem)
diff --git a/components/core/src/clp_s/search/kql/Kql.g4 b/components/core/src/clp_s/search/kql/Kql.g4
new file mode 100644
index 000000000..2649754fa
--- /dev/null
+++ b/components/core/src/clp_s/search/kql/Kql.g4
@@ -0,0 +1,107 @@
+grammar Kql;
+
+start: query EOF ;
+
+query
+    : col=column ':' '{' q=query '}'     #NestedQuery
+    | '(' q=query ')'                    #SubQuery
+    | NOT q=query                        #NotQuery
+    | lhs=query op=(OR | AND) rhs=query  #OrAndQuery
+    | expression                         #Expr
+    ;
+
+expression
+    : column_range_expression 
+    | column_value_expression
+    | value_expression
+    ;
+
+column_range_expression
+    : col=column RANGE_OPERATOR ( date_lit=DATE_LITERAL | lit=LITERAL )
+    ;
+
+column_value_expression
+    : col=column ':' ( list=list_of_values | date_lit=DATE_LITERAL | lit=LITERAL )
+    ;
+
+column: 
+    LITERAL
+    ;
+
+value_expression
+    : LITERAL
+    ;
+
+list_of_values
+    : '(' condition=(AND | OR | NOT)? (literals+=LITERAL)* ')'
+    ;
+
+AND:        [Aa] [Nn] [Dd] ;
+OR:         [Oo] [Rr] ;
+NOT:        [Nn] [Oo] [Tt] ;
+
+DATE_LITERAL: 'date(' (('"' QUOTED_CHARACTER+ '"') | QUOTED_CHARACTER+) ')' ;
+
+LITERAL: QUOTED_STRING | UNQUOTED_LITERAL ;
+
+QUOTED_STRING: '"' QUOTED_CHARACTER* '"' ;
+
+UNQUOTED_LITERAL: UNQUOTED_CHARACTER+ ;
+
+// TODO handle unicode
+fragment QUOTED_CHARACTER
+    : ESCAPED_SPACE
+    | '\\"'
+    | ~'"'
+    ;
+
+// TODO:  handle unicode
+fragment UNQUOTED_CHARACTER
+    : ESCAPED_SPACE
+    | ESCAPED_SPECIAL_CHARACTER
+    | ESCAPED_KEYWORD
+    | WILDCARD
+    |  ~[\\():<>"{} \r\n\t]
+    ;
+
+fragment WILDCARD:   '*';
+
+// TODO: unescape keywords
+fragment ESCAPED_KEYWORD
+    : '\\' KEYWORD
+    ;
+
+fragment KEYWORD
+    : AND
+    | OR
+    | NOT
+    ;
+
+
+RANGE_OPERATOR
+    : '<='
+    | '>='
+    | '<'
+    | '>'
+    ;
+
+fragment ESCAPED_SPECIAL_CHARACTER
+    : '\\' SPECIAL_CHARACTER
+    ;
+
+fragment ESCAPED_SPACE
+    : '\\t'
+    | '\\r'
+    | '\\n'
+    ;
+
+fragment SPECIAL_CHARACTER
+    : [\\():<>"*{}]
+    ;
+
+
+// For unicode hex
+//UNICODE: 'u' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT ;
+//fragment HEXDIGIT: [0-9a-fA-F]+ ;
+
+SPACE:  [ \t\r\n] -> skip ;
diff --git a/components/core/src/clp_s/search/kql/kql.cpp b/components/core/src/clp_s/search/kql/kql.cpp
new file mode 100644
index 000000000..52dc4603b
--- /dev/null
+++ b/components/core/src/clp_s/search/kql/kql.cpp
@@ -0,0 +1,248 @@
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include <antlr4-runtime.h>
+
+#include "KqlBaseVisitor.h"
+#include "KqlLexer.h"
+#include "KqlParser.h"
+// If redlining may want to add ${workspaceFolder}/build/**
+// to include path for vscode C/C++ utils
+
+#include "../../Utils.hpp"
+#include "../AndExpr.hpp"
+#include "../BooleanLiteral.hpp"
+#include "../ColumnDescriptor.hpp"
+#include "../DateLiteral.hpp"
+#include "../EmptyExpr.hpp"
+#include "../FilterExpr.hpp"
+#include "../Integral.hpp"
+#include "../NullLiteral.hpp"
+#include "../OrExpr.hpp"
+#include "../StringLiteral.hpp"
+
+using namespace antlr4;
+using namespace kql;
+
+namespace clp_s::search::kql {
+class ErrorListener : public BaseErrorListener {
+private:
+    bool m_error = false;
+
+public:
+    void syntaxError(
+            Recognizer* recognizer,
+            Token* offending_symbol,
+            size_t line,
+            size_t char_position_in_line,
+            std::string const& msg,
+            std::exception_ptr e
+    ) override {
+        m_error = true;
+    }
+
+    bool error() const { return m_error; }
+};
+
+class ParseTreeVisitor : public KqlBaseVisitor {
+private:
+    static void
+    prepend_column(std::shared_ptr<ColumnDescriptor> const& desc, DescriptorList const& prefix) {
+        desc->get_descriptor_list().insert(desc->descriptor_begin(), prefix.begin(), prefix.end());
+    }
+
+    void prepend_column(std::shared_ptr<Expression> const& expr, DescriptorList const& prefix) {
+        for (auto const& op : expr->get_op_list()) {
+            if (auto col = std::dynamic_pointer_cast<ColumnDescriptor>(op)) {
+                prepend_column(col, prefix);
+            } else if (auto subexpr = std::dynamic_pointer_cast<Expression>(op)) {
+                prepend_column(subexpr, prefix);
+            }
+        }
+    }
+
+public:
+    static std::string unquote_string(std::string const& text) {
+        if (text.at(0) == '"') {
+            return text.substr(1, text.length() - 2);
+        } else {
+            return text;
+        }
+    }
+
+    static std::string unquote_date_string(std::string const& text) {
+        // date(...)
+        // 012345
+        return unquote_string(text.substr(5, text.size() - 6));
+    }
+
+    static std::shared_ptr<Literal> unquote_literal(std::string const& text) {
+        std::string token = unquote_string(text);
+
+        if (auto ret = Integral::create_from_string(token)) {
+            return ret;
+        } else if (auto ret = BooleanLiteral::create_from_string(token)) {
+            return ret;
+        } else if (auto ret = NullLiteral::create_from_string(token)) {
+            return ret;
+        } else {
+            return StringLiteral::create(StringUtils::clean_up_wildcard_search_string(token));
+        }
+    }
+
+    static std::shared_ptr<Literal> unquote_date_literal(std::string const& text) {
+        std::string token = unquote_date_string(text);
+
+        return DateLiteral::create_from_string(token);
+    }
+
+    std::any visitStart(KqlParser::StartContext* ctx) override {
+        // only go through first child (query) and avoid default
+        // behaviour of returning result from last child (EOF in this case)
+        return ctx->children[0]->accept(this);
+    }
+
+    std::any visitColumn(KqlParser::ColumnContext* ctx) override {
+        std::string column = unquote_string(ctx->LITERAL()->getText());
+
+        std::vector<std::string> descriptor_tokens;
+        StringUtils::tokenize_column_descriptor(column, descriptor_tokens);
+
+        return ColumnDescriptor::create(descriptor_tokens);
+    }
+
+    std::any visitNestedQuery(KqlParser::NestedQueryContext* ctx) override {
+        auto descriptor = std::any_cast<std::shared_ptr<ColumnDescriptor>>(ctx->col->accept(this));
+        DescriptorList prefix = descriptor->get_descriptor_list();
+
+        auto nested_expr = std::any_cast<std::shared_ptr<Expression>>(ctx->q->accept(this));
+        prepend_column(nested_expr, prefix);
+
+        return nested_expr;
+    }
+
+    std::any visitOrAndQuery(KqlParser::OrAndQueryContext* ctx) override {
+        auto lhs = std::any_cast<std::shared_ptr<Expression>>(ctx->lhs->accept(this));
+        auto rhs = std::any_cast<std::shared_ptr<Expression>>(ctx->rhs->accept(this));
+        if (ctx->op->getType() == KqlParser::AND) {
+            return AndExpr::create(lhs, rhs);
+        } else {
+            return OrExpr::create(lhs, rhs);
+        }
+    }
+
+    std::any visitNotQuery(KqlParser::NotQueryContext* ctx) override {
+        auto q = std::any_cast<std::shared_ptr<Expression>>(ctx->q->accept(this));
+        q->invert();
+        return q;
+    }
+
+    std::any visitSubQuery(KqlParser::SubQueryContext* ctx) override {
+        return ctx->q->accept(this);
+    }
+
+    std::any visitColumn_value_expression(KqlParser::Column_value_expressionContext* ctx) override {
+        auto descriptor = std::any_cast<std::shared_ptr<ColumnDescriptor>>(ctx->col->accept(this));
+
+        if (ctx->lit) {
+            auto lit = unquote_literal(ctx->lit->getText());
+            return FilterExpr::create(descriptor, FilterOperation::EQ, lit);
+        } else if (ctx->date_lit) {
+            auto lit = unquote_date_literal(ctx->date_lit->getText());
+            return FilterExpr::create(descriptor, FilterOperation::EQ, lit);
+        } else /*if (ctx->list) */ {
+            auto list_expr = std::any_cast<std::shared_ptr<Expression>>(ctx->list->accept(this));
+            DescriptorList prefix = descriptor->get_descriptor_list();
+            prepend_column(list_expr, prefix);
+            return list_expr;
+        }
+    }
+
+    std::any visitColumn_range_expression(KqlParser::Column_range_expressionContext* ctx) override {
+        auto descriptor = std::any_cast<std::shared_ptr<ColumnDescriptor>>(ctx->col->accept(this));
+        std::shared_ptr<Literal> lit;
+        if (ctx->lit) {
+            lit = unquote_literal(ctx->lit->getText());
+        } else /*if (ctx->date_lit)*/ {
+            lit = unquote_date_literal(ctx->date_lit->getText());
+        }
+        std::string range_op = ctx->RANGE_OPERATOR()->getText();
+
+        FilterOperation op = FilterOperation::EQ;
+        if (range_op == "<=") {
+            op = FilterOperation::LTE;
+        } else if (range_op == ">=") {
+            op = FilterOperation::GTE;
+        } else if (range_op == "<") {
+            op = FilterOperation::LT;
+        } else if (range_op == ">") {
+            op = FilterOperation::GT;
+        }
+
+        return FilterExpr::create(descriptor, op, lit);
+    }
+
+    std::any visitValue_expression(KqlParser::Value_expressionContext* ctx) override {
+        auto lit = unquote_literal(ctx->LITERAL()->getText());
+        auto descriptor = ColumnDescriptor::create("*");
+        return FilterExpr::create(descriptor, FilterOperation::EQ, lit);
+    }
+
+    std::any visitList_of_values(KqlParser::List_of_valuesContext* ctx) override {
+        std::shared_ptr<Expression> base(nullptr);
+        bool invert_each_filter = false;
+        if (ctx->condition) {
+            if (ctx->AND()) {
+                base = AndExpr::create();
+            } else if (ctx->OR()) {
+                base = OrExpr::create();
+            } else if (ctx->NOT()) {
+                invert_each_filter = true;
+                base = AndExpr::create();
+            }
+        } else {
+            base = OrExpr::create();
+        }
+
+        auto empty_descriptor = ColumnDescriptor::create(DescriptorList());
+        for (auto token : ctx->literals) {
+            auto literal = unquote_literal(token->getText());
+            auto expr = FilterExpr::create(
+                    empty_descriptor,
+                    FilterOperation::EQ,
+                    literal,
+                    invert_each_filter
+            );
+            base->add_operand(expr);
+        }
+        return base;
+    }
+};
+
+std::shared_ptr<Expression> parse_kql_expression(std::istream& in) {
+    std::shared_ptr<Expression> expr = EmptyExpr::create();
+    ErrorListener lexer_error_listener;
+    ErrorListener parser_error_listener;
+
+    ANTLRInputStream input(in);
+    KqlLexer lexer(&input);
+    lexer.addErrorListener(&lexer_error_listener);
+    CommonTokenStream tokens(&lexer);
+    KqlParser parser(&tokens);
+    parser.addErrorListener(&parser_error_listener);
+    KqlParser::StartContext* tree = parser.start();
+
+    if (lexer_error_listener.error()) {
+        std::cout << "Lexer Error" << std::endl;
+        return expr;
+    } else if (parser_error_listener.error()) {
+        std::cout << "Parser Error" << std::endl;
+        return expr;
+    }
+
+    ParseTreeVisitor visitor;
+    expr = std::any_cast<std::shared_ptr<Expression>>(visitor.visitStart(tree));
+    return expr;
+}
+}  // namespace clp_s::search::kql
diff --git a/components/core/src/clp_s/search/kql/kql.hpp b/components/core/src/clp_s/search/kql/kql.hpp
new file mode 100644
index 000000000..ce74157fb
--- /dev/null
+++ b/components/core/src/clp_s/search/kql/kql.hpp
@@ -0,0 +1,17 @@
+#ifndef CLP_S_SEARCH_KQL_KQL_HPP
+#define CLP_S_SEARCH_KQL_KQL_HPP
+
+#include <istream>
+
+#include "../Expression.hpp"
+
+namespace clp_s::search::kql {
+/**
+ * Generate a search AST from a Kibana expression in an input stream
+ * @param in input stream containing a Kibana expression followed by EOF
+ * @return a search AST
+ */
+std::shared_ptr<Expression> parse_kql_expression(std::istream& in);
+}  // namespace clp_s::search::kql
+
+#endif  // CLP_S_SEARCH_KQL_KQL_HPP
diff --git a/components/core/submodules/abseil-cpp b/components/core/submodules/abseil-cpp
new file mode 160000
index 000000000..fb3621f4f
--- /dev/null
+++ b/components/core/submodules/abseil-cpp
@@ -0,0 +1 @@
+Subproject commit fb3621f4f897824c0dbe0615fa94543df6192f30
diff --git a/components/core/submodules/simdjson b/components/core/submodules/simdjson
new file mode 160000
index 000000000..6060be2fd
--- /dev/null
+++ b/components/core/submodules/simdjson
@@ -0,0 +1 @@
+Subproject commit 6060be2fdf62edf4a8f51a8b0883d57d09397b30
diff --git a/components/core/tools/scripts/deps-download/abseil-cpp.json b/components/core/tools/scripts/deps-download/abseil-cpp.json
new file mode 100644
index 000000000..e38bf8bdb
--- /dev/null
+++ b/components/core/tools/scripts/deps-download/abseil-cpp.json
@@ -0,0 +1,10 @@
+{
+  "url": "https://github.com/abseil/abseil-cpp/archive/refs/tags/20230802.1.zip",
+  "unzip": true,
+  "targets": [
+    {
+      "source": "abseil-cpp-20230802.1",
+      "destination": "submodules/abseil-cpp"
+    }
+  ]
+}
diff --git a/components/core/tools/scripts/deps-download/antlr4.json b/components/core/tools/scripts/deps-download/antlr4.json
new file mode 100644
index 000000000..ff0d4d871
--- /dev/null
+++ b/components/core/tools/scripts/deps-download/antlr4.json
@@ -0,0 +1,14 @@
+{
+  "url": "https://www.antlr.org/download/antlr-4.13.1-complete.jar",
+  "unzip": false,
+  "hash": {
+    "algo": "sha3_256",
+    "digest": "292ba55b3be8443777737e94841cff7a343e7067747c2cb6f58830797b20be65"
+  },
+  "targets": [
+    {
+      "source": "antlr-4.13.1-complete.jar",
+      "destination": "third-party/antlr/antlr-4.13.1-complete.jar"
+    }
+  ]
+}
diff --git a/components/core/tools/scripts/deps-download/download-all.sh b/components/core/tools/scripts/deps-download/download-all.sh
index 3afd60536..ded2b2612 100755
--- a/components/core/tools/scripts/deps-download/download-all.sh
+++ b/components/core/tools/scripts/deps-download/download-all.sh
@@ -13,14 +13,17 @@ mkdir -p submodules
 # We don't use a git submodule for sqlite3 since that would require building the
 # sqlite amalgamation
 python3 "${script_dir}/download-dep.py" "${script_dir}/sqlite3.json"
+python3 "${script_dir}/download-dep.py" "${script_dir}/antlr4.json"
 
 if [ -e "$project_root_dir/.git" ] ; then
   git submodule update --init --recursive
 else
+  python3 "${script_dir}/download-dep.py" "${script_dir}/abseil-cpp.json"
   python3 "${script_dir}/download-dep.py" "${script_dir}/boost-outcome.json"
   python3 "${script_dir}/download-dep.py" "${script_dir}/Catch2.json"
   python3 "${script_dir}/download-dep.py" "${script_dir}/date.json"
   python3 "${script_dir}/download-dep.py" "${script_dir}/json.json"
   python3 "${script_dir}/download-dep.py" "${script_dir}/log-surgeon.json"
+  python3 "${script_dir}/download-dep.py" "${script_dir}/simdjson.json"
   python3 "${script_dir}/download-dep.py" "${script_dir}/yaml-cpp.json"
 fi
diff --git a/components/core/tools/scripts/deps-download/simdjson.json b/components/core/tools/scripts/deps-download/simdjson.json
new file mode 100644
index 000000000..8b9999961
--- /dev/null
+++ b/components/core/tools/scripts/deps-download/simdjson.json
@@ -0,0 +1,11 @@
+{
+  "url": "https://github.com/simdjson/simdjson/archive/refs/tags/v3.6.3.zip",
+  "unzip": true,
+  "targets": [
+    {
+      "source": "simdjson-3.6.3",
+      "destination": "submodules/simdjson"
+    }
+  ]
+}
+
diff --git a/components/core/tools/scripts/lib_install/centos7.4/install-prebuilt-packages.sh b/components/core/tools/scripts/lib_install/centos7.4/install-prebuilt-packages.sh
index 53ce6dc94..e9398083b 100755
--- a/components/core/tools/scripts/lib_install/centos7.4/install-prebuilt-packages.sh
+++ b/components/core/tools/scripts/lib_install/centos7.4/install-prebuilt-packages.sh
@@ -3,6 +3,7 @@
 yum install -y \
   bzip2 \
   centos-release-scl \
+  java-11-openjdk \
   make \
   openssl-devel \
   openssl-static \
diff --git a/components/core/tools/scripts/lib_install/macos-12/install-all.sh b/components/core/tools/scripts/lib_install/macos-12/install-all.sh
index d49b6ee7a..7bac11b54 100755
--- a/components/core/tools/scripts/lib_install/macos-12/install-all.sh
+++ b/components/core/tools/scripts/lib_install/macos-12/install-all.sh
@@ -6,6 +6,7 @@ brew install \
   cmake \
   fmt \
   gcc \
+  java11 \
   libarchive \
   lz4 \
   mariadb-connector-c \
diff --git a/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh b/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh
index 01bc0d321..1fab1ccd9 100755
--- a/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh
+++ b/components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh
@@ -15,6 +15,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y \
   libboost-program-options-dev \
   libmariadb-dev \
   libssl-dev \
+  openjdk-11-jdk \
   pkg-config \
   python3 \
   python3-pip \
diff --git a/components/core/tools/scripts/lib_install/ubuntu-jammy/install-prebuilt-packages.sh b/components/core/tools/scripts/lib_install/ubuntu-jammy/install-prebuilt-packages.sh
index 9a6125a8b..ab8382fdc 100755
--- a/components/core/tools/scripts/lib_install/ubuntu-jammy/install-prebuilt-packages.sh
+++ b/components/core/tools/scripts/lib_install/ubuntu-jammy/install-prebuilt-packages.sh
@@ -13,6 +13,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y \
   libboost-program-options-dev \
   libmariadb-dev \
   libssl-dev \
+  openjdk-11-jdk \
   pkg-config \
   python3 \
   python3-pip \
diff --git a/docs/core/clp-structured.md b/docs/core/clp-structured.md
new file mode 100644
index 000000000..4c919d7f1
--- /dev/null
+++ b/docs/core/clp-structured.md
@@ -0,0 +1,125 @@
+# Using CLP for semi-structured logs
+
+For semi-structured logs (e.g., JSON), you can compress, decompress, and search them using the
+`clp-s` binary described below.
+
+## Contents
+
+* [Compression](#compression)
+* [Decompression](#decompression)
+* [Search](#search)
+* [Current limitations](#current-limitations)
+
+## Compression
+
+Usage:
+
+```shell
+./clp-s c [<options>] <archives-dir> <input-path> [<input-path> ...]
+```
+
+* `archives-dir` is the directory that archives should be written to.
+* `input-path` is any new-line-delimited JSON (ndjson) log file or directory containing such files.
+* `options` allow you to specify things like which field should be considered as the log event's
+  timestamp (`--timestamp-key <field-path>`).
+  * For a complete list, run `./clp-s c --help`
+
+### Examples
+
+**Compress `/mnt/logs/log1.json` and output archives to `/mnt/data/archives1`:**
+
+```bash
+./clp-s c /mnt/data/archives1 /mnt/logs/log1.json
+```
+
+**Treat the field `{"d": {"@timestamp": "..."}}` as each log event's timestamp:**
+
+```bash
+./clp-s c --timestamp-key 'd.@timestamp' /mnt/data/archives1 /mnt/logs/log1.json
+```
+
+> [!TIP]
+> Specifying the timestamp-key will create a range-index for the timestamp column which can increase
+> compression ratio and search performance.
+
+**Set the target encoded size to 1 GiB and the compression level to 6 (3 by default)**
+
+```bash
+./clp-s c \
+    --target-encoded-size 1073741824 \
+    --compression-level 6 \
+    /mnt/data/archives1 \
+    /mnt/logs/log1.json
+```
+
+## Decompression
+
+Usage:
+
+```bash
+./clp-s x <archives-dir> <output-dir>
+```
+
+* `archives-dir` is a directory containing archives.
+* `output-dir` is the directory that decompressed logs should be written to.
+
+### Examples
+
+**Decompress all logs from `/mnt/data/archives1` into `/mnt/data/archives1-decomp`:**
+
+```bash
+./clp-s x /mnt/data/archives1 /mnt/data/archives1-decomp
+```
+
+## Search
+
+Usage:
+
+```bash
+./clp-s s <archives-dir> <kql-query>
+```
+
+* `archives-dir` is a directory containing archives.
+* `kql-query` is a [KQL][1] query.
+
+### Examples
+
+**Find all log events within a time range:**
+
+```bash
+./clp-s s /mnt/data/archives1 'ts >= 1649923037 AND ts <= 1649923038'
+```
+or
+```bash
+./clp-s s /mnt/data/archives1 \
+    'ts >= date("2022-04-14T07:57:17") AND ts <= date("2022-04-14T07:57:18")'
+```
+
+**Find log events with a given key-value pair:**
+
+```bash
+./clp-s s /mnt/data/archives1 'id: 22149'
+```
+
+**Find ERROR log events containing a substring:**
+
+```bash
+./clp-s s /mnt/data/archives1 'level: ERROR AND message: "job*"'
+```
+
+**Find both FATAL and ERROR log events:**
+
+```bash
+./clp-s s /mnt/data/archives1 'level: FATAL OR level: ERROR'
+```
+
+## Current limitations
+
+* `clp-s` currently only supports *valid* ndjson logs; it does not handle ndjson logs with trailing
+  commas or other JSON syntax errors.
+* Time zone information is not preserved.
+* The order of log events is not preserved.
+* The input directory structure is not preserved and during decompression all files are written to
+  the same file.
+
+[1]: https://www.elastic.co/guide/en/kibana/current/kuery-query.html
diff --git a/docs/core/clp-unstructured.md b/docs/core/clp-unstructured.md
new file mode 100644
index 000000000..56613e799
--- /dev/null
+++ b/docs/core/clp-unstructured.md
@@ -0,0 +1,157 @@
+# Using CLP for unstructured logs
+
+For unstructured (plain text) logs, you can compress, decompress, and search them using the `clp`
+and `clg` binaries described below.
+
+## Contents
+
+* [Compression](#compression)
+* [Decompression](#decompression)
+* [Search](#search)
+* [Parallel compression](#parallel-compression)
+* [Utilities](#utilities)
+  * [`make-dictionaries-readable`](#make-dictionaries-readable)
+
+## Compression
+
+### `clp`
+
+Usage:
+
+```shell
+./clp c [<options>] <archives-dir> <input-path> [<input-path> ...]
+```
+
+* `archives-dir` is the directory that archives should be written to.
+  * `clp` will create a number of files and directories within, so it's best if this directory is
+    empty.
+  * You can use the same directory repeatedly and `clp` will add to the compressed logs within.
+* `input-path` is any plain-text log file or directory containing such files.
+* `options` allow you to specify things like a path to a custom
+  [schema](../../components/core/README-Schema.md) file (`--schema-path <file-path>`).
+  * For a complete list, run `./clp c --help`
+
+### Examples
+
+**Compress `/mnt/logs/log1.log` and output archives to `/mnt/data/archives1`:**
+
+```shell
+./clp c /mnt/data/archives1 /mnt/logs/log1.log
+```
+
+**Compress `/mnt/logs/log1.log` using a custom schema specified in `/mnt/conf/schemas.txt`:**
+
+```shell
+./clp c --schema-path /mnt/conf/schemas.txt /mnt/data/archives1 /mnt/logs/log1.log
+```
+
+## Decompression
+
+Usage:
+
+```shell
+./clp x [<options>] <archives-dir> <output-dir> [<file-path>]
+```
+
+* `archives-dir` is a directory containing archives.
+* `output-dir` is the directory that decompressed logs should be written to.
+* `file-path` is an optional file path to decompress, in particular.
+
+### Examples
+
+**Decompress all logs from `/mnt/data/archives1` into `/mnt/data/archives1-decomp`:**
+
+```shell
+./clp x /mnt/data/archives1 /mnt/data/archives1-decomp
+```
+
+**Decompress just `/mnt/logs/file1.log`:**
+
+```shell
+./clp x /mnt/data/archives1 /mnt/data/archives1 /mnt/logs/file1.log
+```
+
+## Search
+
+Usage:
+
+> [!NOTE]
+> Search uses a different executable (`clg`) than compression (`clp`).
+
+```shell
+./clg [<options>] <archives-dir> <wildcard-query> [<file-path>]
+```
+
+* `archives-dir` is a directory containing archives.
+* `wildcard-query` is a wildcard query where:
+  * the `*` wildcard matches 0 or more characters;
+  * the `?` wildcard matches any single character.
+* `options` allow you to specify things like a time-range filter.
+  * For a complete list, run `./clg --help`
+
+### Examples
+
+**Search `/mnt/data/archives1` for specific ERROR logs:**
+
+```shell
+./clg /mnt/data/archives1 " ERROR * container "
+```
+
+**Search for logs in a time range:**
+
+```shell
+./clg /mnt/data/archives1 --tge 1546344654321 --tle 1546344912345 " user1 "
+```
+
+> [!NOTE]
+> Currently, timestamps must be specified as milliseconds since the UNIX epoch.
+
+**Search a single file**:
+
+```shell
+./clg /mnt/data/archives1 " session closed " /mnt/logs/file1
+```
+
+# Parallel Compression
+
+By default, `clp` uses an embedded SQLite database, so each directory containing archives can only
+be accessed by a single `clp` instance.
+
+To enable parallel compression to the same archives directory, `clp`/`clg` can be configured to use
+a MySQL-type database (e.g., MariaDB) as follows:
+
+* Install and configure MariaDB using the instructions for your platform
+* Create a user that has privileges to create databases, create tables, insert records, and delete
+  records.
+* Copy and change `config/metadata-db.yml`, setting the type to `mysql` and uncommenting the MySQL
+  parameters.
+* Install the MariaDB and PyYAML Python packages `pip3 install mariadb PyYAML`
+  * This is necessary to run the database initialization script. If you prefer, you can run the SQL
+    statements in `tools/scripts/db/init-db.py` directly.
+* Run `tools/scripts/db/init-db.py` with the updated config file. This will initialize the database
+  CLP requires.
+* Run `clp` or `clg` as before, with the addition of the `--db-config-file` option pointing at the
+  updated config file.
+* To compress in parallel, simply run another instance of `clp` concurrently.
+
+Note that currently, decompression (`clp x`) and search (`clg`) can only be run with a single
+instance. We are in the process of open-sourcing parallelized versions of these as well.
+
+# Utilities
+
+Below are utilities for working with CLP archives. 
+
+## `make-dictionaries-readable`
+
+To convert the dictionaries of an individual archive into a human-readable form, you can use
+`make-dictionaries-readable`.
+
+```shell
+./make-dictionaries-readable archive-path <output dir>
+```
+
+* `archive-path` is a path to a specific archive (inside `archives-dir`)
+
+See the `make-dictionaries-readable` 
+[README](../../components/core/src/clp/make_dictionaries_readable/README.md) for details on the 
+output format.