diff --git a/docs/quick-start.md b/docs/quick-start.md new file mode 100644 index 0000000..a927c84 --- /dev/null +++ b/docs/quick-start.md @@ -0,0 +1,203 @@ +# Quick start + +Spider is a distributed system for executing user-defined tasks. It is designed to achieve low +latency, high throughput, and robust fault tolerance. + +The guide below briefly describes how to get started with running a task on Spider. At a high-level, +you'll need to: + +* Write a task +* Build the task into a shared library +* Write a client to manage the task +* Build the client +* Set up a Spider cluster +* Run the client + +The example source code for this guide is in `examples/quick-start`. + +> [!NOTE] In the rest of this guide: +> 1. we specify source file paths relative to `examples/quick-start`. +> 2. all CMake commands should be run from inside `examples/quick-start`. + +# Requirements + +In the guide below, you'll need: + +* CMake 3.22.1+ +* GCC 10+ or Clang 7+ +* [Docker] 20.10+ + * If you're not running as root, ensure `docker` can be run + [without superuser privileges][docker-non-root]. + +# Writing a task + +In Spider, a task is a C++ function that satisfies the following conditions: + +* It is a non-member function. +* It takes one or more parameters: + * The first parameter must be a `TaskContext`. + * All other parameters must have types that conform to the `Serializable` or `Data` interfaces. +* It returns a value that conforms to the `Serializable` or `Data` interfaces. + +> [!NOTE] +> You don't immediately need to understand the TaskContext, Serializable, or Data types as we'll +> explain them in other guides. + +For example, the task in `src/tasks.cpp` computes and returns the sum of two integers. + +> [!NOTE] +> The task is split into a header file and an implementation file so that it can be loaded as a +> library in the worker, as we'll see in later sections. + +The integer parameters and return value are `Serializable` values. + +The `SPIDER_REGISTER_TASK` macro at the bottom of `src/tasks.cpp` is how we inform Spider that a +function should be treated as a task. + +# Building the task into a shared library + +In order for Spider to run a task, the task needs to be compiled into a shared library that Spider +can load. The example's `CMakeLists.txt` demonstrates how to do this. + +To build the shared library, run: + +```shell +cmake -S . -B build +cmake --build build --parallel $(nproc) --target tasks +``` + +# Writing a client to manage the task + +To make Spider to run a task, we first need to write a client application. Generally, a client: + +1. connects to Spider; +2. submits the task for execution; +3. waits for its completion—whether it succeeds or fails; +4. and then handles the result. + +For example, the client in `src/client.cpp` runs the `sum` task from the previous section and +verifies its result. + +When we submit a task to Spider, Spider returns a `Job`, which represents a scheduled, running, or +completed task (or `TaskGraph`) in a Spider cluster. + +> [!NOTE] +> `Job`s and `TaskGraph`s will be explained in another guide. + +# Building the client + +The client can be compiled like any normal C++ application, except that we need to link it to the +Spider client library and the `tasks` library. The example's `CMakeLists.txt` demonstrates how to do +this. + +To build the client executable, run: + +```shell +cmake --build build --parallel $(nproc) --target client +``` + +# Setting up a Spider cluster + +Before we can run the client, we need to start a Spider cluster. The simplest Spider cluster +consists of: + +* a storage backend; +* a scheduler instance; +* and a worker instance. + +## Setting up a storage backend + +Spider currently supports using MySQL or MariaDB as a storage backend. In this guide, we'll start +MariaDB in a Docker container: + +```shell +docker run \ + --detach \ + --rm \ + --name spider-storage \ + --env MARIADB_USER=spider \ + --env MARIADB_PASSWORD=password \ + --env MARIADB_DATABASE=spider-storage \ + --env MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=true \ + --publish 3306:3306 mariadb:latest +``` + +> [!WARNING] +> When the container above is stopped, the database will be deleted. In production, you should set +> up a database instance with some form of data persistence. + +> [!WARNING] +> The container above is using hardcoded default credentials that shouldn't be used in production. + +Alternatively, if you have an existing MySQL/MariaDB instance, you can use that as well. Simply +create a database and authorize a user to access it. + +## Setting up the scheduler + +To build the scheduler, run: + +```shell +cmake --build build --parallel $(nproc) --target spider_scheduler +``` + +To start the scheduler, run: + +```shell +build/spider/src/spider/spider_scheduler \ + --storage_url \ + "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" \ + --port 6000 +``` + +NOTE: + +* If you used a different set of arguments to set up the storage backend, ensure you update the + `storage_url` argument in the command. +* If the scheduler fails to bind to port `6000`, change the port in the command and try again. + +## Setting up a worker + +To build the worker, run: + +```shell +cmake --build build --parallel $(nproc) --target spider_worker +``` + +To start a worker, run: + +```shell +build/spider/src/spider/spider_worker \ + --storage_url \ + "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" \ + --port 6000 +``` + +NOTE: + +If you used a different set of arguments to set up the storage backend, ensure you update the +`storage_url` argument in the command. + +> [!TIP] +> You can start multiple workers to increase the number of concurrent tasks that can be run on the +> cluster. + +# Running the client + +To run the client: + +```shell +build/client "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" +``` + +NOTE: + +If you used a different set of arguments to set up the storage backend, ensure you update the +storage backend URL in the command. + +# Next steps + +In future guides, we'll explain how to write more complex tasks, as well as how to leverage Spider's +support for fault tolerance. + +[Docker]: https://docs.docker.com/engine/install/ +[docker-non-root]: https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user diff --git a/examples/quick-start/.clang-format b/examples/quick-start/.clang-format new file mode 100644 index 0000000..60d597e --- /dev/null +++ b/examples/quick-start/.clang-format @@ -0,0 +1,20 @@ +BasedOnStyle: "InheritParentConfig" + +IncludeCategories: + # NOTE: A header is grouped by first matching regex + # Project headers + - Regex: "^\"" + Priority: 4 + # Library headers. Update when adding new libraries. + # NOTE: clang-format retains leading white-space on a line in violation of the YAML spec. + # Ex: + # - Regex: "<(fmt|spdlog)" + # Priority: 3 + - Regex: "^<(absl|boost|catch2|fmt|mariadb|msgpack|spdlog|spider)" + Priority: 3 + # C system headers + - Regex: "^<.+\\.h>" + Priority: 1 + # C++ standard libraries + - Regex: "^<.+>" + Priority: 2 diff --git a/examples/quick-start/CMakeLists.txt b/examples/quick-start/CMakeLists.txt new file mode 100644 index 0000000..1190076 --- /dev/null +++ b/examples/quick-start/CMakeLists.txt @@ -0,0 +1,27 @@ +cmake_minimum_required(VERSION 3.22.1) +project(spider_getting_started) + +# Add the Spider library +add_subdirectory(../../ spider EXCLUDE_FROM_ALL) + +# Add the tasks library +add_library( + tasks + SHARED + src/tasks.cpp + src/tasks.hpp +) + +# Link the Spider library to the tasks library +target_link_libraries(tasks PRIVATE spider::spider) + +# Add the client +add_executable(client src/client.cpp) + +# Link the Spider and tasks library to the client +target_link_libraries( + client + PRIVATE + spider::spider + tasks +) diff --git a/examples/quick-start/src/client.cpp b/examples/quick-start/src/client.cpp new file mode 100644 index 0000000..062764d --- /dev/null +++ b/examples/quick-start/src/client.cpp @@ -0,0 +1,61 @@ +#include +#include +#include +#include + +#include + +#include "tasks.hpp" + +// NOLINTBEGIN(bugprone-exception-escape) +auto main(int argc, char const* argv[]) -> int { + // Parse the storage backend URL from the command line arguments + if (argc < 2) { + std::cerr << "Usage: ./client " << '\n'; + return 1; + } + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + std::string const storage_url{argv[1]}; + if (storage_url.empty()) { + std::cerr << "storage-backend-url cannot be empty." << '\n'; + return 1; + } + + // Create a driver that connects to the Spider cluster + spider::Driver driver{storage_url}; + + // Submit the task for execution + int const x = 2; + int const y = 3; + spider::Job job = driver.start(&sum, x, y); + + // Wait for the job to complete + job.wait_complete(); + + // Handle the job's success/failure + switch (auto job_status = job.get_status()) { + case spider::JobStatus::Succeeded: { + auto result = job.get_result(); + int const expected = x + y; + if (expected == result) { + return 0; + } + std::cerr << "`sum` returned unexpected result. Expected: " << expected + << ". Actual: " << result << '\n'; + return 1; + } + case spider::JobStatus::Failed: { + std::pair const error_and_fn_name = job.get_error(); + std::cerr << "Job failed in function " << error_and_fn_name.second << " - " + << error_and_fn_name.first << '\n'; + return 1; + } + default: + std::cerr << "Job is in unexpected state - " + << static_cast>(job_status) + << '\n'; + return 1; + } +} + +// NOLINTEND(bugprone-exception-escape) diff --git a/examples/quick-start/src/tasks.cpp b/examples/quick-start/src/tasks.cpp new file mode 100644 index 0000000..6b3dde3 --- /dev/null +++ b/examples/quick-start/src/tasks.cpp @@ -0,0 +1,12 @@ +#include "tasks.hpp" + +#include + +// Task function implementation +auto sum(spider::TaskContext& /*context*/, int x, int y) -> int { + return x + y; +} + +// Register the task with Spider +// NOLINTNEXTLINE(cert-err58-cpp) +SPIDER_REGISTER_TASK(sum); diff --git a/examples/quick-start/src/tasks.hpp b/examples/quick-start/src/tasks.hpp new file mode 100644 index 0000000..1bb54c8 --- /dev/null +++ b/examples/quick-start/src/tasks.hpp @@ -0,0 +1,15 @@ +#ifndef TASKS_HPP +#define TASKS_HPP + +#include + +// Task function prototype +/** + * @param context + * @param x + * @param y + * @return The sum of x and y. + */ +auto sum(spider::TaskContext& context, int x, int y) -> int; + +#endif // TASKS_HPP diff --git a/lint-tasks.yaml b/lint-tasks.yaml index f42b802..aa88049 100644 --- a/lint-tasks.yaml +++ b/lint-tasks.yaml @@ -48,6 +48,9 @@ tasks: sources: &cpp_format_src_files - "{{.G_LINT_VENV_CHECKSUM_FILE}}" - "{{.G_SRC_SPIDER_DIR}}/.clang-format" + - "{{.G_EXAMPLES_DIR}}/**/*.cpp" + - "{{.G_EXAMPLES_DIR}}/**/*.h" + - "{{.G_EXAMPLES_DIR}}/**/*.hpp" - "{{.G_SRC_SPIDER_DIR}}/**/*.cpp" - "{{.G_SRC_SPIDER_DIR}}/**/*.h" - "{{.G_SRC_SPIDER_DIR}}/**/*.hpp" @@ -62,6 +65,14 @@ tasks: vars: FLAGS: "--dry-run" SRC_DIR: "{{.G_SRC_SPIDER_DIR}}" + - task: "clang-format" + vars: + FLAGS: "--dry-run" + SRC_DIR: "{{.G_TEST_DIR}}" + - task: "clang-format" + vars: + FLAGS: "--dry-run" + SRC_DIR: "{{.G_EXAMPLES_DIR}}" cpp-format-fix: sources: *cpp_format_src_files @@ -75,6 +86,10 @@ tasks: vars: FLAGS: "-i" SRC_DIR: "{{.G_TEST_DIR}}" + - task: "clang-format" + vars: + FLAGS: "-i" + SRC_DIR: "{{.G_EXAMPLES_DIR}}" cpp-static-check: # Alias task to `cpp-static-fix` since we don't currently support automatic fixes. @@ -84,6 +99,9 @@ tasks: aliases: ["cpp-static-fix"] sources: - "{{.G_LINT_VENV_CHECKSUM_FILE}}" + - "{{.G_EXAMPLES_DIR}}/**/*.cpp" + - "{{.G_EXAMPLES_DIR}}/**/*.h" + - "{{.G_EXAMPLES_DIR}}/**/*.hpp" - "{{.G_SRC_SPIDER_DIR}}/**/*.cpp" - "{{.G_SRC_SPIDER_DIR}}/**/*.h" - "{{.G_SRC_SPIDER_DIR}}/**/*.hpp" @@ -101,7 +119,14 @@ tasks: vars: FLAGS: "--config-file=.clang-tidy -p {{.G_SPIDER_COMPILE_COMMANDS_DB}}" SRC_DIR: "{{.G_SRC_SPIDER_DIR}}" - TEST_DIR: "{{.G_TEST_DIR}}" + - task: "clang-tidy" + vars: + FLAGS: "--config-file=.clang-tidy -p {{.G_SPIDER_COMPILE_COMMANDS_DB}}" + SRC_DIR: "{{.G_TEST_DIR}}" + - task: "clang-tidy" + vars: + FLAGS: "--config-file=.clang-tidy -p {{.G_EXAMPLES_COMPILE_COMMANDS_DB}}" + SRC_DIR: "{{.G_EXAMPLES_DIR}}" py-check: cmds: @@ -165,10 +190,10 @@ tasks: clang-tidy: internal: true requires: - vars: ["FLAGS", "SRC_DIR", "TEST_DIR"] + vars: ["FLAGS", "SRC_DIR"] cmd: |- . "{{.G_LINT_VENV_DIR}}/bin/activate" - find "{{.SRC_DIR}}" "{{.TEST_DIR}}" \ + find "{{.SRC_DIR}}" \ -type f \ \( -iname "*.cpp" -o -iname "*.h" -o -iname "*.hpp" \) \ -print0 | \ @@ -182,6 +207,7 @@ tasks: - "CMakeLists.txt" - "src/spider/CMakeLists.txt" - "tests/CMakeLists.txt" + - "examples/quick-start/CMakeLists.txt" - "cmake/Modules/*.cmake" cmds: - for: "sources" diff --git a/src/spider/client/Job.hpp b/src/spider/client/Job.hpp index 86629bf..ebbd222 100644 --- a/src/spider/client/Job.hpp +++ b/src/spider/client/Job.hpp @@ -260,7 +260,9 @@ class Job { * - the error message sent from the task through `TaskContext::abort` or from Spider. * @throw spider::ConnectionException */ - auto get_error() -> std::pair; + auto get_error() -> std::pair { + throw ConnectionException{"Not implemented"}; + } private: Job(boost::uuids::uuid id, diff --git a/taskfile.yaml b/taskfile.yaml index 19e722d..b398bfc 100644 --- a/taskfile.yaml +++ b/taskfile.yaml @@ -12,8 +12,12 @@ vars: G_BUILD_SPIDER_DIR: "{{.G_BUILD_DIR}}/spider" G_SPIDER_CMAKE_CACHE: "{{.G_BUILD_SPIDER_DIR}}/CMakeCache.txt" G_SPIDER_COMPILE_COMMANDS_DB: "{{.G_BUILD_SPIDER_DIR}}/compile_commands.json" + G_BUILD_EXAMPLES_DIR: "{{.G_BUILD_DIR}}/examples" + G_EXAMPLES_CMAKE_CACHE: "{{.G_BUILD_EXAMPLES_DIR}}/CMakeCache.txt" + G_EXAMPLES_COMPILE_COMMANDS_DB: "{{.G_BUILD_EXAMPLES_DIR}}/compile_commands.json" G_SRC_SPIDER_DIR: "{{.ROOT_DIR}}/src/spider" G_TEST_DIR: "{{.ROOT_DIR}}/tests" + G_EXAMPLES_DIR: "{{.ROOT_DIR}}/examples" tasks: clean: @@ -25,13 +29,20 @@ tasks: sources: - "{{.TASKFILE}}" - "CMakeLists.txt" + - "examples/quick-start/CMakeLists.txt" generates: - "{{.G_SPIDER_CMAKE_CACHE}}" - "{{.G_SPIDER_COMPILE_COMMANDS_DB}}" - cmd: "cmake -S '{{.ROOT_DIR}}' -B '{{.G_BUILD_SPIDER_DIR}}'" + - "{{.G_EXAMPLES_CMAKE_CACHE}}" + - "{{.G_EXAMPLES_COMPILE_COMMANDS_DB}}" + cmds: + - "cmake -S '{{.ROOT_DIR}}' -B '{{.G_BUILD_SPIDER_DIR}}'" + - "cmake -S '{{.ROOT_DIR}}/examples/quick-start' -B '{{.G_BUILD_EXAMPLES_DIR}}'" init: internal: true silent: true run: "once" - cmds: ["mkdir -p '{{.G_BUILD_DIR}}'"] + cmds: + - "mkdir -p '{{.G_BUILD_DIR}}'" + - "mkdir -p '{{.G_BUILD_SPIDER_DIR}}'"