diff --git a/.gitignore b/.gitignore index 722d5e71d..e19c4a7e4 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,13 @@ +cmake +.cache .vscode +.clangd +**/venv +**/_build +build +install.sh +**/.DS_Store +**/Doxyfile.in.bak + +# LLVM debug +launch.json diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 000000000..bff923c84 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,35 @@ +# Read the Docs configuration file for Sphinx projects +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.12" + # You can also specify other tool versions: + # nodejs: "20" + # rust: "1.70" + # golang: "1.20" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/conf.py + # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs + # builder: "dirhtml" + # Fail on all warnings to avoid broken references + # fail_on_warning: true + +# Optionally build your docs in additional formats such as PDF and ePub +# formats: +# - pdf +# - epub + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - requirements: docs/requirements.txt diff --git a/docs/README.rst b/docs/README.rst index 5adca14b4..748c22d9f 100644 --- a/docs/README.rst +++ b/docs/README.rst @@ -27,10 +27,15 @@ Build dependencies: :CMake: For use and installation see http://www.cmake.org/ :ecbuild: ECMWF library of CMake macros () +Optional dependencies for building documentation: + +:sphinx-build: Create documentation from reStructuredText, see https://www.sphinx-doc.org/en/master/man/sphinx-build.html +:breathe: Bridge between Sphinx and Doxygen, see https://breathe.readthedocs.io/en/latest/ + Installation ============ -fdb employs an out-of-source build/install based on CMake. +FDB employs an out-of-source build/install based on CMake. Make sure ecbuild is installed and the ecbuild executable script is found ( ``which ecbuild`` ). diff --git a/docs/conf.py b/docs/conf.py index ee8fe2b24..d8dfb3db7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -61,7 +61,8 @@ def parse_version(ver_str): "sphinx_copybutton", "sphinx_tabs.tabs", "sphinxfortran.fortran_domain", - "breathe", + "sphinx_toolbox.collapse", + "breathe" ] # Add any paths that contain templates here, relative to this directory. @@ -70,7 +71,7 @@ def parse_version(ver_str): # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "venv", "README.rst"] source_suffix = ".rst" master_doc = "index" @@ -121,3 +122,6 @@ def parse_version(ver_str): # -- Sphinx Tabs configuration ----------------------------------------------- sphinx_tabs_disable_tab_closing = True + +# --- Enable figure numbering +numfig = True diff --git a/docs/content/architectural-introduction.rst b/docs/content/architectural-introduction.rst new file mode 100644 index 000000000..9cb735060 --- /dev/null +++ b/docs/content/architectural-introduction.rst @@ -0,0 +1,26 @@ +.. _architectural-introduction-label: + +Architectural Introduction +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +|Licence| + +FDB (Fields DataBase) is a domain-specific object store developed at ECMWF for storing, indexing and retrieving GRIB data. Each GRIB message is stored as a field and indexed trough semantic metadata (i.e. physical variables such as temperature, pressure, ...). +A set of fields can be retrieved specifying a request using a specific language developed for accessing :ref:`mars-request-label` Archive + +FDB exposes a C++ API as well as CLI :ref:`tools-label`, for further information, see :ref:`technical-introduction-label`. + +The overall structure of the system the FDB is operating on can be seen in: + + +.. toctree:: + :maxdepth: 1 + + architecture/introduction + architecture/overview + + +.. |Licence| image:: https://img.shields.io/badge/License-Apache%202.0-blue.svg + :target: https://github.com/ecmwf/fdb/blob/develop/LICENSE + :alt: Apache Licence + diff --git a/docs/content/architecture/img/FDB_Frontend_Backend.png b/docs/content/architecture/img/FDB_Frontend_Backend.png new file mode 100644 index 000000000..a89ab4cd1 Binary files /dev/null and b/docs/content/architecture/img/FDB_Frontend_Backend.png differ diff --git a/docs/content/architecture/img/FDB_schema.png b/docs/content/architecture/img/FDB_schema.png new file mode 100644 index 000000000..2321d733a Binary files /dev/null and b/docs/content/architecture/img/FDB_schema.png differ diff --git a/docs/content/architecture/img/dist_fdb.svg b/docs/content/architecture/img/dist_fdb.svg new file mode 100644 index 000000000..91d047b4b --- /dev/null +++ b/docs/content/architecture/img/dist_fdb.svg @@ -0,0 +1,634 @@ + + + +FDB-1ProcessAny processing function.Frontend/ClientFDB-2ProcessAny processing function.Frontend/Clientarchive/flushretrieve/listProcessAny processing function.Backend/ServerCatalogueStoreControl-ChannelData-ChannelAsynchronous via TCP/IPFDB-3ProcessAny processing function.Frontend/Clientarchive/flushretrieve/listProcessAny processing function.Backend/ServerCatalogueStoreControl-ChannelData-ChannelAsynchronous via TCP/IP diff --git a/docs/content/architecture/img/local_fdb.svg b/docs/content/architecture/img/local_fdb.svg new file mode 100644 index 000000000..0383b03cc --- /dev/null +++ b/docs/content/architecture/img/local_fdb.svg @@ -0,0 +1,346 @@ + + + + + + + + + + + Process + Any processing function. + + + + + + + + + + Magnetic Disk (Database) + A magnetic disk. (ISO) + + + + + + + + + + + + + + + + + FDB + + + + Process + Any processing function. + + + + Frontend/Client + + + + + Process + Any processing function. + + + + Backend/Server + + + + + + Catalogue + Store + + + archive/flush + retrieve/list + + diff --git a/docs/content/architecture/img/remote_fdb.svg b/docs/content/architecture/img/remote_fdb.svg new file mode 100644 index 000000000..ae4c241bc --- /dev/null +++ b/docs/content/architecture/img/remote_fdb.svg @@ -0,0 +1,362 @@ + + + +FDB-1ProcessAny processing function.Frontend/ClientFDB-2ProcessAny processing function.Frontend/Clientarchive/flushretrieve/listProcessAny processing function.Backend/ServerCatalogueStoreControl-ChannelData-ChannelAsynchronous via TCP/IP diff --git a/docs/content/architecture/img/select_fdb.svg b/docs/content/architecture/img/select_fdb.svg new file mode 100644 index 000000000..b600a17fe --- /dev/null +++ b/docs/content/architecture/img/select_fdb.svg @@ -0,0 +1,580 @@ + + + + + + + + + + + Process + Any processing function. + + + + + + + + + + Magnetic Disk (Database) + A magnetic disk. (ISO) + + + + + + + + + + + + + + + + FDB + + + + Process + Any processing function. + + + + Frontend/Client + + + + + + + + + + + + + + + + + + + + Process + Any processing function. + + + + Backend/Server-1 + + + Catalogue + Store + archive/flush + retrieve/list + + + + + + + + + + + + + Process + Any processing function. + + + + Backend/Server-2 + + + Catalogue + Store + archive/flush + retrieve/list + + Metadata-Key-1 + Metadata-Key-2 + + + diff --git a/docs/content/architecture/introduction.rst b/docs/content/architecture/introduction.rst new file mode 100644 index 000000000..c25bbf661 --- /dev/null +++ b/docs/content/architecture/introduction.rst @@ -0,0 +1,30 @@ +.. _schema-fdb-operational: + +Introduction +============ + +Numerical weather prediction (NWP) and climate simulations are data heavy applications. +Over the course of the last 40 years data output has increased by several orders +of magnitude and are projected to continue. In 1995 ECMWF generated a total of +14 TiB per year, whereas the ensemble forecast output at the end of 2023 +totaled 60 TiB in just one hour. + +The corresponding processing (archival, as well as archiving) of all output data, +are I/O intense operations, putting stress on all involved systems. Additionally weather +forecasts decay in value rapidly after their creation (being superseded by newer +forecasts). There is a huge need to make this generated data available quickly and +cheaply for a general lifetime of the forecast data, which is typically 3 - 5 days. + +The **Fields DataBase (FDB)** is a domain-specific object store developed at ECMWF for storing, +indexing and retrieving GRIB data, therefore playing the essential role of a hot-cache in +the context of NWP. + +.. image:: img/FDB_schema.png + :width: 400 + :align: center + :alt: Schematic overview of the FDB in a setup + +.. Each GRIB message is stored as a field and indexed +.. trough semantic metadata (i.e. physical variables such as temperature, pressure, ...). +.. A set of fields can be retrieved specifying a request using a specific language +.. developed for accessing :doc:`mars` Archive diff --git a/docs/content/architecture/overview.rst b/docs/content/architecture/overview.rst new file mode 100644 index 000000000..9b698bcb5 --- /dev/null +++ b/docs/content/architecture/overview.rst @@ -0,0 +1,177 @@ +.. _label-architectural-overview: + +Architectural overview +###################### + +As typical for a modern application, architectural choices have been made to isolate +parts of the FDB, which have specific use-cases. The main driver in that decision was +to split the FDB into a user-facing module, called the **frontend** or **client** and +a storage-facing module, called **backend** or **server**. +The FDB offers different *frontends* +to implement different use case, e.g whether a FDB instance should run with a local +storage attached or the `routing` of data should be forwarded to several FDB instances. +The implemented *backends* are the backbone of the application, handling +how storage/retrieval operations should take place. +Also depending on the storage system, which will accommodate the **catalogue** and the **store**, +there are different implementations for the backend, dealing with technical details +which needs to be considered for different underlying storage systems. + +.. _schema_fdb_frontend_backend: + +.. image:: img/local_fdb.svg + :width: 600 + :align: center + :class: with-shadow + :alt: Schematic of the FDB client/server architecture + +This schematic shows the design of the FDB, especially the difference between the client and server side. + +.. _architecture_frontend: + +Frontend / Client +----------------- + +The Frontend can be thought of as a client, connecting to the implementation of +the backend, dealing with all possible requests a user can have. There are typically +four operations (aside from some administrative function) a frontend needs to implement +to be able to communicate with a configured +backend: + +* `archive()` + Archive a given dataset. +* `flush()` + Guarantee that all data is stored accordingly. +* `retrieve()` + Retrieve stored data specified by a given key (or metadata) +* `list()` + List all (or all partially matched data, given a key) stored in the FDB + +The FDB adheres to the ACID principle. Data is either visible (and therefore correctly stored) +or not, so operations are **transactional**. Furthermore, the aforementioned functions have the following properties: + +* `archive()` blocks until data is transferred from the frontend to the backend + of the FDB - **Asynchrounous** +* `flush()` blocks until data is made visible from the backend, so the data has + been persisted - **Consistent** +* All operations are appending to the storage without altering already stored + data (even if it has the identical metadata) - **Immutable** +* In case of identical metadata the already existing version is masked by the new + one - **Versioned** + +There are several frontend implementations, each of which can be used to achieve +different use-cases: + +**Local**-Frontend +****************** + + Implements the passage from frontend to the storage backend. Originally + intended to have a *local* FDB instance. With the newest changes in modularity + and the corresponding configuration options the backend could also be *non-local*. + + **Schematic of a local FDB instance**: + +.. image:: img/local_fdb.svg + :width: 600 + :align: center + :alt: Schematic of a local FDB instance + +**Remote**-Frontend +******************* + + Handles access to a remote FDB via TCP/IP. Talks to the FDB backend using + asynchronous protocol (versioned). The schema, which is used for indexing of data, + is taken from the server side. + + **Schematic of a remote FDB instance**: + +.. image:: img/remote_fdb.svg + :width: 600 + :align: center + :alt: Schematic of a remote FDB instance + +**Distribute**-Frontend +*********************** + + Implements multi-lane access to multiple FDB's which uses Rendez-vous Hashing to + avoid synchronizations. The schematic below shows two different FDB server, which are + used for data persistence. In general there could be several. + + **Schematic of a distributed FDB instance**: + +.. image:: img/dist_fdb.svg + :width: 600 + :align: center + :alt: Schematic of a remote FDB instance + +**Select**-Frontend +******************* + + Dispatches requests to different FDB's based on the metadata of the associated data. + A typical use case is to split data depending on its metadata. In the schematic + below a splitting only occurs on the same machine. In general the data which should + be stored could also be sent to a remote FDB instance (or a mixture of multiple local/remote FDBs). + + **Schematic of a selection FDB instance**: + +.. image:: img/select_fdb.svg + :width: 600 + :align: center + :alt: Schematic of a remote FDB instance + + +Backend / Server +---------------- + +As seen in :ref:`schema-fdb-operational`, the FDB needs to compensate with two +kind of data flows: + +1. Data needs to be efficiently streamed out of the NWP model (or other data + producers) and stored. +2. The data location needs to be associated with a key and indexed, making the data + available for retrieval processes. + +The FDB consists out of two different backend components, handling the two +aforementioned scenarios: + +* The **Catalogue** which handles the metadata-based indexing +* The **Store** which handles the persisting of streamed data + +Indexing and storing the data needs substantial effort. There are +several aspects one need to keep in mind: + +* **Performance of the system** + + The indexing scheme heavily influence the overall performance of the system. + Depending on the chosen data layout reading (and/or writing) to the storage + can be affected. In NWP the access pattern of post-processing consumers often + differs from the optimal write pattern which should be chosen to guarantee + optimal writing performance. Choosing the right layout (with all aspects like + collocation etc.) is therefore a non-trival task. + +* **Distribution of the backend modules** + + Due the modular nature of the FDB, it's perfectly viable to separate the Catalogue + and the Store, e.g. deploying them to different systems. The design of splitting indexing + as well as storage therefore needs to be considered thoroughly. How the splitting + is done depends on many characteristics of the underlying systems, especially + whether the storage is homogeneous or heterogeneous, or its bandwidth. + +Similar to the frontends there are several implementations for the backend, so individual +implementation for the catalogue and the store. Those are namely: + +* **Catalogue** + + * **POSIX (aka Toc)** + + * **PostgreSQL (under development)** + + * **Intel DAOS (under development)** + +* **Store** + + * **POSIX (aka Toc)** + + * **Ceph** + + * **Intel DAOS (under development)** + diff --git a/docs/content/concept.rst b/docs/content/concept.rst new file mode 100644 index 000000000..8e3c7ac10 --- /dev/null +++ b/docs/content/concept.rst @@ -0,0 +1,10 @@ +New Concept +~~~~~~~~~~~ + +.. toctree:: + :maxdepth: 1 + + concept/introduction + concept/installation + concept/architecture + diff --git a/docs/content/concept/administration.rst b/docs/content/concept/administration.rst new file mode 100644 index 000000000..b1ce41438 --- /dev/null +++ b/docs/content/concept/administration.rst @@ -0,0 +1,28 @@ +Administration +============== + +Schema +------ + +.. include:: administration/schema.rst + +Metkit Language +--------------- + +Config files +------------ + +Config files define a number of parameters for the FDB. There are several different +types, which can be described by a FDB config file, such as +local, remote, distribute, and select. For an architectural overview, see +:ref:`architecture_frontend`. + +.. include:: administration/configuration.rst + +.. include:: administration/config.rst + +Behaviour with Respect to given Configs +--------------------------------------- + +Moving of Data +-------------- diff --git a/docs/content/concept/administration/config.rst b/docs/content/concept/administration/config.rst new file mode 100644 index 000000000..442c8735e --- /dev/null +++ b/docs/content/concept/administration/config.rst @@ -0,0 +1,75 @@ +Types +***** + +Local +~~~~~ + +:: + + type: local + engine: toc + schema: ./schema + spaces: + - handler: Default + roots: + -path: /path/to/fdb/root + + +Local implements the passage of data from the frontend to storage backend, talk to the FDB Store and Catalogue. +Depending on the backend, the data or metadata may not actually be local. + + +Select +~~~~~~ + +:: + + type: select + fdbs: + - select: class=od + type:local + spaces: + roots: + -path: /path/to/fdb/od + -select: class=rd,expver=xx.?.? + type: local + spaces: + roots: + - path: /path/to/fdb/rd + +Select dispatches requests to different FDB's based on the metadata associated with the Messages, +and can be used to send split requests operational data (OD) from research data (RD). + +Remote +~~~~~~ + +:: + + type: remote + host: fdb-minus + port: 36604 + +The remote type handles access to the remote FDB via TCP/IP. +It talks to the FDB server using an asynchronous protocol. +It only handles the transition. Not the distribution of data. +The distributed type implements the multi-lane access to multiple FDB's. +It uses rendezvous hashing to avoid synchronisations. + +Dist +~~~~ + +:: + + type: dist + lanes: + -type: remote + host: fdb-minus-1 + port: 36604 + -type: remote + host: fdb-minus-2 + port: 36604 + +These types can be composed together in the config file when using FDB. + +.. + _## TODO: Get this reviewed and add more information. diff --git a/docs/content/concept/administration/configuration.rst b/docs/content/concept/administration/configuration.rst new file mode 100644 index 000000000..397cf5514 --- /dev/null +++ b/docs/content/concept/administration/configuration.rst @@ -0,0 +1,76 @@ +Configuration +************* + +First thing you should do after the installation is the setting up of the home +Export the environment variable as follows: + +:: + + export FDB_HOME="/path/to/wished/fdb/home" + +If you add it to your ``.bashrc`` or ``.zshrc``, source the file or restart the +terminal session. +Also export the environment variable ``${FDB_HOME}`` and create the following +structure within the FDB home directory: + +.. code-block:: text + + FDB_HOME + └──etc + └──fdb + ├──config.yaml + └──schema + +The ``config.yaml`` and ``schema`` files are the files which can be found in the +corresponding sections of this document for config_ or schema_. + +FDB is looking for the ``schema`` file in the global FDB home directory. The +specific path it looks for needs to be set as shown above. This global scheme is +used for storing data into the FDB database. In case data has already been store +in the FDB a local copy of the schema used during the storing process was created +and is used for further storage operations. This is to guarantee data consistency +over the course of several storage procedures even if the global schema is subject +to changes. + +Checking Installation +********************* + +To check whether the installation of the **FDB** was successful we can execute +the following command + +.. code-block:: console + + which fdb + +This should show the path to the installed FDB instance. If you receive and error +message at this point in time, make sure that you added the path FDB is located +at to you `PATH` environment variable. + +.. code-block:: console + + fdb help + +The help argument shows a list of options we have to interact with the FDB application. +The most useful for this stage would be to run ``fdb home`` to check whether the configuration +of FDB and the corresponding home directory was successful. Running this command +results in the following output: + +.. code-block:: console + + /path/to/fdb/home + +If this isn't the case, make sure you exported the ``FDB_HOME`` environment variable +as stated in the installation guide. Next we want to show information about +the FDB instance. We run + +.. code-block:: console + + fdb schema + +This should print the set up schema. + + + + +.. _Config: config-schema.html +.. _Schema: config-schema.html#schema diff --git a/docs/content/concept/administration/example-schema b/docs/content/concept/administration/example-schema new file mode 100644 index 000000000..e38ae70b8 --- /dev/null +++ b/docs/content/concept/administration/example-schema @@ -0,0 +1,597 @@ +# * Format of the rules is: + +# [a1, a2, a3 ...[b1, b2, b3... [c1, c2, c3...]]] + +# - The first level (a) defines which attributes are used to name the top level directory +# - The second level (b) defines which attributes are used to name the data files +# - The third level (c) defines which attributes are used as index keys + +# * Rules can be grouped + +# [a1, a2, a3 ... +# [b1, b2, b3... [c1, c2, c3...]] +# [B1, B2, B3... [C1, C2, C3...]] +# ] + +# * A list of values can be given for an attribute +# [ ..., stream=enfo/efov, ... ] +# This will be used when matching rules. + +# * Attributes can be typed +# Globally, at the begining of this file: + +# refdate: Date; + +# or in the context of a rule: +# [type=cl, ... [date:ClimateMonth, ...]] + +# Typing attributes is done when the user's requests or the GRIB values need to +# be modified before directories, files and indexes are created. For example, +# ClimateMonth will transform 2010-04-01 to 'may' internally. + +# * Attributes can be optional +# [ step, levelist?, param ] +# They will be replaced internally by an empty value. It is also possible to +# provide a default subtitution value: e.g. [domain?g] will consider the domain +# to be 'g' if missing. + +# * Attributes can be removed: +# [grid-] +# This is useful to remove attributes present in the GRIB that should not be ignored + +# * Rules are matched: + +# - If the attributes are present in the GRIB/Request, or marked optional or ignored +# - If a list of possible value is provided, one of them must match, for example +# [ class, expver, stream=enfo/efov, date, time, domain ] +# will match either stream=enfo or stream=efov, all other attributes +# will be matched if they exist in the GRIB or user's request + +# * On archive: +# - Attributes are extracted from the GRIB (namespace 'mars'), possibly modified +# by the attribute type +# - Only the first rule is used, so order is important +# - All GRIB attributes must be used by the rules, otherwise an error is raised + +# * On retrieve: +# - Attributes are extracted from the user's request, possibly modified by the +# attribute type (e.g. for handling of U/V) +# - All the matching rules are considered +# - Only attributes listed in the rules are used to extract values from the user's request + + +# Default types + +param: Param; +step: Step; +date: Date; +hdate: Date; +refdate: Date; +latitude: Double; +longitude: Double; +levelist: Double; +grid: Grid; +expver: Expver; + +time: Time; +fcmonth: Integer; + +number: Integer; +frequency: Integer; +direction: Integer; +channel: Integer; + +instrument: Integer; +ident: Integer; + +diagnostic: Integer; +iteration: Integer; +system: Integer; +method: Integer; + +# ??????? + +# reference: Integer; +# fcperiod: Integer; + +# opttime: Integer; +# leadtime: Integer; + +# quantile: ?????? +# range: ?????? + +# band: Integer; + + +######################################################## +# These rules must be first, otherwise fields of These +# classes will be index with the default rule for oper +[ class=ti/s2, expver, stream, date, time, model + [ origin, type, levtype, hdate? + [ step, number?, levelist?, param ]] +] + +[ class=ms, expver, stream, date, time, country=de + [ domain, type, levtype, dbase, rki, rty, ty + [ step, levelist?, param ]] +] + +[ class=ms, expver, stream, date, time, country=it + [ domain, type, levtype, model, bcmodel, icmodel:First3 + [ step, levelist?, param ] + ] +] + +[ class=el, expver, stream, date, time, domain + [ origin, type, levtype + [ step, levelist?, param ]] +] + +######################################################## +# The are the rules matching most of the fields +# oper/dcda +[ class, expver, stream=oper/dcda/scda, date, time, domain? + + [ type=im/sim + [ step?, ident, instrument, channel ]] + + [ type=ssd + [ step, param, ident, instrument, channel ]] + + [ type=4i, levtype + [ step, iteration, levelist, param ]] + + [ type=me, levtype + [ step, number, levelist?, param ]] + + [ type=ef, levtype + [ step, levelist?, param, channel? ]] + + [ type=ofb/mfb + [ obsgroup, reportype ]] + + [ type, levtype + [ step, levelist?, param ]] + +] + +# dcwv/scwv/wave +[ class, expver, stream=dcwv/scwv/wave, date, time, domain + [ type, levtype + [ step, param, frequency?, direction? ]]] + +# enfo +[ class, expver, stream=enfo/efov, date, time, domain + + [ type, levtype=dp, product?, section? + [ step, number?, levelist?, latitude?, longitude?, range?, param ]] + + [ type=tu, levtype, reference + [ step, number, levelist?, param ]] + + [ type, levtype + [ step, quantile?, number?, levelist?, param ]] + +] + +# waef/weov +[ class, expver, stream=waef/weov, date, time, domain + [ type, levtype + [ step, number?, param, frequency?, direction? ]] +] + +######################################################## +# enda +[ class, expver, stream=enda, date, time, domain + + [ type=ef/em/es/ses, levtype + [ step, number?, levelist?, param, channel? ]] + + [ type=ssd + [ step, number, param, ident, instrument, channel ]] + + + [ type, levtype + [ step, number?, levelist?, param ]] +] + +# ewda +[ class, expver, stream=ewda, date, time, domain + [ type, levtype + [ step, number?, param, frequency?, direction? ]] +] + + +######################################################## +# elda +[ class, expver, stream=elda, date, time, domain? + + [ type=ofb/mfb + [ obsgroup, reportype ]] + + [ type, levtype, anoffset + [ step, number?, levelist?, iteration?, param, channel? ]] +] + +# ewda +[ class, expver, stream=ewla, date, time, domain + [ type, levtype, anoffset + [ step, number?, param, frequency?, direction? ]] +] + +######################################################## +# elda +[ class, expver, stream=lwda, date, time, domain? + + [ type=ssd, anoffset + [ step, param, ident, instrument, channel ]] + + [type=me, levtype, anoffset + [ number, step, levelist?, param]] + + [ type=4i, levtype, anoffset + [ step, iteration, levelist, param ]] + + [ type=ofb/mfb + [ obsgroup, reportype ]] + + [ type, levtype, anoffset + [ step, levelist?, param]] +] + +# ewda +[ class, expver, stream=lwwv, date, time, domain + [ type, levtype, anoffset + [ step, param, frequency?, direction? ]] +] +######################################################## +# amap +[ class, expver, stream=amap, date, time, domain + [ type, levtype, origin + [ step, levelist?, param ]]] + +# maed +[ class, expver, stream=maed, date, time, domain + [ type, levtype, origin + [ step, levelist?, param ]]] + +# mawv +[ class, expver, stream=mawv, date, time, domain + [ type, levtype, origin + [ step, param, frequency?, direction? ]]] + +# cher +[ class, expver, stream=cher, date, time, domain + [ type, levtype + [ step, levelist, param ]]] + + +# efhc +[ class, expver, stream=efhc, refdate, time, domain + [ type, levtype, date + [ step, number?, levelist?, param ]]] + +# efho +[ class, expver, stream=efho, date, time, domain + [ type, levtype, hdate + [ step, number?, levelist?, param ]]] + + +# efhs +[ class, expver, stream=efhs, date, time, domain + [ type, levtype + [ step, quantile?, number?, levelist?, param ]]] + +# wehs +[ class, expver, stream=wehs, date, time, domain + [ type, levtype + [ step, quantile?, number?, levelist?, param ]]] + +# kwbc +[ class, expver, stream=kwbc, date, time, domain + [ type, levtype + [ step, number?, levelist?, param ]]] + +# ehmm +[ class, expver, stream=ehmm, date, time, domain + [ type, levtype, hdate + [ fcmonth, levelist?, param ]]] + + +# ammc/cwao/edzw/egrr/lfpw/rjtd/toga +[ class, expver, stream=ammc/cwao/edzw/egrr/lfpw/rjtd/toga/fgge, date, time, domain + [ type, levtype + [ step, levelist?, param ]]] + +######################################################################## + +# enfh +[ class, expver, stream=enfh, date, time, domain + + [ type, levtype=dp, hdate, product?, section? + [ step, number?, levelist?, latitude?, longitude?, range?, param ]] + + [ type, levtype, hdate + [ step, number?, levelist?, param ]] +] + +# enwh +[ class, expver, stream=enwh, date, time, domain + [ type, levtype, hdate + [ step, number?, param, frequency?, direction? ]] +] + +######################################################################## +# sens +[ class, expver, stream=sens, date, time, domain + [ type, levtype + [ step, diagnostic, iteration, levelist?, param ]]] + +######################################################################## +# esmm +[ class, expver, stream=esmm, date, time, domain + [ type, levtype + [ fcmonth, levelist?, param ]]] +# ewhc +[ class, expver, stream=ewhc, refdate, time, domain + [ type, levtype, date + [ step, number?, param, frequency?, direction? ]]] + +######################################################################## +# ewho +[ class, expver, stream=ewho, date, time, domain + [ type, levtype, hdate + [ step, number?, param, frequency?, direction? ]]] + +# mfam +[ class, expver, stream=mfam, date, time, domain + + [ type=pb/pd, levtype, origin, system?, method + [ fcperiod, quantile, levelist?, param ]] + + [ type, levtype, origin, system?, method + [ fcperiod, number?, levelist?, param ]] + +] + +# mfhm +[ class, expver, stream=mfhm, refdate, time, domain + [ type, levtype, origin, system?, method, date? + [ fcperiod, number?, levelist?, param ]]] +# mfhw +[ class, expver, stream=mfhw, refdate, time, domain + [ type, levtype, origin, system?, method, date + [ step, number?, param ]]] +# mfwm +[ class, expver, stream=mfwm, date, time, domain + [ type, levtype, origin, system?, method + [ fcperiod, number, param ]]] +# mhwm +[ class, expver, stream=mhwm, refdate, time, domain + [ type, levtype, origin, system?, method, date + [ fcperiod, number, param ]]] + +# mmsf +[ class, expver, stream=mmsf, date, time, domain + + [ type, levtype=dp, origin, product, section, system?, method + [ step, number, levelist?, latitude?, longitude?, range?, param ]] + + [ type, levtype, origin, system?, method + [ step, number, levelist?, param ]] +] + +# mnfc +[ class, expver, stream=mnfc, date, time, domain + + [ type, levtype=dp, origin, product, section, system?, method + [ step, number?, levelist?, latitude?, longitude?, range?, param ]] + + [ type, levtype, origin, system?, method + [ step, number?, levelist?, param ]] +] + +# mnfh +[ class, expver, stream=mnfh, refdate, time, domain + [ type, levtype=dp, origin, product, section, system?, method, date + [ step, number?, levelist?, latitude?, longitude?, range?, param ]] + [ type, levtype, origin, system?, method, date? + [ step, number?, levelist?, param ]] +] + +# mnfm +[ class, expver, stream=mnfm, date, time, domain + [ type, levtype, origin, system?, method + [ fcperiod, number?, levelist?, param ]]] + +# mnfw +[ class, expver, stream=mnfw, date, time, domain + [ type, levtype, origin, system?, method + [ step, number?, param ]]] + +# ea/mnth +[ class=ea, expver, stream=mnth, date, domain + [ type, levtype + [ time, step?, levelist?, param ]]] + +# mnth +[ class, expver, stream=mnth, domain + [ type=cl, levtype + [ date: ClimateMonthly, time, levelist?, param ]] + [ type, levtype + [ date , time, step?, levelist?, param ]]] + +# mofc +[ class, expver, stream=mofc, date, time, domain + [ type, levtype=dp, product, section, system?, method + [ step, number?, levelist?, latitude?, longitude?, range?, param ]] + [ type, levtype, system?, method + [ step, number?, levelist?, param ]] +] + +# mofm +[ class, expver, stream=mofm, date, time, domain + [ type, levtype, system?, method + [ fcperiod, number, levelist?, param ]]] + +# mmsa/msmm +[ class, expver, stream=mmsa, date, time, domain + [ type, levtype, origin, system?, method + [ fcmonth, number?, levelist?, param ]]] + +[ class, expver, stream=msmm, date, time, domain + [ type, levtype, origin, system?, method + [ fcmonth, number?, levelist?, param ]]] + +# ocea +[ class, expver, stream=ocea, date, time, domain + [ type, levtype, product, section, system?, method + [ step, number, levelist?, latitude?, longitude?, range?, param ]] +] + +#=# seas +[ class, expver, stream=seas, date, time, domain + + [ type, levtype=dp, product, section, system?, method + [ step, number, levelist?, latitude?, longitude?, range?, param ]] + + [ type, levtype, system?, method + [ step, number, levelist?, param ]] +] + +# sfmm/smma +[ class, expver, stream=sfmm/smma, date, time, domain + [ type, levtype, system?, method + [ fcmonth, number?, levelist?, param ]]] + +# supd +[ class=od, expver, stream=supd, date, time, domain + [ type, levtype, origin?, grid + [ step, levelist?, param ]]] + +# For era +[ class, expver, stream=supd, date, time, domain + [ type, levtype, grid- # The minus sign is here to consume 'grid', but don't index it + [ step, levelist?, param ]]] + +# swmm +[ class, expver, stream=swmm, date, time, domain + [ type, levtype, system?, method + [ fcmonth, number, param ]]] + +# wamf +[ class, expver, stream=wamf, date, time, domain + [ type, levtype, system?, method + [ step, number?, param ]]] + +# ea/wamo +[ class=ea, expver, stream=wamo, date, domain + [ type, levtype + [ time, step?, param ]]] + +# wamo +[ class, expver, stream=wamo, domain + [ type=cl, levtype + [ date: ClimateMonthly, time, param ]] + [ type, levtype + [ date, time, step?, param ]]] + +# wamd +[ class, expver, stream=wamd, date, domain + [ type, levtype + [ param ]]] + +# wasf +[ class, expver, stream=wasf, date, time, domain + [ type, levtype, system?, method + [ step, number, param ]]] +# wmfm +[ class, expver, stream=wmfm, date, time, domain + [ type, levtype, system?, method + [ fcperiod, number, param ]]] + +# moda +[ class, expver, stream=moda, date, domain + [ type, levtype + [ levelist?, param ]]] + +# msdc/mdfa/msda +[ class, expver, stream=msdc/mdfa/msda, domain + [ type, levtype + [ date, time?, step?, levelist?, param ]]] + + + +# seap +[ class, expver, stream=seap, date, time, domain + [ type=sv/svar, levtype, origin, method? + [ step, leadtime, opttime, number, levelist?, param ]] + + [ type=ef, levtype, origin + [ step, levelist?, param, channel? ]] + + [ type, levtype, origin + [ step, levelist?, param ]] + + ] + +[ class, expver, stream=mmaf, date, time, domain + [ type, levtype, origin, system?, method + [ step, number, levelist?, param ]] +] + +[ class, expver, stream=mmam, date, time, domain + [ type, levtype, origin, system?, method + [ fcmonth, number, levelist?, param ]] +] + + +[ class, expver, stream=dacl, domain + [ type=pb, levtype + [ date: ClimateDaily, time, step, quantile, levelist?, param ]] + [ type, levtype + [ date: ClimateDaily, time, step, levelist?, param ]] + +] + +[ class, expver, stream=dacw, domain + [ type=pb, levtype + [ date: ClimateDaily, time, step, quantile, param ]] + [ type, levtype + [ date: ClimateDaily, time, step, param ]] + +] + +[ class, expver, stream=edmm/ewmm, date, time, domain + [ type=ssd + [ step, number, param, ident, instrument, channel ]] + [ type, levtype + [ step, number, levelist?, param ]] +] + +[ class, expver, stream=edmo/ewmo, date, domain + [ type, levtype + [ number, levelist?, param ]] +] + +# stream gfas +[ class=mc/rd, expver, stream=gfas, date, time, domain + [ type=ga, levtype + [ step, param ]] + + [ type=gsd + [ param, ident, instrument ]] + +] + +# class is e2 +[ class, expver, stream=espd, date, time, domain + [ type, levtype, origin, grid + [ step, number, levelist?, param ]]] + +[ class=cs, expver, stream, date:Default, time, domain + [ type, levtype + [ step, levelist?, param ]]] + + + diff --git a/docs/content/concept/administration/schema.rst b/docs/content/concept/administration/schema.rst new file mode 100644 index 000000000..2c1a41088 --- /dev/null +++ b/docs/content/concept/administration/schema.rst @@ -0,0 +1,98 @@ +The schema indexes structure and data collocation policy. It is used to uniquely +describe the data that is being stored and indexed. + +The schema uses global attributes that describe the underlying data with each attribute having a name and datatype, these are added at the beginning of the file. + +:: + + param: Param; + step: Step; + date: Date; + latitude: Double; + longitude: Double; + +The schema then describes rules for accessing all data stored by the FDB. + +Each rule is described using three levels. The first level defines the attributes +of the top level directory, the second level defines the attributes used to name +the data files, and the third level attributes are used as index keys. The levels +are subsets of the metadata describing the data. + +Example of a rule: + +:: + + [ class, expver, stream=oper/dcda/scda, date, time, domain? + [ type, levtype + [ step, levelist?, param ]]] + +Take for example the following metadata: + +:: + + class = od, + expver = 0001, + stream = oper, + date = 20240202, + time = 0000, + domain = g, + type = fc, + levtype = pl, + step = 1, + levelist = 150, + param = 130 + +With the rule from above, a message with the given metadata, retrieves the following key: + +:: + + {class=od,expver=0001,stream=oper,date=20240202,time=0000,domain=g}{type=fc,levtype=pl}{step=1,levelist=150,param=130} + +As you can see, the three levels are represented in the final key, describing the data. +The individual sub-keys are: + +.. code-block:: bash + + {class=od,expver=0001,stream=oper,date=20240202,time=0000,domain=g} # Dataset Key + {type=fc,levtype=pl} # Colloctation Key + {step=1,levelist=150,param=130} # Element Key + + +Rules can be grouped in the form: + +:: + + [a1, a2, a3 ... + b1, b2, b3... [c1, c2, c3...]] + B1, B2, B3... [C1, C2, C3...]] + ] + +A list of values can be given for an attribute. + +:: + + [ ..., stream=enfo/efov, ... ] + +Attributes in rules can also be optional using the ? character. + +:: + + [ step, levelist?, param ] + +Attributes can be removed using the - character. + +:: + + [grid-] + +Rules are then matched if: + * the attributes are present or marked optional + * a list is provided, one of them matched + +Example schema +************** + +.. literalinclude:: administration/example-schema + +.. + _## TODO: add more info on the schema diff --git a/docs/content/concept/architecture.rst b/docs/content/concept/architecture.rst new file mode 100644 index 000000000..f23eea968 --- /dev/null +++ b/docs/content/concept/architecture.rst @@ -0,0 +1,63 @@ +Architecture +============ + +Client API & Semantics +---------------------- +The client side API of the FDB exposes several functions for data handling. The +following section lists each of the high-level functions and shortly describes +its meaning. + +archive() +````````` +Archives a given message set with respect to a given key and creates a new database if +necessary. + +The key, describing the data in a globally unique and minimal manner, +is created from the metadata describing the data, e.g. the meteorological +information attached to a GRIB message. The schema is consolidated to determine how +the data layout in the FDB's store should be created. After the persistence of the data a new +entry in the catalogue is created. + +flush() +``````` +Ensures data is persisted on disk. This is especially important in cases where the +data is stored remotely. + +Blocks until all data is persisted into the FDB's store. + +list() +`````` +List the contents of the FDB databases. + +Duplicate data, e.g. such data which has been masked by newer entries given the same key, +is skipped. + +purge() +``````` +Purge duplicate entries from the catalogue and remove the associated data +which is matching with a given request. + +Data in the FDB is immutable. It is masked, but not damaged or deleted, when +it is overwritten with new data given the same key. Due to this property, duplicates +of the data can be present at any time. Purging leads to the removal of redundant +data without compromising already saved entries. + +This only holds for data which was written by the user who trys to delete it. + + +Catalogue +--------- + + +Store +----- + +Data Routing +------------ + +Remote +------ + +Authentication & Security +------------------------- + diff --git a/docs/content/concept/client-reference.rst b/docs/content/concept/client-reference.rst new file mode 100644 index 000000000..0ca2b0fdd --- /dev/null +++ b/docs/content/concept/client-reference.rst @@ -0,0 +1,11 @@ +Client Reference +================ + +How do you use the FDB from an application +------------------------------------------ + +Full Docs of the FDB +-------------------- + +Examples +-------- diff --git a/docs/content/concept/data-governance.rst b/docs/content/concept/data-governance.rst new file mode 100644 index 000000000..7501f43d0 --- /dev/null +++ b/docs/content/concept/data-governance.rst @@ -0,0 +1,8 @@ +Data Governance & Requirements +============================== + +Further detail about the Schema +------------------------------- + +Implications and needs of data governance +----------------------------------------- diff --git a/docs/content/DebugTools.rst b/docs/content/concept/fdb-tools/DebugTools.rst similarity index 89% rename from docs/content/DebugTools.rst rename to docs/content/concept/fdb-tools/DebugTools.rst index 55a1cb373..44d193ffd 100644 --- a/docs/content/DebugTools.rst +++ b/docs/content/concept/fdb-tools/DebugTools.rst @@ -1,5 +1,5 @@ Debug Tools -=========== +----------- The debugging tools are found here: diff --git a/docs/content/DebugTools/dump-index.rst b/docs/content/concept/fdb-tools/DebugTools/dump-index.rst similarity index 97% rename from docs/content/DebugTools/dump-index.rst rename to docs/content/concept/fdb-tools/DebugTools/dump-index.rst index 62c25a069..ac06d87bd 100644 --- a/docs/content/DebugTools/dump-index.rst +++ b/docs/content/concept/fdb-tools/DebugTools/dump-index.rst @@ -1,5 +1,5 @@ fdb dump-index -============== +************** Dump the contents of a particular index file for debugging purposes. @@ -40,4 +40,4 @@ Dump the contents of an index file. Contents of index: Fingerprint: 0:1000:130, location: FieldRefLocation(pathid=0,offset=0) Fingerprint: 0:300:130, location: FieldRefLocation(pathid=0,offset=3280398) - ... \ No newline at end of file + ... diff --git a/docs/content/DebugTools/dump-toc.rst b/docs/content/concept/fdb-tools/DebugTools/dump-toc.rst similarity index 99% rename from docs/content/DebugTools/dump-toc.rst rename to docs/content/concept/fdb-tools/DebugTools/dump-toc.rst index 664a823a0..e5c878235 100644 --- a/docs/content/DebugTools/dump-toc.rst +++ b/docs/content/concept/fdb-tools/DebugTools/dump-toc.rst @@ -1,5 +1,5 @@ fdb dump-toc -============ +************ Description ----------- @@ -45,4 +45,4 @@ Dump the contents of a toc walking the subtocs logically. Note that subtocs that TOC_INIT 2019-06-07 14:17:01.032360, version:2, fdb: 50308, uid: , pid 31508, host: Key: {class=rd,expver=xxxx,stream=oper,date=20160907,time=1200,domain=g}, sub-toc: no TOC_INDEX 2019-06-07 14:28:31.850438, version:2, fdb: 50308, uid: , pid 644 , host: Path: an:pl.20190607.142831..2765958938625.index, offset: 0, type: BTreeIndex Prefix: an:pl, key: {type=an,levtype=pl} TOC_INDEX 2019-06-07 14:17:01.943474, version:2, fdb: 50308, uid: , pid 31508, host: Path: an:pl.20190607.141701..135325829562374.index, offset: 0, type: BTreeIndex Prefix: an:pl, key: {type=an,levtype=pl} - ... \ No newline at end of file + ... diff --git a/docs/content/DebugTools/dump.rst b/docs/content/concept/fdb-tools/DebugTools/dump.rst similarity index 99% rename from docs/content/DebugTools/dump.rst rename to docs/content/concept/fdb-tools/DebugTools/dump.rst index 7a1cfdc2e..2552692f3 100644 --- a/docs/content/DebugTools/dump.rst +++ b/docs/content/concept/fdb-tools/DebugTools/dump.rst @@ -1,5 +1,5 @@ fdb dump -======== +******** Dump the structural contents of the FDB. In particular, in the TOC formulation, enumerate the different entries in the Table of Contents (including INIT and CLEAR entries). diff --git a/docs/content/DebugTools/hammer.rst b/docs/content/concept/fdb-tools/DebugTools/hammer.rst similarity index 99% rename from docs/content/DebugTools/hammer.rst rename to docs/content/concept/fdb-tools/DebugTools/hammer.rst index 2c40a22c1..395fef42e 100644 --- a/docs/content/DebugTools/hammer.rst +++ b/docs/content/concept/fdb-tools/DebugTools/hammer.rst @@ -1,5 +1,5 @@ fdb hammer -========== +********** Description ----------- diff --git a/docs/content/DebugTools/patch.rst b/docs/content/concept/fdb-tools/DebugTools/patch.rst similarity index 98% rename from docs/content/DebugTools/patch.rst rename to docs/content/concept/fdb-tools/DebugTools/patch.rst index 4f6cdc3eb..4a4c80324 100644 --- a/docs/content/DebugTools/patch.rst +++ b/docs/content/concept/fdb-tools/DebugTools/patch.rst @@ -1,5 +1,5 @@ fdb patch -========= +********* Description ----------- @@ -53,4 +53,4 @@ Note that this is a global search through all the databases of the FDB that matc 12 fields (37.5412 Mbytes) copied to {expver=xxxz} Rates: 114.971 Mbytes per second, 36.7503 fields/s - fdb patch: 0.946881 second elapsed, 0.159061 second cpu \ No newline at end of file + fdb patch: 0.946881 second elapsed, 0.159061 second cpu diff --git a/docs/content/DebugTools/read.rst b/docs/content/concept/fdb-tools/DebugTools/read.rst similarity index 99% rename from docs/content/DebugTools/read.rst rename to docs/content/concept/fdb-tools/DebugTools/read.rst index 469041a80..cd988f662 100644 --- a/docs/content/DebugTools/read.rst +++ b/docs/content/concept/fdb-tools/DebugTools/read.rst @@ -1,5 +1,5 @@ fdb read -======== +******** Read data from the FDB and write this data into a specified target file. This may involve visiting multiple databases if required by the request. diff --git a/docs/content/DebugTools/reconsolidate-toc.rst b/docs/content/concept/fdb-tools/DebugTools/reconsolidate-toc.rst similarity index 90% rename from docs/content/DebugTools/reconsolidate-toc.rst rename to docs/content/concept/fdb-tools/DebugTools/reconsolidate-toc.rst index 9544d52f3..9021518a5 100644 --- a/docs/content/DebugTools/reconsolidate-toc.rst +++ b/docs/content/concept/fdb-tools/DebugTools/reconsolidate-toc.rst @@ -1,5 +1,5 @@ fdb reconsolidate-toc -===================== +********************* This is an advanced tool that exists to assist cleanup in a situation where databases have been poorly written. This can occur in a number of contexts such as: @@ -12,4 +12,4 @@ In general it is preferable to wipe such databases, and rerun with correct exper Usage ----- -``fdb reconsolidate-toc [database path]`` \ No newline at end of file +``fdb reconsolidate-toc [database path]`` diff --git a/docs/content/DebugTools/write.rst b/docs/content/concept/fdb-tools/DebugTools/write.rst similarity index 99% rename from docs/content/DebugTools/write.rst rename to docs/content/concept/fdb-tools/DebugTools/write.rst index 5ed793863..22ae85be3 100644 --- a/docs/content/DebugTools/write.rst +++ b/docs/content/concept/fdb-tools/DebugTools/write.rst @@ -1,5 +1,5 @@ fdb write -========= +********* Inserts data into the FDB, creating a new databases if needed. The data is copied into the FDB, and the tool reports the location where it was inserted. diff --git a/docs/content/GeneralPurposeTools.rst b/docs/content/concept/fdb-tools/GeneralPurposeTools.rst similarity index 69% rename from docs/content/GeneralPurposeTools.rst rename to docs/content/concept/fdb-tools/GeneralPurposeTools.rst index 119c48635..5b73971d0 100644 --- a/docs/content/GeneralPurposeTools.rst +++ b/docs/content/concept/fdb-tools/GeneralPurposeTools.rst @@ -1,5 +1,5 @@ General Purpose Tools -===================== +--------------------- The General Purpose tools are found here: @@ -7,4 +7,4 @@ The General Purpose tools are found here: :maxdepth: 1 :glob: - GeneralPurposeTools/* \ No newline at end of file + GeneralPurposeTools/* diff --git a/docs/content/GeneralPurposeTools/fdb-stats.rst b/docs/content/concept/fdb-tools/GeneralPurposeTools/fdb-stats.rst similarity index 98% rename from docs/content/GeneralPurposeTools/fdb-stats.rst rename to docs/content/concept/fdb-tools/GeneralPurposeTools/fdb-stats.rst index 3ab639af8..77948ef6c 100644 --- a/docs/content/GeneralPurposeTools/fdb-stats.rst +++ b/docs/content/concept/fdb-tools/GeneralPurposeTools/fdb-stats.rst @@ -1,5 +1,5 @@ fdb stats -========= +********* Prints information about FDB databases, aggregating the information over all the databases visited into a final summary. @@ -33,7 +33,7 @@ Example 1 You may pass a partial request (as a key) that will print information on all FDB databases that match that key. -:: +.. code-block:: bash % fdb stats class=od,expver=0001,stream=oper,date=20151001 ... @@ -67,7 +67,7 @@ Example 2 The --details flag prints a report per database that is visited, as well as the overall summary -:: +.. code-block:: bash % fdb stats class=od,expver=0001 ... @@ -98,4 +98,4 @@ The --details flag prints a report per database that is visited, as well as the ======== Number of databases : 4 - ... \ No newline at end of file + ... diff --git a/docs/content/GeneralPurposeTools/fdb.rst b/docs/content/concept/fdb-tools/GeneralPurposeTools/fdb.rst similarity index 98% rename from docs/content/GeneralPurposeTools/fdb.rst rename to docs/content/concept/fdb-tools/GeneralPurposeTools/fdb.rst index c7555e8cf..554698849 100644 --- a/docs/content/GeneralPurposeTools/fdb.rst +++ b/docs/content/concept/fdb-tools/GeneralPurposeTools/fdb.rst @@ -1,5 +1,5 @@ fdb -=== +*** Description ----------- @@ -38,4 +38,4 @@ fdb list example {class=od,expver=0001,stream=oper,date=20151004,time=1200,domain=g}{type=an,levtype=pl}{step=0,levelist=700,param=155} {class=od,expver=0001,stream=oper,date=20151004,time=1200,domain=g}{type=an,levtype=pl}{step=0,levelist=850,param=129} {class=od,expver=0001,stream=oper,date=20151004,time=1200,domain=g}{type=an,levtype=pl}{step=0,levelist=850,param=130} - ... \ No newline at end of file + ... diff --git a/docs/content/GeneralPurposeTools/grib2fdb.rst b/docs/content/concept/fdb-tools/GeneralPurposeTools/grib2fdb.rst similarity index 99% rename from docs/content/GeneralPurposeTools/grib2fdb.rst rename to docs/content/concept/fdb-tools/GeneralPurposeTools/grib2fdb.rst index a4a6019c5..2995e31b4 100644 --- a/docs/content/GeneralPurposeTools/grib2fdb.rst +++ b/docs/content/concept/fdb-tools/GeneralPurposeTools/grib2fdb.rst @@ -1,5 +1,5 @@ grib2fdb5 -========= +********* Inserts data into the FDB, creating a new databases if needed. @@ -51,4 +51,4 @@ Check that the supplied keys match Processing data.grib Key {class=rd,expver=xxxx,type=an,stream=oper} FDB archive 12 fields, size 37.5412 Mbytes, in 0.086995 second (431.518 Mbytes per second) - fdb::service::archive: 0.087076 second elapsed, 0.087075 second cpu \ No newline at end of file + fdb::service::archive: 0.087076 second elapsed, 0.087075 second cpu diff --git a/docs/content/GeneralPurposeTools/info.rst b/docs/content/concept/fdb-tools/GeneralPurposeTools/info.rst similarity index 98% rename from docs/content/GeneralPurposeTools/info.rst rename to docs/content/concept/fdb-tools/GeneralPurposeTools/info.rst index 1edb3be7b..13d6efaaa 100644 --- a/docs/content/GeneralPurposeTools/info.rst +++ b/docs/content/concept/fdb-tools/GeneralPurposeTools/info.rst @@ -1,5 +1,5 @@ fdb info -======== +******** Get information about the FDB configuration and binaries @@ -61,4 +61,4 @@ Get location of current FDB configuration file in an easily parsable form: :: % fdb info --config - /testcases/fdb5/fdb5_simple.yaml \ No newline at end of file + /testcases/fdb5/fdb5_simple.yaml diff --git a/docs/content/GeneralPurposeTools/list.rst b/docs/content/concept/fdb-tools/GeneralPurposeTools/list.rst similarity index 99% rename from docs/content/GeneralPurposeTools/list.rst rename to docs/content/concept/fdb-tools/GeneralPurposeTools/list.rst index 72d764c53..e4b2aaa65 100644 --- a/docs/content/GeneralPurposeTools/list.rst +++ b/docs/content/concept/fdb-tools/GeneralPurposeTools/list.rst @@ -1,9 +1,10 @@ fdb list -======== +******** Lists the contents of the FDB databases. In the body of the output, one line is given per field that has been archived. These (by default) present the fields that are available and will be retrievable - i.e. masked duplicates are skipped. The lines are broken into three segments, which represent the hierarchical nature of the schema: + * The first component identifies the FDB database containing the data * The second component identifies the (set of) indexes * The third component identifies entries collocated within an index diff --git a/docs/content/GeneralPurposeTools/purge.rst b/docs/content/concept/fdb-tools/GeneralPurposeTools/purge.rst similarity index 98% rename from docs/content/GeneralPurposeTools/purge.rst rename to docs/content/concept/fdb-tools/GeneralPurposeTools/purge.rst index 43b1fd636..85a3f83fe 100644 --- a/docs/content/GeneralPurposeTools/purge.rst +++ b/docs/content/concept/fdb-tools/GeneralPurposeTools/purge.rst @@ -1,5 +1,5 @@ fdb purge -========= +********* Purge duplicate entries from the database and remove the associated data (if the data is owned, not adopted). @@ -99,4 +99,4 @@ Additionally pass the --doit flag to delete the duplicates. ... % du -sh /data/fdb/od\:0001\:oper\:20160907\:1200\:g/ - 20M fdb_root/root/od:0001:oper:20160907:1200:g/ \ No newline at end of file + 20M fdb_root/root/od:0001:oper:20160907:1200:g/ diff --git a/docs/content/GeneralPurposeTools/schema.rst b/docs/content/concept/fdb-tools/GeneralPurposeTools/schema.rst similarity index 100% rename from docs/content/GeneralPurposeTools/schema.rst rename to docs/content/concept/fdb-tools/GeneralPurposeTools/schema.rst diff --git a/docs/content/GeneralPurposeTools/where.rst b/docs/content/concept/fdb-tools/GeneralPurposeTools/where.rst similarity index 99% rename from docs/content/GeneralPurposeTools/where.rst rename to docs/content/concept/fdb-tools/GeneralPurposeTools/where.rst index 0a91247aa..29d082ac9 100644 --- a/docs/content/GeneralPurposeTools/where.rst +++ b/docs/content/concept/fdb-tools/GeneralPurposeTools/where.rst @@ -1,5 +1,5 @@ fdb where -========= +********* Print the location of FDB5 database. diff --git a/docs/content/GeneralPurposeTools/wipe.rst b/docs/content/concept/fdb-tools/GeneralPurposeTools/wipe.rst similarity index 99% rename from docs/content/GeneralPurposeTools/wipe.rst rename to docs/content/concept/fdb-tools/GeneralPurposeTools/wipe.rst index 4b19758ad..4874264e0 100644 --- a/docs/content/GeneralPurposeTools/wipe.rst +++ b/docs/content/concept/fdb-tools/GeneralPurposeTools/wipe.rst @@ -1,5 +1,5 @@ fdb wipe -======== +******** Deletes FDB databases and the data therein contained. Uses the passed request to identify the database to delete. @@ -142,4 +142,4 @@ Use --minimum-keys with caution! Setting --minimum-keys=class is a BAD IDEA! You Unlink /data/fdb5/rd:wxyz:oper:20151004:1200:g/toc Unlink /data/fdb5/rd:wxyz:oper:20151004:1200:g/schema Unlink /data/fdb5/rd:wxyz:oper:20151004:1200:g/an:pl.20170323.164359.host.30249454665731.data - Unlink /data/fdb5/rd:wxyz:oper:20151004:1200:g/an:pl.20170323.164359.host.30249454665730.index \ No newline at end of file + Unlink /data/fdb5/rd:wxyz:oper:20151004:1200:g/an:pl.20170323.164359.host.30249454665730.index diff --git a/docs/content/SpecialPurposeTools.rst b/docs/content/concept/fdb-tools/SpecialPurposeTools.rst similarity index 69% rename from docs/content/SpecialPurposeTools.rst rename to docs/content/concept/fdb-tools/SpecialPurposeTools.rst index baadc2da5..5ffe63cec 100644 --- a/docs/content/SpecialPurposeTools.rst +++ b/docs/content/concept/fdb-tools/SpecialPurposeTools.rst @@ -1,5 +1,5 @@ Special Purpose Tools -===================== +--------------------- The Special Purpose tools are found here: @@ -7,4 +7,4 @@ The Special Purpose tools are found here: :maxdepth: 1 :glob: - SpecialPurposeTools/* \ No newline at end of file + SpecialPurposeTools/* diff --git a/docs/content/SpecialPurposeTools/hide.rst b/docs/content/concept/fdb-tools/SpecialPurposeTools/hide.rst similarity index 98% rename from docs/content/SpecialPurposeTools/hide.rst rename to docs/content/concept/fdb-tools/SpecialPurposeTools/hide.rst index 5a5aa9fda..f15268515 100644 --- a/docs/content/SpecialPurposeTools/hide.rst +++ b/docs/content/concept/fdb-tools/SpecialPurposeTools/hide.rst @@ -1,5 +1,5 @@ fdb hide -======== +******** Hide the contents of one FDB database. This masks all existing entries in the database such that they are permanently inaccessible, without destructively damaging the data or indexes. @@ -38,4 +38,4 @@ These changes can then be made permanent :: % fdb hide --doit class=rd,expver=xxxx,stream=oper,date=20160907,time=0000,domain=g - Hide contents of DB: TocDBReader(/fdb5/root/rd:xxxx:oper:20160907:0000:g) \ No newline at end of file + Hide contents of DB: TocDBReader(/fdb5/root/rd:xxxx:oper:20160907:0000:g) diff --git a/docs/content/SpecialPurposeTools/move.rst b/docs/content/concept/fdb-tools/SpecialPurposeTools/move.rst similarity index 98% rename from docs/content/SpecialPurposeTools/move.rst rename to docs/content/concept/fdb-tools/SpecialPurposeTools/move.rst index fbadf6108..3f3210379 100644 --- a/docs/content/SpecialPurposeTools/move.rst +++ b/docs/content/concept/fdb-tools/SpecialPurposeTools/move.rst @@ -1,5 +1,5 @@ fdb move -======== +******** Move the content of one FDB database. This locks the source database, make it possible to create a second database in another root, duplicates all data. Source data are not automatically removed. @@ -41,5 +41,7 @@ Example ------- This tool is non-destructive (it only copies data), but it affects the status of the source DB by applying locks. + :: - % fdb move class=rd,expver=xxxx,stream=oper,date=20160907,time=0000,domain=g --dest=../fdb/root2 --delay=10 --threads=16 \ No newline at end of file + + % fdb move class=rd,expver=xxxx,stream=oper,date=20160907,time=0000,domain=g --dest=../fdb/root2 --delay=10 --threads=16 diff --git a/docs/content/SpecialPurposeTools/overlay.rst b/docs/content/concept/fdb-tools/SpecialPurposeTools/overlay.rst similarity index 99% rename from docs/content/SpecialPurposeTools/overlay.rst rename to docs/content/concept/fdb-tools/SpecialPurposeTools/overlay.rst index 5b2af1f02..138cb8317 100644 --- a/docs/content/SpecialPurposeTools/overlay.rst +++ b/docs/content/concept/fdb-tools/SpecialPurposeTools/overlay.rst @@ -1,5 +1,5 @@ fdb overlay -=========== +*********** Make the contents of one FDB database available as though they were archived using different keys (class or expver). diff --git a/docs/content/SpecialPurposeTools/root.rst b/docs/content/concept/fdb-tools/SpecialPurposeTools/root.rst similarity index 96% rename from docs/content/SpecialPurposeTools/root.rst rename to docs/content/concept/fdb-tools/SpecialPurposeTools/root.rst index f7c03e49a..9cd78681e 100644 --- a/docs/content/SpecialPurposeTools/root.rst +++ b/docs/content/concept/fdb-tools/SpecialPurposeTools/root.rst @@ -1,5 +1,5 @@ fdb root -======== +******** Find the location on disk of the specified databases. @@ -42,4 +42,4 @@ Note that the supplied keys must be sufficient to identify the database. This su % fdb root --create class=od,expver=0001,stream=oper,date=20160907,time=1200 {class=od,expver=0001,stream=oper,date=20160907,time=1200,domain=g} (Rule[line=128]) - TocDBWriter(/data/fdb/od:0001:oper:20160907:1200:g) \ No newline at end of file + TocDBWriter(/data/fdb/od:0001:oper:20160907:1200:g) diff --git a/docs/content/concept/installation.rst b/docs/content/concept/installation.rst new file mode 100644 index 000000000..a4bfdd5e9 --- /dev/null +++ b/docs/content/concept/installation.rst @@ -0,0 +1,112 @@ +Installation +============ + +The installation of FDB is only one of two steps necessary for FDB to work properly. +See the setup of config and schema to get a fully working FDB instance! + +Manual Build & Installation +--------------------------- + +FDB employs an out-of-source build/install based on CMake. +Make sure ecbuild is installed and the ecbuild executable script is found ( ``which ecbuild`` ). + +:: + + # Clone repo + git clone https://github.com/ecmwf/fdb + cd fdb + + # Environment --- Edit as needed + srcdir=$(pwd) + builddir=build + installdir=$HOME/local + + # 1. Create the build directory: + mkdir $builddir + cd $builddir + + # 2. Run CMake + ecbuild --prefix=$installdir -- -DCMAKE_INSTALL_PREFIX= $srcdir + + # 3. Compile / Install + make -j10 + ctest + make install + +Bundle Build & Installation +--------------------------- +You also have the option to install FDB with all its dependencies as a bundle using +`ecbundle `_. Install `ecbundle` via + +:: + + git clone https://github.com/ecmwf/ecbundle + export PATH=$(pwd)/ecbundle/bin:${PATH} + +Afterwards you can use the `bundle.yml` below to install FDB and its dependencies. +Simply locate it in a directory. + +.. code-block:: yaml + + --- + name : fdb_bundle # The name given to the bundle + version : 2024.2 # A version given to the bundle + cmake : CMAKE_EXPORT_COMPILE_COMMANDS=ON + + projects : + + - ecbuild : + git : https://github.com/ecmwf/ecbuild + version : 3.8.2 + bundle : false # (do not build/install, only download) + + - eckit : + git : https://github.com/ecmwf/eckit + version : 1.25.2 + require : ecbuild + cmake : > # turn off some unnecessary eckit features + ENABLE_ECKIT_CMD=off + ENABLE_ECKIT_SQL=off + + - metkit : + git : https://github.com/ecmwf/metkit + version : master + require : ecbuild + + - eccodes: + git : https://github.com/ecmwf/eccodes + version : master + require : ecbuild + cmake : > + ENABLE_AEC=ON + ENABLE_MEMFS=ON + + - fdb : + git : https://github.com/ecmwf/fdb + version : develop + require : ecbuild eckit metkit eccodes + + options: + - with-aec: + help : Enable AEC library + cmake : ENABLE_AEC=ON + + - with-memfs: + help : Enable MEMFS library + cmake : ENABLE_MEMFS=ON + +Run + +:: + + ecbuild create && \ + ecbundle build --install-dir= -j10 + +to build FDB with all dependencies. A final `./build/install.sh` will install FDB +to the path you specified. + + +Python API +---------- +There is also a Python-Interface for accessing FDB. A thin python wrapper around the existing +FDB functionality can be found here: `PyFDB `_. diff --git a/docs/content/concept/introduction.rst b/docs/content/concept/introduction.rst new file mode 100644 index 000000000..57f116b07 --- /dev/null +++ b/docs/content/concept/introduction.rst @@ -0,0 +1,70 @@ +Introduction +============ + + +Numerical weather prediction (NWP) and climate simulations are data heavy applications. +Over the course of the last 40 years data output has increased by several orders +of magnitude and are projected to continue. In 1995 ECMWF generated a total of +14 TiB per year, whereas the ensemble forecast output at the end of 2023 +totaled 60 TiB in just one hour. + +The corresponding processing (archival, as well as archiving) of all output data, +are I/O intense operations, putting stress on all involved systems. Additionally weather +forecasts decay in value rapidly after their creation (being superseded by newer +forecasts). There is a huge need to make this generated data available quickly and +cheaply for a general lifetime of the forecast data, which is typically 3-5 days. + +The **Fields DataBase (FDB)** is a domain-specific object store developed at ECMWF for storing, +indexing and retrieving GRIB data, therefore playing the essential role of a hot-cache in +the context of NWP. + +The image below shows the **FDB** running in an operational forecast setting at ECMWF. The **Integrated +forecast system (IFS)** writes the output of ensemble forecasts to the FDB, leading to +approximately 1500 I/O nodes writing in parallel to the FDB file system. Numerous +post-processing applications access the output of the IFS during the writing process to +enable the fastest-possible creation of corresponding products. + +The long-term storing of data into the **Meteorological Archiving and Retrieval System (MARS)** +is also triggered from the FDB side, resulting in an enormous I/O-induced stress on the file systems. + +.. image:: /content/img/FDB_schema.png + :width: 400 + :align: center + :alt: Schematic overview of the FDB in a setup + + +Data in the **MARS** ecosystem consists out of concatenated **GRIB** files. **GRIB** files are collections +of self-contained records of 2D data without references to other records or an overall data schema. Therefore +a stream of valid **GRIB** messages is a **GRIB**-message itself. + +.. image:: /content/img/Grib_msg.png + :align: center + :alt: Schematic of a stream of GRIB messages + + +Dealing with this amount of data in parallel poses a lot of problems, e.g. developing a description +which is minimal, as well as uniquely describing each data field in the entire ecosystem. +The MARS request language is a semantic description which is also used in the context of the FDB. +It describes data by specifying meteorological properties. It has two main characteristics: + +- **Globally unique**: A request return exactly one set of data deterministically. This doesn't necessarily + have to be a single dataset, due to the possibility of partial description. +- **Minimal**: Nothing from the request can be spared without changing the returned dataset. + +What is the FDB +--------------- + +MARS Ecosystem +-------------- + +Semantic Methodic +----------------- + +- Globally unique +- Minimal + +Messages +-------- + +Usage Semantics +--------------- diff --git a/docs/content/concept/reference-docs.rst b/docs/content/concept/reference-docs.rst new file mode 100644 index 000000000..5d9ebc675 --- /dev/null +++ b/docs/content/concept/reference-docs.rst @@ -0,0 +1,2 @@ +Reference Documentation +======================= diff --git a/docs/content/tools.rst b/docs/content/concept/tools.rst similarity index 75% rename from docs/content/tools.rst rename to docs/content/concept/tools.rst index fe155222e..139deea82 100644 --- a/docs/content/tools.rst +++ b/docs/content/concept/tools.rst @@ -1,5 +1,7 @@ -FDB Tools -========= +.. _tools-label: + +Tools +===== FDB has a set of general purpose CLI tools for archiving and retrieving user data, some special purpose tools for administrative maintenance and for developer tools for debugging. @@ -8,19 +10,19 @@ The debugging tools are: .. toctree:: :maxdepth: 1 - DebugTools + fdb-tools/DebugTools The general purpose tools are: .. toctree:: :maxdepth: 1 - GeneralPurposeTools + fdb-tools/GeneralPurposeTools The special purpose tools are: .. toctree:: :maxdepth: 1 - SpecialPurposeTools + fdb-tools/SpecialPurposeTools diff --git a/docs/content/config-schema.rst b/docs/content/config-schema.rst deleted file mode 100644 index 71e1035a3..000000000 --- a/docs/content/config-schema.rst +++ /dev/null @@ -1,114 +0,0 @@ -Config -====== - -Config files define a number of parameters for the FDB. - -The following is of the form local: -:: - type: local - engine: toc - schema: ./schema - spaces: - - handler: Default - roots: - -path: /path/to/fdb/root - -There a number of different types such as local, remote, distributed, and select. - -Local implements the passage of data from the frontend to storage backend, talk to the FDB Store and Catalogue. -Depending on the backend, the data or metadata may not actually be local. - -Select dispatches requests to different FDB's based on the metadata associated with the Messages, and can be used to send split requests OD from RD. - -Select Type: -:: - type: select - fdbs: - - select: class=od - type:local - spaces: - roots: - -path: /path/to/fdb/od - -select: class=rd,expver=xx.?.? - type: local - spaces: - roots: - - path: /path/to/fdb/rd - -The remote type handles access to the remote FDB vis TCP/IP. It talks to the FDB server using an asynchronous protocol. -It only handles the transition. not the distribution of data. - -Remote type: -:: - type: remote - host: fdb-minus - port: 36604 - -The distributed type implements the multi-lane access to multiple FDB's. It uses rendezvous hashing to avoid synchronisations. - -Dist type: -:: - type: dist - lanes: - -type: remote - host: fdb-minus-1 - port: 36604 - -type: remote - host: fdb-minus-2 - port: 36604 - -These types can be composed together in the config file when using FDB. - -## TODO: Get this reviewed and add more information. - -Schema -====== - -The schema indexes structure and data collocation policy. It is used to uniquely describe the data that is being stored and indexed. - -The schema uses global attributes that describe the underlying data with each attribute having a name and datatype, these are added at the beginning of the file. -:: - param: Param; - step: Step; - date; Date; - latitude; Double; - longitude; Double; - -The schema then describes rules for accessing all data stored by the fdb. - -Each rule is described using three levels. The first level defines the attributes of the top level directory, the second level defines the attributes used to name the data files, and the third level attributes are used as index keys. -Example of a rule: -:: - [ class=ti/s2, expver, stream, date, time, model - [ origin, type, levtype, hdate? - [ step, number?, levelist?, param ]] - ] - -Rules can be grouped in the form: -:: - [a1, a2, a3 ... - b1, b2, b3... [c1, c2, c3...]] - B1, B2, B3... [C1, C2, C3...]] - ] - -A list of values can be given for an attribute. -:: - [ ..., stream=enfo/efov, ... ] - -Attributes in rules can also be optional using the ? character. -:: - [ step, levelist?, param ] - -Attributes can be removed using the - character. -:: - [grid-] - -Rules are then matched if: - * the attributes are present or marked optional - * a list is provided, one of them matched - -An example schema is provided schema_. - -.. _schema: schema - -## TODO: add more info on the schema diff --git a/docs/content/genindex.rst b/docs/content/genindex.rst deleted file mode 100644 index 9e530fa2f..000000000 --- a/docs/content/genindex.rst +++ /dev/null @@ -1,2 +0,0 @@ -Index -===== diff --git a/docs/content/img/FDB_Frontend_Backend.png b/docs/content/img/FDB_Frontend_Backend.png new file mode 100644 index 000000000..a89ab4cd1 Binary files /dev/null and b/docs/content/img/FDB_Frontend_Backend.png differ diff --git a/docs/content/img/FDB_schema.png b/docs/content/img/FDB_schema.png new file mode 100644 index 000000000..2321d733a Binary files /dev/null and b/docs/content/img/FDB_schema.png differ diff --git a/docs/content/img/Grib_msg.png b/docs/content/img/Grib_msg.png new file mode 100644 index 000000000..8b7e170b0 Binary files /dev/null and b/docs/content/img/Grib_msg.png differ diff --git a/docs/content/img/dist_fdb.svg b/docs/content/img/dist_fdb.svg new file mode 100644 index 000000000..91d047b4b --- /dev/null +++ b/docs/content/img/dist_fdb.svg @@ -0,0 +1,634 @@ + + + +FDB-1ProcessAny processing function.Frontend/ClientFDB-2ProcessAny processing function.Frontend/Clientarchive/flushretrieve/listProcessAny processing function.Backend/ServerCatalogueStoreControl-ChannelData-ChannelAsynchronous via TCP/IPFDB-3ProcessAny processing function.Frontend/Clientarchive/flushretrieve/listProcessAny processing function.Backend/ServerCatalogueStoreControl-ChannelData-ChannelAsynchronous via TCP/IP diff --git a/docs/content/img/local_fdb.svg b/docs/content/img/local_fdb.svg new file mode 100644 index 000000000..0383b03cc --- /dev/null +++ b/docs/content/img/local_fdb.svg @@ -0,0 +1,346 @@ + + + + + + + + + + + Process + Any processing function. + + + + + + + + + + Magnetic Disk (Database) + A magnetic disk. (ISO) + + + + + + + + + + + + + + + + + FDB + + + + Process + Any processing function. + + + + Frontend/Client + + + + + Process + Any processing function. + + + + Backend/Server + + + + + + Catalogue + Store + + + archive/flush + retrieve/list + + diff --git a/docs/content/img/remote_fdb.svg b/docs/content/img/remote_fdb.svg new file mode 100644 index 000000000..ae4c241bc --- /dev/null +++ b/docs/content/img/remote_fdb.svg @@ -0,0 +1,362 @@ + + + +FDB-1ProcessAny processing function.Frontend/ClientFDB-2ProcessAny processing function.Frontend/Clientarchive/flushretrieve/listProcessAny processing function.Backend/ServerCatalogueStoreControl-ChannelData-ChannelAsynchronous via TCP/IP diff --git a/docs/content/img/select_fdb.svg b/docs/content/img/select_fdb.svg new file mode 100644 index 000000000..b600a17fe --- /dev/null +++ b/docs/content/img/select_fdb.svg @@ -0,0 +1,580 @@ + + + + + + + + + + + Process + Any processing function. + + + + + + + + + + Magnetic Disk (Database) + A magnetic disk. (ISO) + + + + + + + + + + + + + + + + FDB + + + + Process + Any processing function. + + + + Frontend/Client + + + + + + + + + + + + + + + + + + + + Process + Any processing function. + + + + Backend/Server-1 + + + Catalogue + Store + archive/flush + retrieve/list + + + + + + + + + + + + + Process + Any processing function. + + + + Backend/Server-2 + + + Catalogue + Store + archive/flush + retrieve/list + + Metadata-Key-1 + Metadata-Key-2 + + + diff --git a/docs/content/introduction.rst b/docs/content/introduction.rst index 3b2e7fe6f..aa4f249e4 100644 --- a/docs/content/introduction.rst +++ b/docs/content/introduction.rst @@ -1,28 +1,11 @@ -============ -Introduction -============ - -|Licence| - -FDB (Fields DataBase) is a domain-specific object store developed at ECMWF for storing, indexing and retrieving GRIB data. Each GRIB message is stored as a field and indexed trough semantic metadata (i.e. physical variables such as temperature, pressure, ...). -A set of fields can be retrieved specifying a request using a specific language developed for accessing :doc:`mars` Archive - -FDB exposes a C++ API as well as CLI :doc:`tools`. +New Concept +~~~~~~~~~~~ .. toctree:: :maxdepth: 1 - requirements - installation - reference - tools - api - license - + concept/introduction .. |Licence| image:: https://img.shields.io/badge/License-Apache%202.0-blue.svg :target: https://github.com/ecmwf/fdb/blob/develop/LICENSE :alt: Apache Licence - -.. _mars: mars.html -.. _tools: tools.html diff --git a/docs/content/mars.rst b/docs/content/mars.rst index 355361962..a6978dc3a 100644 --- a/docs/content/mars.rst +++ b/docs/content/mars.rst @@ -1,5 +1,7 @@ +.. _mars-request-label: + MARS request -============ +------------ A **MARS request** is the way to specify an action on a set of fields or observations. The directives specified in a MARS request have the following syntax: :: diff --git a/docs/content/operational-introduction.rst b/docs/content/operational-introduction.rst new file mode 100644 index 000000000..5a2e18cad --- /dev/null +++ b/docs/content/operational-introduction.rst @@ -0,0 +1,10 @@ +.. _operational-introduction-label: + +Operational Introduction +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. toctree:: + :maxdepth: 2 + + operational/tools + diff --git a/docs/content/reference.rst b/docs/content/reference.rst index eb90a9fc6..993636369 100644 --- a/docs/content/reference.rst +++ b/docs/content/reference.rst @@ -5,7 +5,8 @@ Two publications, co-authored by Simon D. Smart, Tiago Quintino, Baudouin Raoult describe fdb architecture and have been presented at PASC'17 `A Scalable Object Store for Meteorological and Climate Data`_ and PASC'19 `A High-Performance Distributed Object-Store for Exascale Numerical Weather Prediction and Climate`_ In the following the two BibTeX snippets: -:: + +.. code-block:: latex @inproceedings{10.1145/3093172.3093238, author = {Smart, Simon D. and Quintino, Tiago and Raoult, Baudouin}, @@ -23,8 +24,6 @@ In the following the two BibTeX snippets: series = {PASC ’17} } - - @inproceedings{10.1145/3324989.3325726, author = {Smart, Simon D. and Quintino, Tiago and Raoult, Baudouin}, title = {A High-Performance Distributed Object-Store for Exascale Numerical Weather Prediction and Climate}, @@ -43,4 +42,4 @@ In the following the two BibTeX snippets: .. _A Scalable Object Store for Meteorological and Climate Data: https://dl.acm.org/doi/pdf/10.1145/3093172.3093238 -.. _A High-Performance Distributed Object-Store for Exascale Numerical Weather Prediction and Climate: https://dl.acm.org/doi/pdf/10.1145/3324989.3325726 \ No newline at end of file +.. _A High-Performance Distributed Object-Store for Exascale Numerical Weather Prediction and Climate: https://dl.acm.org/doi/pdf/10.1145/3324989.3325726 diff --git a/docs/content/requirements.rst b/docs/content/requirements.rst deleted file mode 100644 index f33fc1fe9..000000000 --- a/docs/content/requirements.rst +++ /dev/null @@ -1,14 +0,0 @@ -Requirements -============ - -Runtime dependencies: - -:eccodes: http://github.com/ecmwf/eccodes -:eckit: http://github.com/ecmwf/eckit -:metkit: http://github.com/ecmwf/metkit - - -Build dependencies: - -:CMake: For use and installation see http://www.cmake.org/ -:ecbuild: ECMWF library of CMake macros () \ No newline at end of file diff --git a/docs/content/technical-introduction.rst b/docs/content/technical-introduction.rst new file mode 100644 index 000000000..28ec7fb7e --- /dev/null +++ b/docs/content/technical-introduction.rst @@ -0,0 +1,28 @@ +.. _technical-introduction-label: + +Technical Introduction +~~~~~~~~~~~~~~~~~~~~~~ + +|Licence| + +FDB (Fields DataBase) is a domain-specific object store developed at ECMWF for storing, indexing and retrieving GRIB data. Each GRIB message is stored as a field and indexed trough semantic metadata (i.e. physical variables such as temperature, pressure, ...). +A set of fields can be retrieved specifying a request using a specific language developed for accessing :doc:`mars` Archive + +FDB exposes a C++ API as well as CLI :doc:`technical/api`. + +.. toctree:: + :maxdepth: 1 + + technical/requirements + technical/build-and-installation + technical/check-installation + technical/configuration + technical/config + technical/schema + technical/api + +.. |Licence| image:: https://img.shields.io/badge/License-Apache%202.0-blue.svg + :target: https://github.com/ecmwf/fdb/blob/develop/LICENSE + :alt: Apache Licence + +.. _mars: mars.html diff --git a/docs/content/technical/Api/api_c++.rst b/docs/content/technical/Api/api_c++.rst new file mode 100644 index 000000000..9cc2d0b3f --- /dev/null +++ b/docs/content/technical/Api/api_c++.rst @@ -0,0 +1,11 @@ +.. index:: Reference; C++ API + :name: c++-reference + +Work on adding documentation for the FDB API is in progress here. + +C++ API +======= +.. doxygenclass:: fdb5::FDB + :project: fdb + :members: + :protected-members: diff --git a/docs/content/api.rst b/docs/content/technical/Api/api_c.rst similarity index 98% rename from docs/content/api.rst rename to docs/content/technical/Api/api_c.rst index 8dd91384d..6e4352102 100644 --- a/docs/content/api.rst +++ b/docs/content/technical/Api/api_c.rst @@ -1,8 +1,8 @@ .. index:: Reference; C API :name: c-reference -API -=== +C API +===== Work on adding documentation for the FDB API is in progress here. diff --git a/docs/content/technical/Api/api_python.rst b/docs/content/technical/Api/api_python.rst new file mode 100644 index 000000000..942aa7587 --- /dev/null +++ b/docs/content/technical/Api/api_python.rst @@ -0,0 +1,9 @@ +.. index:: Reference; Python API + :name: Python-reference + +Work on adding documentation for the FDB Python API is in progress here. + +The Python Api relies on a seperate project, see https://github.com/ecmwf/pyfdb. + +Python API +========== diff --git a/docs/content/technical/api.rst b/docs/content/technical/api.rst new file mode 100644 index 000000000..526f9dccf --- /dev/null +++ b/docs/content/technical/api.rst @@ -0,0 +1,10 @@ +API Overview +============ + +The different APIs are found here: + +.. toctree:: + :maxdepth: 1 + :glob: + + Api/* diff --git a/docs/content/installation.rst b/docs/content/technical/build-and-installation.rst similarity index 78% rename from docs/content/installation.rst rename to docs/content/technical/build-and-installation.rst index 38290b160..960ef2b01 100644 --- a/docs/content/installation.rst +++ b/docs/content/technical/build-and-installation.rst @@ -1,11 +1,8 @@ -Installation -============ - -fdb employs an out-of-source build/install based on CMake. - +Build & Installation +==================== +FDB employs an out-of-source build/install based on CMake. Make sure ecbuild is installed and the ecbuild executable script is found ( ``which ecbuild`` ). -Now proceed with installation as follows: :: # Clone repo @@ -27,4 +24,5 @@ Now proceed with installation as follows: # 3. Compile / Install make -j10 ctest - make install \ No newline at end of file + make install + diff --git a/docs/content/technical/check-installation.rst b/docs/content/technical/check-installation.rst new file mode 100644 index 000000000..488cf0d65 --- /dev/null +++ b/docs/content/technical/check-installation.rst @@ -0,0 +1,37 @@ +Checking Installation +================================= + +To check whether the installation of the **FDB** was successful we can execute +the following command + +.. code-block:: console + + which fdb + +This should show the path to the installed FDB instance. If you receive and error +message at this point in time, make sure that you added the path FDB is located +at to you `PATH` environment variable. + +.. code-block:: console + + fdb help + +The help argument shows a list of options we have to interact with the FDB application. +The most useful for this stage would be to run ``fdb home`` to check whether the configuration +of FDB and the corresponding home directory was successful. Running this command +results in the following output: + +.. code-block:: console + + /path/to/fdb/home + +If this isn't the case, make sure you exported the ``FDB_HOME`` environment variable +as stated in the installation guide. Next we want to show information about +the FDB instance. We run + +.. code-block:: console + + fdb schema + +This should print the set up schema. + diff --git a/docs/content/technical/requirements.rst b/docs/content/technical/requirements.rst new file mode 100644 index 000000000..bbe402d52 --- /dev/null +++ b/docs/content/technical/requirements.rst @@ -0,0 +1,15 @@ +Requirements +============ + +Runtime dependencies: + +:eccodes: http://github.com/ecmwf/eccodes +:eckit: http://github.com/ecmwf/eckit +:metkit: http://github.com/ecmwf/metkit + + +Build dependencies: + +:CMake: For use and installation, see http://www.cmake.org/ +:ecbuild: ECMWF library of CMake macros, see https://github.com/ecmwf/ecbuild +:libaec: DKRZ Adaptive Entropy Coding Library, see https://gitlab.dkrz.de/k202009/libaec diff --git a/docs/img_raw/Grib_msg.svg b/docs/img_raw/Grib_msg.svg new file mode 100644 index 000000000..30072f0ff --- /dev/null +++ b/docs/img_raw/Grib_msg.svg @@ -0,0 +1,294 @@ + + + + + + + + + + + + + + Header + GRIB Msg 1 + Body + + + + + + + + Header + GRIB Msg 2 + Body + + + + + + + ... + + + + + + + + Header + GRIB Msg n + Body + + + diff --git a/docs/img_raw/dist_fdb.svg b/docs/img_raw/dist_fdb.svg new file mode 100644 index 000000000..91d047b4b --- /dev/null +++ b/docs/img_raw/dist_fdb.svg @@ -0,0 +1,634 @@ + + + +FDB-1ProcessAny processing function.Frontend/ClientFDB-2ProcessAny processing function.Frontend/Clientarchive/flushretrieve/listProcessAny processing function.Backend/ServerCatalogueStoreControl-ChannelData-ChannelAsynchronous via TCP/IPFDB-3ProcessAny processing function.Frontend/Clientarchive/flushretrieve/listProcessAny processing function.Backend/ServerCatalogueStoreControl-ChannelData-ChannelAsynchronous via TCP/IP diff --git a/docs/img_raw/local_fdb.svg b/docs/img_raw/local_fdb.svg new file mode 100644 index 000000000..0383b03cc --- /dev/null +++ b/docs/img_raw/local_fdb.svg @@ -0,0 +1,346 @@ + + + + + + + + + + + Process + Any processing function. + + + + + + + + + + Magnetic Disk (Database) + A magnetic disk. (ISO) + + + + + + + + + + + + + + + + + FDB + + + + Process + Any processing function. + + + + Frontend/Client + + + + + Process + Any processing function. + + + + Backend/Server + + + + + + Catalogue + Store + + + archive/flush + retrieve/list + + diff --git a/docs/img_raw/remote_fdb.svg b/docs/img_raw/remote_fdb.svg new file mode 100644 index 000000000..ae4c241bc --- /dev/null +++ b/docs/img_raw/remote_fdb.svg @@ -0,0 +1,362 @@ + + + +FDB-1ProcessAny processing function.Frontend/ClientFDB-2ProcessAny processing function.Frontend/Clientarchive/flushretrieve/listProcessAny processing function.Backend/ServerCatalogueStoreControl-ChannelData-ChannelAsynchronous via TCP/IP diff --git a/docs/img_raw/select_fdb.svg b/docs/img_raw/select_fdb.svg new file mode 100644 index 000000000..b600a17fe --- /dev/null +++ b/docs/img_raw/select_fdb.svg @@ -0,0 +1,580 @@ + + + + + + + + + + + Process + Any processing function. + + + + + + + + + + Magnetic Disk (Database) + A magnetic disk. (ISO) + + + + + + + + + + + + + + + + FDB + + + + Process + Any processing function. + + + + Frontend/Client + + + + + + + + + + + + + + + + + + + + Process + Any processing function. + + + + Backend/Server-1 + + + Catalogue + Store + archive/flush + retrieve/list + + + + + + + + + + + + + Process + Any processing function. + + + + Backend/Server-2 + + + Catalogue + Store + archive/flush + retrieve/list + + Metadata-Key-1 + Metadata-Key-2 + + + diff --git a/docs/index.rst b/docs/index.rst index d733ded39..7a5461d49 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,21 +1,60 @@ -Welcome to fbd's documentation! =============================== +Welcome to FDB's documentation! +=============================== + +The FDB (Fields DataBase) is a domain-specific object store developed at ECMWF for +storing, indexing and retrieving GRIB data. Each GRIB message is stored as a +field and indexed trough semantic metadata (i.e. physical variables such as +temperature, pressure, ...). A set of fields can be retrieved specifying a +request using a specific language developed for accessing MARS Archive. + +The documentation is divided into three parts: + +*************************************** +:ref:`architectural-introduction-label` +*************************************** + +The aim of this document to given an overview of the system landscape, showing +how the FDB integrates into an existing setup, consisting of a data archive, using +the example of `MARS `_. + +*********************************** +:ref:`technical-introduction-label` +*********************************** + +The aim of this part of the documentation is to give a broad and technical overview of the +API of the FDB. + +************************************* +:ref:`operational-introduction-label` +************************************* + +The aim of this part of the documentation is to give a broad ... +API of the FDB. + +This part of the documentations aims at operations and how to configure and deploy the FDB. + +.. index:: Structure + +.. toctree:: + :maxdepth: 2 + :caption: Structure -FDB (Fields DataBase) is a domain-specific object store developed at ECMWF for storing, indexing and retrieving GRIB data. Each GRIB message is stored as a field and indexed trough semantic metadata (i.e. physical variables such as temperature, pressure, ...). -A set of fields can be retrieved specifying a request using a specific language developed for accessing MARS Archive. + content/concept/introduction + content/concept/installation + content/concept/architecture + content/concept/client-reference + content/concept/administration + content/concept/tools + content/concept/data-governance +.. raw:: html -.. index:: Contents +
.. toctree:: - :maxdepth: 1 - :caption: Contents + :maxdepth: 2 + :caption: Misc - content/introduction - content/installation - content/requirements content/reference - content/tools - content/config-schema content/license - genindex diff --git a/docs/requirements.in b/docs/requirements.in new file mode 100644 index 000000000..5bf06b848 --- /dev/null +++ b/docs/requirements.in @@ -0,0 +1,7 @@ +sphinx +sphinx-rtd-theme==0.5.2 +sphinx-copybutton==0.3.1 +sphinx-tabs +breathe +six +sphinx-fortran diff --git a/docs/requirements.txt b/docs/requirements.txt index 5bf06b848..5bfab66d9 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,7 +1,71 @@ -sphinx -sphinx-rtd-theme==0.5.2 +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --output-file=requirements.txt docs.in +# +alabaster==0.7.16 + # via sphinx +babel==2.14.0 + # via sphinx +breathe==4.35.0 + # via -r docs.in +certifi==2024.2.2 + # via requests +charset-normalizer==3.3.2 + # via requests +docutils==0.16 + # via + # breathe + # sphinx + # sphinx-rtd-theme + # sphinx-tabs +idna==3.6 + # via requests +imagesize==1.4.1 + # via sphinx +jinja2==3.1.3 + # via sphinx +markupsafe==2.1.5 + # via jinja2 +packaging==23.2 + # via sphinx +pygments==2.17.2 + # via + # sphinx + # sphinx-tabs +requests==2.31.0 + # via sphinx +six==1.16.0 + # via -r docs.in +snowballstemmer==2.2.0 + # via sphinx +sphinx==5.3.0 + # via + # -r docs.in + # breathe + # sphinx-copybutton + # sphinx-rtd-theme + # sphinx-tabs sphinx-copybutton==0.3.1 -sphinx-tabs -breathe -six -sphinx-fortran + # via -r docs.in +sphinx-fortran==1.1.1 + # via -r docs.in +sphinx-rtd-theme==0.5.2 + # via -r docs.in +sphinx-tabs==3.4.5 + # via -r docs.in +sphinxcontrib-applehelp==1.0.8 + # via sphinx +sphinxcontrib-devhelp==1.0.6 + # via sphinx +sphinxcontrib-htmlhelp==2.0.5 + # via sphinx +sphinxcontrib-jsmath==1.0.1 + # via sphinx +sphinxcontrib-qthelp==1.0.7 + # via sphinx +sphinxcontrib-serializinghtml==1.1.10 + # via sphinx +urllib3==2.2.0 + # via requests diff --git a/src/fdb5/api/FDB.h b/src/fdb5/api/FDB.h index cb9879363..42ee9eb42 100644 --- a/src/fdb5/api/FDB.h +++ b/src/fdb5/api/FDB.h @@ -69,16 +69,47 @@ class FDB { // -------------- Primary API functions ---------------------------- + /** @brief Archives binary data to a FDB instance. + * + * @param handle eckit::message::Message to data to archive + */ void archive(eckit::message::Message msg); + + /** @brief Archives binary data to a FDB instance. + * + * Reads messages from the eckit#DataHandle and calls archive() the the + * corresponding messages. + * + * @param handle eckit::DataHandle reference data to archive + */ void archive(eckit::DataHandle& handle); + + /** @brief Archives binary data to a FDB instance. + * + * Internally creates a DataHandle and calls archive(). + * + * @param data Pointer to the binary data to archive + * @param length Size of the data to archive with the given + */ void archive(const void* data, size_t length); - // warning: not high-perf API - makes sure that all the requested fields are archived and there are no data exceeding the request + + // @warning not high-perf API - makes sure that all the requested fields are archived and there are no data exceeding the request void archive(const metkit::mars::MarsRequest& request, eckit::DataHandle& handle); - // disclaimer: this is a low-level API. The provided key and the corresponding data are not checked for consistency + + /** @brief Archives binary data to a FDB instance. + * + * @warning this is a low-level API. The provided key and the corresponding data are not checked for consistency + * + * @param key Key used for indexing and archiving the data + * @param data Pointer to the binary data to archive + * @param length Size of the data to archive with the given @p key + */ void archive(const Key& key, const void* data, size_t length); - /// Flushes all buffers and closes all data handles into a consistent DB state - /// @note always safe to call + /** @brief Flushes all buffers and closes all data handles into a consistent DB state + * + * @note always safe to call + */ void flush(); eckit::DataHandle* read(const eckit::URI& uri); @@ -87,10 +118,22 @@ class FDB { eckit::DataHandle* read(ListIterator& it, bool sorted = false); + /** @brief Retrieve data which is specified by a MARS request + * + * @param request MarsRequest which describes the data which should be retrieved + * @return DataHandle for reading the requested data from + */ eckit::DataHandle* retrieve(const metkit::mars::MarsRequest& request); ListIterator inspect(const metkit::mars::MarsRequest& request); + /** @brief List data present at the archive and which can be retrieved + * + * @param request FDBToolRequest stating which data should be queried + * @param deduplicate bool, whether the returned iterator should ignore duplicates + * @param length Size of the data to archive with the given @p key + * @return ListIterator for iterating over the set of found items + */ ListIterator list(const FDBToolRequest& request, bool deduplicate=false); DumpIterator dump(const FDBToolRequest& request, bool simple=false); @@ -98,6 +141,14 @@ class FDB { /// TODO: Is this function superfluous given the control() function? StatusIterator status(const FDBToolRequest& request); + /** @brief Wipe data from the database + * + * @param request FDBToolRequest stating which data should be queried + * @param doit flag for committing to the wipe (default is dry-run) + * @param porcelain flag for committing to the wipe (default is dry-run) (TODO: comment it) + * @param unsafeWipeAll flag for omitting all security checks and force a wipe + * @return WipeIterator for iterating over the set of wiped items + */ WipeIterator wipe(const FDBToolRequest& request, bool doit=false, bool porcelain=false, bool unsafeWipeAll=false); MoveIterator move(const FDBToolRequest& request, const eckit::URI& dest);