From 19a5c6cc1dbac898a546bcf7d95b27972f45b51e Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Fri, 28 Jun 2024 07:31:41 +0200 Subject: [PATCH 1/2] Update Todos --- TODO.md | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/TODO.md b/TODO.md index 36b55f4..9317969 100644 --- a/TODO.md +++ b/TODO.md @@ -1,15 +1,33 @@ -# TODOs +# Possible Tasks and Improvements -- Improve checkpoints/archives +- Benchmarking + - Benchmark and compare common timeseries DBs with our data and our queries +- Memory management + - To overcome garbage collection overhead: Reimplement in Rust + - Request memory directly batchwise via mmap (started in branch) +- Archive + - S3 backend for archive - Store information in each buffer if already archived - Do not create new checkpoint if all buffers already archived -- Missing Testcases: +- Checkpoints + - S3 backend for checkpoints + - Combine checkpoints into larger files + - Binary checkpoints (started in branch) +- API + - Redesign query interface + - Introduce JWT authentication for REST and NATS +- Testing - General tests - Check for corner cases that should fail gracefully - - Write a more realistic `ToArchive`/`FromArchive` tests -- Optimization: Once a buffer is full, calculate min, max and avg - - Calculate averages buffer-wise, average weighted by length of buffer + - Write a more realistic `ToArchive`/`FromArchive` Tests +- Aggregation + - Calculate averages buffer-wise as soon as full, average weighted by length of buffer - Only the head-buffer needs to be fully traversed -- Optimization: If aggregating over hwthreads/cores/sockets cache those results - and reuse some of that for new queres aggregating only over the newer data -- ... + - If aggregating over hwthreads/cores/sockets cache those results and reuse + some of that for new queries aggregating only over the newer data +- Compression + - Enable compression for http API requests + - Enable compression for checkpoints/archive +- Sampling + - Support data re sampling to reduce data points + - Support re sampling algorithms that preserve min/max as far as possible From bc77ac4839301b1d65323818653032c1975fb852 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Fri, 28 Jun 2024 08:44:24 +0200 Subject: [PATCH 2/2] Update todos. Cleanup --- TODO.md | 49 +++++++++------- api/openapi.yaml | 148 ----------------------------------------------- 2 files changed, 28 insertions(+), 169 deletions(-) delete mode 100644 api/openapi.yaml diff --git a/TODO.md b/TODO.md index 9317969..bbf827e 100644 --- a/TODO.md +++ b/TODO.md @@ -1,33 +1,40 @@ # Possible Tasks and Improvements +Importance: + +- **I** Important +- **N** Nice to have +- **W** Won't do. Probably not necessary. + - Benchmarking - - Benchmark and compare common timeseries DBs with our data and our queries + - Benchmark and compare common timeseries DBs with our data and our queries (N) - Memory management - - To overcome garbage collection overhead: Reimplement in Rust - - Request memory directly batchwise via mmap (started in branch) + - To overcome garbage collection overhead: Reimplement in Rust (N) + - Request memory directly batchwise via mmap (started in branch) (W) - Archive - - S3 backend for archive - - Store information in each buffer if already archived - - Do not create new checkpoint if all buffers already archived + - S3 backend for archive (I) + - Store information in each buffer if already archived (N) + - Do not create new checkpoint if all buffers already archived (N) - Checkpoints - - S3 backend for checkpoints - - Combine checkpoints into larger files - - Binary checkpoints (started in branch) + - S3 backend for checkpoints (I) + - Combine checkpoints into larger files (I) + - Binary checkpoints (started in branch) (W) - API - - Redesign query interface - - Introduce JWT authentication for REST and NATS + - Redesign query interface (N) + - Introduce JWT authentication for REST and NATS (I) - Testing - - General tests - - Check for corner cases that should fail gracefully - - Write a more realistic `ToArchive`/`FromArchive` Tests + - General tests (I) + - Test data generator for regression tests (I) + - Check for corner cases that should fail gracefully (N) + - Write a more realistic `ToArchive`/`FromArchive` Tests (N) - Aggregation - - Calculate averages buffer-wise as soon as full, average weighted by length of buffer - - Only the head-buffer needs to be fully traversed + - Calculate averages buffer-wise as soon as full, average weighted by length of buffer (N) + - Only the head-buffer needs to be fully traversed (N) - If aggregating over hwthreads/cores/sockets cache those results and reuse - some of that for new queries aggregating only over the newer data + some of that for new queries aggregating only over the newer data (W) - Compression - - Enable compression for http API requests - - Enable compression for checkpoints/archive + - Enable compression for http API requests (N) + - Enable compression for checkpoints/archive (I) - Sampling - - Support data re sampling to reduce data points - - Support re sampling algorithms that preserve min/max as far as possible + - Support data re sampling to reduce data points (I) + - Use re sampling algorithms that preserve min/max as far as possible (I) diff --git a/api/openapi.yaml b/api/openapi.yaml deleted file mode 100644 index 501ff68..0000000 --- a/api/openapi.yaml +++ /dev/null @@ -1,148 +0,0 @@ -# OpenAPI spec describing a subset of the HTTP REST API for the cc-metric-store. - -openapi: 3.0.3 -info: - title: 'cc-metric-store REST API' - description: 'In-memory time series database for hpc metrics to be used with the [ClusterCockpit](https://github.com/ClusterCockpit) toolsuite' - version: 0.1.0 -paths: - '/api/write': - post: - operationId: 'writeMetrics' - description: 'Recieves metrics in the influx line-protocol using [this format](https://github.com/ClusterCockpit/cc-specifications/blob/master/metrics/lineprotocol_alternative.md)' - parameters: - - name: cluster - in: query - schema: { type: string } - description: "If the lines in the body do not have a cluster tag, use this value instead." - requestBody: - required: true - content: - 'text/plain': - example: - 'flops_any,cluster=emmy,hostname=e1001,type=cpu,type-id=0 value=42.0' - responses: - 200: - description: 'Everything went fine' - 400: - description: 'Bad Request' - '/api/query': - post: - operationId: 'queryMetrics' - description: 'Query metrics' - requestBody: - required: true - content: - 'application/json': - schema: - type: object - required: [cluster, from, to] - properties: - cluster: - type: string - from: - type: integer - to: - type: integer - with-stats: - type: boolean - default: true - with-data: - type: boolean - default: true - queries: - type: array - items: - $ref: '#/components/schemas/ApiQuery' - for-all-nodes: - description: 'If not null, add a new query for every known host on that cluster and every metric (at node-scope) specified in this array to the request. This can be used to get a metric for every host in a cluster without knowing the name of every host.' - type: array - items: - type: string - responses: - 200: - description: 'Requested data and stats as JSON' - content: - 'application/json': - schema: - type: object - properties: - queries: - description: 'Only if for-all-nodes was used, this property exists.' - results: - type: array - description: 'Array where each element is a response to the query at that same index in the request' - items: - description: 'If `aggreg` is true, only ever has one element.' - type: array - items: - type: object - properties: - error: - description: 'If not null or undefined, an error happend processing that query' - type: string - nullable: true - data: - type: array - items: - type: number - nullable: true - avg: { type: number } - min: { type: number } - max: { type: number } - 400: - description: 'Bad Request' - '/api/free': - post: - operationId: 'freeBuffers' - description: 'Allow all buffers containing only data older than `to`' - parameters: - - name: to - in: query - description: 'Unix Timestamp' - required: true - schema: - type: integer - requestBody: - required: true - content: - 'application/json': - schema: - type: array - items: - type: array - items: - type: string - responses: - 200: - description: 'Everything went fine' - 400: - description: 'Bad Request' -components: - schemas: - ApiQuery: - description: 'A single query for a specific metric resulting in one series' - type: object - required: [metric, hostname, aggreg] - properties: - metirc: - type: string - hostname: - type: string - type: - description: 'Not required for node-level requests. Usually something like socket, cpu or hwthread.' - type: string - type-ids: - type: array - items: - type: string - aggreg: - type: boolean - description: 'If true, every query result will have exactly one element. Otherwise, the data for every requested type-id/sub-type-id is provided seperately' - securitySchemes: - bearerAuth: - type: http - scheme: bearer - bearerFormat: JWT -security: - - bearerAuth: [] # Applies `bearerAuth` globally \ No newline at end of file