From c258268dcade727a1f6f53c6c95c2e6d48fc9ac8 Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Thu, 13 Feb 2020 10:14:13 +0100 Subject: [PATCH 01/23] add first draft for a bgp cache --- package.json | 2 + src/engine/cache/bgp-cache.ts | 71 ++++++++++ src/engine/cache/cache-base.ts | 228 +++++++++++++++++++++++++++++++++ src/utils.ts | 26 ++++ yarn.lock | 187 ++++++++++++++++----------- 5 files changed, 439 insertions(+), 75 deletions(-) create mode 100644 src/engine/cache/bgp-cache.ts create mode 100644 src/engine/cache/cache-base.ts diff --git a/package.json b/package.json index 1aebde4e..3823726e 100644 --- a/package.json +++ b/package.json @@ -38,6 +38,7 @@ "homepage": "https://github.com/Callidon/sparql-engine#readme", "devDependencies": { "@types/lodash": "^4.14.116", + "@types/lru-cache": "^5.1.0", "@types/node": "^10.14.17", "@types/uuid": "^3.4.4", "@types/xml": "^1.0.2", @@ -54,6 +55,7 @@ "dependencies": { "@rdfjs/data-model": "^1.1.2", "lodash": "^4.17.15", + "lru-cache": "^5.1.1", "moment": "^2.22.2", "n3": "^0.11.3", "rdf-string": "^1.3.1", diff --git a/src/engine/cache/bgp-cache.ts b/src/engine/cache/bgp-cache.ts new file mode 100644 index 00000000..014d9577 --- /dev/null +++ b/src/engine/cache/bgp-cache.ts @@ -0,0 +1,71 @@ +/* file: bgp-cache.ts +MIT License + +Copyright (c) 2019-2020 Thomas Minier + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the 'Software'), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +'use strict' + +import { AsyncLRUCache, AsyncCache } from './cache-base' +import { Pipeline } from '../pipeline/pipeline' +import { PipelineStage } from '../pipeline/pipeline-engine' +import { Bindings } from '../../rdf/bindings' +import { Algebra } from 'sparqljs' +import { sparql } from '../../utils' + +export default class BGPCache implements AsyncCache { + private readonly _cache: AsyncLRUCache + constructor (maxSize: number, maxAge: number) { + this._cache = new AsyncLRUCache(maxSize, maxAge) + } + + has (bgp: Algebra.TripleObject[]): boolean { + return this._cache.has(sparql.hashBGP(bgp)) + } + + update (bgp: Algebra.TripleObject[], item: Bindings, writerID: string): void { + this._cache.update(sparql.hashBGP(bgp), item, writerID) + } + + get (bgp: Algebra.TripleObject[]): Bindings[] | null { + return this._cache.get(sparql.hashBGP(bgp)) + } + + getAsPipeline (bgp: Algebra.TripleObject[]): PipelineStage { + const bindings = this.get(bgp) + if (bindings === null) { + return Pipeline.getInstance().empty() + } + return Pipeline.getInstance().from(bindings.map(b => b.clone())) + } + + commit (bgp: Algebra.TripleObject[], writerID: string): void { + this._cache.commit(sparql.hashBGP(bgp), writerID) + } + + delete (bgp: Algebra.TripleObject[], writerID: string): void { + this._cache.delete(sparql.hashBGP(bgp), writerID) + } + + count (): number { + return this._cache.count() + } +} diff --git a/src/engine/cache/cache-base.ts b/src/engine/cache/cache-base.ts new file mode 100644 index 00000000..5a340a12 --- /dev/null +++ b/src/engine/cache/cache-base.ts @@ -0,0 +1,228 @@ +/* file: cache-base.ts +MIT License + +Copyright (c) 2019-2020 Thomas Minier + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the 'Software'), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +'use strict' + +import LRU from 'lru-cache' + +interface AsyncCacheEntry { + content: Array, + writerID: I, + isComplete: boolean +} + +/** + * A cache is a vue that materializes data for latter re-use + * @author Thomas Minier + */ +export interface Cache { + /** + * Put an item into the cache + * @param key - Item's key + * @param item - Item + */ + put (key: K, item: T): void + + /** + * Test if the cache contains an item with a given key + * @param key - Item's key + * @return True if the cache contains the item with the given key, False otherwise + */ + has (key: K): boolean + + /** + * Access an item by its key. + * Each call to get() should be predated by a call to has(), + * to check if the item is in the cache. + * @param key - Item's key + * @return The item with the given key, or null if it was not found + */ + get (key: K): T | null + + /** + * Remove an item from the cache + * @param key - Item's key + */ + delete (key: K): void + + /** + * Get the number of items currently in the cache + * @return The number of items currently in the cache + */ + count (): number +} + +/** + * An async cache is cache which stores collections of items that are built over time. + * Writers will call the update and commit method to update the cache content & mark items as available. + * @author Thomas Minier + */ +export interface AsyncCache { + /** + * Update an item into the cache + * @param key - Item's key + * @param item - Item + * @param writerID - ID of the writer + */ + update (key: K, item: T, writerID: I): void + + /** + * Mark an item as available from the cache + * @param key - Item's key + * @param IwriterID - ID of the writer + */ + commit (key: K, writerID: I): void + + /** + * Test if the cache contains an item with a given key + * @param key - Item's key + * @return True if the cache contains the item with the given key, False otherwise + */ + has (key: K): boolean + + /** + * Access an item by its key. + * Each call to get() should be predated by a call to has() to check if the item is in the cache. + * @param key - Item's key + * @return The values of the item with the given key, or null if it was not found + */ + get (key: K): T[] | null + + /** + * Remove an item from the cache + * @param key - Item's key + */ + delete (key: K, writerID: I): void + + /** + * Get the number of items currently in the cache + * @return The number of items currently in the cache + */ + count (): number +} + +/** + * An in-memory LRU cache + * @author Thomas Minier + */ +export class BaseLRUCache implements Cache { + private readonly _content: LRU + + constructor (maxSize: number, maxAge: number) { + const options = { + max: maxSize, + maxAge + } + this._content = new LRU(options) + } + + put (key: K, item: T): void { + this._content.set(key, item) + } + + has (key: K): boolean { + return this._content.has(key) + } + + get (key: K): T | null { + if (this._content.has(key)) { + return this._content.get(key)! + } + return null + } + + delete (key: K): void { + this.delete(key) + } + + count (): number { + return this._content.itemCount + } +} + +/** + * An in-memory LRU cache that supports async insertion of items + * @author Thomas Minier + */ +export class AsyncLRUCache implements AsyncCache { + private readonly _cache: BaseLRUCache> + + constructor (maxSize: number, maxAge: number) { + this._cache = new BaseLRUCache(maxSize, maxAge) + } + + has (key: K): boolean { + if (this._cache.has(key)) { + const entry = this._cache.get(key)! + return entry.isComplete + } + return false + } + + update (key: K, item: T, writerID: I): void { + if (this.has(key)) { + const entry = this._cache.get(key)! + if (entry.writerID === writerID) { + entry.content.push(item) + this._cache.put(key, entry) + } + } else { + this._cache.put(key, { + content: [item], + writerID, + isComplete: false + }) + } + } + + commit (key: K, writerID: I): void { + if (this._cache.has(key)) { + const entry = this._cache.get(key)! + if (entry.writerID === writerID) { + entry.isComplete = true + this._cache.put(key, entry) + } + } + } + + get (key: K): T[] | null { + if (this.has(key)) { + return this._cache.get(key)!.content + } + return null + } + + delete (key: K, writerID: I): void { + if (this._cache.has(key)) { + const entry = this._cache.get(key)! + if (entry.writerID === writerID) { + this._cache.delete(key) + } + } + } + + count (): number { + return this._cache.count() + } +} diff --git a/src/utils.ts b/src/utils.ts index 9e37b396..f0369e83 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -33,6 +33,7 @@ import { parseZone, Moment, ISO_8601 } from 'moment' import * as DataFactory from '@rdfjs/data-model' import { BlankNode, Literal, NamedNode, Term } from 'rdf-js' import { termToString, stringToTerm } from 'rdf-string' +import * as crypto from 'crypto' /** * RDF related utilities @@ -404,6 +405,15 @@ export namespace rdf { return literal } + /** + * Hash Triple (pattern) to assign it an unique ID + * @param triple - Triple (pattern) to hash + * @return An unique ID to identify the Triple (pattern) + */ + export function hashTriple (triple: Algebra.TripleObject): string { + return `s=${triple.subject}&p=${triple.predicate}&o=${triple.object}` + } + /** * Create an IRI under the XSD namespace * () @@ -449,6 +459,22 @@ export namespace rdf { * SPARQL related utilities */ export namespace sparql { + /** + * Hash Basic Graph pattern to assign them an unique ID + * @param bgp - Basic Graph Pattern to hash + * @param md5 - True if the ID should be hashed to md5, False to keep it as a plain text string + * @return An unique ID to identify the BGP + */ + export function hashBGP (bgp: Algebra.TripleObject[], md5: boolean = false): string { + const hashedBGP = bgp.map(rdf.hashTriple).join(';') + if (!md5) { + return hashedBGP + } + const hash = crypto.createHash('md5') + hash.update(hashedBGP) + return hash.digest('hex') + } + /** * Get the set of SPARQL variables in a triple pattern * @param pattern - Triple Pattern diff --git a/yarn.lock b/yarn.lock index 69ea82f1..211b5e49 100644 --- a/yarn.lock +++ b/yarn.lock @@ -25,39 +25,47 @@ dependencies: "@types/rdf-js" "^2.0.1" +"@tootallnate/once@1": + version "1.0.0" + resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-1.0.0.tgz#9c13c2574c92d4503b005feca8f2e16cc1611506" + integrity sha512-KYyTT/T6ALPkIRd2Ge080X/BsXvy9O0hcWTtMWkPvwAwF99+vn6Dv4GzrFT/Nn1LePr+FFDbRXXlqmsy9lw2zA== + "@types/lodash@^4.14.116": version "4.14.149" resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.14.149.tgz#1342d63d948c6062838fbf961012f74d4e638440" integrity sha512-ijGqzZt/b7BfzcK9vTrS6MFljQRPn5BFWOx8oE0GYxribu6uV+aA9zZuXI1zc/etK9E8nrgdoF2+LgUw7+9tJQ== +"@types/lru-cache@^5.1.0": + version "5.1.0" + resolved "https://registry.yarnpkg.com/@types/lru-cache/-/lru-cache-5.1.0.tgz#57f228f2b80c046b4a1bd5cac031f81f207f4f03" + integrity sha512-RaE0B+14ToE4l6UqdarKPnXwVDuigfFv+5j9Dze/Nqr23yyuqdNvzcZi3xB+3Agvi5R4EOgAksfv3lXX4vBt9w== + "@types/minimatch@3.0.3": version "3.0.3" resolved "https://registry.yarnpkg.com/@types/minimatch/-/minimatch-3.0.3.tgz#3dca0e3f33b200fc7d1139c0cd96c1268cadfd9d" integrity sha512-tHq6qdbT9U1IRSGf14CL0pUlULksvY9OZ+5eEgl1N7t+OA3tGvNpxJCzuKQlsNgCVwbAs670L1vcVQi8j9HjnA== "@types/node@*": - version "13.5.0" - resolved "https://registry.yarnpkg.com/@types/node/-/node-13.5.0.tgz#4e498dbf355795a611a87ae5ef811a8660d42662" - integrity sha512-Onhn+z72D2O2Pb2ql2xukJ55rglumsVo1H6Fmyi8mlU9SvKdBk/pUSUAiBY/d9bAOF7VVWajX3sths/+g6ZiAQ== + version "13.7.1" + resolved "https://registry.yarnpkg.com/@types/node/-/node-13.7.1.tgz#238eb34a66431b71d2aaddeaa7db166f25971a0d" + integrity sha512-Zq8gcQGmn4txQEJeiXo/KiLpon8TzAl0kmKH4zdWctPj05nWwp1ClMdAVEloqrQKfaC48PNLdgN/aVaLqUrluA== "@types/node@^10.14.17": - version "10.17.13" - resolved "https://registry.yarnpkg.com/@types/node/-/node-10.17.13.tgz#ccebcdb990bd6139cd16e84c39dc2fb1023ca90c" - integrity sha512-pMCcqU2zT4TjqYFrWtYHKal7Sl30Ims6ulZ4UFXxI4xbtQqK/qqKwkDoBFCfooRqqmRu9vY3xaJRwxSh673aYg== + version "10.17.15" + resolved "https://registry.yarnpkg.com/@types/node/-/node-10.17.15.tgz#bfff4e23e9e70be6eec450419d51e18de1daf8e7" + integrity sha512-daFGV9GSs6USfPgxceDA8nlSe48XrVCJfDeYm7eokxq/ye7iuOH87hKXgMtEAVLFapkczbZsx868PMDT1Y0a6A== "@types/rdf-js@^2.0.1": - version "2.0.9" - resolved "https://registry.yarnpkg.com/@types/rdf-js/-/rdf-js-2.0.9.tgz#4d94b62fa6e36a1dfeb339d5141e709ba70378ba" - integrity sha512-C7YCfZDlWcN/SjZsFa/DjH7aKJjJ48KgnfpvKKWSx2/i8zjp4lLAw4uTrcArd+oAmar8e+Nnxg9w1SPFGzEhXQ== + version "2.0.11" + resolved "https://registry.yarnpkg.com/@types/rdf-js/-/rdf-js-2.0.11.tgz#b9e398504ceb9f00eaa3b3036b643dc3490cf362" + integrity sha512-GC5MZU2HbL5JnlrLAzoxSqLprqtKwocz0TNVugqM04t1ZeeNFpZRqqBQc9Jhev35hEwdH84siRLaCesxHHYlmA== dependencies: "@types/node" "*" "@types/uuid@^3.4.4": - version "3.4.6" - resolved "https://registry.yarnpkg.com/@types/uuid/-/uuid-3.4.6.tgz#d2c4c48eb85a757bf2927f75f939942d521e3016" - integrity sha512-cCdlC/1kGEZdEglzOieLDYBxHsvEOIg7kp/2FYyVR9Pxakq+Qf/inL3RKQ+PA8gOlI/NnL+fXmQH12nwcGzsHw== - dependencies: - "@types/node" "*" + version "3.4.7" + resolved "https://registry.yarnpkg.com/@types/uuid/-/uuid-3.4.7.tgz#51d42247473bc00e38cc8dfaf70d936842a36c03" + integrity sha512-C2j2FWgQkF1ru12SjZJyMaTPxs/f6n90+5G5qNakBxKXjTBc/YTSelHh4Pz1HUDwxFXD9WvpQhOGCDC+/Y4mIQ== "@types/xml@^1.0.2": version "1.0.4" @@ -83,12 +91,17 @@ acorn@^5.5.0: resolved "https://registry.yarnpkg.com/acorn/-/acorn-5.7.3.tgz#67aa231bf8812974b85235a96771eb6bd07ea279" integrity sha512-T/zvzYRfbVojPWahDsE5evJdHb3oJoQfFbsrKM7w5Zcs++Tr257tia3BmMP8XYVjp1S9RZXQMh7gao96BlqZOw== -agent-base@^4.3.0: - version "4.3.0" - resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-4.3.0.tgz#8165f01c436009bccad0b1d122f05ed770efc6ee" - integrity sha512-salcGninV0nPrwpGNn4VTXBb1SOuXQBiqbrNXoeizJsHrsL6ERFM2Ne3JUSBWRE6aeNJI2ROP/WEEIDUiDe3cg== +agent-base@5: + version "5.1.1" + resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-5.1.1.tgz#e8fb3f242959db44d63be665db7a8e739537a32c" + integrity sha512-TMeqbNl2fMW0nMjTEPOwe3J/PRFP4vqeoNuQMG0HlMrtm5QxKqdvAkZ1pRBQ/ulIyDD5Yq0nJ7YbdD8ey0TO3g== + +agent-base@6: + version "6.0.0" + resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-6.0.0.tgz#5d0101f19bbfaed39980b22ae866de153b93f09a" + integrity sha512-j1Q7cSCqN+AwrmDd+pzgqc0/NpC655x2bUf5ZjRIO77DcNBFmh+OgRNzF6OKdCC9RSCb19fGd99+bhXFdkRNqw== dependencies: - es6-promisify "^5.0.0" + debug "4" ajv-keywords@^2.1.0: version "2.1.1" @@ -139,7 +152,7 @@ argparse@^1.0.7: dependencies: sprintf-js "~1.0.2" -argv@^0.0.2: +argv@0.0.2: version "0.0.2" resolved "https://registry.yarnpkg.com/argv/-/argv-0.0.2.tgz#ecbd16f8949b157183711b1bda334f37840185ab" integrity sha1-7L0W+JSbFXGDcRsb2jNPN4QBhas= @@ -300,16 +313,15 @@ co@^4.6.0: integrity sha1-bqa989hTrlTMuOR7+gvz+QMfsYQ= codecov@^3.0.4: - version "3.6.2" - resolved "https://registry.yarnpkg.com/codecov/-/codecov-3.6.2.tgz#9503533d744233f6864f8f3ead9435d285ed3f47" - integrity sha512-i1VYZYY3M8Lodk/QRsIWYVimkuhl0oMSiM2itxbTbEIjB0PCSWP1cI7cscu5P0MayggoTl6I/jkXV2go8Ub8/Q== + version "3.6.5" + resolved "https://registry.yarnpkg.com/codecov/-/codecov-3.6.5.tgz#d73ce62e8a021f5249f54b073e6f2d6a513f172a" + integrity sha512-v48WuDMUug6JXwmmfsMzhCHRnhUf8O3duqXvltaYJKrO1OekZWpB/eH6iIoaxMl8Qli0+u3OxptdsBOYiD7VAQ== dependencies: - argv "^0.0.2" - ignore-walk "^3.0.1" - js-yaml "^3.13.1" - teeny-request "^3.11.3" - urlgrey "^0.4.4" - validator "^12.1.0" + argv "0.0.2" + ignore-walk "3.0.3" + js-yaml "3.13.1" + teeny-request "6.0.1" + urlgrey "0.4.4" color-convert@^1.9.0: version "1.9.3" @@ -379,6 +391,13 @@ debug@3.1.0: dependencies: ms "2.0.0" +debug@4: + version "4.1.1" + resolved "https://registry.yarnpkg.com/debug/-/debug-4.1.1.tgz#3b72260255109c6b589cee050f1d516139664791" + integrity sha512-pYAIzeRo8J6KPEaJ0VWOh5Pzkbw/RetuzehGM7QRRX5he4fPHx2rdKMB256ehJCkX+XRQm16eZLqLNS8RSZXZw== + dependencies: + ms "^2.1.1" + debug@^2.6.8, debug@^2.6.9: version "2.6.9" resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f" @@ -497,18 +516,6 @@ es-to-primitive@^1.2.1: is-date-object "^1.0.1" is-symbol "^1.0.2" -es6-promise@^4.0.3: - version "4.2.8" - resolved "https://registry.yarnpkg.com/es6-promise/-/es6-promise-4.2.8.tgz#4eb21594c972bc40553d276e510539143db53e0a" - integrity sha512-HJDGx5daxeIvxdBxvG2cb9g4tEvwIk3i8+nhX0yGrYmZUzbkdg8QbDevheDB8gd0//uPj4c1EQua8Q+MViT0/w== - -es6-promisify@^5.0.0: - version "5.0.0" - resolved "https://registry.yarnpkg.com/es6-promisify/-/es6-promisify-5.0.0.tgz#5109d62f3e56ea967c4b63505aef08291c8a5203" - integrity sha1-UQnWLz5W6pZ8S2NQWu8IKRyKUgM= - dependencies: - es6-promise "^4.0.3" - escape-string-regexp@1.0.5, escape-string-regexp@^1.0.2, escape-string-regexp@^1.0.5: version "1.0.5" resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4" @@ -656,9 +663,9 @@ esprima@^4.0.0: integrity sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A== esquery@^1.0.0: - version "1.0.1" - resolved "https://registry.yarnpkg.com/esquery/-/esquery-1.0.1.tgz#406c51658b1f5991a5f9b62b1dc25b00e3e5c708" - integrity sha512-SmiyZ5zIWH9VM+SRUReLS5Q8a7GxtRdxEBVZpm98rJM7Sb+A9DVCndXfkeFUd3byderg+EbDkfnevfCwynWaNA== + version "1.1.0" + resolved "https://registry.yarnpkg.com/esquery/-/esquery-1.1.0.tgz#c5c0b66f383e7656404f86b31334d72524eddb48" + integrity sha512-MxYW9xKmROWF672KqjO75sszsA8Mxhw06YFeS5VHlB98KDHbOSurm3ArsjO60Eaf3QmGMCP1yn+0JQkNLo/97Q== dependencies: estraverse "^4.0.0" @@ -819,9 +826,9 @@ growl@1.10.5: integrity sha512-qBr4OuELkhPenW6goKVXiv47US3clb3/IbuWF9KNKEijAy9oeHxU9IgzjvJhHkUzhaj7rOUD7+YGWqUjLp5oSA== handlebars@^4.7.0: - version "4.7.2" - resolved "https://registry.yarnpkg.com/handlebars/-/handlebars-4.7.2.tgz#01127b3840156a0927058779482031afe0e730d7" - integrity sha512-4PwqDL2laXtTWZghzzCtunQUTLbo31pcCJrd/B/9JP8XbhVzpS5ZXuKqlOzsd1rtcaLo4KqAn8nl8mkknS4MHw== + version "4.7.3" + resolved "https://registry.yarnpkg.com/handlebars/-/handlebars-4.7.3.tgz#8ece2797826886cf8082d1726ff21d2a022550ee" + integrity sha512-SRGwSYuNfx8DwHD/6InAPzD6RgeruWLT+B8e8a7gGs8FWgHzlExpTFMEq2IA6QpAfOClpKHy6+8IqTjeBCu6Kg== dependencies: neo-async "^2.6.0" optimist "^0.6.1" @@ -859,22 +866,31 @@ he@1.1.1: integrity sha1-k0EP0hsAlzUVH4howvJx80J+I/0= highlight.js@^9.17.1: - version "9.18.0" - resolved "https://registry.yarnpkg.com/highlight.js/-/highlight.js-9.18.0.tgz#6b1763cfcd53744313bd3f31f1210f7beb962c79" - integrity sha512-A97kI1KAUzKoAiEoaGcf2O9YPS8nbDTCRFokaaeBhnqjQTvbAuAJrQMm21zw8s8xzaMtCQBtgbyGXLGxdxQyqQ== + version "9.18.1" + resolved "https://registry.yarnpkg.com/highlight.js/-/highlight.js-9.18.1.tgz#ed21aa001fe6252bb10a3d76d47573c6539fe13c" + integrity sha512-OrVKYz70LHsnCgmbXctv/bfuvntIKDz177h0Co37DQ5jamGZLVmoCVMtjMtNZY3X9DrCcKfklHPNeA0uPZhSJg== hosted-git-info@^2.1.4: version "2.8.5" resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-2.8.5.tgz#759cfcf2c4d156ade59b0b2dfabddc42a6b9c70c" integrity sha512-kssjab8CvdXfcXMXVcvsXum4Hwdq9XGtRD3TteMEvEbq0LXyiNQr6AprqKqfeaDXze7SxWvRxdpwE6ku7ikLkg== -https-proxy-agent@^2.2.1: - version "2.2.4" - resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-2.2.4.tgz#4ee7a737abd92678a293d9b34a1af4d0d08c787b" - integrity sha512-OmvfoQ53WLjtA9HeYP9RNrWMJzzAz1JGaSFr1nijg0PVR1JaD/xbJq1mdEIIlxGpXp9eSe/O2LgU9DJmTPd0Eg== +http-proxy-agent@^4.0.0: + version "4.0.1" + resolved "https://registry.yarnpkg.com/http-proxy-agent/-/http-proxy-agent-4.0.1.tgz#8a8c8ef7f5932ccf953c296ca8291b95aa74aa3a" + integrity sha512-k0zdNgqWTGA6aeIRVpvfVob4fL52dTfaehylg0Y4UvSySvOq/Y+BOyPrgpUrA7HylqvU8vIZGsRuXmspskV0Tg== dependencies: - agent-base "^4.3.0" - debug "^3.1.0" + "@tootallnate/once" "1" + agent-base "6" + debug "4" + +https-proxy-agent@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-4.0.0.tgz#702b71fb5520a132a66de1f67541d9e62154d82b" + integrity sha512-zoDhWrkR3of1l9QAL8/scJZyLu8j/gBkcwcaQOZh7Gyh/+uJQzGVETdgT30akuwkpL8HTRfssqI3BZuV18teDg== + dependencies: + agent-base "5" + debug "4" iconv-lite@^0.4.17: version "0.4.24" @@ -883,7 +899,7 @@ iconv-lite@^0.4.17: dependencies: safer-buffer ">= 2.1.2 < 3" -ignore-walk@^3.0.1: +ignore-walk@3.0.3: version "3.0.3" resolved "https://registry.yarnpkg.com/ignore-walk/-/ignore-walk-3.0.3.tgz#017e2447184bfeade7c238e4aefdd1e8f95b1e37" integrity sha512-m7o6xuOaT1aqheYHKf8W6J5pYH85ZI9w077erOzLje3JsB1gkafkAhHHY19dqjulgIZHFm32Cp5uNZgcQqdJKw== @@ -1017,7 +1033,7 @@ js-tokens@^3.0.2: resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-3.0.2.tgz#9866df395102130e38f7f996bceb65443209c25b" integrity sha1-mGbfOVECEw449/mWvOtlRDIJwls= -js-yaml@^3.13.1, js-yaml@^3.9.1: +js-yaml@3.13.1, js-yaml@^3.13.1, js-yaml@^3.9.1: version "3.13.1" resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-3.13.1.tgz#aff151b30bfdfa8e49e05da22e7415e9dfa37847" integrity sha512-YfbcO7jXDdyj0DGxYVSlSeQNHbD7XPWvrVWeVUujrQEoZzWJIRrCPoyk6kL6IAjAG2IolMK4T0hNUe0HOUs5Jw== @@ -1111,6 +1127,13 @@ lru-cache@^4.0.1: pseudomap "^1.0.2" yallist "^2.1.2" +lru-cache@^5.1.1: + version "5.1.1" + resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-5.1.1.tgz#1da27e6710271947695daf6848e847f01d84b920" + integrity sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w== + dependencies: + yallist "^3.0.2" + lunr@^2.3.8: version "2.3.8" resolved "https://registry.yarnpkg.com/lunr/-/lunr-2.3.8.tgz#a8b89c31f30b5a044b97d2d28e2da191b6ba2072" @@ -1483,9 +1506,9 @@ resolve-from@^1.0.0: integrity sha1-Jsv+k10a7uq7Kbw/5a6wHpPUQiY= resolve@^1.1.6, resolve@^1.10.0, resolve@^1.13.1, resolve@^1.3.2, resolve@^1.3.3: - version "1.15.0" - resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.15.0.tgz#1b7ca96073ebb52e741ffd799f6b39ea462c67f5" - integrity sha512-+hTmAldEGE80U2wJJDC1lebb5jWqvTYAfm3YZ1ckk1gBr0MnCqUKlwK1e+anaFljIl+F5tR5IoZcm4ZDA1zMQw== + version "1.15.1" + resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.15.1.tgz#27bdcdeffeaf2d6244b95bb0f9f4b4653451f3e8" + integrity sha512-84oo6ZTtoTUpjgNEr5SJyzQhzL72gaRodsSfyxC/AXRvwu0Yse9H8eF9IpGo7b8YetZhlI6v7ZQ6bKBFV/6S7w== dependencies: path-parse "^1.0.6" @@ -1654,6 +1677,13 @@ standard@^11.0.1: eslint-plugin-standard "~3.0.1" standard-engine "~8.0.0" +stream-events@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/stream-events/-/stream-events-1.0.5.tgz#bbc898ec4df33a4902d892333d47da9bf1c406d5" + integrity sha512-E1GUzBSgvct8Jsb3v2X15pjzN1tYebtbLaMg+eBOUOAxgbLoSbT2NS91ckc5lJD1KfLjId+jXJRgo0qnV5Nerg== + dependencies: + stubs "^3.0.0" + string-width@^2.1.0, string-width@^2.1.1: version "2.1.1" resolved "https://registry.yarnpkg.com/string-width/-/string-width-2.1.1.tgz#ab93f27a8dc13d28cac815c462143a6d9012ae9e" @@ -1709,6 +1739,11 @@ strip-json-comments@~2.0.1: resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-2.0.1.tgz#3c531942e908c2697c0ec344858c286c7ca0a60a" integrity sha1-PFMZQukIwml8DsNEhYwobHygpgo= +stubs@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/stubs/-/stubs-3.0.0.tgz#e8d2ba1fa9c90570303c030b6900f7d5f89abe5b" + integrity sha1-6NK6H6nJBXAwPAMLaQD31fiavls= + supports-color@5.4.0: version "5.4.0" resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-5.4.0.tgz#1c6b337402c2137605efe19f10fec390f6faab54" @@ -1740,13 +1775,15 @@ table@4.0.2: slice-ansi "1.0.0" string-width "^2.1.1" -teeny-request@^3.11.3: - version "3.11.3" - resolved "https://registry.yarnpkg.com/teeny-request/-/teeny-request-3.11.3.tgz#335c629f7645e5d6599362df2f3230c4cbc23a55" - integrity sha512-CKncqSF7sH6p4rzCgkb/z/Pcos5efl0DmolzvlqRQUNcpRIruOhY9+T1FsIlyEbfWd7MsFpodROOwHYh2BaXzw== +teeny-request@6.0.1: + version "6.0.1" + resolved "https://registry.yarnpkg.com/teeny-request/-/teeny-request-6.0.1.tgz#9b1f512cef152945827ba7e34f62523a4ce2c5b0" + integrity sha512-TAK0c9a00ELOqLrZ49cFxvPVogMUFaWY8dUsQc/0CuQPGF+BOxOQzXfE413BAk2kLomwNplvdtMpeaeGWmoc2g== dependencies: - https-proxy-agent "^2.2.1" + http-proxy-agent "^4.0.0" + https-proxy-agent "^4.0.0" node-fetch "^2.2.0" + stream-events "^1.0.5" uuid "^3.3.2" text-table@~0.2.0: @@ -1880,9 +1917,9 @@ typescript@3.7.x, typescript@^3.6.2: integrity sha512-/P5lkRXkWHNAbcJIiHPfRoKqyd7bsyCma1hZNUGfn20qm64T6ZBlrzprymeu918H+mB/0rIg2gGK/BXkhhYgBw== uglify-js@^3.1.4: - version "3.7.6" - resolved "https://registry.yarnpkg.com/uglify-js/-/uglify-js-3.7.6.tgz#0783daa867d4bc962a37cc92f67f6e3238c47485" - integrity sha512-yYqjArOYSxvqeeiYH2VGjZOqq6SVmhxzaPjJC1W2F9e+bqvFL9QXQ2osQuKUFjM2hGjKG2YclQnRKWQSt/nOTQ== + version "3.7.7" + resolved "https://registry.yarnpkg.com/uglify-js/-/uglify-js-3.7.7.tgz#21e52c7dccda80a53bf7cde69628a7e511aec9c9" + integrity sha512-FeSU+hi7ULYy6mn8PKio/tXsdSXN35lm4KgV2asx00kzrLU9Pi3oAslcJT70Jdj7PHX29gGUPOT6+lXGBbemhA== dependencies: commander "~2.20.3" source-map "~0.6.1" @@ -1902,7 +1939,7 @@ universalify@^0.1.0: resolved "https://registry.yarnpkg.com/universalify/-/universalify-0.1.2.tgz#b646f69be3942dabcecc9d6639c80dc105efaa66" integrity sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg== -urlgrey@^0.4.4: +urlgrey@0.4.4: version "0.4.4" resolved "https://registry.yarnpkg.com/urlgrey/-/urlgrey-0.4.4.tgz#892fe95960805e85519f1cd4389f2cb4cbb7652f" integrity sha1-iS/pWWCAXoVRnxzUOJ8stMu3ZS8= @@ -1925,11 +1962,6 @@ validate-npm-package-license@^3.0.1: spdx-correct "^3.0.0" spdx-expression-parse "^3.0.0" -validator@^12.1.0: - version "12.1.0" - resolved "https://registry.yarnpkg.com/validator/-/validator-12.1.0.tgz#a3a7315d5238cbc15e46ad8d5e479aafa7119925" - integrity sha512-gIC2RBuFRi574Rb9vewGCJ7TCLxHXNx6EKthEgs+Iz0pYa9a9Te1VLG/bGLsAyGWrqR5FfR7tbFUI7FEF2LiGA== - which@^1.2.9: version "1.3.1" resolved "https://registry.yarnpkg.com/which/-/which-1.3.1.tgz#a45043d54f5805316da8d62f9f50918d3da70b0a" @@ -1986,3 +2018,8 @@ yallist@^2.1.2: version "2.1.2" resolved "https://registry.yarnpkg.com/yallist/-/yallist-2.1.2.tgz#1c11f9218f076089a47dd512f93c6699a6a81d52" integrity sha1-HBH5IY8HYImkfdUS+TxmmaaoHVI= + +yallist@^3.0.2: + version "3.1.1" + resolved "https://registry.yarnpkg.com/yallist/-/yallist-3.1.1.tgz#dbb7daf9bfd8bac9ab45ebf602b8cbad0d5d08fd" + integrity sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g== From c0edbe40f3762b9a9fc0abecd24e43730107fee1 Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Thu, 13 Feb 2020 13:50:13 +0100 Subject: [PATCH 02/23] add test for AsyncLRUCache --- src/engine/cache/cache-base.ts | 6 +- tests/cache/async-lru-cache-test.js | 95 +++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 3 deletions(-) create mode 100644 tests/cache/async-lru-cache-test.js diff --git a/src/engine/cache/cache-base.ts b/src/engine/cache/cache-base.ts index 5a340a12..71e885a6 100644 --- a/src/engine/cache/cache-base.ts +++ b/src/engine/cache/cache-base.ts @@ -24,7 +24,7 @@ SOFTWARE. 'use strict' -import LRU from 'lru-cache' +import * as LRU from 'lru-cache' interface AsyncCacheEntry { content: Array, @@ -134,7 +134,7 @@ export class BaseLRUCache implements Cache { max: maxSize, maxAge } - this._content = new LRU(options) + this._content = new LRU(options) } put (key: K, item: T): void { @@ -181,7 +181,7 @@ export class AsyncLRUCache implements AsyncCache { } update (key: K, item: T, writerID: I): void { - if (this.has(key)) { + if (this._cache.has(key)) { const entry = this._cache.get(key)! if (entry.writerID === writerID) { entry.content.push(item) diff --git a/tests/cache/async-lru-cache-test.js b/tests/cache/async-lru-cache-test.js new file mode 100644 index 00000000..0a147954 --- /dev/null +++ b/tests/cache/async-lru-cache-test.js @@ -0,0 +1,95 @@ +/* file: async-lru-cache-test.js +MIT License + +Copyright (c) 2019-2020 Thomas Minier + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the 'Software'), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +'use strict' + +const expect = require('chai').expect +const { AsyncLRUCache } = require('../../dist/engine/cache/cache-base') + +describe('AsyncLRUCache', () => { + let cache = null + beforeEach(() => { + cache = new AsyncLRUCache(Infinity, Infinity) + }) + + describe('#update/commit', () => { + it('should supports insertion of items over time', () => { + const writerID = 1 + cache.update(1, 1, writerID) + cache.update(1, 2, writerID) + cache.update(1, 3, writerID) + cache.commit(1, writerID) + expect(cache.get(1)).to.deep.equals([1, 2, 3]) + }) + + it('should supports concurrent insertions of items from distinct writers', () => { + const firstID = 1 + const secondID = 2 + cache.update(1, 1, firstID) + cache.update(1, '1', secondID) + cache.update(1, 2, firstID) + cache.update(1, '2', secondID) + cache.update(1, '3', secondID) + cache.update(1, 3, firstID) + cache.update(1, '4', secondID) + cache.commit(1, secondID) + cache.commit(1, firstID) + expect(cache.get(1)).to.deep.equals([1, 2, 3]) + }) + }) + + + describe('#has', () => { + it('should returns true when the cache entry is available', () => { + const writerID = 1 + cache.update(1, 1, writerID) + cache.update(1, 2, writerID) + cache.update(1, 3, writerID) + cache.commit(1, writerID) + expect(cache.has(1)).to.deep.equals(true) + }) + + it('should returns false when the cache entry is not available', () => { + const writerID = 1 + cache.update(1, 1, writerID) + cache.update(1, 2, writerID) + cache.update(1, 3, writerID) + expect(cache.has(1)).to.deep.equals(false) + cache.commit(1, writerID) + expect(cache.has(1)).to.deep.equals(true) + }) + }) + + describe('#get', () => { + it('should returns null when the cache entry is not available', () => { + const writerID = 1 + cache.update(1, 1, writerID) + cache.update(1, 2, writerID) + cache.update(1, 3, writerID) + expect(cache.get(1)).to.deep.equals(null) + cache.commit(1, writerID) + expect(cache.get(1)).to.deep.equals([1, 2, 3]) + }) + }) +}) From 4fba195a834f3879a0782dc17c8f8c08532e3a5a Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Thu, 13 Feb 2020 13:58:54 +0100 Subject: [PATCH 03/23] add doc + store the current cache in ExecutionContext --- src/engine/cache/bgp-cache.ts | 18 +++++++++++++- src/engine/cache/cache-base.ts | 10 ++++++++ src/engine/context/execution-context.ts | 32 ++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/src/engine/cache/bgp-cache.ts b/src/engine/cache/bgp-cache.ts index 014d9577..4b86cdac 100644 --- a/src/engine/cache/bgp-cache.ts +++ b/src/engine/cache/bgp-cache.ts @@ -31,8 +31,24 @@ import { Bindings } from '../../rdf/bindings' import { Algebra } from 'sparqljs' import { sparql } from '../../utils' -export default class BGPCache implements AsyncCache { +/** + * An async cache that stores the solution bindings from BGP evaluation + * @author Thomas Minier + */ +export type BGPCache = AsyncCache + +/** + * An implementation of a {@link BGPCache} using an {@link AsyncLRUCache} + * @author Thomas Minier + */ +export class LRUBGPCache implements BGPCache { private readonly _cache: AsyncLRUCache + + /** + * Constructor + * @param maxSize - The maximum size of the cache + * @param maxAge - Maximum age in ms + */ constructor (maxSize: number, maxAge: number) { this._cache = new AsyncLRUCache(maxSize, maxAge) } diff --git a/src/engine/cache/cache-base.ts b/src/engine/cache/cache-base.ts index 71e885a6..419d07fd 100644 --- a/src/engine/cache/cache-base.ts +++ b/src/engine/cache/cache-base.ts @@ -129,6 +129,11 @@ export interface AsyncCache { export class BaseLRUCache implements Cache { private readonly _content: LRU + /** + * Constructor + * @param maxSize - The maximum size of the cache + * @param maxAge - Maximum age in ms + */ constructor (maxSize: number, maxAge: number) { const options = { max: maxSize, @@ -168,6 +173,11 @@ export class BaseLRUCache implements Cache { export class AsyncLRUCache implements AsyncCache { private readonly _cache: BaseLRUCache> + /** + * Constructor + * @param maxSize - The maximum size of the cache + * @param maxAge - Maximum age in ms + */ constructor (maxSize: number, maxAge: number) { this._cache = new BaseLRUCache(maxSize, maxAge) } diff --git a/src/engine/context/execution-context.ts b/src/engine/context/execution-context.ts index 8ab4a06e..3bd03892 100644 --- a/src/engine/context/execution-context.ts +++ b/src/engine/context/execution-context.ts @@ -25,6 +25,7 @@ SOFTWARE. 'use strict' import { QueryHints } from './query-hints' +import { BGPCache } from '../cache/bgp-cache' /** * An execution context conatains control information for query execution. @@ -34,12 +35,14 @@ export default class ExecutionContext { protected _hints: QueryHints protected _defaultGraphs: string[] protected _namedGraphs: string[] + protected _cache: BGPCache | null constructor () { this._properties = new Map() this._hints = new QueryHints() this._defaultGraphs = [] this._namedGraphs = [] + this._cache = null } /** @@ -90,6 +93,32 @@ export default class ExecutionContext { this._hints = newHints } + /** + * Get the BGP cache currently used by the query engine. + * returns null if caching is disabled + * @return The BGP cache currently used by the query engine, or null if caching is disabled. + */ + get cache (): BGPCache | null { + return this._cache + } + + /** + * Set the BGP cache currently used by the query engine. + * Use null to disable caching + * @param newCache - The BGP cache to use for caching. + */ + set cache (newCache: BGPCache | null) { + this._cache = newCache + } + + /** + * Test the caching is enabled + * @return True if the caching is enabled, false otherwise + */ + cachingEnabled (): boolean { + return this._cache !== null + } + /** * Get a property associated with a key * @param key - Key associated with the property @@ -121,12 +150,13 @@ export default class ExecutionContext { * Clone the execution context * @return A clone of the execution context */ - clone () : ExecutionContext { + clone (): ExecutionContext { const res = new ExecutionContext() this._properties.forEach((value, key) => res.setProperty(key, value)) res._hints = this.hints.clone() res._defaultGraphs = this._defaultGraphs.slice(0) res._namedGraphs = this._namedGraphs.slice(0) + res._cache = this._cache return res } From 0c40ab82f3e8b5ad2f8827553a2fab4d87e85ae9 Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Thu, 13 Feb 2020 14:05:58 +0100 Subject: [PATCH 04/23] add methods to enable/disable caching in PlanBuilder --- src/engine/plan-builder.ts | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/engine/plan-builder.ts b/src/engine/plan-builder.ts index 998e6ffd..f807a68c 100644 --- a/src/engine/plan-builder.ts +++ b/src/engine/plan-builder.ts @@ -54,6 +54,8 @@ import OptionalStageBuilder from './stages/optional-stage-builder' import OrderByStageBuilder from './stages/orderby-stage-builder' import UnionStageBuilder from './stages/union-stage-builder' import UpdateStageBuilder from './stages/update-stage-builder' +// caching +import { BGPCache, LRUBGPCache } from './cache/bgp-cache' // utilities import { partition, @@ -113,6 +115,7 @@ export class PlanBuilder { private _optimizer: Optimizer private _stageBuilders: Map private _customFunctions: CustomFunctions | undefined + private _currentCache: BGPCache | null /** * Constructor @@ -123,6 +126,7 @@ export class PlanBuilder { this._dataset = dataset this._parser = new Parser(prefixes) this._optimizer = Optimizer.getDefault() + this._currentCache = null this._customFunctions = customFunctions this._stageBuilders = new Map() @@ -162,6 +166,28 @@ export class PlanBuilder { this._stageBuilders.set(kind, stageBuilder) } + /** + * Enable Basic Graph Patterns caching for SPARQL query evaluation. + * The parameter is optional and used to provide your own cache instance. + * If left undefined, the query engine will use a {@link LRUBGPCache} with + * a maximum of 500 items and a max age of 20 minutes. + * @param customCache - (optional) Custom cache instance + */ + useCache (customCache?: LRUBGPCache): void { + if (customCache === undefined) { + this._currentCache = new LRUBGPCache(500, 1200 * 60 * 60) + } else { + this._currentCache = customCache + } + } + + /** + * Disable Basic Graph Patterns caching for SPARQL query evaluation. + */ + disableCache (): void { + this._currentCache = null + } + /** * Build the physical query execution of a SPARQL 1.1 query * and returns a {@link PipelineStage} or a {@link Consumable} that can be consumed to evaluate the query. @@ -176,6 +202,7 @@ export class PlanBuilder { } if (isNull(context) || isUndefined(context)) { context = new ExecutionContext() + context.cache = this._currentCache } // Optimize the logical query execution plan query = this._optimizer.optimize(query) From 5ff525808b9a655a4c7529141682346d374f37aa Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Thu, 13 Feb 2020 14:06:44 +0100 Subject: [PATCH 05/23] remove uncessary type assertion --- src/engine/plan-builder.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine/plan-builder.ts b/src/engine/plan-builder.ts index f807a68c..e398506b 100644 --- a/src/engine/plan-builder.ts +++ b/src/engine/plan-builder.ts @@ -401,7 +401,7 @@ export class PlanBuilder { } // delegate remaining BGP evaluation to the dedicated executor - let iter = this._stageBuilders.get(SPARQL_OPERATION.BGP)!.execute(source, classicTriples as Algebra.TripleObject[], childContext) as PipelineStage + let iter = this._stageBuilders.get(SPARQL_OPERATION.BGP)!.execute(source, classicTriples, childContext) as PipelineStage // filter out variables added by the rewriting of property paths if (tempVariables.length > 0) { From 43a9d45c1dc6aa4f3fdd4ce5bcddfccb8a4fa804 Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Thu, 13 Feb 2020 14:53:19 +0100 Subject: [PATCH 06/23] cache added for bgp evaluation using index loop joins --- src/engine/stages/bgp-stage-builder.ts | 24 +++++++++- tests/sparql/bgp-cache-test.js | 61 ++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 tests/sparql/bgp-cache-test.js diff --git a/src/engine/stages/bgp-stage-builder.ts b/src/engine/stages/bgp-stage-builder.ts index 93b67c6e..450e6849 100644 --- a/src/engine/stages/bgp-stage-builder.ts +++ b/src/engine/stages/bgp-stage-builder.ts @@ -37,6 +37,7 @@ import { fts } from './rewritings' import ExecutionContext from '../context/execution-context' import { rdf } from '../../utils' import { isNaN, isNull, isInteger } from 'lodash' +import * as uuid from 'uuid/v4' import boundJoin from '../../operators/join/bound-join' @@ -49,7 +50,28 @@ function bgpEvaluation (source: PipelineStage, bgp: Algebra.TripleObje const engine = Pipeline.getInstance() return engine.mergeMap(source, (bindings: Bindings) => { let boundedBGP = bgp.map(t => bindings.bound(t)) - return engine.map(graph.evalBGP(boundedBGP, context), (item: Bindings) => { + // check the cache + let iterator + if (context.cachingEnabled()) { + if (context.cache!.has(boundedBGP)) { + iterator = Pipeline.getInstance().from(context.cache!.get(boundedBGP)!.map(b => b.clone())) + } else { + // generate an unique writer ID + const writerID = uuid() + // put all solutions into the cache + iterator = Pipeline.getInstance().tap(graph.evalBGP(boundedBGP, context), b => { + context.cache!.update(boundedBGP, b, writerID) + }) + // commit the cache entry when the BGP evaluation is done + iterator = Pipeline.getInstance().finalize(iterator, () => { + context.cache!.commit(boundedBGP, writerID) + }) + } + } else { + iterator = graph.evalBGP(boundedBGP, context) + } + // build join results + return engine.map(iterator, (item: Bindings) => { // if (item.size === 0 && hasVars) return null return item.union(bindings) }) diff --git a/tests/sparql/bgp-cache-test.js b/tests/sparql/bgp-cache-test.js new file mode 100644 index 00000000..1b5d5faf --- /dev/null +++ b/tests/sparql/bgp-cache-test.js @@ -0,0 +1,61 @@ +/* file : union-test.js +MIT License + +Copyright (c) 2018 Thomas Minier + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +'use strict' + +const expect = require('chai').expect +const { getGraph, TestEngine } = require('../utils.js') + +describe('Basic Graph Pattern cache', () => { + let engine = null + before(() => { + const g = getGraph('./tests/data/dblp.nt') + engine = new TestEngine(g) + }) + + it('should fill the cache when evaluating a BGP', done => { + const query = ` + SELECT ?s ?p ?o WHERE { + { ?s ?p ?o } UNION { ?s ?p ?o } + }` + engine._builder.useCache() + const results = [] + const iterator = engine.execute(query) + iterator.subscribe(b => { + b = b.toObject() + expect(b).to.have.keys('?s', '?p', '?o') + results.push(b) + }, done, () => { + // we have all results in double + expect(results.length).to.equal(34) + // check for cache hits + const bgp = [ { subject: '?s', predicate: '?p', object: '?o' } ] + const cache = engine._builder._currentCache + expect(cache.count()).to.equal(1) + expect(cache.has(bgp)).to.equal(true) + expect(cache.get(bgp).length).to.equal(17) + done() + }) + }) +}) From 01e689b75ef9ab5b42ffa2e8d30c9acbaa85b34a Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Thu, 13 Feb 2020 10:14:13 +0100 Subject: [PATCH 07/23] add first draft for a bgp cache --- package.json | 2 + src/engine/cache/bgp-cache.ts | 71 ++++++++++ src/engine/cache/cache-base.ts | 228 +++++++++++++++++++++++++++++++++ src/utils.ts | 26 ++++ yarn.lock | 187 ++++++++++++++++----------- 5 files changed, 439 insertions(+), 75 deletions(-) create mode 100644 src/engine/cache/bgp-cache.ts create mode 100644 src/engine/cache/cache-base.ts diff --git a/package.json b/package.json index 1aebde4e..3823726e 100644 --- a/package.json +++ b/package.json @@ -38,6 +38,7 @@ "homepage": "https://github.com/Callidon/sparql-engine#readme", "devDependencies": { "@types/lodash": "^4.14.116", + "@types/lru-cache": "^5.1.0", "@types/node": "^10.14.17", "@types/uuid": "^3.4.4", "@types/xml": "^1.0.2", @@ -54,6 +55,7 @@ "dependencies": { "@rdfjs/data-model": "^1.1.2", "lodash": "^4.17.15", + "lru-cache": "^5.1.1", "moment": "^2.22.2", "n3": "^0.11.3", "rdf-string": "^1.3.1", diff --git a/src/engine/cache/bgp-cache.ts b/src/engine/cache/bgp-cache.ts new file mode 100644 index 00000000..014d9577 --- /dev/null +++ b/src/engine/cache/bgp-cache.ts @@ -0,0 +1,71 @@ +/* file: bgp-cache.ts +MIT License + +Copyright (c) 2019-2020 Thomas Minier + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the 'Software'), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +'use strict' + +import { AsyncLRUCache, AsyncCache } from './cache-base' +import { Pipeline } from '../pipeline/pipeline' +import { PipelineStage } from '../pipeline/pipeline-engine' +import { Bindings } from '../../rdf/bindings' +import { Algebra } from 'sparqljs' +import { sparql } from '../../utils' + +export default class BGPCache implements AsyncCache { + private readonly _cache: AsyncLRUCache + constructor (maxSize: number, maxAge: number) { + this._cache = new AsyncLRUCache(maxSize, maxAge) + } + + has (bgp: Algebra.TripleObject[]): boolean { + return this._cache.has(sparql.hashBGP(bgp)) + } + + update (bgp: Algebra.TripleObject[], item: Bindings, writerID: string): void { + this._cache.update(sparql.hashBGP(bgp), item, writerID) + } + + get (bgp: Algebra.TripleObject[]): Bindings[] | null { + return this._cache.get(sparql.hashBGP(bgp)) + } + + getAsPipeline (bgp: Algebra.TripleObject[]): PipelineStage { + const bindings = this.get(bgp) + if (bindings === null) { + return Pipeline.getInstance().empty() + } + return Pipeline.getInstance().from(bindings.map(b => b.clone())) + } + + commit (bgp: Algebra.TripleObject[], writerID: string): void { + this._cache.commit(sparql.hashBGP(bgp), writerID) + } + + delete (bgp: Algebra.TripleObject[], writerID: string): void { + this._cache.delete(sparql.hashBGP(bgp), writerID) + } + + count (): number { + return this._cache.count() + } +} diff --git a/src/engine/cache/cache-base.ts b/src/engine/cache/cache-base.ts new file mode 100644 index 00000000..5a340a12 --- /dev/null +++ b/src/engine/cache/cache-base.ts @@ -0,0 +1,228 @@ +/* file: cache-base.ts +MIT License + +Copyright (c) 2019-2020 Thomas Minier + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the 'Software'), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +'use strict' + +import LRU from 'lru-cache' + +interface AsyncCacheEntry { + content: Array, + writerID: I, + isComplete: boolean +} + +/** + * A cache is a vue that materializes data for latter re-use + * @author Thomas Minier + */ +export interface Cache { + /** + * Put an item into the cache + * @param key - Item's key + * @param item - Item + */ + put (key: K, item: T): void + + /** + * Test if the cache contains an item with a given key + * @param key - Item's key + * @return True if the cache contains the item with the given key, False otherwise + */ + has (key: K): boolean + + /** + * Access an item by its key. + * Each call to get() should be predated by a call to has(), + * to check if the item is in the cache. + * @param key - Item's key + * @return The item with the given key, or null if it was not found + */ + get (key: K): T | null + + /** + * Remove an item from the cache + * @param key - Item's key + */ + delete (key: K): void + + /** + * Get the number of items currently in the cache + * @return The number of items currently in the cache + */ + count (): number +} + +/** + * An async cache is cache which stores collections of items that are built over time. + * Writers will call the update and commit method to update the cache content & mark items as available. + * @author Thomas Minier + */ +export interface AsyncCache { + /** + * Update an item into the cache + * @param key - Item's key + * @param item - Item + * @param writerID - ID of the writer + */ + update (key: K, item: T, writerID: I): void + + /** + * Mark an item as available from the cache + * @param key - Item's key + * @param IwriterID - ID of the writer + */ + commit (key: K, writerID: I): void + + /** + * Test if the cache contains an item with a given key + * @param key - Item's key + * @return True if the cache contains the item with the given key, False otherwise + */ + has (key: K): boolean + + /** + * Access an item by its key. + * Each call to get() should be predated by a call to has() to check if the item is in the cache. + * @param key - Item's key + * @return The values of the item with the given key, or null if it was not found + */ + get (key: K): T[] | null + + /** + * Remove an item from the cache + * @param key - Item's key + */ + delete (key: K, writerID: I): void + + /** + * Get the number of items currently in the cache + * @return The number of items currently in the cache + */ + count (): number +} + +/** + * An in-memory LRU cache + * @author Thomas Minier + */ +export class BaseLRUCache implements Cache { + private readonly _content: LRU + + constructor (maxSize: number, maxAge: number) { + const options = { + max: maxSize, + maxAge + } + this._content = new LRU(options) + } + + put (key: K, item: T): void { + this._content.set(key, item) + } + + has (key: K): boolean { + return this._content.has(key) + } + + get (key: K): T | null { + if (this._content.has(key)) { + return this._content.get(key)! + } + return null + } + + delete (key: K): void { + this.delete(key) + } + + count (): number { + return this._content.itemCount + } +} + +/** + * An in-memory LRU cache that supports async insertion of items + * @author Thomas Minier + */ +export class AsyncLRUCache implements AsyncCache { + private readonly _cache: BaseLRUCache> + + constructor (maxSize: number, maxAge: number) { + this._cache = new BaseLRUCache(maxSize, maxAge) + } + + has (key: K): boolean { + if (this._cache.has(key)) { + const entry = this._cache.get(key)! + return entry.isComplete + } + return false + } + + update (key: K, item: T, writerID: I): void { + if (this.has(key)) { + const entry = this._cache.get(key)! + if (entry.writerID === writerID) { + entry.content.push(item) + this._cache.put(key, entry) + } + } else { + this._cache.put(key, { + content: [item], + writerID, + isComplete: false + }) + } + } + + commit (key: K, writerID: I): void { + if (this._cache.has(key)) { + const entry = this._cache.get(key)! + if (entry.writerID === writerID) { + entry.isComplete = true + this._cache.put(key, entry) + } + } + } + + get (key: K): T[] | null { + if (this.has(key)) { + return this._cache.get(key)!.content + } + return null + } + + delete (key: K, writerID: I): void { + if (this._cache.has(key)) { + const entry = this._cache.get(key)! + if (entry.writerID === writerID) { + this._cache.delete(key) + } + } + } + + count (): number { + return this._cache.count() + } +} diff --git a/src/utils.ts b/src/utils.ts index 9e37b396..f0369e83 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -33,6 +33,7 @@ import { parseZone, Moment, ISO_8601 } from 'moment' import * as DataFactory from '@rdfjs/data-model' import { BlankNode, Literal, NamedNode, Term } from 'rdf-js' import { termToString, stringToTerm } from 'rdf-string' +import * as crypto from 'crypto' /** * RDF related utilities @@ -404,6 +405,15 @@ export namespace rdf { return literal } + /** + * Hash Triple (pattern) to assign it an unique ID + * @param triple - Triple (pattern) to hash + * @return An unique ID to identify the Triple (pattern) + */ + export function hashTriple (triple: Algebra.TripleObject): string { + return `s=${triple.subject}&p=${triple.predicate}&o=${triple.object}` + } + /** * Create an IRI under the XSD namespace * () @@ -449,6 +459,22 @@ export namespace rdf { * SPARQL related utilities */ export namespace sparql { + /** + * Hash Basic Graph pattern to assign them an unique ID + * @param bgp - Basic Graph Pattern to hash + * @param md5 - True if the ID should be hashed to md5, False to keep it as a plain text string + * @return An unique ID to identify the BGP + */ + export function hashBGP (bgp: Algebra.TripleObject[], md5: boolean = false): string { + const hashedBGP = bgp.map(rdf.hashTriple).join(';') + if (!md5) { + return hashedBGP + } + const hash = crypto.createHash('md5') + hash.update(hashedBGP) + return hash.digest('hex') + } + /** * Get the set of SPARQL variables in a triple pattern * @param pattern - Triple Pattern diff --git a/yarn.lock b/yarn.lock index 69ea82f1..211b5e49 100644 --- a/yarn.lock +++ b/yarn.lock @@ -25,39 +25,47 @@ dependencies: "@types/rdf-js" "^2.0.1" +"@tootallnate/once@1": + version "1.0.0" + resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-1.0.0.tgz#9c13c2574c92d4503b005feca8f2e16cc1611506" + integrity sha512-KYyTT/T6ALPkIRd2Ge080X/BsXvy9O0hcWTtMWkPvwAwF99+vn6Dv4GzrFT/Nn1LePr+FFDbRXXlqmsy9lw2zA== + "@types/lodash@^4.14.116": version "4.14.149" resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.14.149.tgz#1342d63d948c6062838fbf961012f74d4e638440" integrity sha512-ijGqzZt/b7BfzcK9vTrS6MFljQRPn5BFWOx8oE0GYxribu6uV+aA9zZuXI1zc/etK9E8nrgdoF2+LgUw7+9tJQ== +"@types/lru-cache@^5.1.0": + version "5.1.0" + resolved "https://registry.yarnpkg.com/@types/lru-cache/-/lru-cache-5.1.0.tgz#57f228f2b80c046b4a1bd5cac031f81f207f4f03" + integrity sha512-RaE0B+14ToE4l6UqdarKPnXwVDuigfFv+5j9Dze/Nqr23yyuqdNvzcZi3xB+3Agvi5R4EOgAksfv3lXX4vBt9w== + "@types/minimatch@3.0.3": version "3.0.3" resolved "https://registry.yarnpkg.com/@types/minimatch/-/minimatch-3.0.3.tgz#3dca0e3f33b200fc7d1139c0cd96c1268cadfd9d" integrity sha512-tHq6qdbT9U1IRSGf14CL0pUlULksvY9OZ+5eEgl1N7t+OA3tGvNpxJCzuKQlsNgCVwbAs670L1vcVQi8j9HjnA== "@types/node@*": - version "13.5.0" - resolved "https://registry.yarnpkg.com/@types/node/-/node-13.5.0.tgz#4e498dbf355795a611a87ae5ef811a8660d42662" - integrity sha512-Onhn+z72D2O2Pb2ql2xukJ55rglumsVo1H6Fmyi8mlU9SvKdBk/pUSUAiBY/d9bAOF7VVWajX3sths/+g6ZiAQ== + version "13.7.1" + resolved "https://registry.yarnpkg.com/@types/node/-/node-13.7.1.tgz#238eb34a66431b71d2aaddeaa7db166f25971a0d" + integrity sha512-Zq8gcQGmn4txQEJeiXo/KiLpon8TzAl0kmKH4zdWctPj05nWwp1ClMdAVEloqrQKfaC48PNLdgN/aVaLqUrluA== "@types/node@^10.14.17": - version "10.17.13" - resolved "https://registry.yarnpkg.com/@types/node/-/node-10.17.13.tgz#ccebcdb990bd6139cd16e84c39dc2fb1023ca90c" - integrity sha512-pMCcqU2zT4TjqYFrWtYHKal7Sl30Ims6ulZ4UFXxI4xbtQqK/qqKwkDoBFCfooRqqmRu9vY3xaJRwxSh673aYg== + version "10.17.15" + resolved "https://registry.yarnpkg.com/@types/node/-/node-10.17.15.tgz#bfff4e23e9e70be6eec450419d51e18de1daf8e7" + integrity sha512-daFGV9GSs6USfPgxceDA8nlSe48XrVCJfDeYm7eokxq/ye7iuOH87hKXgMtEAVLFapkczbZsx868PMDT1Y0a6A== "@types/rdf-js@^2.0.1": - version "2.0.9" - resolved "https://registry.yarnpkg.com/@types/rdf-js/-/rdf-js-2.0.9.tgz#4d94b62fa6e36a1dfeb339d5141e709ba70378ba" - integrity sha512-C7YCfZDlWcN/SjZsFa/DjH7aKJjJ48KgnfpvKKWSx2/i8zjp4lLAw4uTrcArd+oAmar8e+Nnxg9w1SPFGzEhXQ== + version "2.0.11" + resolved "https://registry.yarnpkg.com/@types/rdf-js/-/rdf-js-2.0.11.tgz#b9e398504ceb9f00eaa3b3036b643dc3490cf362" + integrity sha512-GC5MZU2HbL5JnlrLAzoxSqLprqtKwocz0TNVugqM04t1ZeeNFpZRqqBQc9Jhev35hEwdH84siRLaCesxHHYlmA== dependencies: "@types/node" "*" "@types/uuid@^3.4.4": - version "3.4.6" - resolved "https://registry.yarnpkg.com/@types/uuid/-/uuid-3.4.6.tgz#d2c4c48eb85a757bf2927f75f939942d521e3016" - integrity sha512-cCdlC/1kGEZdEglzOieLDYBxHsvEOIg7kp/2FYyVR9Pxakq+Qf/inL3RKQ+PA8gOlI/NnL+fXmQH12nwcGzsHw== - dependencies: - "@types/node" "*" + version "3.4.7" + resolved "https://registry.yarnpkg.com/@types/uuid/-/uuid-3.4.7.tgz#51d42247473bc00e38cc8dfaf70d936842a36c03" + integrity sha512-C2j2FWgQkF1ru12SjZJyMaTPxs/f6n90+5G5qNakBxKXjTBc/YTSelHh4Pz1HUDwxFXD9WvpQhOGCDC+/Y4mIQ== "@types/xml@^1.0.2": version "1.0.4" @@ -83,12 +91,17 @@ acorn@^5.5.0: resolved "https://registry.yarnpkg.com/acorn/-/acorn-5.7.3.tgz#67aa231bf8812974b85235a96771eb6bd07ea279" integrity sha512-T/zvzYRfbVojPWahDsE5evJdHb3oJoQfFbsrKM7w5Zcs++Tr257tia3BmMP8XYVjp1S9RZXQMh7gao96BlqZOw== -agent-base@^4.3.0: - version "4.3.0" - resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-4.3.0.tgz#8165f01c436009bccad0b1d122f05ed770efc6ee" - integrity sha512-salcGninV0nPrwpGNn4VTXBb1SOuXQBiqbrNXoeizJsHrsL6ERFM2Ne3JUSBWRE6aeNJI2ROP/WEEIDUiDe3cg== +agent-base@5: + version "5.1.1" + resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-5.1.1.tgz#e8fb3f242959db44d63be665db7a8e739537a32c" + integrity sha512-TMeqbNl2fMW0nMjTEPOwe3J/PRFP4vqeoNuQMG0HlMrtm5QxKqdvAkZ1pRBQ/ulIyDD5Yq0nJ7YbdD8ey0TO3g== + +agent-base@6: + version "6.0.0" + resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-6.0.0.tgz#5d0101f19bbfaed39980b22ae866de153b93f09a" + integrity sha512-j1Q7cSCqN+AwrmDd+pzgqc0/NpC655x2bUf5ZjRIO77DcNBFmh+OgRNzF6OKdCC9RSCb19fGd99+bhXFdkRNqw== dependencies: - es6-promisify "^5.0.0" + debug "4" ajv-keywords@^2.1.0: version "2.1.1" @@ -139,7 +152,7 @@ argparse@^1.0.7: dependencies: sprintf-js "~1.0.2" -argv@^0.0.2: +argv@0.0.2: version "0.0.2" resolved "https://registry.yarnpkg.com/argv/-/argv-0.0.2.tgz#ecbd16f8949b157183711b1bda334f37840185ab" integrity sha1-7L0W+JSbFXGDcRsb2jNPN4QBhas= @@ -300,16 +313,15 @@ co@^4.6.0: integrity sha1-bqa989hTrlTMuOR7+gvz+QMfsYQ= codecov@^3.0.4: - version "3.6.2" - resolved "https://registry.yarnpkg.com/codecov/-/codecov-3.6.2.tgz#9503533d744233f6864f8f3ead9435d285ed3f47" - integrity sha512-i1VYZYY3M8Lodk/QRsIWYVimkuhl0oMSiM2itxbTbEIjB0PCSWP1cI7cscu5P0MayggoTl6I/jkXV2go8Ub8/Q== + version "3.6.5" + resolved "https://registry.yarnpkg.com/codecov/-/codecov-3.6.5.tgz#d73ce62e8a021f5249f54b073e6f2d6a513f172a" + integrity sha512-v48WuDMUug6JXwmmfsMzhCHRnhUf8O3duqXvltaYJKrO1OekZWpB/eH6iIoaxMl8Qli0+u3OxptdsBOYiD7VAQ== dependencies: - argv "^0.0.2" - ignore-walk "^3.0.1" - js-yaml "^3.13.1" - teeny-request "^3.11.3" - urlgrey "^0.4.4" - validator "^12.1.0" + argv "0.0.2" + ignore-walk "3.0.3" + js-yaml "3.13.1" + teeny-request "6.0.1" + urlgrey "0.4.4" color-convert@^1.9.0: version "1.9.3" @@ -379,6 +391,13 @@ debug@3.1.0: dependencies: ms "2.0.0" +debug@4: + version "4.1.1" + resolved "https://registry.yarnpkg.com/debug/-/debug-4.1.1.tgz#3b72260255109c6b589cee050f1d516139664791" + integrity sha512-pYAIzeRo8J6KPEaJ0VWOh5Pzkbw/RetuzehGM7QRRX5he4fPHx2rdKMB256ehJCkX+XRQm16eZLqLNS8RSZXZw== + dependencies: + ms "^2.1.1" + debug@^2.6.8, debug@^2.6.9: version "2.6.9" resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f" @@ -497,18 +516,6 @@ es-to-primitive@^1.2.1: is-date-object "^1.0.1" is-symbol "^1.0.2" -es6-promise@^4.0.3: - version "4.2.8" - resolved "https://registry.yarnpkg.com/es6-promise/-/es6-promise-4.2.8.tgz#4eb21594c972bc40553d276e510539143db53e0a" - integrity sha512-HJDGx5daxeIvxdBxvG2cb9g4tEvwIk3i8+nhX0yGrYmZUzbkdg8QbDevheDB8gd0//uPj4c1EQua8Q+MViT0/w== - -es6-promisify@^5.0.0: - version "5.0.0" - resolved "https://registry.yarnpkg.com/es6-promisify/-/es6-promisify-5.0.0.tgz#5109d62f3e56ea967c4b63505aef08291c8a5203" - integrity sha1-UQnWLz5W6pZ8S2NQWu8IKRyKUgM= - dependencies: - es6-promise "^4.0.3" - escape-string-regexp@1.0.5, escape-string-regexp@^1.0.2, escape-string-regexp@^1.0.5: version "1.0.5" resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4" @@ -656,9 +663,9 @@ esprima@^4.0.0: integrity sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A== esquery@^1.0.0: - version "1.0.1" - resolved "https://registry.yarnpkg.com/esquery/-/esquery-1.0.1.tgz#406c51658b1f5991a5f9b62b1dc25b00e3e5c708" - integrity sha512-SmiyZ5zIWH9VM+SRUReLS5Q8a7GxtRdxEBVZpm98rJM7Sb+A9DVCndXfkeFUd3byderg+EbDkfnevfCwynWaNA== + version "1.1.0" + resolved "https://registry.yarnpkg.com/esquery/-/esquery-1.1.0.tgz#c5c0b66f383e7656404f86b31334d72524eddb48" + integrity sha512-MxYW9xKmROWF672KqjO75sszsA8Mxhw06YFeS5VHlB98KDHbOSurm3ArsjO60Eaf3QmGMCP1yn+0JQkNLo/97Q== dependencies: estraverse "^4.0.0" @@ -819,9 +826,9 @@ growl@1.10.5: integrity sha512-qBr4OuELkhPenW6goKVXiv47US3clb3/IbuWF9KNKEijAy9oeHxU9IgzjvJhHkUzhaj7rOUD7+YGWqUjLp5oSA== handlebars@^4.7.0: - version "4.7.2" - resolved "https://registry.yarnpkg.com/handlebars/-/handlebars-4.7.2.tgz#01127b3840156a0927058779482031afe0e730d7" - integrity sha512-4PwqDL2laXtTWZghzzCtunQUTLbo31pcCJrd/B/9JP8XbhVzpS5ZXuKqlOzsd1rtcaLo4KqAn8nl8mkknS4MHw== + version "4.7.3" + resolved "https://registry.yarnpkg.com/handlebars/-/handlebars-4.7.3.tgz#8ece2797826886cf8082d1726ff21d2a022550ee" + integrity sha512-SRGwSYuNfx8DwHD/6InAPzD6RgeruWLT+B8e8a7gGs8FWgHzlExpTFMEq2IA6QpAfOClpKHy6+8IqTjeBCu6Kg== dependencies: neo-async "^2.6.0" optimist "^0.6.1" @@ -859,22 +866,31 @@ he@1.1.1: integrity sha1-k0EP0hsAlzUVH4howvJx80J+I/0= highlight.js@^9.17.1: - version "9.18.0" - resolved "https://registry.yarnpkg.com/highlight.js/-/highlight.js-9.18.0.tgz#6b1763cfcd53744313bd3f31f1210f7beb962c79" - integrity sha512-A97kI1KAUzKoAiEoaGcf2O9YPS8nbDTCRFokaaeBhnqjQTvbAuAJrQMm21zw8s8xzaMtCQBtgbyGXLGxdxQyqQ== + version "9.18.1" + resolved "https://registry.yarnpkg.com/highlight.js/-/highlight.js-9.18.1.tgz#ed21aa001fe6252bb10a3d76d47573c6539fe13c" + integrity sha512-OrVKYz70LHsnCgmbXctv/bfuvntIKDz177h0Co37DQ5jamGZLVmoCVMtjMtNZY3X9DrCcKfklHPNeA0uPZhSJg== hosted-git-info@^2.1.4: version "2.8.5" resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-2.8.5.tgz#759cfcf2c4d156ade59b0b2dfabddc42a6b9c70c" integrity sha512-kssjab8CvdXfcXMXVcvsXum4Hwdq9XGtRD3TteMEvEbq0LXyiNQr6AprqKqfeaDXze7SxWvRxdpwE6ku7ikLkg== -https-proxy-agent@^2.2.1: - version "2.2.4" - resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-2.2.4.tgz#4ee7a737abd92678a293d9b34a1af4d0d08c787b" - integrity sha512-OmvfoQ53WLjtA9HeYP9RNrWMJzzAz1JGaSFr1nijg0PVR1JaD/xbJq1mdEIIlxGpXp9eSe/O2LgU9DJmTPd0Eg== +http-proxy-agent@^4.0.0: + version "4.0.1" + resolved "https://registry.yarnpkg.com/http-proxy-agent/-/http-proxy-agent-4.0.1.tgz#8a8c8ef7f5932ccf953c296ca8291b95aa74aa3a" + integrity sha512-k0zdNgqWTGA6aeIRVpvfVob4fL52dTfaehylg0Y4UvSySvOq/Y+BOyPrgpUrA7HylqvU8vIZGsRuXmspskV0Tg== dependencies: - agent-base "^4.3.0" - debug "^3.1.0" + "@tootallnate/once" "1" + agent-base "6" + debug "4" + +https-proxy-agent@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-4.0.0.tgz#702b71fb5520a132a66de1f67541d9e62154d82b" + integrity sha512-zoDhWrkR3of1l9QAL8/scJZyLu8j/gBkcwcaQOZh7Gyh/+uJQzGVETdgT30akuwkpL8HTRfssqI3BZuV18teDg== + dependencies: + agent-base "5" + debug "4" iconv-lite@^0.4.17: version "0.4.24" @@ -883,7 +899,7 @@ iconv-lite@^0.4.17: dependencies: safer-buffer ">= 2.1.2 < 3" -ignore-walk@^3.0.1: +ignore-walk@3.0.3: version "3.0.3" resolved "https://registry.yarnpkg.com/ignore-walk/-/ignore-walk-3.0.3.tgz#017e2447184bfeade7c238e4aefdd1e8f95b1e37" integrity sha512-m7o6xuOaT1aqheYHKf8W6J5pYH85ZI9w077erOzLje3JsB1gkafkAhHHY19dqjulgIZHFm32Cp5uNZgcQqdJKw== @@ -1017,7 +1033,7 @@ js-tokens@^3.0.2: resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-3.0.2.tgz#9866df395102130e38f7f996bceb65443209c25b" integrity sha1-mGbfOVECEw449/mWvOtlRDIJwls= -js-yaml@^3.13.1, js-yaml@^3.9.1: +js-yaml@3.13.1, js-yaml@^3.13.1, js-yaml@^3.9.1: version "3.13.1" resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-3.13.1.tgz#aff151b30bfdfa8e49e05da22e7415e9dfa37847" integrity sha512-YfbcO7jXDdyj0DGxYVSlSeQNHbD7XPWvrVWeVUujrQEoZzWJIRrCPoyk6kL6IAjAG2IolMK4T0hNUe0HOUs5Jw== @@ -1111,6 +1127,13 @@ lru-cache@^4.0.1: pseudomap "^1.0.2" yallist "^2.1.2" +lru-cache@^5.1.1: + version "5.1.1" + resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-5.1.1.tgz#1da27e6710271947695daf6848e847f01d84b920" + integrity sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w== + dependencies: + yallist "^3.0.2" + lunr@^2.3.8: version "2.3.8" resolved "https://registry.yarnpkg.com/lunr/-/lunr-2.3.8.tgz#a8b89c31f30b5a044b97d2d28e2da191b6ba2072" @@ -1483,9 +1506,9 @@ resolve-from@^1.0.0: integrity sha1-Jsv+k10a7uq7Kbw/5a6wHpPUQiY= resolve@^1.1.6, resolve@^1.10.0, resolve@^1.13.1, resolve@^1.3.2, resolve@^1.3.3: - version "1.15.0" - resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.15.0.tgz#1b7ca96073ebb52e741ffd799f6b39ea462c67f5" - integrity sha512-+hTmAldEGE80U2wJJDC1lebb5jWqvTYAfm3YZ1ckk1gBr0MnCqUKlwK1e+anaFljIl+F5tR5IoZcm4ZDA1zMQw== + version "1.15.1" + resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.15.1.tgz#27bdcdeffeaf2d6244b95bb0f9f4b4653451f3e8" + integrity sha512-84oo6ZTtoTUpjgNEr5SJyzQhzL72gaRodsSfyxC/AXRvwu0Yse9H8eF9IpGo7b8YetZhlI6v7ZQ6bKBFV/6S7w== dependencies: path-parse "^1.0.6" @@ -1654,6 +1677,13 @@ standard@^11.0.1: eslint-plugin-standard "~3.0.1" standard-engine "~8.0.0" +stream-events@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/stream-events/-/stream-events-1.0.5.tgz#bbc898ec4df33a4902d892333d47da9bf1c406d5" + integrity sha512-E1GUzBSgvct8Jsb3v2X15pjzN1tYebtbLaMg+eBOUOAxgbLoSbT2NS91ckc5lJD1KfLjId+jXJRgo0qnV5Nerg== + dependencies: + stubs "^3.0.0" + string-width@^2.1.0, string-width@^2.1.1: version "2.1.1" resolved "https://registry.yarnpkg.com/string-width/-/string-width-2.1.1.tgz#ab93f27a8dc13d28cac815c462143a6d9012ae9e" @@ -1709,6 +1739,11 @@ strip-json-comments@~2.0.1: resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-2.0.1.tgz#3c531942e908c2697c0ec344858c286c7ca0a60a" integrity sha1-PFMZQukIwml8DsNEhYwobHygpgo= +stubs@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/stubs/-/stubs-3.0.0.tgz#e8d2ba1fa9c90570303c030b6900f7d5f89abe5b" + integrity sha1-6NK6H6nJBXAwPAMLaQD31fiavls= + supports-color@5.4.0: version "5.4.0" resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-5.4.0.tgz#1c6b337402c2137605efe19f10fec390f6faab54" @@ -1740,13 +1775,15 @@ table@4.0.2: slice-ansi "1.0.0" string-width "^2.1.1" -teeny-request@^3.11.3: - version "3.11.3" - resolved "https://registry.yarnpkg.com/teeny-request/-/teeny-request-3.11.3.tgz#335c629f7645e5d6599362df2f3230c4cbc23a55" - integrity sha512-CKncqSF7sH6p4rzCgkb/z/Pcos5efl0DmolzvlqRQUNcpRIruOhY9+T1FsIlyEbfWd7MsFpodROOwHYh2BaXzw== +teeny-request@6.0.1: + version "6.0.1" + resolved "https://registry.yarnpkg.com/teeny-request/-/teeny-request-6.0.1.tgz#9b1f512cef152945827ba7e34f62523a4ce2c5b0" + integrity sha512-TAK0c9a00ELOqLrZ49cFxvPVogMUFaWY8dUsQc/0CuQPGF+BOxOQzXfE413BAk2kLomwNplvdtMpeaeGWmoc2g== dependencies: - https-proxy-agent "^2.2.1" + http-proxy-agent "^4.0.0" + https-proxy-agent "^4.0.0" node-fetch "^2.2.0" + stream-events "^1.0.5" uuid "^3.3.2" text-table@~0.2.0: @@ -1880,9 +1917,9 @@ typescript@3.7.x, typescript@^3.6.2: integrity sha512-/P5lkRXkWHNAbcJIiHPfRoKqyd7bsyCma1hZNUGfn20qm64T6ZBlrzprymeu918H+mB/0rIg2gGK/BXkhhYgBw== uglify-js@^3.1.4: - version "3.7.6" - resolved "https://registry.yarnpkg.com/uglify-js/-/uglify-js-3.7.6.tgz#0783daa867d4bc962a37cc92f67f6e3238c47485" - integrity sha512-yYqjArOYSxvqeeiYH2VGjZOqq6SVmhxzaPjJC1W2F9e+bqvFL9QXQ2osQuKUFjM2hGjKG2YclQnRKWQSt/nOTQ== + version "3.7.7" + resolved "https://registry.yarnpkg.com/uglify-js/-/uglify-js-3.7.7.tgz#21e52c7dccda80a53bf7cde69628a7e511aec9c9" + integrity sha512-FeSU+hi7ULYy6mn8PKio/tXsdSXN35lm4KgV2asx00kzrLU9Pi3oAslcJT70Jdj7PHX29gGUPOT6+lXGBbemhA== dependencies: commander "~2.20.3" source-map "~0.6.1" @@ -1902,7 +1939,7 @@ universalify@^0.1.0: resolved "https://registry.yarnpkg.com/universalify/-/universalify-0.1.2.tgz#b646f69be3942dabcecc9d6639c80dc105efaa66" integrity sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg== -urlgrey@^0.4.4: +urlgrey@0.4.4: version "0.4.4" resolved "https://registry.yarnpkg.com/urlgrey/-/urlgrey-0.4.4.tgz#892fe95960805e85519f1cd4389f2cb4cbb7652f" integrity sha1-iS/pWWCAXoVRnxzUOJ8stMu3ZS8= @@ -1925,11 +1962,6 @@ validate-npm-package-license@^3.0.1: spdx-correct "^3.0.0" spdx-expression-parse "^3.0.0" -validator@^12.1.0: - version "12.1.0" - resolved "https://registry.yarnpkg.com/validator/-/validator-12.1.0.tgz#a3a7315d5238cbc15e46ad8d5e479aafa7119925" - integrity sha512-gIC2RBuFRi574Rb9vewGCJ7TCLxHXNx6EKthEgs+Iz0pYa9a9Te1VLG/bGLsAyGWrqR5FfR7tbFUI7FEF2LiGA== - which@^1.2.9: version "1.3.1" resolved "https://registry.yarnpkg.com/which/-/which-1.3.1.tgz#a45043d54f5805316da8d62f9f50918d3da70b0a" @@ -1986,3 +2018,8 @@ yallist@^2.1.2: version "2.1.2" resolved "https://registry.yarnpkg.com/yallist/-/yallist-2.1.2.tgz#1c11f9218f076089a47dd512f93c6699a6a81d52" integrity sha1-HBH5IY8HYImkfdUS+TxmmaaoHVI= + +yallist@^3.0.2: + version "3.1.1" + resolved "https://registry.yarnpkg.com/yallist/-/yallist-3.1.1.tgz#dbb7daf9bfd8bac9ab45ebf602b8cbad0d5d08fd" + integrity sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g== From 86eea7de46da46b9e55f4cbb46cb3fadfc964c4d Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Thu, 13 Feb 2020 13:50:13 +0100 Subject: [PATCH 08/23] add test for AsyncLRUCache --- src/engine/cache/cache-base.ts | 6 +- tests/cache/async-lru-cache-test.js | 95 +++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 3 deletions(-) create mode 100644 tests/cache/async-lru-cache-test.js diff --git a/src/engine/cache/cache-base.ts b/src/engine/cache/cache-base.ts index 5a340a12..71e885a6 100644 --- a/src/engine/cache/cache-base.ts +++ b/src/engine/cache/cache-base.ts @@ -24,7 +24,7 @@ SOFTWARE. 'use strict' -import LRU from 'lru-cache' +import * as LRU from 'lru-cache' interface AsyncCacheEntry { content: Array, @@ -134,7 +134,7 @@ export class BaseLRUCache implements Cache { max: maxSize, maxAge } - this._content = new LRU(options) + this._content = new LRU(options) } put (key: K, item: T): void { @@ -181,7 +181,7 @@ export class AsyncLRUCache implements AsyncCache { } update (key: K, item: T, writerID: I): void { - if (this.has(key)) { + if (this._cache.has(key)) { const entry = this._cache.get(key)! if (entry.writerID === writerID) { entry.content.push(item) diff --git a/tests/cache/async-lru-cache-test.js b/tests/cache/async-lru-cache-test.js new file mode 100644 index 00000000..0a147954 --- /dev/null +++ b/tests/cache/async-lru-cache-test.js @@ -0,0 +1,95 @@ +/* file: async-lru-cache-test.js +MIT License + +Copyright (c) 2019-2020 Thomas Minier + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the 'Software'), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +'use strict' + +const expect = require('chai').expect +const { AsyncLRUCache } = require('../../dist/engine/cache/cache-base') + +describe('AsyncLRUCache', () => { + let cache = null + beforeEach(() => { + cache = new AsyncLRUCache(Infinity, Infinity) + }) + + describe('#update/commit', () => { + it('should supports insertion of items over time', () => { + const writerID = 1 + cache.update(1, 1, writerID) + cache.update(1, 2, writerID) + cache.update(1, 3, writerID) + cache.commit(1, writerID) + expect(cache.get(1)).to.deep.equals([1, 2, 3]) + }) + + it('should supports concurrent insertions of items from distinct writers', () => { + const firstID = 1 + const secondID = 2 + cache.update(1, 1, firstID) + cache.update(1, '1', secondID) + cache.update(1, 2, firstID) + cache.update(1, '2', secondID) + cache.update(1, '3', secondID) + cache.update(1, 3, firstID) + cache.update(1, '4', secondID) + cache.commit(1, secondID) + cache.commit(1, firstID) + expect(cache.get(1)).to.deep.equals([1, 2, 3]) + }) + }) + + + describe('#has', () => { + it('should returns true when the cache entry is available', () => { + const writerID = 1 + cache.update(1, 1, writerID) + cache.update(1, 2, writerID) + cache.update(1, 3, writerID) + cache.commit(1, writerID) + expect(cache.has(1)).to.deep.equals(true) + }) + + it('should returns false when the cache entry is not available', () => { + const writerID = 1 + cache.update(1, 1, writerID) + cache.update(1, 2, writerID) + cache.update(1, 3, writerID) + expect(cache.has(1)).to.deep.equals(false) + cache.commit(1, writerID) + expect(cache.has(1)).to.deep.equals(true) + }) + }) + + describe('#get', () => { + it('should returns null when the cache entry is not available', () => { + const writerID = 1 + cache.update(1, 1, writerID) + cache.update(1, 2, writerID) + cache.update(1, 3, writerID) + expect(cache.get(1)).to.deep.equals(null) + cache.commit(1, writerID) + expect(cache.get(1)).to.deep.equals([1, 2, 3]) + }) + }) +}) From 6435fa8c4ce93fa879513609b5d48ac550c42ed7 Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Thu, 13 Feb 2020 13:58:54 +0100 Subject: [PATCH 09/23] add doc + store the current cache in ExecutionContext --- src/engine/cache/bgp-cache.ts | 18 +++++++++++++- src/engine/cache/cache-base.ts | 10 ++++++++ src/engine/context/execution-context.ts | 32 ++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/src/engine/cache/bgp-cache.ts b/src/engine/cache/bgp-cache.ts index 014d9577..4b86cdac 100644 --- a/src/engine/cache/bgp-cache.ts +++ b/src/engine/cache/bgp-cache.ts @@ -31,8 +31,24 @@ import { Bindings } from '../../rdf/bindings' import { Algebra } from 'sparqljs' import { sparql } from '../../utils' -export default class BGPCache implements AsyncCache { +/** + * An async cache that stores the solution bindings from BGP evaluation + * @author Thomas Minier + */ +export type BGPCache = AsyncCache + +/** + * An implementation of a {@link BGPCache} using an {@link AsyncLRUCache} + * @author Thomas Minier + */ +export class LRUBGPCache implements BGPCache { private readonly _cache: AsyncLRUCache + + /** + * Constructor + * @param maxSize - The maximum size of the cache + * @param maxAge - Maximum age in ms + */ constructor (maxSize: number, maxAge: number) { this._cache = new AsyncLRUCache(maxSize, maxAge) } diff --git a/src/engine/cache/cache-base.ts b/src/engine/cache/cache-base.ts index 71e885a6..419d07fd 100644 --- a/src/engine/cache/cache-base.ts +++ b/src/engine/cache/cache-base.ts @@ -129,6 +129,11 @@ export interface AsyncCache { export class BaseLRUCache implements Cache { private readonly _content: LRU + /** + * Constructor + * @param maxSize - The maximum size of the cache + * @param maxAge - Maximum age in ms + */ constructor (maxSize: number, maxAge: number) { const options = { max: maxSize, @@ -168,6 +173,11 @@ export class BaseLRUCache implements Cache { export class AsyncLRUCache implements AsyncCache { private readonly _cache: BaseLRUCache> + /** + * Constructor + * @param maxSize - The maximum size of the cache + * @param maxAge - Maximum age in ms + */ constructor (maxSize: number, maxAge: number) { this._cache = new BaseLRUCache(maxSize, maxAge) } diff --git a/src/engine/context/execution-context.ts b/src/engine/context/execution-context.ts index 8ab4a06e..3bd03892 100644 --- a/src/engine/context/execution-context.ts +++ b/src/engine/context/execution-context.ts @@ -25,6 +25,7 @@ SOFTWARE. 'use strict' import { QueryHints } from './query-hints' +import { BGPCache } from '../cache/bgp-cache' /** * An execution context conatains control information for query execution. @@ -34,12 +35,14 @@ export default class ExecutionContext { protected _hints: QueryHints protected _defaultGraphs: string[] protected _namedGraphs: string[] + protected _cache: BGPCache | null constructor () { this._properties = new Map() this._hints = new QueryHints() this._defaultGraphs = [] this._namedGraphs = [] + this._cache = null } /** @@ -90,6 +93,32 @@ export default class ExecutionContext { this._hints = newHints } + /** + * Get the BGP cache currently used by the query engine. + * returns null if caching is disabled + * @return The BGP cache currently used by the query engine, or null if caching is disabled. + */ + get cache (): BGPCache | null { + return this._cache + } + + /** + * Set the BGP cache currently used by the query engine. + * Use null to disable caching + * @param newCache - The BGP cache to use for caching. + */ + set cache (newCache: BGPCache | null) { + this._cache = newCache + } + + /** + * Test the caching is enabled + * @return True if the caching is enabled, false otherwise + */ + cachingEnabled (): boolean { + return this._cache !== null + } + /** * Get a property associated with a key * @param key - Key associated with the property @@ -121,12 +150,13 @@ export default class ExecutionContext { * Clone the execution context * @return A clone of the execution context */ - clone () : ExecutionContext { + clone (): ExecutionContext { const res = new ExecutionContext() this._properties.forEach((value, key) => res.setProperty(key, value)) res._hints = this.hints.clone() res._defaultGraphs = this._defaultGraphs.slice(0) res._namedGraphs = this._namedGraphs.slice(0) + res._cache = this._cache return res } From e7c6183cabf86e910ed444f7859f7098f4fae89f Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Thu, 13 Feb 2020 14:05:58 +0100 Subject: [PATCH 10/23] add methods to enable/disable caching in PlanBuilder --- src/engine/plan-builder.ts | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/engine/plan-builder.ts b/src/engine/plan-builder.ts index 998e6ffd..f807a68c 100644 --- a/src/engine/plan-builder.ts +++ b/src/engine/plan-builder.ts @@ -54,6 +54,8 @@ import OptionalStageBuilder from './stages/optional-stage-builder' import OrderByStageBuilder from './stages/orderby-stage-builder' import UnionStageBuilder from './stages/union-stage-builder' import UpdateStageBuilder from './stages/update-stage-builder' +// caching +import { BGPCache, LRUBGPCache } from './cache/bgp-cache' // utilities import { partition, @@ -113,6 +115,7 @@ export class PlanBuilder { private _optimizer: Optimizer private _stageBuilders: Map private _customFunctions: CustomFunctions | undefined + private _currentCache: BGPCache | null /** * Constructor @@ -123,6 +126,7 @@ export class PlanBuilder { this._dataset = dataset this._parser = new Parser(prefixes) this._optimizer = Optimizer.getDefault() + this._currentCache = null this._customFunctions = customFunctions this._stageBuilders = new Map() @@ -162,6 +166,28 @@ export class PlanBuilder { this._stageBuilders.set(kind, stageBuilder) } + /** + * Enable Basic Graph Patterns caching for SPARQL query evaluation. + * The parameter is optional and used to provide your own cache instance. + * If left undefined, the query engine will use a {@link LRUBGPCache} with + * a maximum of 500 items and a max age of 20 minutes. + * @param customCache - (optional) Custom cache instance + */ + useCache (customCache?: LRUBGPCache): void { + if (customCache === undefined) { + this._currentCache = new LRUBGPCache(500, 1200 * 60 * 60) + } else { + this._currentCache = customCache + } + } + + /** + * Disable Basic Graph Patterns caching for SPARQL query evaluation. + */ + disableCache (): void { + this._currentCache = null + } + /** * Build the physical query execution of a SPARQL 1.1 query * and returns a {@link PipelineStage} or a {@link Consumable} that can be consumed to evaluate the query. @@ -176,6 +202,7 @@ export class PlanBuilder { } if (isNull(context) || isUndefined(context)) { context = new ExecutionContext() + context.cache = this._currentCache } // Optimize the logical query execution plan query = this._optimizer.optimize(query) From 4b7156d006aedd1ba814688aeae38ca14da5ec4e Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Thu, 13 Feb 2020 14:06:44 +0100 Subject: [PATCH 11/23] remove uncessary type assertion --- src/engine/plan-builder.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine/plan-builder.ts b/src/engine/plan-builder.ts index f807a68c..e398506b 100644 --- a/src/engine/plan-builder.ts +++ b/src/engine/plan-builder.ts @@ -401,7 +401,7 @@ export class PlanBuilder { } // delegate remaining BGP evaluation to the dedicated executor - let iter = this._stageBuilders.get(SPARQL_OPERATION.BGP)!.execute(source, classicTriples as Algebra.TripleObject[], childContext) as PipelineStage + let iter = this._stageBuilders.get(SPARQL_OPERATION.BGP)!.execute(source, classicTriples, childContext) as PipelineStage // filter out variables added by the rewriting of property paths if (tempVariables.length > 0) { From cd43f1662840259737320b4357d112ee2ea3a23b Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Thu, 13 Feb 2020 14:53:19 +0100 Subject: [PATCH 12/23] cache added for bgp evaluation using index loop joins --- src/engine/stages/bgp-stage-builder.ts | 24 +++++++++- tests/sparql/bgp-cache-test.js | 61 ++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 tests/sparql/bgp-cache-test.js diff --git a/src/engine/stages/bgp-stage-builder.ts b/src/engine/stages/bgp-stage-builder.ts index 93b67c6e..450e6849 100644 --- a/src/engine/stages/bgp-stage-builder.ts +++ b/src/engine/stages/bgp-stage-builder.ts @@ -37,6 +37,7 @@ import { fts } from './rewritings' import ExecutionContext from '../context/execution-context' import { rdf } from '../../utils' import { isNaN, isNull, isInteger } from 'lodash' +import * as uuid from 'uuid/v4' import boundJoin from '../../operators/join/bound-join' @@ -49,7 +50,28 @@ function bgpEvaluation (source: PipelineStage, bgp: Algebra.TripleObje const engine = Pipeline.getInstance() return engine.mergeMap(source, (bindings: Bindings) => { let boundedBGP = bgp.map(t => bindings.bound(t)) - return engine.map(graph.evalBGP(boundedBGP, context), (item: Bindings) => { + // check the cache + let iterator + if (context.cachingEnabled()) { + if (context.cache!.has(boundedBGP)) { + iterator = Pipeline.getInstance().from(context.cache!.get(boundedBGP)!.map(b => b.clone())) + } else { + // generate an unique writer ID + const writerID = uuid() + // put all solutions into the cache + iterator = Pipeline.getInstance().tap(graph.evalBGP(boundedBGP, context), b => { + context.cache!.update(boundedBGP, b, writerID) + }) + // commit the cache entry when the BGP evaluation is done + iterator = Pipeline.getInstance().finalize(iterator, () => { + context.cache!.commit(boundedBGP, writerID) + }) + } + } else { + iterator = graph.evalBGP(boundedBGP, context) + } + // build join results + return engine.map(iterator, (item: Bindings) => { // if (item.size === 0 && hasVars) return null return item.union(bindings) }) diff --git a/tests/sparql/bgp-cache-test.js b/tests/sparql/bgp-cache-test.js new file mode 100644 index 00000000..1b5d5faf --- /dev/null +++ b/tests/sparql/bgp-cache-test.js @@ -0,0 +1,61 @@ +/* file : union-test.js +MIT License + +Copyright (c) 2018 Thomas Minier + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +'use strict' + +const expect = require('chai').expect +const { getGraph, TestEngine } = require('../utils.js') + +describe('Basic Graph Pattern cache', () => { + let engine = null + before(() => { + const g = getGraph('./tests/data/dblp.nt') + engine = new TestEngine(g) + }) + + it('should fill the cache when evaluating a BGP', done => { + const query = ` + SELECT ?s ?p ?o WHERE { + { ?s ?p ?o } UNION { ?s ?p ?o } + }` + engine._builder.useCache() + const results = [] + const iterator = engine.execute(query) + iterator.subscribe(b => { + b = b.toObject() + expect(b).to.have.keys('?s', '?p', '?o') + results.push(b) + }, done, () => { + // we have all results in double + expect(results.length).to.equal(34) + // check for cache hits + const bgp = [ { subject: '?s', predicate: '?p', object: '?o' } ] + const cache = engine._builder._currentCache + expect(cache.count()).to.equal(1) + expect(cache.has(bgp)).to.equal(true) + expect(cache.get(bgp).length).to.equal(17) + done() + }) + }) +}) From 90a766c59fb277a2a6568ab61872e7f937c552e6 Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Sat, 15 Feb 2020 09:36:21 +0100 Subject: [PATCH 13/23] the async cache now wait for uncommitted entries to become available --- src/engine/cache/bgp-cache.ts | 13 +++- src/engine/cache/cache-base.ts | 86 +++++++++++++++++++------- src/engine/stages/bgp-stage-builder.ts | 2 +- tests/cache/async-lru-cache-test.js | 30 +++++---- tests/sparql/bgp-cache-test.js | 7 ++- 5 files changed, 96 insertions(+), 42 deletions(-) diff --git a/src/engine/cache/bgp-cache.ts b/src/engine/cache/bgp-cache.ts index 4b86cdac..2210da6e 100644 --- a/src/engine/cache/bgp-cache.ts +++ b/src/engine/cache/bgp-cache.ts @@ -35,7 +35,14 @@ import { sparql } from '../../utils' * An async cache that stores the solution bindings from BGP evaluation * @author Thomas Minier */ -export type BGPCache = AsyncCache +export interface BGPCache extends AsyncCache { + /** + * Access the cache and returns a pipeline stage that returns the content of the cache for a given BGP + * @param bgp - Cache key, i.e., a Basic Graph pattern + * @return A pipeline stage that returns the content of the cache entry for the given BGP + */ + getAsPipeline (bgp: Algebra.TripleObject[]): PipelineStage +} /** * An implementation of a {@link BGPCache} using an {@link AsyncLRUCache} @@ -61,7 +68,7 @@ export class LRUBGPCache implements BGPCache { this._cache.update(sparql.hashBGP(bgp), item, writerID) } - get (bgp: Algebra.TripleObject[]): Bindings[] | null { + get (bgp: Algebra.TripleObject[]): Promise | null { return this._cache.get(sparql.hashBGP(bgp)) } @@ -70,7 +77,7 @@ export class LRUBGPCache implements BGPCache { if (bindings === null) { return Pipeline.getInstance().empty() } - return Pipeline.getInstance().from(bindings.map(b => b.clone())) + return Pipeline.getInstance().flatMap(Pipeline.getInstance().from(bindings), x => x.map(b => b.clone())) } commit (bgp: Algebra.TripleObject[], writerID: string): void { diff --git a/src/engine/cache/cache-base.ts b/src/engine/cache/cache-base.ts index 419d07fd..019fa7cb 100644 --- a/src/engine/cache/cache-base.ts +++ b/src/engine/cache/cache-base.ts @@ -25,12 +25,7 @@ SOFTWARE. 'use strict' import * as LRU from 'lru-cache' - -interface AsyncCacheEntry { - content: Array, - writerID: I, - isComplete: boolean -} +import LRUCache = require('lru-cache') /** * A cache is a vue that materializes data for latter re-use @@ -107,7 +102,7 @@ export interface AsyncCache { * @param key - Item's key * @return The values of the item with the given key, or null if it was not found */ - get (key: K): T[] | null + get (key: K): Promise | null /** * Remove an item from the cache @@ -167,27 +162,37 @@ export class BaseLRUCache implements Cache { } /** - * An in-memory LRU cache that supports async insertion of items + * Data-structure used for the base implementation of an asynchronous cache. * @author Thomas Minier */ -export class AsyncLRUCache implements AsyncCache { - private readonly _cache: BaseLRUCache> +interface AsyncCacheEntry { + /** The cache entry's content */ + content: Array, + /** The ID of the writer that is allowed to edit the cache entry */ + writerID: I, + /** All reads that wait for this cache entry to be committed */ + pendingReaders: Array<(items: Array) => void>, + /** Whether the cache entry is availbale for read or not */ + isComplete: boolean +} + +/** + * A base class for implementing an asynchronous cache. + * It simply needs to provides a data structure used to cache items + * @author Thomas Minier + */ +export abstract class BaseAsyncCache implements AsyncCache { + private readonly _cache: Cache> /** * Constructor - * @param maxSize - The maximum size of the cache - * @param maxAge - Maximum age in ms */ - constructor (maxSize: number, maxAge: number) { - this._cache = new BaseLRUCache(maxSize, maxAge) + constructor (cacheInstance: Cache>) { + this._cache = cacheInstance } has (key: K): boolean { - if (this._cache.has(key)) { - const entry = this._cache.get(key)! - return entry.isComplete - } - return false + return this._cache.has(key) } update (key: K, item: T, writerID: I): void { @@ -201,7 +206,8 @@ export class AsyncLRUCache implements AsyncCache { this._cache.put(key, { content: [item], writerID, - isComplete: false + isComplete: false, + pendingReaders: [] }) } } @@ -210,15 +216,30 @@ export class AsyncLRUCache implements AsyncCache { if (this._cache.has(key)) { const entry = this._cache.get(key)! if (entry.writerID === writerID) { - entry.isComplete = true - this._cache.put(key, entry) + // update cache entry ot marke it complete + this._cache.put(key, { + content: entry.content, + writerID: entry.writerID, + isComplete: true, + pendingReaders: [] + }) + // resolve all pending readers + entry.pendingReaders.forEach(resolve => resolve(entry.content)) } } } - get (key: K): T[] | null { + get (key: K): Promise | null { if (this.has(key)) { - return this._cache.get(key)!.content + const entry = this._cache.get(key)! + if (entry.isComplete) { + return Promise.resolve(entry.content) + } + // wait until the entry is complete + // all awaiting promises will be resolved by the commit or delete method + return new Promise(resolve => { + entry.pendingReaders.push(resolve) + }) } return null } @@ -227,6 +248,8 @@ export class AsyncLRUCache implements AsyncCache { if (this._cache.has(key)) { const entry = this._cache.get(key)! if (entry.writerID === writerID) { + // resolve all pending readers with an empty result + entry.pendingReaders.forEach(resolve => resolve([])) this._cache.delete(key) } } @@ -236,3 +259,18 @@ export class AsyncLRUCache implements AsyncCache { return this._cache.count() } } + +/** + * An in-memory LRU implementation of an asynchronous cache. + * @author Thomas Minier + */ +export class AsyncLRUCache extends BaseAsyncCache { + /** + * Constructor + * @param maxSize - The maximum size of the cache + * @param maxAge - Maximum age in ms + */ + constructor (maxSize: number, maxAge: number) { + super(new BaseLRUCache>(maxSize, maxAge)) + } +} diff --git a/src/engine/stages/bgp-stage-builder.ts b/src/engine/stages/bgp-stage-builder.ts index 450e6849..a66ce3d5 100644 --- a/src/engine/stages/bgp-stage-builder.ts +++ b/src/engine/stages/bgp-stage-builder.ts @@ -54,7 +54,7 @@ function bgpEvaluation (source: PipelineStage, bgp: Algebra.TripleObje let iterator if (context.cachingEnabled()) { if (context.cache!.has(boundedBGP)) { - iterator = Pipeline.getInstance().from(context.cache!.get(boundedBGP)!.map(b => b.clone())) + iterator = context.cache!.getAsPipeline(boundedBGP) } else { // generate an unique writer ID const writerID = uuid() diff --git a/tests/cache/async-lru-cache-test.js b/tests/cache/async-lru-cache-test.js index 0a147954..98266460 100644 --- a/tests/cache/async-lru-cache-test.js +++ b/tests/cache/async-lru-cache-test.js @@ -34,16 +34,20 @@ describe('AsyncLRUCache', () => { }) describe('#update/commit', () => { - it('should supports insertion of items over time', () => { + it('should supports insertion of items over time', done => { const writerID = 1 cache.update(1, 1, writerID) cache.update(1, 2, writerID) cache.update(1, 3, writerID) cache.commit(1, writerID) - expect(cache.get(1)).to.deep.equals([1, 2, 3]) + cache.get(1).then(content => { + expect(content).to.deep.equals([1, 2, 3]) + done() + }).catch(done) + }) - it('should supports concurrent insertions of items from distinct writers', () => { + it('should supports concurrent insertions of items from distinct writers', done => { const firstID = 1 const secondID = 2 cache.update(1, 1, firstID) @@ -55,7 +59,10 @@ describe('AsyncLRUCache', () => { cache.update(1, '4', secondID) cache.commit(1, secondID) cache.commit(1, firstID) - expect(cache.get(1)).to.deep.equals([1, 2, 3]) + cache.get(1).then(content => { + expect(content).to.deep.equals([1, 2, 3]) + done() + }).catch(done) }) }) @@ -72,24 +79,23 @@ describe('AsyncLRUCache', () => { it('should returns false when the cache entry is not available', () => { const writerID = 1 - cache.update(1, 1, writerID) - cache.update(1, 2, writerID) - cache.update(1, 3, writerID) expect(cache.has(1)).to.deep.equals(false) + cache.update(1, 1, writerID) cache.commit(1, writerID) expect(cache.has(1)).to.deep.equals(true) }) }) - describe('#get', () => { + describe('#get', done => { it('should returns null when the cache entry is not available', () => { const writerID = 1 - cache.update(1, 1, writerID) - cache.update(1, 2, writerID) - cache.update(1, 3, writerID) expect(cache.get(1)).to.deep.equals(null) + cache.update(1, 1, writerID) cache.commit(1, writerID) - expect(cache.get(1)).to.deep.equals([1, 2, 3]) + cache.get(1).then(content => { + expect(content).to.deep.equals([1]) + done() + }).catch(done) }) }) }) diff --git a/tests/sparql/bgp-cache-test.js b/tests/sparql/bgp-cache-test.js index 1b5d5faf..b5a4a105 100644 --- a/tests/sparql/bgp-cache-test.js +++ b/tests/sparql/bgp-cache-test.js @@ -54,8 +54,11 @@ describe('Basic Graph Pattern cache', () => { const cache = engine._builder._currentCache expect(cache.count()).to.equal(1) expect(cache.has(bgp)).to.equal(true) - expect(cache.get(bgp).length).to.equal(17) - done() + // check that the cache is accessible + cache.get(bgp).then(content => { + expect(content.length).to.equals(17) + done() + }).catch(done) }) }) }) From cb070b2b9b8ae3539c636dfc38ea06f12004b094 Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Sat, 15 Feb 2020 09:45:14 +0100 Subject: [PATCH 14/23] extract bgp evaluation with a cache has an utility function --- src/engine/stages/bgp-stage-builder.ts | 18 ++------------ src/utils.ts | 34 ++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/src/engine/stages/bgp-stage-builder.ts b/src/engine/stages/bgp-stage-builder.ts index a66ce3d5..e779df1e 100644 --- a/src/engine/stages/bgp-stage-builder.ts +++ b/src/engine/stages/bgp-stage-builder.ts @@ -35,9 +35,8 @@ import { GRAPH_CAPABILITY } from '../../rdf/graph_capability' import { parseHints } from '../context/query-hints' import { fts } from './rewritings' import ExecutionContext from '../context/execution-context' -import { rdf } from '../../utils' +import { rdf, evaluation } from '../../utils' import { isNaN, isNull, isInteger } from 'lodash' -import * as uuid from 'uuid/v4' import boundJoin from '../../operators/join/bound-join' @@ -53,20 +52,7 @@ function bgpEvaluation (source: PipelineStage, bgp: Algebra.TripleObje // check the cache let iterator if (context.cachingEnabled()) { - if (context.cache!.has(boundedBGP)) { - iterator = context.cache!.getAsPipeline(boundedBGP) - } else { - // generate an unique writer ID - const writerID = uuid() - // put all solutions into the cache - iterator = Pipeline.getInstance().tap(graph.evalBGP(boundedBGP, context), b => { - context.cache!.update(boundedBGP, b, writerID) - }) - // commit the cache entry when the BGP evaluation is done - iterator = Pipeline.getInstance().finalize(iterator, () => { - context.cache!.commit(boundedBGP, writerID) - }) - } + iterator = evaluation.cacheEvalBGP(boundedBGP, graph, context.cache!, context) } else { iterator = graph.evalBGP(boundedBGP, context) } diff --git a/src/utils.ts b/src/utils.ts index f0369e83..4b45985b 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -33,7 +33,11 @@ import { parseZone, Moment, ISO_8601 } from 'moment' import * as DataFactory from '@rdfjs/data-model' import { BlankNode, Literal, NamedNode, Term } from 'rdf-js' import { termToString, stringToTerm } from 'rdf-string' +import { BGPCache } from './engine/cache/bgp-cache' +import Graph from './rdf/graph' +import ExecutionContext from './engine/context/execution-context' import * as crypto from 'crypto' +import * as uuid from 'uuid/v4' /** * RDF related utilities @@ -527,6 +531,36 @@ export namespace sparql { } } +/** + * Utilities related to SPARQL query evaluation + * @author Thomas Minier + */ +export namespace evaluation { + /** + * Evaluate a Basic Graph pattern on a RDF graph using a cache + * @param bgp - Basic Graph pattern to evaluate + * @param graph - RDF graph + * @param cache - Cache used + * @return A pipeline stage that produces the evaluation results + */ + export function cacheEvalBGP (bgp: Algebra.TripleObject[], graph: Graph, cache: BGPCache, context: ExecutionContext): PipelineStage { + if (cache.has(bgp)) { + return cache.getAsPipeline(bgp) + } else { + // generate an unique writer ID + const writerID = uuid() + // put all solutions into the cache + const iterator = Pipeline.getInstance().tap(graph.evalBGP(bgp, context), b => { + cache.update(bgp, b, writerID) + }) + // commit the cache entry when the BGP evaluation is done + return Pipeline.getInstance().finalize(iterator, () => { + cache.commit(bgp, writerID) + }) + } + } +} + /** * Bound a triple pattern using a set of bindings, i.e., substitute variables in the triple pattern * using the set of bindings provided From 1d17dd154647f6b11e9758dec1d9214e26aaa035 Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Sat, 15 Feb 2020 09:53:20 +0100 Subject: [PATCH 15/23] add cache support for bound join --- src/operators/join/bound-join.ts | 10 ++++++++-- src/operators/join/rewriting-op.ts | 21 ++++++++++++++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/operators/join/bound-join.ts b/src/operators/join/bound-join.ts index 33f66bb2..244f0cc4 100644 --- a/src/operators/join/bound-join.ts +++ b/src/operators/join/bound-join.ts @@ -29,7 +29,7 @@ import { PipelineStage, StreamPipelineInput } from '../../engine/pipeline/pipeli import ExecutionContext from '../../engine/context/execution-context' import Graph from '../../rdf/graph' import { Bindings } from '../../rdf/bindings' -import { rdf } from '../../utils' +import { rdf, evaluation } from '../../utils' import rewritingOp from './rewriting-op' import { Algebra } from 'sparqljs' @@ -102,7 +102,13 @@ export default function boundJoin (source: PipelineStage, bgp: Algebra activeIterators++ // simple case: first join in the pipeline if (bucket.length === 1 && bucket[0].isEmpty) { - graph.evalBGP(bgp, context).subscribe((b: Bindings) => { + let iterator + if (context.cachingEnabled()) { + iterator = evaluation.cacheEvalBGP(bgp, graph, context.cache!, context) + } else { + iterator = graph.evalBGP(bgp, context) + } + iterator.subscribe((b: Bindings) => { input.next(b) }, (err: Error) => input.error(err), () => tryClose()) } else { diff --git a/src/operators/join/rewriting-op.ts b/src/operators/join/rewriting-op.ts index 2e564e23..0519cd93 100644 --- a/src/operators/join/rewriting-op.ts +++ b/src/operators/join/rewriting-op.ts @@ -28,7 +28,9 @@ import { Pipeline } from '../../engine/pipeline/pipeline' import ExecutionContext from '../../engine/context/execution-context' import Graph from '../../rdf/graph' import { Bindings } from '../../rdf/bindings' +import { evaluation } from '../../utils' import { Algebra } from 'sparqljs' +import { PipelineStage } from '../../engine/pipeline/pipeline-engine' /** * Find a rewriting key in a list of variables @@ -91,7 +93,24 @@ function rewriteSolutions (bindings: Bindings, rewritingMap: Map, context: ExecutionContext) { - return Pipeline.getInstance().map(graph.evalUnion(bgpBucket, context), bindings => { + let source + if (context.cachingEnabled()) { + // partition the BGPs that can be evaluated using the cache from the others + const stages: PipelineStage[] = [] + const others: Algebra.TripleObject[][] = [] + bgpBucket.forEach(bgp => { + if (context.cache!.has(bgp)) { + stages.push(evaluation.cacheEvalBGP(bgp, graph, context.cache!, context)) + } else { + others.push(bgp) + } + }) + // merge all sources from the cache first, and then the evaluation of bgp that are not in the cache + source = Pipeline.getInstance().merge(Pipeline.getInstance().merge(...stages), graph.evalUnion(others, context)) + } else { + source = graph.evalUnion(bgpBucket, context) + } + return Pipeline.getInstance().map(source, bindings => { return rewriteSolutions(bindings, rewritingTable) }) } From 9013e63705704b31c28304971485ea0fa55d7868 Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Sun, 16 Feb 2020 09:44:48 +0100 Subject: [PATCH 16/23] start to convert the BGPCache to a semantic cache, by adding the #findSubset method --- package.json | 1 + src/engine/cache/bgp-cache.ts | 89 +++++++++++++++++++---- src/engine/cache/cache-base.ts | 17 +++-- src/utils.ts | 10 +++ tests/cache/async-lru-cache-test.js | 32 +++++++-- tests/cache/bgp-cache-test.js | 108 ++++++++++++++++++++++++++++ types/binary-search-tree/index.d.ts | 14 ++++ yarn.lock | 12 ++++ 8 files changed, 260 insertions(+), 23 deletions(-) create mode 100644 tests/cache/bgp-cache-test.js create mode 100644 types/binary-search-tree/index.d.ts diff --git a/package.json b/package.json index 3823726e..5ea70eaa 100644 --- a/package.json +++ b/package.json @@ -54,6 +54,7 @@ }, "dependencies": { "@rdfjs/data-model": "^1.1.2", + "binary-search-tree": "^0.2.6", "lodash": "^4.17.15", "lru-cache": "^5.1.1", "moment": "^2.22.2", diff --git a/src/engine/cache/bgp-cache.ts b/src/engine/cache/bgp-cache.ts index 2210da6e..f7c655e3 100644 --- a/src/engine/cache/bgp-cache.ts +++ b/src/engine/cache/bgp-cache.ts @@ -29,19 +29,39 @@ import { Pipeline } from '../pipeline/pipeline' import { PipelineStage } from '../pipeline/pipeline-engine' import { Bindings } from '../../rdf/bindings' import { Algebra } from 'sparqljs' -import { sparql } from '../../utils' +import { rdf, sparql } from '../../utils' +import { BinarySearchTree } from 'binary-search-tree' +import { findIndex, maxBy } from 'lodash' + +// type alias to simplify the type defintion in this file +type BasicGraphPattern = Algebra.TripleObject[] + +interface SavedBGP { + bgp: BasicGraphPattern, + key: string +} /** * An async cache that stores the solution bindings from BGP evaluation * @author Thomas Minier */ -export interface BGPCache extends AsyncCache { +export interface BGPCache extends AsyncCache { + + /** + * Search for a BGP in the cache that is a subset of the input BGP + * This method enable the user to use the Semantic caching technique, + * to evaluate a BGP using one of its cached subset. + * @param bgp - Basic Graph pattern + * @return A pair [subset BGP, set of patterns not in cache] + */ + findSubset (bgp: BasicGraphPattern): [BasicGraphPattern, BasicGraphPattern] + /** * Access the cache and returns a pipeline stage that returns the content of the cache for a given BGP * @param bgp - Cache key, i.e., a Basic Graph pattern * @return A pipeline stage that returns the content of the cache entry for the given BGP */ - getAsPipeline (bgp: Algebra.TripleObject[]): PipelineStage + getAsPipeline (bgp: BasicGraphPattern): PipelineStage } /** @@ -49,6 +69,7 @@ export interface BGPCache extends AsyncCache private readonly _cache: AsyncLRUCache /** @@ -57,22 +78,33 @@ export class LRUBGPCache implements BGPCache { * @param maxAge - Maximum age in ms */ constructor (maxSize: number, maxAge: number) { - this._cache = new AsyncLRUCache(maxSize, maxAge) + this._allKeys = new BinarySearchTree({ + checkValueEquality: (a: SavedBGP, b: SavedBGP) => a.key === b.key + }) + this._cache = new AsyncLRUCache(maxSize, maxAge, (key: string) => { + // remove index entries when they slide out + // replace key by something correct + // this._allKeys.delete(key, { bgp: [], key }) + }) } - has (bgp: Algebra.TripleObject[]): boolean { + has (bgp: BasicGraphPattern): boolean { return this._cache.has(sparql.hashBGP(bgp)) } - update (bgp: Algebra.TripleObject[], item: Bindings, writerID: string): void { - this._cache.update(sparql.hashBGP(bgp), item, writerID) + update (bgp: BasicGraphPattern, item: Bindings, writerID: string): void { + const key = sparql.hashBGP(bgp) + if (!this._cache.has(key)) { + bgp.forEach(pattern => this._allKeys.insert(rdf.hashTriple(pattern), { bgp, key })) + } + this._cache.update(key, item, writerID) } - get (bgp: Algebra.TripleObject[]): Promise | null { + get (bgp: BasicGraphPattern): Promise | null { return this._cache.get(sparql.hashBGP(bgp)) } - getAsPipeline (bgp: Algebra.TripleObject[]): PipelineStage { + getAsPipeline (bgp: BasicGraphPattern): PipelineStage { const bindings = this.get(bgp) if (bindings === null) { return Pipeline.getInstance().empty() @@ -80,15 +112,48 @@ export class LRUBGPCache implements BGPCache { return Pipeline.getInstance().flatMap(Pipeline.getInstance().from(bindings), x => x.map(b => b.clone())) } - commit (bgp: Algebra.TripleObject[], writerID: string): void { + commit (bgp: BasicGraphPattern, writerID: string): void { this._cache.commit(sparql.hashBGP(bgp), writerID) } - delete (bgp: Algebra.TripleObject[], writerID: string): void { - this._cache.delete(sparql.hashBGP(bgp), writerID) + delete (bgp: BasicGraphPattern, writerID: string): void { + const key = sparql.hashBGP(bgp) + this._cache.delete(key, writerID) + bgp.forEach(pattern => this._allKeys.delete(rdf.hashTriple(pattern), { bgp, key })) } count (): number { return this._cache.count() } + + findSubset (bgp: BasicGraphPattern): [BasicGraphPattern, BasicGraphPattern] { + // find all bgp matching + let matches = [] + for (let pattern of bgp) { + const searchResults = this._allKeys + .search(rdf.hashTriple(pattern)) + .filter(v => { + // remove all BGps that are not a subset of the input BGP + // we use lodash.findIndex + rdf.tripleEquals to check for triple pattern equality + return v.bgp.every(a => findIndex(bgp, b => rdf.tripleEquals(a, b)) > -1) + }) + matches.push({ pattern, searchResults }) + } + // compute the largest subset BGP and the missing patterns (missingPatterns = input_BGP - subset_BGP) + let foundPatterns: BasicGraphPattern = [] + let missingPatterns: BasicGraphPattern = [] + let maxBGPLength = -1 + for (let match of matches) { + if (match.searchResults.length === 0) { + missingPatterns.push(match.pattern) + } else { + const localMax = maxBy(match.searchResults, v => v.bgp.length) + if (localMax !== undefined && localMax.bgp.length > maxBGPLength) { + maxBGPLength = localMax.bgp.length + foundPatterns = localMax.bgp + } + } + } + return [foundPatterns, missingPatterns] + } } diff --git a/src/engine/cache/cache-base.ts b/src/engine/cache/cache-base.ts index 019fa7cb..7e1af566 100644 --- a/src/engine/cache/cache-base.ts +++ b/src/engine/cache/cache-base.ts @@ -128,11 +128,13 @@ export class BaseLRUCache implements Cache { * Constructor * @param maxSize - The maximum size of the cache * @param maxAge - Maximum age in ms + * @param onDispose - Function that is called on items when they are dropped from the cache */ - constructor (maxSize: number, maxAge: number) { + constructor (maxSize: number, maxAge: number, onDispose?: (key: K, item: T) => void) { const options = { max: maxSize, - maxAge + maxAge, + dispose: onDispose } this._content = new LRU(options) } @@ -153,7 +155,7 @@ export class BaseLRUCache implements Cache { } delete (key: K): void { - this.delete(key) + this._content.del(key) } count (): number { @@ -165,7 +167,7 @@ export class BaseLRUCache implements Cache { * Data-structure used for the base implementation of an asynchronous cache. * @author Thomas Minier */ -interface AsyncCacheEntry { +export interface AsyncCacheEntry { /** The cache entry's content */ content: Array, /** The ID of the writer that is allowed to edit the cache entry */ @@ -248,9 +250,9 @@ export abstract class BaseAsyncCache implements AsyncCache { if (this._cache.has(key)) { const entry = this._cache.get(key)! if (entry.writerID === writerID) { + this._cache.delete(key) // resolve all pending readers with an empty result entry.pendingReaders.forEach(resolve => resolve([])) - this._cache.delete(key) } } } @@ -269,8 +271,9 @@ export class AsyncLRUCache extends BaseAsyncCache { * Constructor * @param maxSize - The maximum size of the cache * @param maxAge - Maximum age in ms + * @param onDispose - Function that is called on items when they are dropped from the cache */ - constructor (maxSize: number, maxAge: number) { - super(new BaseLRUCache>(maxSize, maxAge)) + constructor (maxSize: number, maxAge: number, onDispose?: (key: K, item: AsyncCacheEntry) => void) { + super(new BaseLRUCache>(maxSize, maxAge, onDispose)) } } diff --git a/src/utils.ts b/src/utils.ts index 4b45985b..74860227 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -43,6 +43,16 @@ import * as uuid from 'uuid/v4' * RDF related utilities */ export namespace rdf { + /** + * Test if two triple (patterns) are equals + * @param a - First triple (pattern) + * @param b - Second triple (pattern) + * @return True if the two triple (patterns) are equals, False otherwise + */ + export function tripleEquals (a: Algebra.TripleObject, b: Algebra.TripleObject): boolean { + return a.subject === b.subject && a.predicate === b.predicate && a.object === b.object + } + /** * Convert an string RDF Term to a RDFJS representation * @see https://rdf.js.org/data-model-spec diff --git a/tests/cache/async-lru-cache-test.js b/tests/cache/async-lru-cache-test.js index 98266460..6cf4cfcf 100644 --- a/tests/cache/async-lru-cache-test.js +++ b/tests/cache/async-lru-cache-test.js @@ -86,16 +86,40 @@ describe('AsyncLRUCache', () => { }) }) - describe('#get', done => { - it('should returns null when the cache entry is not available', () => { - const writerID = 1 + describe('#get', () => { + it('should returns null when the key is not in the cache', () => { expect(cache.get(1)).to.deep.equals(null) + }) + + it('should delay execution until the cache entry is committed', done => { + const writerID = 1 cache.update(1, 1, writerID) + cache.get(1).then(content => { + expect(content).to.deep.equals([1, 2]) + done() + }).catch(done) + cache.update(1, 2, writerID) cache.commit(1, writerID) + }) + }) + + describe('#delete', () => { + it('should delete items inserted into the cache', () => { + const writerID = 1 + cache.update(1, 1, writerID) + expect(cache.has(1)).to.deep.equals(true) + cache.delete(1, writerID) + expect(cache.has(1)).to.deep.equals(false) + }) + + it('should resolve get promises to an empty array when an uncommitted entry is deleted', done => { + const writerID = 1 + cache.update(1, 1, writerID) cache.get(1).then(content => { - expect(content).to.deep.equals([1]) + expect(content.length).to.deep.equals(0) done() }).catch(done) + cache.delete(1, writerID) }) }) }) diff --git a/tests/cache/bgp-cache-test.js b/tests/cache/bgp-cache-test.js new file mode 100644 index 00000000..64f3bb1a --- /dev/null +++ b/tests/cache/bgp-cache-test.js @@ -0,0 +1,108 @@ +/* file: bgp-test.js +MIT License + +Copyright (c) 2019-2020 Thomas Minier + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the 'Software'), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +'use strict' + +const expect = require('chai').expect +const { LRUBGPCache } = require('../../dist/engine/cache/bgp-cache') +const { BindingBase } = require('../../dist/api.js') + +describe('LRUBGPCache', () => { + let cache = null + beforeEach(() => { + cache = new LRUBGPCache(Infinity, Infinity) + }) + + describe('#update/commit', () => { + it('should supports insertion of items over time', done => { + const writerID = 1 + const bgp = [ { subject: '?s', predicate: 'rdf:type', object: '?type' } ] + const bindings = [ + BindingBase.fromObject({ '?s': ':s1', '?type': ':c1' }), + BindingBase.fromObject({ '?s': ':s2', '?type': ':c2' }) + ] + cache.update(bgp, bindings[0], writerID) + cache.update(bgp, bindings[1], writerID) + cache.commit(bgp, writerID) + cache.get(bgp).then(content => { + expect(content.map(x => x.toObject())).to.deep.equals(bindings.map(x => x.toObject())) + done() + }).catch(done) + }) + }) + + describe('#findSubset', () => { + it('should find a subset for a Basic Graph Pattern which is partially in the cache', () => { + // populate cache + const subsetBGP = [ { subject: '?s', predicate: 'rdf:type', object: '?type'} ] + cache.update(subsetBGP, BindingBase.fromObject({ '?s': ':s1' }), 1) + cache.commit(subsetBGP, 1) + // search for subset + const bgp = [ + { subject: '?s', predicate: 'rdf:type', object: '?type'}, + { subject: '?s', predicate: 'foaf:name', object: '?name'} + ] + const [computedSubset, computedMissing] = cache.findSubset(bgp) + expect(computedSubset).to.deep.equals(subsetBGP) + expect(computedMissing).to.deep.equals([ bgp[1] ]) + }) + + it('should find an empty subset for a Basic Graph Pattern with no valid subset in the cache', () => { + // populate cache + const subsetBGP = [ { subject: '?s', predicate: 'rdf:type', object: '?type'} ] + cache.update(subsetBGP, BindingBase.fromObject({ '?s': ':s1' }), 1) + cache.commit(subsetBGP, 1) + // search for subset + const bgp = [ + { subject: '?s', predicate: 'foaf:knows', object: '?type' }, + { subject: '?s', predicate: 'foaf:name', object: '?name' } + ] + const [computedSubset, computedMissing] = cache.findSubset(bgp) + expect(computedSubset.length).to.equals(0) + expect(computedMissing).to.deep.equals(bgp) + }) + + it('should find the largest subset from the cache entry', () => { + // populate cache + const subsetBGP_a = [ { subject: '?s', predicate: 'rdf:type', object: '?type'} ] + const subsetBGP_b = [ + { subject: '?s', predicate: 'rdf:type', object: '?type' }, + { subject: '?s', predicate: 'foaf:name', object: '?name' } + ] + cache.update(subsetBGP_a, BindingBase.fromObject({ '?s': ':s1' }), 1) + cache.commit(subsetBGP_a, 1) + cache.update(subsetBGP_b, BindingBase.fromObject({ '?s': ':s2' }), 1) + cache.commit(subsetBGP_b, 1) + // search for subset + const bgp = [ + { subject: '?s', predicate: 'rdf:type', object: '?type' }, + { subject: '?s', predicate: 'foaf:knows', object: '?type' }, + { subject: '?s', predicate: 'foaf:name', object: '?name' } + ] + const [computedSubset, computedMissing] = cache.findSubset(bgp) + expect(computedSubset).to.deep.equals(subsetBGP_b) + expect(computedMissing).to.deep.equals([ bgp[1] ]) + }) + }) +}) diff --git a/types/binary-search-tree/index.d.ts b/types/binary-search-tree/index.d.ts new file mode 100644 index 00000000..0e0d5afd --- /dev/null +++ b/types/binary-search-tree/index.d.ts @@ -0,0 +1,14 @@ +// type delcaration for https://www.npmjs.com/package/binary-search-tree +declare module 'binary-search-tree' { + export interface BSTOptions { + unique?: boolean, + compareKeys?: (a: K, b: K) => number, + checkValueEquality?: (a: T, b: T) => boolean + } + export class BinarySearchTree { + constructor (options?: BSTOptions) + insert (key: K, item: T): void + search (key: K): T[] + delete (key: K, item?: T): void + } +} diff --git a/yarn.lock b/yarn.lock index 211b5e49..9632d3f4 100644 --- a/yarn.lock +++ b/yarn.lock @@ -197,6 +197,13 @@ balanced-match@^1.0.0: resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.0.tgz#89b4d199ab2bee49de164ea02b89ce462d71b767" integrity sha1-ibTRmasr7kneFk6gK4nORi1xt2c= +binary-search-tree@^0.2.6: + version "0.2.6" + resolved "https://registry.yarnpkg.com/binary-search-tree/-/binary-search-tree-0.2.6.tgz#c6d29194e286827fcffe079010e6bf77def10ce3" + integrity sha1-xtKRlOKGgn/P/geQEOa/d97xDOM= + dependencies: + underscore "~1.4.4" + brace-expansion@^1.1.7: version "1.1.11" resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd" @@ -1929,6 +1936,11 @@ underscore@>=1.8.3, underscore@^1.9.1: resolved "https://registry.yarnpkg.com/underscore/-/underscore-1.9.2.tgz#0c8d6f536d6f378a5af264a72f7bec50feb7cf2f" integrity sha512-D39qtimx0c1fI3ya1Lnhk3E9nONswSKhnffBI0gME9C99fYOkNi04xs8K6pePLhvl1frbDemkaBQ5ikWllR2HQ== +underscore@~1.4.4: + version "1.4.4" + resolved "https://registry.yarnpkg.com/underscore/-/underscore-1.4.4.tgz#61a6a32010622afa07963bf325203cf12239d604" + integrity sha1-YaajIBBiKvoHljvzJSA88SI51gQ= + uniq@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/uniq/-/uniq-1.0.1.tgz#b31c5ae8254844a3a8281541ce2b04b865a734ff" From a8f2d86a1306c2c62a239954fb8d99272ae23c28 Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Sun, 16 Feb 2020 09:45:13 +0100 Subject: [PATCH 17/23] typos in doc --- tests/cache/async-lru-cache-test.js | 2 +- tests/cache/bgp-cache-test.js | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/cache/async-lru-cache-test.js b/tests/cache/async-lru-cache-test.js index 6cf4cfcf..fde27cac 100644 --- a/tests/cache/async-lru-cache-test.js +++ b/tests/cache/async-lru-cache-test.js @@ -1,7 +1,7 @@ /* file: async-lru-cache-test.js MIT License -Copyright (c) 2019-2020 Thomas Minier +Copyright (c) 2018-2020 Thomas Minier Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the 'Software'), to deal diff --git a/tests/cache/bgp-cache-test.js b/tests/cache/bgp-cache-test.js index 64f3bb1a..f00fad28 100644 --- a/tests/cache/bgp-cache-test.js +++ b/tests/cache/bgp-cache-test.js @@ -1,7 +1,7 @@ -/* file: bgp-test.js +/* file: bgp-cache-test.js MIT License -Copyright (c) 2019-2020 Thomas Minier +Copyright (c) 2018-2020 Thomas Minier Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the 'Software'), to deal From 1c4be72a4fe815c9e8aec6d609ba785d047662fe Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Sun, 16 Feb 2020 09:48:16 +0100 Subject: [PATCH 18/23] extract Cache and AsyncCache interfaces into their own file --- src/engine/cache/bgp-cache.ts | 3 +- src/engine/cache/cache-base.ts | 92 +-------------------- src/engine/cache/cache-interfaces.ts | 115 +++++++++++++++++++++++++++ src/utils.ts | 2 +- 4 files changed, 119 insertions(+), 93 deletions(-) create mode 100644 src/engine/cache/cache-interfaces.ts diff --git a/src/engine/cache/bgp-cache.ts b/src/engine/cache/bgp-cache.ts index f7c655e3..fcc07fce 100644 --- a/src/engine/cache/bgp-cache.ts +++ b/src/engine/cache/bgp-cache.ts @@ -24,7 +24,8 @@ SOFTWARE. 'use strict' -import { AsyncLRUCache, AsyncCache } from './cache-base' +import { AsyncLRUCache } from './cache-base' +import { AsyncCache } from './cache-interfaces' import { Pipeline } from '../pipeline/pipeline' import { PipelineStage } from '../pipeline/pipeline-engine' import { Bindings } from '../../rdf/bindings' diff --git a/src/engine/cache/cache-base.ts b/src/engine/cache/cache-base.ts index 7e1af566..dc008dfa 100644 --- a/src/engine/cache/cache-base.ts +++ b/src/engine/cache/cache-base.ts @@ -25,97 +25,7 @@ SOFTWARE. 'use strict' import * as LRU from 'lru-cache' -import LRUCache = require('lru-cache') - -/** - * A cache is a vue that materializes data for latter re-use - * @author Thomas Minier - */ -export interface Cache { - /** - * Put an item into the cache - * @param key - Item's key - * @param item - Item - */ - put (key: K, item: T): void - - /** - * Test if the cache contains an item with a given key - * @param key - Item's key - * @return True if the cache contains the item with the given key, False otherwise - */ - has (key: K): boolean - - /** - * Access an item by its key. - * Each call to get() should be predated by a call to has(), - * to check if the item is in the cache. - * @param key - Item's key - * @return The item with the given key, or null if it was not found - */ - get (key: K): T | null - - /** - * Remove an item from the cache - * @param key - Item's key - */ - delete (key: K): void - - /** - * Get the number of items currently in the cache - * @return The number of items currently in the cache - */ - count (): number -} - -/** - * An async cache is cache which stores collections of items that are built over time. - * Writers will call the update and commit method to update the cache content & mark items as available. - * @author Thomas Minier - */ -export interface AsyncCache { - /** - * Update an item into the cache - * @param key - Item's key - * @param item - Item - * @param writerID - ID of the writer - */ - update (key: K, item: T, writerID: I): void - - /** - * Mark an item as available from the cache - * @param key - Item's key - * @param IwriterID - ID of the writer - */ - commit (key: K, writerID: I): void - - /** - * Test if the cache contains an item with a given key - * @param key - Item's key - * @return True if the cache contains the item with the given key, False otherwise - */ - has (key: K): boolean - - /** - * Access an item by its key. - * Each call to get() should be predated by a call to has() to check if the item is in the cache. - * @param key - Item's key - * @return The values of the item with the given key, or null if it was not found - */ - get (key: K): Promise | null - - /** - * Remove an item from the cache - * @param key - Item's key - */ - delete (key: K, writerID: I): void - - /** - * Get the number of items currently in the cache - * @return The number of items currently in the cache - */ - count (): number -} +import { Cache, AsyncCache } from './cache-interfaces' /** * An in-memory LRU cache diff --git a/src/engine/cache/cache-interfaces.ts b/src/engine/cache/cache-interfaces.ts new file mode 100644 index 00000000..e4dc008f --- /dev/null +++ b/src/engine/cache/cache-interfaces.ts @@ -0,0 +1,115 @@ +/* file: cache-interfaces.ts +MIT License + +Copyright (c) 2019-2020 Thomas Minier + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the 'Software'), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +'use strict' + +/** + * A cache is a vue that materializes data for latter re-use + * @author Thomas Minier + */ +export interface Cache { + /** + * Put an item into the cache + * @param key - Item's key + * @param item - Item + */ + put (key: K, item: T): void + + /** + * Test if the cache contains an item with a given key + * @param key - Item's key + * @return True if the cache contains the item with the given key, False otherwise + */ + has (key: K): boolean + + /** + * Access an item by its key. + * Each call to get() should be predated by a call to has(), + * to check if the item is in the cache. + * @param key - Item's key + * @return The item with the given key, or null if it was not found + */ + get (key: K): T | null + + /** + * Remove an item from the cache + * @param key - Item's key + */ + delete (key: K): void + + /** + * Get the number of items currently in the cache + * @return The number of items currently in the cache + */ + count (): number +} + +/** + * An async cache is cache which stores collections of items that are built over time. + * Writers will call the update and commit method to update the cache content & mark items as available. + * @author Thomas Minier + */ +export interface AsyncCache { + /** + * Update an item into the cache + * @param key - Item's key + * @param item - Item + * @param writerID - ID of the writer + */ + update (key: K, item: T, writerID: I): void + + /** + * Mark an item as available from the cache + * @param key - Item's key + * @param IwriterID - ID of the writer + */ + commit (key: K, writerID: I): void + + /** + * Test if the cache contains an item with a given key + * @param key - Item's key + * @return True if the cache contains the item with the given key, False otherwise + */ + has (key: K): boolean + + /** + * Access an item by its key. + * Each call to get() should be predated by a call to has() to check if the item is in the cache. + * @param key - Item's key + * @return The values of the item with the given key, or null if it was not found + */ + get (key: K): Promise | null + + /** + * Remove an item from the cache + * @param key - Item's key + */ + delete (key: K, writerID: I): void + + /** + * Get the number of items currently in the cache + * @return The number of items currently in the cache + */ + count (): number +} diff --git a/src/utils.ts b/src/utils.ts index 74860227..aa67c193 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -52,7 +52,7 @@ export namespace rdf { export function tripleEquals (a: Algebra.TripleObject, b: Algebra.TripleObject): boolean { return a.subject === b.subject && a.predicate === b.predicate && a.object === b.object } - + /** * Convert an string RDF Term to a RDFJS representation * @see https://rdf.js.org/data-model-spec From ccb0fdb0d6127e645dfa65967a513d7e832aace8 Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Sun, 16 Feb 2020 10:06:41 +0100 Subject: [PATCH 19/23] cleanup LRUBGPCache when items slide out from the cache, using a secondary index --- src/engine/cache/bgp-cache.ts | 26 +++++++++++++++++++++----- src/engine/cache/cache-base.ts | 15 ++++++++++++--- tests/cache/bgp-cache-test.js | 2 +- 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/src/engine/cache/bgp-cache.ts b/src/engine/cache/bgp-cache.ts index fcc07fce..5ddc10cd 100644 --- a/src/engine/cache/bgp-cache.ts +++ b/src/engine/cache/bgp-cache.ts @@ -24,7 +24,7 @@ SOFTWARE. 'use strict' -import { AsyncLRUCache } from './cache-base' +import { AsyncCacheEntry, AsyncLRUCache } from './cache-base' import { AsyncCache } from './cache-interfaces' import { Pipeline } from '../pipeline/pipeline' import { PipelineStage } from '../pipeline/pipeline-engine' @@ -70,7 +70,13 @@ export interface BGPCache extends AsyncCache + // Secondary index: track the triple patterns of each BGP. + // Used to clear the primary index when items slides out from the cache + private readonly _patternsPerBGP: Map + // AsyncCache used to store set of solution bindings private readonly _cache: AsyncLRUCache /** @@ -79,13 +85,19 @@ export class LRUBGPCache implements BGPCache { * @param maxAge - Maximum age in ms */ constructor (maxSize: number, maxAge: number) { + this._patternsPerBGP = new Map() this._allKeys = new BinarySearchTree({ checkValueEquality: (a: SavedBGP, b: SavedBGP) => a.key === b.key }) - this._cache = new AsyncLRUCache(maxSize, maxAge, (key: string) => { + this._cache = new AsyncLRUCache(maxSize, maxAge, (item: AsyncCacheEntry) => { + return item.content.length + }, (key: string) => { // remove index entries when they slide out - // replace key by something correct - // this._allKeys.delete(key, { bgp: [], key }) + if (this._patternsPerBGP.has(key)) { + const bgp = this._patternsPerBGP.get(key)! + bgp.forEach(pattern => this._allKeys.delete(rdf.hashTriple(pattern), { bgp, key })) + this._patternsPerBGP.delete(key) + } }) } @@ -96,6 +108,8 @@ export class LRUBGPCache implements BGPCache { update (bgp: BasicGraphPattern, item: Bindings, writerID: string): void { const key = sparql.hashBGP(bgp) if (!this._cache.has(key)) { + // update the indexes + this._patternsPerBGP.set(key, bgp) bgp.forEach(pattern => this._allKeys.insert(rdf.hashTriple(pattern), { bgp, key })) } this._cache.update(key, item, writerID) @@ -120,6 +134,8 @@ export class LRUBGPCache implements BGPCache { delete (bgp: BasicGraphPattern, writerID: string): void { const key = sparql.hashBGP(bgp) this._cache.delete(key, writerID) + // clear the indexes + this._patternsPerBGP.delete(key) bgp.forEach(pattern => this._allKeys.delete(rdf.hashTriple(pattern), { bgp, key })) } @@ -134,7 +150,7 @@ export class LRUBGPCache implements BGPCache { const searchResults = this._allKeys .search(rdf.hashTriple(pattern)) .filter(v => { - // remove all BGps that are not a subset of the input BGP + // remove all BGPs that are not a subset of the input BGP // we use lodash.findIndex + rdf.tripleEquals to check for triple pattern equality return v.bgp.every(a => findIndex(bgp, b => rdf.tripleEquals(a, b)) > -1) }) diff --git a/src/engine/cache/cache-base.ts b/src/engine/cache/cache-base.ts index dc008dfa..f3ac6f69 100644 --- a/src/engine/cache/cache-base.ts +++ b/src/engine/cache/cache-base.ts @@ -38,14 +38,22 @@ export class BaseLRUCache implements Cache { * Constructor * @param maxSize - The maximum size of the cache * @param maxAge - Maximum age in ms + * @param length - Function that is used to calculate the length of stored items * @param onDispose - Function that is called on items when they are dropped from the cache */ - constructor (maxSize: number, maxAge: number, onDispose?: (key: K, item: T) => void) { + constructor (maxSize: number, maxAge: number, length?: (item: T) => number, onDispose?: (key: K, item: T) => void) { const options = { max: maxSize, maxAge, + length, dispose: onDispose } + // if we set a dispose function, we need to turn 'noDisposeOnSet' to True, + // otherwise onDispose will be called each time an item is updated (instead of when it slide out), + // which will break any class extending BaseAsyncCache + if (onDispose !== undefined) { + options['noDisposeOnSet'] = true + } this._content = new LRU(options) } @@ -181,9 +189,10 @@ export class AsyncLRUCache extends BaseAsyncCache { * Constructor * @param maxSize - The maximum size of the cache * @param maxAge - Maximum age in ms + * @param length - Function that is used to calculate the length of stored items * @param onDispose - Function that is called on items when they are dropped from the cache */ - constructor (maxSize: number, maxAge: number, onDispose?: (key: K, item: AsyncCacheEntry) => void) { - super(new BaseLRUCache>(maxSize, maxAge, onDispose)) + constructor (maxSize: number, maxAge: number, length?: (item: AsyncCacheEntry) => number, onDispose?: (key: K, item: AsyncCacheEntry) => void) { + super(new BaseLRUCache>(maxSize, maxAge, length, onDispose)) } } diff --git a/tests/cache/bgp-cache-test.js b/tests/cache/bgp-cache-test.js index f00fad28..cad13ba6 100644 --- a/tests/cache/bgp-cache-test.js +++ b/tests/cache/bgp-cache-test.js @@ -31,7 +31,7 @@ const { BindingBase } = require('../../dist/api.js') describe('LRUBGPCache', () => { let cache = null beforeEach(() => { - cache = new LRUBGPCache(Infinity, Infinity) + cache = new LRUBGPCache(0, Infinity) }) describe('#update/commit', () => { From eadb027730c84b4286b8e0a5872301ffea06ec23 Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Mon, 17 Feb 2020 09:37:59 +0100 Subject: [PATCH 20/23] fix findSubset to use differenceWith to compute the missing patterns --- src/engine/cache/bgp-cache.ts | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/engine/cache/bgp-cache.ts b/src/engine/cache/bgp-cache.ts index 5ddc10cd..3f5e1826 100644 --- a/src/engine/cache/bgp-cache.ts +++ b/src/engine/cache/bgp-cache.ts @@ -32,7 +32,7 @@ import { Bindings } from '../../rdf/bindings' import { Algebra } from 'sparqljs' import { rdf, sparql } from '../../utils' import { BinarySearchTree } from 'binary-search-tree' -import { findIndex, maxBy } from 'lodash' +import { differenceWith, findIndex, maxBy } from 'lodash' // type alias to simplify the type defintion in this file type BasicGraphPattern = Algebra.TripleObject[] @@ -158,12 +158,9 @@ export class LRUBGPCache implements BGPCache { } // compute the largest subset BGP and the missing patterns (missingPatterns = input_BGP - subset_BGP) let foundPatterns: BasicGraphPattern = [] - let missingPatterns: BasicGraphPattern = [] let maxBGPLength = -1 for (let match of matches) { - if (match.searchResults.length === 0) { - missingPatterns.push(match.pattern) - } else { + if (match.searchResults.length > 0) { const localMax = maxBy(match.searchResults, v => v.bgp.length) if (localMax !== undefined && localMax.bgp.length > maxBGPLength) { maxBGPLength = localMax.bgp.length @@ -171,6 +168,6 @@ export class LRUBGPCache implements BGPCache { } } } - return [foundPatterns, missingPatterns] + return [foundPatterns, differenceWith(bgp, foundPatterns, rdf.tripleEquals)] } } From cb3a82e4b322ac3d8fe91eb09bb9c37095068b10 Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Mon, 17 Feb 2020 09:53:30 +0100 Subject: [PATCH 21/23] wire up the semantic cache into the plan builder --- src/engine/cache/bgp-cache.ts | 6 ++++- src/engine/stages/bgp-stage-builder.ts | 8 +++---- src/operators/join/bound-join.ts | 13 +++++----- src/operators/join/rewriting-op.ts | 5 ++-- src/utils.ts | 33 ++++++++++++++++---------- 5 files changed, 39 insertions(+), 26 deletions(-) diff --git a/src/engine/cache/bgp-cache.ts b/src/engine/cache/bgp-cache.ts index 3f5e1826..c4d81566 100644 --- a/src/engine/cache/bgp-cache.ts +++ b/src/engine/cache/bgp-cache.ts @@ -144,7 +144,11 @@ export class LRUBGPCache implements BGPCache { } findSubset (bgp: BasicGraphPattern): [BasicGraphPattern, BasicGraphPattern] { - // find all bgp matching + // if the bgp is in the cache, then the computation is simple + if (this.has(bgp)) { + return [bgp, []] + } + // otherwise, we search for all candidate subsets let matches = [] for (let pattern of bgp) { const searchResults = this._allKeys diff --git a/src/engine/stages/bgp-stage-builder.ts b/src/engine/stages/bgp-stage-builder.ts index e779df1e..1ece3482 100644 --- a/src/engine/stages/bgp-stage-builder.ts +++ b/src/engine/stages/bgp-stage-builder.ts @@ -45,14 +45,14 @@ import boundJoin from '../../operators/join/bound-join' * available * @private */ -function bgpEvaluation (source: PipelineStage, bgp: Algebra.TripleObject[], graph: Graph, context: ExecutionContext) { +function bgpEvaluation (source: PipelineStage, bgp: Algebra.TripleObject[], graph: Graph, builder: BGPStageBuilder, context: ExecutionContext) { const engine = Pipeline.getInstance() return engine.mergeMap(source, (bindings: Bindings) => { let boundedBGP = bgp.map(t => bindings.bound(t)) // check the cache let iterator if (context.cachingEnabled()) { - iterator = evaluation.cacheEvalBGP(boundedBGP, graph, context.cache!, context) + iterator = evaluation.cacheEvalBGP(boundedBGP, graph, context.cache!, builder, context) } else { iterator = graph.evalBGP(boundedBGP, context) } @@ -184,9 +184,9 @@ export default class BGPStageBuilder extends StageBuilder { */ _buildIterator (source: PipelineStage, graph: Graph, patterns: Algebra.TripleObject[], context: ExecutionContext): PipelineStage { if (graph._isCapable(GRAPH_CAPABILITY.UNION)) { - return boundJoin(source, patterns, graph, context) + return boundJoin(source, patterns, graph, this, context) } - return bgpEvaluation(source, patterns, graph, context) + return bgpEvaluation(source, patterns, graph, this, context) } /** diff --git a/src/operators/join/bound-join.ts b/src/operators/join/bound-join.ts index 244f0cc4..4f451e2e 100644 --- a/src/operators/join/bound-join.ts +++ b/src/operators/join/bound-join.ts @@ -24,14 +24,15 @@ SOFTWARE. 'use strict' +import { Algebra } from 'sparqljs' +import { Bindings } from '../../rdf/bindings' import { Pipeline } from '../../engine/pipeline/pipeline' import { PipelineStage, StreamPipelineInput } from '../../engine/pipeline/pipeline-engine' +import { rdf, evaluation } from '../../utils' +import BGPStageBuilder from '../../engine/stages/bgp-stage-builder' import ExecutionContext from '../../engine/context/execution-context' import Graph from '../../rdf/graph' -import { Bindings } from '../../rdf/bindings' -import { rdf, evaluation } from '../../utils' import rewritingOp from './rewriting-op' -import { Algebra } from 'sparqljs' // The default size of the bucket of Basic Graph Patterns used by the Bound Join algorithm const BOUND_JOIN_BUFFER_SIZE = 15 @@ -72,7 +73,7 @@ function rewriteTriple (triple: Algebra.TripleObject, key: number): Algebra.Trip * @param Context - Query execution context * @return A pipeline stage which evaluates the bound join */ -export default function boundJoin (source: PipelineStage, bgp: Algebra.TripleObject[], graph: Graph, context: ExecutionContext) { +export default function boundJoin (source: PipelineStage, bgp: Algebra.TripleObject[], graph: Graph, builder: BGPStageBuilder, context: ExecutionContext) { return Pipeline.getInstance().fromAsync((input: StreamPipelineInput) => { let sourceClosed = false let activeIterators = 0 @@ -104,7 +105,7 @@ export default function boundJoin (source: PipelineStage, bgp: Algebra if (bucket.length === 1 && bucket[0].isEmpty) { let iterator if (context.cachingEnabled()) { - iterator = evaluation.cacheEvalBGP(bgp, graph, context.cache!, context) + iterator = evaluation.cacheEvalBGP(bgp, graph, context.cache!, builder, context) } else { iterator = graph.evalBGP(bgp, context) } @@ -133,7 +134,7 @@ export default function boundJoin (source: PipelineStage, bgp: Algebra key++ }) // Evaluates the bucket using the Sage server - rewritingOp(graph, bgpBucket, rewritingTable, context) + rewritingOp(graph, bgpBucket, rewritingTable, builder, context) .subscribe(b => input.next(b), err => input.error(err), () => tryClose()) } }, err => input.error(err), () => { sourceClosed = true }) diff --git a/src/operators/join/rewriting-op.ts b/src/operators/join/rewriting-op.ts index 0519cd93..575afd33 100644 --- a/src/operators/join/rewriting-op.ts +++ b/src/operators/join/rewriting-op.ts @@ -31,6 +31,7 @@ import { Bindings } from '../../rdf/bindings' import { evaluation } from '../../utils' import { Algebra } from 'sparqljs' import { PipelineStage } from '../../engine/pipeline/pipeline-engine' +import BGPStageBuilder from '../../engine/stages/bgp-stage-builder' /** * Find a rewriting key in a list of variables @@ -92,7 +93,7 @@ function rewriteSolutions (bindings: Bindings, rewritingMap: Map, context: ExecutionContext) { +export default function rewritingOp (graph: Graph, bgpBucket: Algebra.TripleObject[][], rewritingTable: Map, builder: BGPStageBuilder, context: ExecutionContext) { let source if (context.cachingEnabled()) { // partition the BGPs that can be evaluated using the cache from the others @@ -100,7 +101,7 @@ export default function rewritingOp (graph: Graph, bgpBucket: Algebra.TripleObje const others: Algebra.TripleObject[][] = [] bgpBucket.forEach(bgp => { if (context.cache!.has(bgp)) { - stages.push(evaluation.cacheEvalBGP(bgp, graph, context.cache!, context)) + stages.push(evaluation.cacheEvalBGP(bgp, graph, context.cache!, builder, context)) } else { others.push(bgp) } diff --git a/src/utils.ts b/src/utils.ts index aa67c193..f3d2c66a 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -24,20 +24,21 @@ SOFTWARE. 'use strict' -import { Pipeline } from './engine/pipeline/pipeline' -import { PipelineStage } from './engine/pipeline/pipeline-engine' import { Algebra } from 'sparqljs' -import { Bindings } from './rdf/bindings' +import { BGPCache } from './engine/cache/bgp-cache' +import { Bindings, BindingBase } from './rdf/bindings' +import { BlankNode, Literal, NamedNode, Term } from 'rdf-js' import { includes, union } from 'lodash' import { parseZone, Moment, ISO_8601 } from 'moment' -import * as DataFactory from '@rdfjs/data-model' -import { BlankNode, Literal, NamedNode, Term } from 'rdf-js' +import { Pipeline } from './engine/pipeline/pipeline' +import { PipelineStage } from './engine/pipeline/pipeline-engine' import { termToString, stringToTerm } from 'rdf-string' -import { BGPCache } from './engine/cache/bgp-cache' -import Graph from './rdf/graph' -import ExecutionContext from './engine/context/execution-context' import * as crypto from 'crypto' +import * as DataFactory from '@rdfjs/data-model' import * as uuid from 'uuid/v4' +import BGPStageBuilder from './engine/stages/bgp-stage-builder' +import ExecutionContext from './engine/context/execution-context' +import Graph from './rdf/graph' /** * RDF related utilities @@ -553,13 +554,13 @@ export namespace evaluation { * @param cache - Cache used * @return A pipeline stage that produces the evaluation results */ - export function cacheEvalBGP (bgp: Algebra.TripleObject[], graph: Graph, cache: BGPCache, context: ExecutionContext): PipelineStage { - if (cache.has(bgp)) { - return cache.getAsPipeline(bgp) - } else { + export function cacheEvalBGP (bgp: Algebra.TripleObject[], graph: Graph, cache: BGPCache, builder: BGPStageBuilder, context: ExecutionContext): PipelineStage { + const [subsetBGP, missingBGP] = cache.findSubset(bgp) + // case 1: no subset of the BGP are in cache => classic evaluation (most frequent) + if (subsetBGP.length === 0) { // generate an unique writer ID const writerID = uuid() - // put all solutions into the cache + // evaluate the BGP while saving all solutions into the cache const iterator = Pipeline.getInstance().tap(graph.evalBGP(bgp, context), b => { cache.update(bgp, b, writerID) }) @@ -568,6 +569,12 @@ export namespace evaluation { cache.commit(bgp, writerID) }) } + // case 2: no missing patterns => the complete BGP is in the cache + if (missingBGP.length === 0) { + return cache.getAsPipeline(bgp) + } + // case 3: evaluate the subset BGP using the cache, then join with the missing patterns + return builder.execute(cache.getAsPipeline(subsetBGP), missingBGP, context) } } From e5c2abaa8059048cfb3cbe7c9ab2d0a24f81fbcf Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Mon, 17 Feb 2020 09:55:49 +0100 Subject: [PATCH 22/23] doc --- src/engine/plan-builder.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/engine/plan-builder.ts b/src/engine/plan-builder.ts index e398506b..e65cf2d6 100644 --- a/src/engine/plan-builder.ts +++ b/src/engine/plan-builder.ts @@ -167,7 +167,7 @@ export class PlanBuilder { } /** - * Enable Basic Graph Patterns caching for SPARQL query evaluation. + * Enable Basic Graph Patterns semantic caching for SPARQL query evaluation. * The parameter is optional and used to provide your own cache instance. * If left undefined, the query engine will use a {@link LRUBGPCache} with * a maximum of 500 items and a max age of 20 minutes. @@ -182,7 +182,7 @@ export class PlanBuilder { } /** - * Disable Basic Graph Patterns caching for SPARQL query evaluation. + * Disable Basic Graph Patterns semantic caching for SPARQL query evaluation. */ disableCache (): void { this._currentCache = null From 3373651934c43e398f6e587e47e9dd48d575c90a Mon Sep 17 00:00:00 2001 From: Thomas Minier Date: Mon, 17 Feb 2020 10:11:04 +0100 Subject: [PATCH 23/23] update README --- README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/README.md b/README.md index 28282d8f..5e4c5add 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ An open-source framework for building SPARQL query engines in Javascript/Typescr * Implements advanced *SPARQL query rewriting techniques* for transparently optimizing SPARQL query processing. * Supports [full text search queries](#full-text-search). * Supports [Custom SPARQL functions](#custom-functions). +* Supports [Semantic Caching](#enable-caching), to speed up query evaluation of reccurent patterns. * Supports the [SPARQL UPDATE protocol](https://www.w3.org/TR/2013/REC-sparql11-update-20130321/). * Supports Basic [Federated SPARQL queries](https://www.w3.org/TR/2013/REC-sparql11-federated-query-20130321/) using **SERVICE clauses**. * Customize every step of SPARQL query processing, thanks to *a modular architecture*. @@ -27,6 +28,7 @@ An open-source framework for building SPARQL query engines in Javascript/Typescr * [RDF Graphs](#rdf-graphs) * [RDF Datasets](#rdf-datasets) * [Running a SPARQL query](#running-a-sparql-query) +* [Enable caching](#enable-caching) * [Full text search](#full-text-search) * [Federated SPARQL Queries](#federated-sparql-queries) * [Custom Functions](#custom-functions) @@ -188,6 +190,23 @@ Finally, to run a SPARQL query on your RDF dataset, you need to use the `PlanBui ) ``` +# Enable caching + +The `sparql-engine` provides support for automatic caching of Basic Graph Pattern evaluation using the [Semantic Cache algorithm](https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=1161590). Basically, the cache will save the results of BGPs already evaluated and, when the engine wants to evaluates a BGP, it will look for the largest subset of the BGP in the cache. If one is available, it will re-use the cached results to speed up query processing. + +By default, semantic caching is disabled. You can turn it on/off using the `PlanBuilder.useCache` and `PlanBuilder.disableCache` methods, respectively. The `useCache` method accepts an optional parameter, so you can provide your own implementation of the semantic cache. By defaults, it uses an in-memory [LRU cache](https://callidon.github.io/sparql-engine/classes/lrubgpcache.html) which stores up to 500MB of items for 20 minutes. + +```javascript +// get an instance of a PlanBuilder +const builder = new PlanBuilder(/* ... */) + +// activate the cache +builder.useCache() + +// disable the cache +builder.disableCache() +``` + # Full Text Search The `sparql-engine` provides a non-standard full text search functionnality,