From 8f615fbdee840e674241212caf568f9dd1fdf303 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Tue, 2 Jan 2024 11:33:14 -0800 Subject: [PATCH 1/8] Removed serializer implementations which should be part of their own PR Signed-off-by: Peter Alfonsi --- .../cache/tier/BytesReferenceSerializer.java | 35 ----------- .../tier/BytesReferenceSerializerTests.java | 61 ------------------- .../tier/EhCacheDiskCachingTierTests.java | 35 ----------- 3 files changed, 131 deletions(-) delete mode 100644 server/src/main/java/org/opensearch/common/cache/tier/BytesReferenceSerializer.java delete mode 100644 server/src/test/java/org/opensearch/common/cache/tier/BytesReferenceSerializerTests.java diff --git a/server/src/main/java/org/opensearch/common/cache/tier/BytesReferenceSerializer.java b/server/src/main/java/org/opensearch/common/cache/tier/BytesReferenceSerializer.java deleted file mode 100644 index 55ffe22c2a339..0000000000000 --- a/server/src/main/java/org/opensearch/common/cache/tier/BytesReferenceSerializer.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.common.cache.tier; - -import org.opensearch.core.common.bytes.BytesArray; -import org.opensearch.core.common.bytes.BytesReference; - -import java.io.IOException; -import java.util.Arrays; - -public class BytesReferenceSerializer implements Serializer { - // This class does not get passed to ehcache itself, so it's not required that classes match after deserialization. - - public BytesReferenceSerializer() {} - @Override - public byte[] serialize(BytesReference object) { - return BytesReference.toBytes(object); - } - - @Override - public BytesReference deserialize(byte[] bytes) { - return new BytesArray(bytes); - } - - @Override - public boolean equals(BytesReference object, byte[] bytes) { - return Arrays.equals(serialize(object), bytes); - } -} diff --git a/server/src/test/java/org/opensearch/common/cache/tier/BytesReferenceSerializerTests.java b/server/src/test/java/org/opensearch/common/cache/tier/BytesReferenceSerializerTests.java deleted file mode 100644 index 2fc9c7cbb2756..0000000000000 --- a/server/src/test/java/org/opensearch/common/cache/tier/BytesReferenceSerializerTests.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.common.cache.tier; - -import org.opensearch.common.Randomness; -import org.opensearch.common.bytes.ReleasableBytesReference; -import org.opensearch.common.util.BigArrays; -import org.opensearch.common.util.PageCacheRecycler; -import org.opensearch.core.common.bytes.BytesArray; -import org.opensearch.core.common.bytes.BytesReference; -import org.opensearch.core.common.bytes.CompositeBytesReference; -import org.opensearch.core.common.util.ByteArray; -import org.opensearch.test.OpenSearchTestCase; - -import java.util.Random; - -public class BytesReferenceSerializerTests extends OpenSearchTestCase { - public void testEquality() throws Exception { - BytesReferenceSerializer ser = new BytesReferenceSerializer(); - // Test that values are equal before and after serialization, for each implementation of BytesReference. - byte[] bytesValue = new byte[1000]; - Random rand = Randomness.get(); - rand.nextBytes(bytesValue); - - BytesReference ba = new BytesArray(bytesValue); - byte[] serialized = ser.serialize(ba); - assertTrue(ser.equals(ba, serialized)); - BytesReference deserialized = ser.deserialize(serialized); - assertEquals(ba, deserialized); - - BytesReference cbr = CompositeBytesReference.of(new BytesArray(bytesValue), new BytesArray(bytesValue)); - serialized = ser.serialize(cbr); - assertTrue(ser.equals(cbr, serialized)); - deserialized = ser.deserialize(serialized); - assertEquals(cbr, deserialized); - - // We need the PagedBytesReference to be larger than the page size (16 KB) in order to actually create it - byte[] pbrValue = new byte[PageCacheRecycler.PAGE_SIZE_IN_BYTES * 2]; - rand.nextBytes(pbrValue); - ByteArray arr = BigArrays.NON_RECYCLING_INSTANCE.newByteArray(pbrValue.length); - arr.set(0L, pbrValue, 0, pbrValue.length); - assert !arr.hasArray(); - BytesReference pbr = BytesReference.fromByteArray(arr, pbrValue.length); - serialized = ser.serialize(pbr); - assertTrue(ser.equals(pbr, serialized)); - deserialized = ser.deserialize(serialized); - assertEquals(pbr, deserialized); - - BytesReference rbr = new ReleasableBytesReference(new BytesArray(bytesValue), ReleasableBytesReference.NO_OP); - serialized = ser.serialize(rbr); - assertTrue(ser.equals(rbr, serialized)); - deserialized = ser.deserialize(serialized); - assertEquals(rbr, deserialized); - } -} diff --git a/server/src/test/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTierTests.java b/server/src/test/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTierTests.java index e6222a9065f94..0f7bf51b65546 100644 --- a/server/src/test/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTierTests.java +++ b/server/src/test/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTierTests.java @@ -65,41 +65,6 @@ public void testBasicGetAndPut() throws IOException { } } - public void testBasicGetAndPutBytesReference() throws Exception { - Settings settings = Settings.builder().build(); - try (NodeEnvironment env = newNodeEnvironment(settings)) { - EhCacheDiskCachingTier ehCacheDiskCachingTier = new EhCacheDiskCachingTier.Builder() - .setKeyType(String.class) - .setValueType(BytesReference.class) - .setExpireAfterAccess(TimeValue.MAX_VALUE) - .setSettings(settings) - .setThreadPoolAlias("ehcacheTest") - .setMaximumWeightInBytes(CACHE_SIZE_IN_BYTES * 2) // bigger so no evictions happen - .setStoragePath(env.nodePaths()[0].indicesPath.toString() + "/request_cache") - .setSettingPrefix(SETTING_PREFIX) - .setKeySerializer(new StringSerializer()) - .setValueSerializer(new BytesReferenceSerializer()) - .build(); - int randomKeys = randomIntBetween(10, 100); - int valueLength = 1000; - Random rand = Randomness.get(); - Map keyValueMap = new HashMap<>(); - for (int i = 0; i < randomKeys; i++) { - byte[] valueBytes = new byte[valueLength]; - rand.nextBytes(valueBytes); - keyValueMap.put(UUID.randomUUID().toString(), new BytesArray(valueBytes)); - } - for (Map.Entry entry : keyValueMap.entrySet()) { - ehCacheDiskCachingTier.put(entry.getKey(), entry.getValue()); - } - for (Map.Entry entry : keyValueMap.entrySet()) { - BytesReference value = ehCacheDiskCachingTier.get(entry.getKey()); - assertEquals(entry.getValue(), value); - } - ehCacheDiskCachingTier.close(); - } - } - public void testConcurrentPut() throws Exception { Settings settings = Settings.builder().build(); try (NodeEnvironment env = newNodeEnvironment(settings)) { From 9ef1254bf47dc3363041a473d2ae7ec3cc792a4e Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Fri, 22 Sep 2023 13:32:47 -0700 Subject: [PATCH 2/8] Implements a memory-efficient roaring bitmap-based keystore for use in disk cache tier Signed-off-by: Peter Alfonsi --- .gitignore | 3 +- server/build.gradle | 3 + server/licenses/RoaringBitmap-0.9.49.jar.sha1 | 1 + server/licenses/RoaringBitmap-LICENSE.txt | 191 ++++++++++++ server/licenses/RoaringBitmap-NOTICE.txt | 0 server/licenses/shims-0.9.49.jar.sha1 | 1 + server/licenses/shims-LICENSE.txt | 191 ++++++++++++ server/licenses/shims-NOTICE.txt | 0 .../opensearch/indices/KeyLookupStore.java | 133 ++++++++ .../indices/RBMIntKeyLookupStore.java | 264 ++++++++++++++++ .../opensearch/indices/RBMSizeEstimator.java | 131 ++++++++ .../indices/RBMIntKeyLookupStoreTests.java | 295 ++++++++++++++++++ 12 files changed, 1212 insertions(+), 1 deletion(-) create mode 100644 server/licenses/RoaringBitmap-0.9.49.jar.sha1 create mode 100644 server/licenses/RoaringBitmap-LICENSE.txt create mode 100644 server/licenses/RoaringBitmap-NOTICE.txt create mode 100644 server/licenses/shims-0.9.49.jar.sha1 create mode 100644 server/licenses/shims-LICENSE.txt create mode 100644 server/licenses/shims-NOTICE.txt create mode 100644 server/src/main/java/org/opensearch/indices/KeyLookupStore.java create mode 100644 server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java create mode 100644 server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java create mode 100644 server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java diff --git a/.gitignore b/.gitignore index 7514d55cc3c9a..291a63cdeef92 100644 --- a/.gitignore +++ b/.gitignore @@ -64,4 +64,5 @@ testfixtures_shared/ .ci/jobs/ # build files generated -doc-tools/missing-doclet/bin/ \ No newline at end of file +doc-tools/missing-doclet/bin/ +server/src/main/java/org/opensearch/indices/KLSPerformanceTest.java diff --git a/server/build.gradle b/server/build.gradle index 2c1b247d46a0e..d81f6df99c335 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -161,6 +161,9 @@ dependencies { api "org.ehcache:ehcache:${versions.ehcache}" api "org.slf4j:slf4j-api:${versions.slf4j}" + // roaring bitmaps + api 'org.roaringbitmap:RoaringBitmap:0.9.49' + runtimeOnly 'org.roaringbitmap:shims:0.9.49' testImplementation(project(":test:framework")) { // tests use the locally compiled version of server diff --git a/server/licenses/RoaringBitmap-0.9.49.jar.sha1 b/server/licenses/RoaringBitmap-0.9.49.jar.sha1 new file mode 100644 index 0000000000000..919a73c074b6a --- /dev/null +++ b/server/licenses/RoaringBitmap-0.9.49.jar.sha1 @@ -0,0 +1 @@ +b45b49c1ec5c5fc48580412d0ca635e1833110ea \ No newline at end of file diff --git a/server/licenses/RoaringBitmap-LICENSE.txt b/server/licenses/RoaringBitmap-LICENSE.txt new file mode 100644 index 0000000000000..a890d4a062fad --- /dev/null +++ b/server/licenses/RoaringBitmap-LICENSE.txt @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright 2013-2016 the RoaringBitmap authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/server/licenses/RoaringBitmap-NOTICE.txt b/server/licenses/RoaringBitmap-NOTICE.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/server/licenses/shims-0.9.49.jar.sha1 b/server/licenses/shims-0.9.49.jar.sha1 new file mode 100644 index 0000000000000..9e76614ca5207 --- /dev/null +++ b/server/licenses/shims-0.9.49.jar.sha1 @@ -0,0 +1 @@ +8bd7794fbdaa9536354dd2d8d961d9503beb9460 \ No newline at end of file diff --git a/server/licenses/shims-LICENSE.txt b/server/licenses/shims-LICENSE.txt new file mode 100644 index 0000000000000..a890d4a062fad --- /dev/null +++ b/server/licenses/shims-LICENSE.txt @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright 2013-2016 the RoaringBitmap authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/server/licenses/shims-NOTICE.txt b/server/licenses/shims-NOTICE.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/server/src/main/java/org/opensearch/indices/KeyLookupStore.java b/server/src/main/java/org/opensearch/indices/KeyLookupStore.java new file mode 100644 index 0000000000000..60e1386a460ec --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/KeyLookupStore.java @@ -0,0 +1,133 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.indices; + +/** + * An interface for objects that hold an in-memory record of hashes of keys in the disk cache. + * These objects have some internal data structure which stores some transformation of added + * int values. The internal representations may have collisions. Example transformations include a modulo + * or -abs(value), or some combination. + */ +public interface KeyLookupStore { + + /** + * Transforms the input value into the internal representation for this keystore + * and adds it to the internal data structure. + * @param value The value to add. + * @return true if the value was added, false if it wasn't added because of a + * collision or if it was already present. + */ + boolean add(T value) throws Exception; + + /** + * Checks if the transformation of the value is in the keystore. + * @param value The value to check. + * @return true if the value was found, false otherwise. Due to collisions, false positives are + * possible, but there should be no false negatives unless forceRemove() is called. + */ + boolean contains(T value) throws Exception; + + /** + * Returns the transformed version of the input value, that would be used to stored it in the keystore. + * This transformation should be always be the same for a given instance. + * @param value The value to transform. + * @return The transformed value. + */ + T getInternalRepresentation(T value); + + /** + * Attempts to safely remove a value from the internal structure, maintaining the property that contains(value) + * will never return a false negative. If removing would lead to a false negative, the value won't be removed. + * Classes may not implement safe removal. + * @param value The value to attempt to remove. + * @return true if the value was removed, false if it wasn't. + */ + boolean remove(T value) throws Exception; + + /** + * Returns the number of distinct values stored in the internal data structure. + * Does not count values which weren't successfully added due to collisions. + * @return The number of values + */ + int getSize(); + + /** + * Returns the number of times add() has been run, including unsuccessful attempts. + * @return The number of adding attempts. + */ + int getTotalAdds(); + + /** + * Returns the number of times add() has returned false due to a collision. + * @return The number of collisions. + */ + int getCollisions(); + + + /** + * Checks if two values would collide after being transformed by this store's transformation. + * @param value1 The first value to compare. + * @param value2 The second value to compare. + * @return true if the transformations are equal, false otherwise. + */ + boolean isCollision(T value1, T value2); + + /** + * Returns an estimate of the store's memory usage. + * @return The memory usage, in MB + */ + long getMemorySizeInBytes(); + + /** + * Returns the cap for the store's memory usage. + * @return The cap, in bytes + */ + long getMemorySizeCapInBytes(); + + /** + * Returns whether the store is at memory capacity and can't accept more entries + */ + boolean isFull(); + + /** + * Deletes the internal data structure and regenerates it from the values passed in. + * Also resets all stats related to adding. + * @param newValues The keys that should be in the reset structure. + */ + void regenerateStore(T[] newValues) throws Exception; + + /** + * Deletes all keys and resets all stats related to adding. + */ + void clear() throws Exception; +} diff --git a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java new file mode 100644 index 0000000000000..3789989b5eaf1 --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java @@ -0,0 +1,264 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.indices; + +import org.opensearch.common.metrics.CounterMetric; +import org.roaringbitmap.RoaringBitmap; + +import java.util.HashSet; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +/** + * This class implements KeyLookupStore using a roaring bitmap with a modulo applied to values. + * The modulo increases the density of values, which makes RBMs more memory-efficient. The recommended modulo is ~2^28. + * It also maintains a hash set of values which have had collisions. Values which haven't had collisions can be + * safely removed from the store. The fraction of collided values should be low, + * about 0.5% for a store with 10^7 values and a modulo of 2^28. + * The store estimates its memory footprint and will stop adding more values once it reaches its memory cap. + */ +public class RBMIntKeyLookupStore implements KeyLookupStore { + protected final int modulo; + protected class KeyStoreStats { + protected int size; + protected long memSizeCapInBytes; + protected CounterMetric numAddAttempts; + protected CounterMetric numCollisions; + protected boolean guaranteesNoFalseNegatives; + protected int maxNumEntries; + protected boolean atCapacity; + protected CounterMetric numRemovalAttempts; + protected CounterMetric numSuccessfulRemovals; + protected KeyStoreStats(long memSizeCapInBytes, int maxNumEntries) { + this.size = 0; + this.numAddAttempts = new CounterMetric(); + this.numCollisions = new CounterMetric(); + this.memSizeCapInBytes = memSizeCapInBytes; + this.maxNumEntries = maxNumEntries; + this.atCapacity = false; + this.numRemovalAttempts = new CounterMetric(); + this.numSuccessfulRemovals = new CounterMetric(); + } + } + + protected KeyStoreStats stats; + protected RoaringBitmap rbm; + private HashSet collidedInts; + protected RBMSizeEstimator sizeEstimator; + protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + protected final Lock readLock = lock.readLock(); + protected final Lock writeLock = lock.writeLock(); + + RBMIntKeyLookupStore(int modulo, long memSizeCapInBytes) { + this.modulo = modulo; + sizeEstimator = new RBMSizeEstimator(modulo); + this.stats = new KeyStoreStats(memSizeCapInBytes, calculateMaxNumEntries(memSizeCapInBytes)); + this.rbm = new RoaringBitmap(); + collidedInts = new HashSet<>(); + } + + protected int calculateMaxNumEntries(long memSizeCapInBytes) { + if (memSizeCapInBytes == 0) { + return Integer.MAX_VALUE; + } + return sizeEstimator.getNumEntriesFromSizeInBytes(memSizeCapInBytes); + } + + protected final int transform(int value) { + return modulo == 0 ? value : value % modulo; + } + + protected void handleCollisions(int transformedValue) { + stats.numCollisions.inc(); + collidedInts.add(transformedValue); + } + + @Override + public boolean add(Integer value) throws Exception { + if (value == null) { + return false; + } + writeLock.lock(); + stats.numAddAttempts.inc(); + try { + if (stats.size == stats.maxNumEntries) { + stats.atCapacity = true; + return false; + } + int transformedValue = transform(value); + boolean alreadyContained = contains(value); + if (!alreadyContained) { + rbm.add(transformedValue); + stats.size++; + return true; + } + handleCollisions(transformedValue); + return false; + } finally { + writeLock.unlock(); + } + } + + @Override + public boolean contains(Integer value) throws Exception { + if (value == null) { + return false; + } + int transformedValue = transform(value); + readLock.lock(); + try { + return rbm.contains(transformedValue); + } finally { + readLock.unlock(); + } + } + + @Override + public Integer getInternalRepresentation(Integer value) { + if (value == null) { + return 0; + } + return Integer.valueOf(transform(value)); + } + + @Override + public boolean remove(Integer value) throws Exception { + if (value == null) { + return false; + } + int transformedValue = transform(value); + readLock.lock(); + try { + if (!contains(value)) { + return false; + } + stats.numRemovalAttempts.inc(); + if (collidedInts.contains(transformedValue)) { + return false; + } + } finally { + readLock.unlock(); + } + writeLock.lock(); + try { + rbm.remove(transformedValue); + stats.size--; + stats.numSuccessfulRemovals.inc(); + return true; + } finally { + writeLock.unlock(); + } + } + + @Override + public int getSize() { + readLock.lock(); + try { + return stats.size; + } finally { + readLock.unlock(); + } + } + + @Override + public int getTotalAdds() { + return (int) stats.numAddAttempts.count(); + } + + @Override + public int getCollisions() { + return (int) stats.numCollisions.count(); + } + + + @Override + public boolean isCollision(Integer value1, Integer value2) { + if (value1 == null || value2 == null) { + return false; + } + return transform(value1) == transform(value2); + } + + @Override + public long getMemorySizeInBytes() { + return sizeEstimator.getSizeInBytes(stats.size) + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size()); + } + + @Override + public long getMemorySizeCapInBytes() { + return stats.memSizeCapInBytes; + } + + @Override + public boolean isFull() { + return stats.atCapacity; + } + + @Override + public void regenerateStore(Integer[] newValues) throws Exception { + rbm.clear(); + collidedInts = new HashSet<>(); + stats.size = 0; + stats.numAddAttempts = new CounterMetric(); + stats.numCollisions = new CounterMetric(); + stats.guaranteesNoFalseNegatives = true; + stats.numRemovalAttempts = new CounterMetric(); + stats.numSuccessfulRemovals = new CounterMetric(); + for (int i = 0; i < newValues.length; i++) { + if (newValues[i] != null) { + add(newValues[i]); + } + } + } + + + + @Override + public void clear() throws Exception { + regenerateStore(new Integer[]{}); + } + public int getNumRemovalAttempts() { + return (int) stats.numRemovalAttempts.count(); + } + + public int getNumSuccessfulRemovals() { + return (int) stats.numSuccessfulRemovals.count(); + } + + public boolean valueHasHadCollision(Integer value) { + if (value == null) { + return false; + } + return collidedInts.contains(transform(value)); + } +} diff --git a/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java b/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java new file mode 100644 index 0000000000000..6e3b8e581ba9f --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java @@ -0,0 +1,131 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.indices; + +/** + * A class used to estimate roaring bitmap memory sizes (and hash set sizes). + * Values based on experiments with adding randomly distributed integers, which matches the use case for KeyLookupStore. + * In this use case, true values are much higher than an RBM's self-reported size, especially for small RBMs: see + * https://github.com/RoaringBitmap/RoaringBitmap/issues/257 + */ +public class RBMSizeEstimator { + public static final int BYTES_IN_MB = 1048576; + public static final double HASHSET_MEM_SLOPE = 6.46 * Math.pow(10, -5); + protected final double slope; + protected final double bufferMultiplier; + protected final double intercept; + + RBMSizeEstimator(int modulo) { + double[] memValues = calculateMemoryCoefficients(modulo); + this.bufferMultiplier = memValues[0]; + this.slope = memValues[1]; + this.intercept = memValues[2]; + } + + public static double[] calculateMemoryCoefficients(int modulo) { + // Sets up values to help estimate RBM size given a modulo + // Returns an array of {bufferMultiplier, slope, intercept} + + double modifiedModulo; + if (modulo == 0) { + modifiedModulo = 32.0; + } else { + modifiedModulo = Math.log(modulo) / Math.log(2); + } + // we "round up" the modulo to the nearest tested value + double highCutoff = 29.001; // Floating point makes 29 not work + double mediumCutoff = 28.0; + double lowCutoff = 26.0; + double bufferMultiplier = 1.0; + double slope; + double intercept; + if (modifiedModulo > highCutoff) { + // modulo > 2^29 + bufferMultiplier = 1.2; + slope = 0.637; + intercept = 3.091; + } else if (modifiedModulo > mediumCutoff) { + // 2^29 >= modulo > 2^28 + slope = 0.619; + intercept = 2.993; + } else if (modifiedModulo > lowCutoff) { + // 2^28 >= modulo > 2^26 + slope = 0.614; + intercept = 2.905; + } else { + slope = 0.628; + intercept = 2.603; + } + return new double[] { bufferMultiplier, slope, intercept }; + } + + public long getSizeInBytes(int numEntries) { + // Based on a linear fit in log-log space, so that we minimize the error as a proportion rather than as + // an absolute value. Should be within ~50% of the true value at worst, and should overestimate rather + // than underestimate the memory usage + return (long) ((long) Math.pow(numEntries, slope) * (long) Math.pow(10, intercept) * bufferMultiplier); + } + + public int getNumEntriesFromSizeInBytes(long sizeInBytes) { + // This function has some precision issues especially when composed with its inverse: + // numEntries = getNumEntriesFromSizeInBytes(getSizeInBytes(numEntries)) + // In this case the result can be off by up to a couple percent + // However, this shouldn't really matter as both functions are based on memory estimates with higher errors than a couple percent + // and this composition won't happen outside of tests + return (int) Math.pow(sizeInBytes / (bufferMultiplier * Math.pow(10, intercept)), 1 / slope); + + } + + public static long getSizeInBytesWithModulo(int numEntries, int modulo) { + double[] memValues = calculateMemoryCoefficients(modulo); + return (long) ((long) Math.pow(numEntries, memValues[1]) * (long) Math.pow(10, memValues[2]) * memValues[0]); + } + + public static int getNumEntriesFromSizeInBytesWithModulo(long sizeInBytes, int modulo) { + double[] memValues = calculateMemoryCoefficients(modulo); + return (int) Math.pow(sizeInBytes / (memValues[0] * Math.pow(10, memValues[2])), 1 / memValues[1]); + } + + + protected static long convertMBToBytes(double valMB) { + return (long) (valMB * BYTES_IN_MB); + } + + protected static double convertBytesToMB(long valBytes) { + return (double) valBytes / BYTES_IN_MB; + } + + protected static long getHashsetMemSizeInBytes(int numEntries) { + return convertMBToBytes(HASHSET_MEM_SLOPE * numEntries); + } +} diff --git a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java new file mode 100644 index 0000000000000..c857c1ecab768 --- /dev/null +++ b/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java @@ -0,0 +1,295 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.indices; + +import org.opensearch.common.Randomness; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.ArrayList; +import java.util.Random; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; + +public class RBMIntKeyLookupStoreTests extends OpenSearchTestCase { + public void testInit() { + long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB; + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); + assertEquals(0, kls.getSize()); + assertEquals(memCap, kls.getMemorySizeCapInBytes()); + } + public void testTransformationLogic() throws Exception { + int modulo = (int) Math.pow(2, 29); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + int offset = 3; + for (int i = 0; i < 4; i++) { // after this we run into max value, but thats not a flaw with the class design + int posValue = i * modulo + offset; + kls.add(posValue); + int negValue = -(i * modulo + offset); + kls.add(negValue); + } + assertEquals(2, kls.getSize()); + int[] testVals = new int[]{0, 1, -1, -23495, 23058, modulo, -modulo, Integer.MAX_VALUE, Integer.MIN_VALUE}; + for (int value : testVals) { + assertTrue(kls.getInternalRepresentation(value) < modulo); + assertTrue(kls.getInternalRepresentation(value) > -modulo); + } + } + + public void testContains() throws Exception { + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (int i = 0; i < 2000; i++) { + kls.add(i); + assertTrue(kls.contains(i)); + } + } + + public void testAddingStatsGetters() throws Exception { + int modulo = (int) Math.pow(2, 15); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, 0L); + kls.add(15); + kls.add(-15); + assertEquals(2, kls.getTotalAdds()); + assertEquals(0, kls.getCollisions()); + + int offset = 1; + for (int i = 0; i < 10; i++) { + kls.add(i * modulo + offset); + } + assertEquals(12, kls.getTotalAdds()); + assertEquals(9, kls.getCollisions()); + } + + public void testRegenerateStore() throws Exception { + int numToAdd = 10000000; + Random rand = Randomness.get(); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + for (int i = 0; i < numToAdd; i++) { + kls.add(i); + } + assertEquals(numToAdd, kls.getSize()); + Integer[] newVals = new Integer[1000]; // margin accounts for collisions + for (int j = 0; j < newVals.length; j++) { + newVals[j] = rand.nextInt(); + } + kls.regenerateStore(newVals); + assertTrue(Math.abs(kls.getSize() - newVals.length) < 3); // inexact due to collisions + + // test clear() + kls.clear(); + assertEquals(0, kls.getSize()); + } + + public void testAddingDuplicates() throws Exception { + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + int numToAdd = 4820411; + for (int i = 0; i < numToAdd; i++) { + kls.add(i); + kls.add(i); + } + for (int j = 0; j < 1000; j++) { + kls.add(577); + } + assertEquals(numToAdd, kls.getSize()); + } + + public void testMemoryCapBlocksAdd() throws Exception { + int modulo = (int) Math.pow(2, 29); + for (int maxEntries: new int[]{2342000, 1000, 100000}) { + long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytesWithModulo(maxEntries, modulo); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, memSizeCapInBytes); + for (int j = 0; j < maxEntries + 1000; j++) { + kls.add(j); + } + assertTrue(Math.abs(maxEntries - kls.getSize()) < (double) maxEntries / 25); + // exact cap varies a small amount bc of floating point, especially when we use bytes instead of MB for calculations + // precision gets much worse when we compose the two functions, as we do here, but this wouldn't happen in an actual use case + } + } + + public void testConcurrency() throws Exception { + Random rand = Randomness.get(); + for (int j = 0; j < 5; j++) { // test with different numbers of threads + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + int numThreads = rand.nextInt(50) + 1; + ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads); + // In this test we want to add the first 200K numbers and check they're all correctly there. + // We do some duplicates too to ensure those aren't incorrectly added. + int amountToAdd = 200000; + ArrayList> wasAdded = new ArrayList<>(amountToAdd); + ArrayList> duplicatesWasAdded = new ArrayList<>(); + for (int i = 0; i < amountToAdd; i++) { + wasAdded.add(null); + } + for (int i = 0; i < amountToAdd; i++) { + final int val = i; + Future fut = executor.submit(() -> { + boolean didAdd; + try { + didAdd = kls.add(val); + } catch (Exception e) { + throw new RuntimeException(e); + } + return didAdd; + }); + wasAdded.set(val, fut); + if (val % 1000 == 0) { + // do a duplicate add + Future duplicateFut = executor.submit(() -> { + boolean didAdd; + try { + didAdd = kls.add(val); + } catch (Exception e) { + throw new RuntimeException(e); + } + return didAdd; + }); + duplicatesWasAdded.add(duplicateFut); + } + } + int originalAdds = 0; + int duplicateAdds = 0; + for (Future fut : wasAdded) { + if (fut.get()) { + originalAdds++; + } + } + for (Future duplicateFut : duplicatesWasAdded) { + if (duplicateFut.get()) { + duplicateAdds++; + } + } + for (int i = 0; i < amountToAdd; i++) { + assertTrue(kls.contains(i)); + } + assertEquals(amountToAdd, originalAdds + duplicateAdds); + assertEquals(amountToAdd, kls.getSize()); + assertEquals(amountToAdd / 1000, kls.getCollisions()); + executor.shutdown(); + } + } + + public void testRemoveNoCollisions() throws Exception { + long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; + int numToAdd = 195000; + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(0, memCap); + // there should be no collisions for sequential positive numbers up to modulo + for (int i = 0; i < numToAdd; i++) { + kls.add(i); + } + for (int i = 0; i < 1000; i++) { + assertTrue(kls.remove(i)); + assertFalse(kls.contains(i)); + assertFalse(kls.valueHasHadCollision(i)); + } + assertEquals(numToAdd - 1000, kls.getSize()); + } + + public void testRemoveWithCollisions() throws Exception { + int modulo = (int) Math.pow(2, 26); + long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, memCap); + for (int i = 0; i < 10; i++) { + kls.add(i); + if (i % 2 == 1) { + kls.add(-i); + assertFalse(kls.valueHasHadCollision(i)); + kls.add(i + modulo); + assertTrue(kls.valueHasHadCollision(i)); + } else { + assertFalse(kls.valueHasHadCollision(i)); + } + } + assertEquals(15, kls.getSize()); + for (int i = 0; i < 10; i++) { + boolean didRemove = kls.remove(i); + if (i % 2 == 1) { + // we expect a collision with i + modulo, so we can't remove + assertFalse(didRemove); + assertTrue(kls.contains(i)); + // but we should be able to remove -i + boolean didRemoveNegative = kls.remove(-i); + assertTrue(didRemoveNegative); + assertFalse(kls.contains(-i)); + } else { + // we expect no collision + assertTrue(didRemove); + assertFalse(kls.contains(i)); + assertFalse(kls.valueHasHadCollision(i)); + } + } + assertEquals(5, kls.getSize()); + int offset = 12; + kls.add(offset); + for (int j = 1; j < 5; j++) { + kls.add(offset + j * modulo); + } + assertEquals(6, kls.getSize()); + assertFalse(kls.remove(offset + modulo)); + assertTrue(kls.valueHasHadCollision(offset + 15 * modulo)); + assertTrue(kls.contains(offset + 17 * modulo)); + } + + public void testNullInputs() throws Exception { + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + assertFalse(kls.add(null)); + assertFalse(kls.contains(null)); + assertEquals(0, (int) kls.getInternalRepresentation(null)); + assertFalse(kls.remove(null)); + assertFalse(kls.isCollision(null, null)); + assertEquals(0, kls.getTotalAdds()); + Integer[] newVals = new Integer[]{1, 17, -2, null, -4, null}; + kls.regenerateStore(newVals); + assertEquals(4, kls.getSize()); + } + + public void testMemoryCapValueInitialization() { + double[] logModulos = new double[] { 0.0, 31.2, 30, 29, 28, 13 }; + double[] expectedMultipliers = new double[] { 1.2, 1.2, 1.2, 1, 1, 1 }; + double[] expectedSlopes = new double[] { 0.637, 0.637, 0.637, 0.619, 0.614, 0.629 }; + double[] expectedIntercepts = new double[] { 3.091, 3.091, 3.091, 2.993, 2.905, 2.603 }; + long memSizeCapInBytes = (long) 100.0 * RBMSizeEstimator.BYTES_IN_MB; + double delta = 0.01; + for (int i = 0; i < logModulos.length; i++) { + int modulo = 0; + if (logModulos[i] != 0) { + modulo = (int) Math.pow(2, logModulos[i]); + } + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, memSizeCapInBytes); + assertEquals(kls.stats.memSizeCapInBytes, kls.getMemorySizeCapInBytes(), 1.0); + assertEquals(expectedMultipliers[i], kls.sizeEstimator.bufferMultiplier, delta); + assertEquals(expectedSlopes[i], kls.sizeEstimator.slope, delta); + assertEquals(expectedIntercepts[i], kls.sizeEstimator.intercept, delta); + } + + } +} From 00853060dd531f81ab6d63bf96dbae6207ed51fe Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Thu, 2 Nov 2023 16:30:20 -0700 Subject: [PATCH 3/8] Addressed Sagar's comments besides new counter/arraylist setup for removing keys Signed-off-by: Peter Alfonsi --- .../cache/tier/keystore}/KeyLookupStore.java | 5 +- .../cache/tier/keystore/KeyStoreStats.java | 40 ++++++++ .../tier/keystore}/RBMIntKeyLookupStore.java | 98 ++++++++++--------- .../tier/keystore}/RBMSizeEstimator.java | 10 +- .../keystore}/RBMIntKeyLookupStoreTests.java | 67 +++++++------ 5 files changed, 137 insertions(+), 83 deletions(-) rename server/src/main/java/org/opensearch/{indices => common/cache/tier/keystore}/KeyLookupStore.java (98%) create mode 100644 server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyStoreStats.java rename server/src/main/java/org/opensearch/{indices => common/cache/tier/keystore}/RBMIntKeyLookupStore.java (77%) rename server/src/main/java/org/opensearch/{indices => common/cache/tier/keystore}/RBMSizeEstimator.java (92%) rename server/src/test/java/org/opensearch/{indices => common/cache/tier/keystore}/RBMIntKeyLookupStoreTests.java (80%) diff --git a/server/src/main/java/org/opensearch/indices/KeyLookupStore.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java similarity index 98% rename from server/src/main/java/org/opensearch/indices/KeyLookupStore.java rename to server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java index 60e1386a460ec..552698e0293eb 100644 --- a/server/src/main/java/org/opensearch/indices/KeyLookupStore.java +++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java @@ -30,7 +30,7 @@ * GitHub history for details. */ -package org.opensearch.indices; +package org.opensearch.common.cache.tier.keystore; /** * An interface for objects that hold an in-memory record of hashes of keys in the disk cache. @@ -85,7 +85,7 @@ public interface KeyLookupStore { * Returns the number of times add() has been run, including unsuccessful attempts. * @return The number of adding attempts. */ - int getTotalAdds(); + int getAddAttempts(); /** * Returns the number of times add() has returned false due to a collision. @@ -93,7 +93,6 @@ public interface KeyLookupStore { */ int getCollisions(); - /** * Checks if two values would collide after being transformed by this store's transformation. * @param value1 The first value to compare. diff --git a/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyStoreStats.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyStoreStats.java new file mode 100644 index 0000000000000..b5c95b134990c --- /dev/null +++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyStoreStats.java @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.cache.tier.keystore; + +import org.opensearch.common.metrics.CounterMetric; + +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * A stats holder for use in KeyLookupStore implementations. + * Getters should be exposed by the KeyLookupStore which uses it. + */ +public class KeyStoreStats { + protected CounterMetric size; + protected long memSizeCapInBytes; + protected CounterMetric numAddAttempts; + protected CounterMetric numCollisions; + protected boolean guaranteesNoFalseNegatives; + protected final int maxNumEntries; + protected AtomicBoolean atCapacity; + protected CounterMetric numRemovalAttempts; + protected CounterMetric numSuccessfulRemovals; + + protected KeyStoreStats(long memSizeCapInBytes, int maxNumEntries) { + this.size = new CounterMetric(); + this.numAddAttempts = new CounterMetric(); + this.numCollisions = new CounterMetric(); + this.memSizeCapInBytes = memSizeCapInBytes; + this.maxNumEntries = maxNumEntries; + this.atCapacity = new AtomicBoolean(false); + this.numRemovalAttempts = new CounterMetric(); + this.numSuccessfulRemovals = new CounterMetric(); + } +} diff --git a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java similarity index 77% rename from server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java rename to server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java index 3789989b5eaf1..7e91a64758919 100644 --- a/server/src/main/java/org/opensearch/indices/RBMIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java @@ -30,15 +30,16 @@ * GitHub history for details. */ -package org.opensearch.indices; +package org.opensearch.common.cache.tier.keystore; import org.opensearch.common.metrics.CounterMetric; -import org.roaringbitmap.RoaringBitmap; import java.util.HashSet; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantReadWriteLock; +import org.roaringbitmap.RoaringBitmap; + /** * This class implements KeyLookupStore using a roaring bitmap with a modulo applied to values. * The modulo increases the density of values, which makes RBMs more memory-efficient. The recommended modulo is ~2^28. @@ -48,30 +49,29 @@ * The store estimates its memory footprint and will stop adding more values once it reaches its memory cap. */ public class RBMIntKeyLookupStore implements KeyLookupStore { - protected final int modulo; - protected class KeyStoreStats { - protected int size; - protected long memSizeCapInBytes; - protected CounterMetric numAddAttempts; - protected CounterMetric numCollisions; - protected boolean guaranteesNoFalseNegatives; - protected int maxNumEntries; - protected boolean atCapacity; - protected CounterMetric numRemovalAttempts; - protected CounterMetric numSuccessfulRemovals; - protected KeyStoreStats(long memSizeCapInBytes, int maxNumEntries) { - this.size = 0; - this.numAddAttempts = new CounterMetric(); - this.numCollisions = new CounterMetric(); - this.memSizeCapInBytes = memSizeCapInBytes; - this.maxNumEntries = maxNumEntries; - this.atCapacity = false; - this.numRemovalAttempts = new CounterMetric(); - this.numSuccessfulRemovals = new CounterMetric(); + /** + * An enum representing modulo values for use in the keystore + */ + public enum KeystoreModuloValue { + NONE(0), // No modulo applied + TWO_TO_THIRTY_ONE((int) Math.pow(2, 31)), + TWO_TO_TWENTY_NINE((int) Math.pow(2, 29)), // recommended value + TWO_TO_TWENTY_EIGHT((int) Math.pow(2, 28)), + TWO_TO_TWENTY_SIX((int) Math.pow(2, 26)); + + private final int value; + + private KeystoreModuloValue(int value) { + this.value = value; + } + + public int getValue() { + return this.value; } } - protected KeyStoreStats stats; + protected final int modulo; + KeyStoreStats stats; protected RoaringBitmap rbm; private HashSet collidedInts; protected RBMSizeEstimator sizeEstimator; @@ -79,8 +79,13 @@ protected KeyStoreStats(long memSizeCapInBytes, int maxNumEntries) { protected final Lock readLock = lock.readLock(); protected final Lock writeLock = lock.writeLock(); - RBMIntKeyLookupStore(int modulo, long memSizeCapInBytes) { - this.modulo = modulo; + // Default constructor sets modulo = 2^28 + public RBMIntKeyLookupStore(long memSizeCapInBytes) { + this(KeystoreModuloValue.TWO_TO_TWENTY_EIGHT, memSizeCapInBytes); + } + + public RBMIntKeyLookupStore(KeystoreModuloValue moduloValue, long memSizeCapInBytes) { + this.modulo = moduloValue.getValue(); sizeEstimator = new RBMSizeEstimator(modulo); this.stats = new KeyStoreStats(memSizeCapInBytes, calculateMaxNumEntries(memSizeCapInBytes)); this.rbm = new RoaringBitmap(); @@ -94,11 +99,11 @@ protected int calculateMaxNumEntries(long memSizeCapInBytes) { return sizeEstimator.getNumEntriesFromSizeInBytes(memSizeCapInBytes); } - protected final int transform(int value) { + private final int transform(int value) { return modulo == 0 ? value : value % modulo; } - protected void handleCollisions(int transformedValue) { + private void handleCollisions(int transformedValue) { stats.numCollisions.inc(); collidedInts.add(transformedValue); } @@ -108,18 +113,21 @@ public boolean add(Integer value) throws Exception { if (value == null) { return false; } - writeLock.lock(); stats.numAddAttempts.inc(); + if (stats.size.count() == stats.maxNumEntries) { + stats.atCapacity.set(true); + return false; + } + int transformedValue = transform(value); + + writeLock.lock(); try { - if (stats.size == stats.maxNumEntries) { - stats.atCapacity = true; - return false; - } - int transformedValue = transform(value); - boolean alreadyContained = contains(value); + boolean alreadyContained; + // saves calling transform() an additional time + alreadyContained = rbm.contains(transformedValue); if (!alreadyContained) { rbm.add(transformedValue); - stats.size++; + stats.size.inc(); return true; } handleCollisions(transformedValue); @@ -159,7 +167,7 @@ public boolean remove(Integer value) throws Exception { int transformedValue = transform(value); readLock.lock(); try { - if (!contains(value)) { + if (!rbm.contains(transformedValue)) { // saves additional transform() call return false; } stats.numRemovalAttempts.inc(); @@ -172,7 +180,7 @@ public boolean remove(Integer value) throws Exception { writeLock.lock(); try { rbm.remove(transformedValue); - stats.size--; + stats.size.dec(); stats.numSuccessfulRemovals.inc(); return true; } finally { @@ -184,14 +192,14 @@ public boolean remove(Integer value) throws Exception { public int getSize() { readLock.lock(); try { - return stats.size; + return (int) stats.size.count(); } finally { readLock.unlock(); } } @Override - public int getTotalAdds() { + public int getAddAttempts() { return (int) stats.numAddAttempts.count(); } @@ -200,7 +208,6 @@ public int getCollisions() { return (int) stats.numCollisions.count(); } - @Override public boolean isCollision(Integer value1, Integer value2) { if (value1 == null || value2 == null) { @@ -211,7 +218,7 @@ public boolean isCollision(Integer value1, Integer value2) { @Override public long getMemorySizeInBytes() { - return sizeEstimator.getSizeInBytes(stats.size) + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size()); + return sizeEstimator.getSizeInBytes((int) stats.size.count()) + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size()); } @Override @@ -221,14 +228,14 @@ public long getMemorySizeCapInBytes() { @Override public boolean isFull() { - return stats.atCapacity; + return stats.atCapacity.get(); } @Override public void regenerateStore(Integer[] newValues) throws Exception { rbm.clear(); collidedInts = new HashSet<>(); - stats.size = 0; + stats.size = new CounterMetric(); stats.numAddAttempts = new CounterMetric(); stats.numCollisions = new CounterMetric(); stats.guaranteesNoFalseNegatives = true; @@ -241,12 +248,11 @@ public void regenerateStore(Integer[] newValues) throws Exception { } } - - @Override public void clear() throws Exception { - regenerateStore(new Integer[]{}); + regenerateStore(new Integer[] {}); } + public int getNumRemovalAttempts() { return (int) stats.numRemovalAttempts.count(); } diff --git a/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMSizeEstimator.java similarity index 92% rename from server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java rename to server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMSizeEstimator.java index 6e3b8e581ba9f..e07c645bad0cf 100644 --- a/server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java +++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMSizeEstimator.java @@ -30,7 +30,9 @@ * GitHub history for details. */ -package org.opensearch.indices; +package org.opensearch.common.cache.tier.keystore; + +import org.opensearch.common.cache.tier.keystore.RBMIntKeyLookupStore; /** * A class used to estimate roaring bitmap memory sizes (and hash set sizes). @@ -111,12 +113,16 @@ public static long getSizeInBytesWithModulo(int numEntries, int modulo) { return (long) ((long) Math.pow(numEntries, memValues[1]) * (long) Math.pow(10, memValues[2]) * memValues[0]); } + public static long getSizeInBytesWithModuloValue(int numEntries, RBMIntKeyLookupStore.KeystoreModuloValue moduloValue) { + double[] memValues = calculateMemoryCoefficients(moduloValue.getValue()); + return (long) ((long) Math.pow(numEntries, memValues[1]) * (long) Math.pow(10, memValues[2]) * memValues[0]); + } + public static int getNumEntriesFromSizeInBytesWithModulo(long sizeInBytes, int modulo) { double[] memValues = calculateMemoryCoefficients(modulo); return (int) Math.pow(sizeInBytes / (memValues[0] * Math.pow(10, memValues[2])), 1 / memValues[1]); } - protected static long convertMBToBytes(double valMB) { return (long) (valMB * BYTES_IN_MB); } diff --git a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java similarity index 80% rename from server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java rename to server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java index c857c1ecab768..08ab08462373b 100644 --- a/server/src/test/java/org/opensearch/indices/RBMIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java @@ -29,9 +29,11 @@ * GitHub history for details. */ -package org.opensearch.indices; +package org.opensearch.common.cache.tier.keystore; import org.opensearch.common.Randomness; +import org.opensearch.common.cache.tier.keystore.RBMIntKeyLookupStore; +import org.opensearch.common.cache.tier.keystore.RBMSizeEstimator; import org.opensearch.test.OpenSearchTestCase; import java.util.ArrayList; @@ -43,13 +45,15 @@ public class RBMIntKeyLookupStoreTests extends OpenSearchTestCase { public void testInit() { long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB; - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memCap); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(memCap); assertEquals(0, kls.getSize()); + assertEquals(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_EIGHT.getValue(), kls.modulo); assertEquals(memCap, kls.getMemorySizeCapInBytes()); } + public void testTransformationLogic() throws Exception { int modulo = (int) Math.pow(2, 29); - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L); int offset = 3; for (int i = 0; i < 4; i++) { // after this we run into max value, but thats not a flaw with the class design int posValue = i * modulo + offset; @@ -58,7 +62,7 @@ public void testTransformationLogic() throws Exception { kls.add(negValue); } assertEquals(2, kls.getSize()); - int[] testVals = new int[]{0, 1, -1, -23495, 23058, modulo, -modulo, Integer.MAX_VALUE, Integer.MIN_VALUE}; + int[] testVals = new int[] { 0, 1, -1, -23495, 23058, modulo, -modulo, Integer.MAX_VALUE, Integer.MIN_VALUE }; for (int value : testVals) { assertTrue(kls.getInternalRepresentation(value) < modulo); assertTrue(kls.getInternalRepresentation(value) > -modulo); @@ -66,7 +70,7 @@ public void testTransformationLogic() throws Exception { } public void testContains() throws Exception { - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L); for (int i = 0; i < 2000; i++) { kls.add(i); assertTrue(kls.contains(i)); @@ -74,25 +78,25 @@ public void testContains() throws Exception { } public void testAddingStatsGetters() throws Exception { - int modulo = (int) Math.pow(2, 15); - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, 0L); + RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX; + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, 0L); kls.add(15); kls.add(-15); - assertEquals(2, kls.getTotalAdds()); + assertEquals(2, kls.getAddAttempts()); assertEquals(0, kls.getCollisions()); int offset = 1; for (int i = 0; i < 10; i++) { - kls.add(i * modulo + offset); + kls.add(i * moduloValue.getValue() + offset); } - assertEquals(12, kls.getTotalAdds()); + assertEquals(12, kls.getAddAttempts()); assertEquals(9, kls.getCollisions()); } public void testRegenerateStore() throws Exception { int numToAdd = 10000000; Random rand = Randomness.get(); - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L); for (int i = 0; i < numToAdd; i++) { kls.add(i); } @@ -110,7 +114,7 @@ public void testRegenerateStore() throws Exception { } public void testAddingDuplicates() throws Exception { - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(0L); int numToAdd = 4820411; for (int i = 0; i < numToAdd; i++) { kls.add(i); @@ -123,10 +127,10 @@ public void testAddingDuplicates() throws Exception { } public void testMemoryCapBlocksAdd() throws Exception { - int modulo = (int) Math.pow(2, 29); - for (int maxEntries: new int[]{2342000, 1000, 100000}) { - long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytesWithModulo(maxEntries, modulo); - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, memSizeCapInBytes); + RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE; + for (int maxEntries : new int[] { 2342000, 1000, 100000 }) { + long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytesWithModuloValue(maxEntries, moduloValue); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, memSizeCapInBytes); for (int j = 0; j < maxEntries + 1000; j++) { kls.add(j); } @@ -139,7 +143,7 @@ public void testMemoryCapBlocksAdd() throws Exception { public void testConcurrency() throws Exception { Random rand = Randomness.get(); for (int j = 0; j < 5; j++) { // test with different numbers of threads - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L); int numThreads = rand.nextInt(50) + 1; ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads); // In this test we want to add the first 200K numbers and check they're all correctly there. @@ -201,7 +205,7 @@ public void testConcurrency() throws Exception { public void testRemoveNoCollisions() throws Exception { long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; int numToAdd = 195000; - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(0, memCap); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.NONE, memCap); // there should be no collisions for sequential positive numbers up to modulo for (int i = 0; i < numToAdd; i++) { kls.add(i); @@ -217,7 +221,7 @@ public void testRemoveNoCollisions() throws Exception { public void testRemoveWithCollisions() throws Exception { int modulo = (int) Math.pow(2, 26); long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, memCap); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX, memCap); for (int i = 0; i < 10; i++) { kls.add(i); if (i % 2 == 1) { @@ -260,31 +264,30 @@ public void testRemoveWithCollisions() throws Exception { } public void testNullInputs() throws Exception { - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), 0L); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L); assertFalse(kls.add(null)); assertFalse(kls.contains(null)); assertEquals(0, (int) kls.getInternalRepresentation(null)); assertFalse(kls.remove(null)); assertFalse(kls.isCollision(null, null)); - assertEquals(0, kls.getTotalAdds()); - Integer[] newVals = new Integer[]{1, 17, -2, null, -4, null}; + assertEquals(0, kls.getAddAttempts()); + Integer[] newVals = new Integer[] { 1, 17, -2, null, -4, null }; kls.regenerateStore(newVals); assertEquals(4, kls.getSize()); } public void testMemoryCapValueInitialization() { - double[] logModulos = new double[] { 0.0, 31.2, 30, 29, 28, 13 }; - double[] expectedMultipliers = new double[] { 1.2, 1.2, 1.2, 1, 1, 1 }; - double[] expectedSlopes = new double[] { 0.637, 0.637, 0.637, 0.619, 0.614, 0.629 }; - double[] expectedIntercepts = new double[] { 3.091, 3.091, 3.091, 2.993, 2.905, 2.603 }; + // double[] logModulos = new double[] { 0.0, 31.2, 30, 29, 28, 13 }; + // 0, 31, 29, 28, 26 + RBMIntKeyLookupStore.KeystoreModuloValue[] mods = RBMIntKeyLookupStore.KeystoreModuloValue.values(); + double[] expectedMultipliers = new double[] { 1.2, 1.2, 1, 1, 1 }; + double[] expectedSlopes = new double[] { 0.637, 0.637, 0.619, 0.614, 0.629 }; + double[] expectedIntercepts = new double[] { 3.091, 3.091, 2.993, 2.905, 2.603 }; // check the numbers closer later long memSizeCapInBytes = (long) 100.0 * RBMSizeEstimator.BYTES_IN_MB; double delta = 0.01; - for (int i = 0; i < logModulos.length; i++) { - int modulo = 0; - if (logModulos[i] != 0) { - modulo = (int) Math.pow(2, logModulos[i]); - } - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, memSizeCapInBytes); + for (int i = 0; i < mods.length; i++) { + RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = mods[i]; + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, memSizeCapInBytes); assertEquals(kls.stats.memSizeCapInBytes, kls.getMemorySizeCapInBytes(), 1.0); assertEquals(expectedMultipliers[i], kls.sizeEstimator.bufferMultiplier, delta); assertEquals(expectedSlopes[i], kls.sizeEstimator.slope, delta); From cf8a80626f2042ca832bd95db930b503a7aa816d Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 6 Nov 2023 13:57:34 -0800 Subject: [PATCH 4/8] Implemented/tested counter+removal list setup to allow more removals Signed-off-by: Peter Alfonsi --- .../tier/keystore/RBMIntKeyLookupStore.java | 91 +++++++++++++-- .../keystore/RBMIntKeyLookupStoreTests.java | 108 +++++++++++++++++- 2 files changed, 184 insertions(+), 15 deletions(-) diff --git a/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java index 7e91a64758919..f9696e62a88b7 100644 --- a/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java @@ -34,6 +34,8 @@ import org.opensearch.common.metrics.CounterMetric; +import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -73,7 +75,8 @@ public int getValue() { protected final int modulo; KeyStoreStats stats; protected RoaringBitmap rbm; - private HashSet collidedInts; + private HashMap collidedIntCounters; + private HashMap> removalSets; protected RBMSizeEstimator sizeEstimator; protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); protected final Lock readLock = lock.readLock(); @@ -89,7 +92,8 @@ public RBMIntKeyLookupStore(KeystoreModuloValue moduloValue, long memSizeCapInBy sizeEstimator = new RBMSizeEstimator(modulo); this.stats = new KeyStoreStats(memSizeCapInBytes, calculateMaxNumEntries(memSizeCapInBytes)); this.rbm = new RoaringBitmap(); - collidedInts = new HashSet<>(); + this.collidedIntCounters = new HashMap<>(); + this.removalSets = new HashMap<>(); } protected int calculateMaxNumEntries(long memSizeCapInBytes) { @@ -105,7 +109,14 @@ private final int transform(int value) { private void handleCollisions(int transformedValue) { stats.numCollisions.inc(); - collidedInts.add(transformedValue); + CounterMetric numCollisions = collidedIntCounters.get(transformedValue); + if (numCollisions == null) { // First time the transformedValue has had a collision + numCollisions = new CounterMetric(); + numCollisions.inc(2); + collidedIntCounters.put(transformedValue, numCollisions); // Initialize the number of colliding keys to 2 + } else { + numCollisions.inc(); + } } @Override @@ -130,6 +141,16 @@ public boolean add(Integer value) throws Exception { stats.size.inc(); return true; } + // If the value is already pending removal, take it out of the removalList + HashSet removalSet = removalSets.get(transformedValue); + if (removalSet != null) { + removalSet.remove(value); + // Don't increment the counter - this is handled by handleCollisions() later + if (removalSet.isEmpty()) { + removalSets.remove(transformedValue); + } + } + handleCollisions(transformedValue); return false; } finally { @@ -159,6 +180,13 @@ public Integer getInternalRepresentation(Integer value) { return Integer.valueOf(transform(value)); } + /** + * Attempts to remove a value from the keystore. WARNING: Removing keys which have not been added to the keystore + * may cause undefined behavior, including future false negatives!! + * @param value The value to attempt to remove. + * @return true if the value was removed, false otherwise + * @throws Exception + */ @Override public boolean remove(Integer value) throws Exception { if (value == null) { @@ -170,24 +198,54 @@ public boolean remove(Integer value) throws Exception { if (!rbm.contains(transformedValue)) { // saves additional transform() call return false; } + // move below into write lock stats.numRemovalAttempts.inc(); - if (collidedInts.contains(transformedValue)) { - return false; - } } finally { readLock.unlock(); } writeLock.lock(); try { - rbm.remove(transformedValue); - stats.size.dec(); - stats.numSuccessfulRemovals.inc(); + CounterMetric numCollisions = collidedIntCounters.get(transformedValue); + if (numCollisions != null) { + // This transformed value has had a collision before + HashSet removalSet = removalSets.get(transformedValue); + if (removalSet == null) { + // First time a removal has been attempted for this transformed value + HashSet newRemovalSet = new HashSet<>(); + newRemovalSet.add(value); // Add the key value, not the transformed value, to the list of attempted removals for this transformedValue + removalSets.put(transformedValue, newRemovalSet); + numCollisions.dec(); + } else { + if (removalSet.contains(value)) { + return false; // We have already attempted to remove this value. Do nothing + } + removalSet.add(value); + numCollisions.dec(); + // If numCollisions has reached zero, we can safely remove all values in removalList + if (numCollisions.count() == 0) { + removeFromRBM(transformedValue); + collidedIntCounters.remove(transformedValue); + removalSets.remove(transformedValue); + return true; + } + } + return false; + } + // Otherwise, there's not been a collision for this transformedValue, so we can safely remove + removeFromRBM(transformedValue); return true; } finally { writeLock.unlock(); } } + // Helper fn for remove() + private void removeFromRBM(int transformedValue) { + rbm.remove(transformedValue); + stats.size.dec(); + stats.numSuccessfulRemovals.inc(); + } + @Override public int getSize() { readLock.lock(); @@ -218,7 +276,7 @@ public boolean isCollision(Integer value1, Integer value2) { @Override public long getMemorySizeInBytes() { - return sizeEstimator.getSizeInBytes((int) stats.size.count()) + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size()); + return sizeEstimator.getSizeInBytes((int) stats.size.count()); // + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size()); } @Override @@ -234,7 +292,8 @@ public boolean isFull() { @Override public void regenerateStore(Integer[] newValues) throws Exception { rbm.clear(); - collidedInts = new HashSet<>(); + collidedIntCounters = new HashMap<>(); + removalSets = new HashMap<>(); stats.size = new CounterMetric(); stats.numAddAttempts = new CounterMetric(); stats.numCollisions = new CounterMetric(); @@ -265,6 +324,14 @@ public boolean valueHasHadCollision(Integer value) { if (value == null) { return false; } - return collidedInts.contains(transform(value)); + return collidedIntCounters.containsKey(transform(value)); + } + + CounterMetric getNumCollisionsForValue(int value) { // package private for testing + return collidedIntCounters.get(transform(value)); + } + + HashSet getRemovalSetForValue(int value) { + return removalSets.get(transform(value)); } } diff --git a/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java index 08ab08462373b..0e9d63a9731a5 100644 --- a/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java @@ -34,9 +34,11 @@ import org.opensearch.common.Randomness; import org.opensearch.common.cache.tier.keystore.RBMIntKeyLookupStore; import org.opensearch.common.cache.tier.keystore.RBMSizeEstimator; +import org.opensearch.common.metrics.CounterMetric; import org.opensearch.test.OpenSearchTestCase; import java.util.ArrayList; +import java.util.HashSet; import java.util.Random; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -277,12 +279,10 @@ public void testNullInputs() throws Exception { } public void testMemoryCapValueInitialization() { - // double[] logModulos = new double[] { 0.0, 31.2, 30, 29, 28, 13 }; - // 0, 31, 29, 28, 26 RBMIntKeyLookupStore.KeystoreModuloValue[] mods = RBMIntKeyLookupStore.KeystoreModuloValue.values(); double[] expectedMultipliers = new double[] { 1.2, 1.2, 1, 1, 1 }; double[] expectedSlopes = new double[] { 0.637, 0.637, 0.619, 0.614, 0.629 }; - double[] expectedIntercepts = new double[] { 3.091, 3.091, 2.993, 2.905, 2.603 }; // check the numbers closer later + double[] expectedIntercepts = new double[] { 3.091, 3.091, 2.993, 2.905, 2.603 }; long memSizeCapInBytes = (long) 100.0 * RBMSizeEstimator.BYTES_IN_MB; double delta = 0.01; for (int i = 0; i < mods.length; i++) { @@ -295,4 +295,106 @@ public void testMemoryCapValueInitialization() { } } + + public void testRemovalLogic() throws Exception { + RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX; + int modulo = moduloValue.getValue(); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, 0L); + + // Test standard sequence: add K1, K2, K3 which all transform to C, then: + // Remove K3 + // Remove K2, re-add it, re-remove it twice (duplicate should do nothing) + // Remove K1, which should finally actually remove everything + int c = -42; + int k1 = c + modulo; + int k2 = c + 2 * modulo; + int k3 = c + 3 * modulo; + kls.add(k1); + assertTrue(kls.contains(k1)); + assertTrue(kls.contains(k3)); + kls.add(k2); + CounterMetric numCollisions = kls.getNumCollisionsForValue(k2); + assertNotNull(numCollisions); + assertEquals(2, numCollisions.count()); + kls.add(k3); + assertEquals(3, numCollisions.count()); + assertEquals(1, kls.getSize()); + + boolean removed = kls.remove(k3); + assertFalse(removed); + HashSet removalSet = kls.getRemovalSetForValue(k3); + assertEquals(1, removalSet.size()); + assertTrue(removalSet.contains(k3)); + assertEquals(2, numCollisions.count()); + assertEquals(1, kls.getSize()); + + removed = kls.remove(k2); + assertFalse(removed); + assertEquals(2, removalSet.size()); + assertTrue(removalSet.contains(k2)); + assertEquals(1, numCollisions.count()); + assertEquals(1, kls.getSize()); + + kls.add(k2); + assertEquals(1, removalSet.size()); + assertFalse(removalSet.contains(k2)); + assertEquals(2, numCollisions.count()); + assertEquals(1, kls.getSize()); + + removed = kls.remove(k2); + assertFalse(removed); + assertEquals(2, removalSet.size()); + assertTrue(removalSet.contains(k2)); + assertEquals(1, numCollisions.count()); + assertEquals(1, kls.getSize()); + + removed = kls.remove(k2); + assertFalse(removed); + assertEquals(2, removalSet.size()); + assertTrue(removalSet.contains(k2)); + assertEquals(1, numCollisions.count()); + assertEquals(1, kls.getSize()); + + removed = kls.remove(k1); + assertTrue(removed); + assertNull(kls.getRemovalSetForValue(k1)); + assertNull(kls.getNumCollisionsForValue(k1)); + assertEquals(0, kls.getSize()); + } + + public void testRemovalLogicWithHashCollision() throws Exception { + RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX; + int modulo = moduloValue.getValue(); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, 0L); + + // Test adding K1 twice (maybe two keys hash to K1), then removing it twice. + // We expect it to be unable to remove the last one, but there should be no false negatives. + int c = 77; + int k1 = c + modulo; + int k2 = c + 2 * modulo; + kls.add(k1); + kls.add(k2); + CounterMetric numCollisions = kls.getNumCollisionsForValue(k1); + assertEquals(2, numCollisions.count()); + kls.add(k1); + assertEquals(3, numCollisions.count()); + + boolean removed = kls.remove(k1); + assertFalse(removed); + HashSet removalSet = kls.getRemovalSetForValue(k1); + assertTrue(removalSet.contains(k1)); + assertEquals(2, numCollisions.count()); + + removed = kls.remove(k2); + assertFalse(removed); + assertTrue(removalSet.contains(k2)); + assertEquals(1, numCollisions.count()); + + removed = kls.remove(k1); + assertFalse(removed); + assertTrue(removalSet.contains(k1)); + assertEquals(1, numCollisions.count()); + assertTrue(kls.contains(k1)); + assertTrue(kls.contains(k2)); + } } From 35a22c2f4ca864e2f99714566cb5523d97621b01 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 6 Nov 2023 15:17:55 -0800 Subject: [PATCH 5/8] Integrated keystore --- .../cache/tier/EhCacheDiskCachingTier.java | 16 +++++++++++++++- .../cache/tier/keystore/KeyLookupStore.java | 10 +++++----- .../tier/keystore/RBMIntKeyLookupStore.java | 11 +++++------ 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/server/src/main/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTier.java b/server/src/main/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTier.java index fad0c5b1f8552..1fcdd6963794a 100644 --- a/server/src/main/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTier.java +++ b/server/src/main/java/org/opensearch/common/cache/tier/EhCacheDiskCachingTier.java @@ -14,6 +14,7 @@ import org.opensearch.common.cache.RemovalListener; import org.opensearch.common.cache.RemovalNotification; import org.opensearch.common.cache.RemovalReason; +import org.opensearch.common.cache.tier.keystore.RBMIntKeyLookupStore; import org.opensearch.common.metrics.CounterMetric; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; @@ -42,6 +43,7 @@ import org.ehcache.event.EventType; import org.ehcache.expiry.ExpiryPolicy; import org.ehcache.impl.config.store.disk.OffHeapDiskStoreConfiguration; +import org.opensearch.core.common.unit.ByteSizeValue; /** * @param The key type of cache entries @@ -92,6 +94,7 @@ public class EhCacheDiskCachingTier implements DiskCachingTier { // Defines how many segments the disk cache is separated into. Higher number achieves greater concurrency but // will hold that many file pointers. public final Setting DISK_SEGMENTS; + private final RBMIntKeyLookupStore keystore; private final Serializer keySerializer; private final Serializer valueSerializer; @@ -124,6 +127,11 @@ private EhCacheDiskCachingTier(Builder builder) { close(); cacheManager = buildCacheManager(); this.cache = buildCache(Duration.ofMillis(expireAfterAccess.getMillis()), builder); + + // IndicesRequestCache gets 1%, of which we allocate 5% to the keystore = 0.05% + // TODO: how do we change this automatically based on INDICES_CACHE_QUERY_SIZE setting? + Setting keystoreSizeSetting = Setting.memorySizeSetting(builder.settingPrefix + ".tiered.disk.keystore_size", "0.05%"); + this.keystore = new RBMIntKeyLookupStore(keystoreSizeSetting.get(this.settings).getBytes()); } private PersistentCacheManager buildCacheManager() { @@ -193,12 +201,16 @@ private CacheEventListenerConfigurationBuilder getListenerConfiguration(Builder< @Override public V get(K key) { - return valueSerializer.deserialize(cache.get(key)); + if (keystore.contains(key.hashCode())) { // Check in-memory store of key hashes to avoid unnecessary disk seek + return valueSerializer.deserialize(cache.get(key)); + } + return null; } @Override public void put(K key, V value) { cache.put(key, valueSerializer.serialize(value)); + keystore.add(key.hashCode()); } @Override @@ -211,6 +223,7 @@ public V computeIfAbsent(K key, TieredCacheLoader loader) throws Exception public void invalidate(K key) { // There seems to be a thread leak issue while calling this and then closing cache. cache.remove(key); + keystore.remove(key.hashCode()); } @Override @@ -227,6 +240,7 @@ public void setRemovalListener(RemovalListener removalListener) { @Override public void invalidateAll() { // Clear up files. + keystore.clear(); } @Override diff --git a/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java index 552698e0293eb..a08174b5b46d3 100644 --- a/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java +++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java @@ -47,7 +47,7 @@ public interface KeyLookupStore { * @return true if the value was added, false if it wasn't added because of a * collision or if it was already present. */ - boolean add(T value) throws Exception; + boolean add(T value); /** * Checks if the transformation of the value is in the keystore. @@ -55,7 +55,7 @@ public interface KeyLookupStore { * @return true if the value was found, false otherwise. Due to collisions, false positives are * possible, but there should be no false negatives unless forceRemove() is called. */ - boolean contains(T value) throws Exception; + boolean contains(T value); /** * Returns the transformed version of the input value, that would be used to stored it in the keystore. @@ -72,7 +72,7 @@ public interface KeyLookupStore { * @param value The value to attempt to remove. * @return true if the value was removed, false if it wasn't. */ - boolean remove(T value) throws Exception; + boolean remove(T value); /** * Returns the number of distinct values stored in the internal data structure. @@ -123,10 +123,10 @@ public interface KeyLookupStore { * Also resets all stats related to adding. * @param newValues The keys that should be in the reset structure. */ - void regenerateStore(T[] newValues) throws Exception; + void regenerateStore(T[] newValues); /** * Deletes all keys and resets all stats related to adding. */ - void clear() throws Exception; + void clear(); } diff --git a/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java index f9696e62a88b7..9dba9e8165d40 100644 --- a/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java @@ -120,7 +120,7 @@ private void handleCollisions(int transformedValue) { } @Override - public boolean add(Integer value) throws Exception { + public boolean add(Integer value) { if (value == null) { return false; } @@ -159,7 +159,7 @@ public boolean add(Integer value) throws Exception { } @Override - public boolean contains(Integer value) throws Exception { + public boolean contains(Integer value) { if (value == null) { return false; } @@ -185,10 +185,9 @@ public Integer getInternalRepresentation(Integer value) { * may cause undefined behavior, including future false negatives!! * @param value The value to attempt to remove. * @return true if the value was removed, false otherwise - * @throws Exception */ @Override - public boolean remove(Integer value) throws Exception { + public boolean remove(Integer value) { if (value == null) { return false; } @@ -290,7 +289,7 @@ public boolean isFull() { } @Override - public void regenerateStore(Integer[] newValues) throws Exception { + public void regenerateStore(Integer[] newValues) { rbm.clear(); collidedIntCounters = new HashMap<>(); removalSets = new HashMap<>(); @@ -308,7 +307,7 @@ public void regenerateStore(Integer[] newValues) throws Exception { } @Override - public void clear() throws Exception { + public void clear() { regenerateStore(new Integer[] {}); } From f5a0eb86cf80c2847412fd6ff9b67ac200904249 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Fri, 10 Nov 2023 16:37:14 -0800 Subject: [PATCH 6/8] Simplified RBM size estimator Signed-off-by: Peter Alfonsi --- .../cache/tier/keystore/KeyLookupStore.java | 2 +- .../cache/tier/keystore/KeyStoreStats.java | 4 +- .../tier/keystore/RBMIntKeyLookupStore.java | 41 ++++-- .../cache/tier/keystore/RBMSizeEstimator.java | 137 ------------------ .../keystore/RBMIntKeyLookupStoreTests.java | 64 ++++---- 5 files changed, 63 insertions(+), 185 deletions(-) delete mode 100644 server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMSizeEstimator.java diff --git a/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java index a08174b5b46d3..dc2b7a4ba1234 100644 --- a/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java +++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyLookupStore.java @@ -103,7 +103,7 @@ public interface KeyLookupStore { /** * Returns an estimate of the store's memory usage. - * @return The memory usage, in MB + * @return The memory usage */ long getMemorySizeInBytes(); diff --git a/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyStoreStats.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyStoreStats.java index b5c95b134990c..ab3055a81d4c9 100644 --- a/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyStoreStats.java +++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/KeyStoreStats.java @@ -22,17 +22,15 @@ public class KeyStoreStats { protected CounterMetric numAddAttempts; protected CounterMetric numCollisions; protected boolean guaranteesNoFalseNegatives; - protected final int maxNumEntries; protected AtomicBoolean atCapacity; protected CounterMetric numRemovalAttempts; protected CounterMetric numSuccessfulRemovals; - protected KeyStoreStats(long memSizeCapInBytes, int maxNumEntries) { + protected KeyStoreStats(long memSizeCapInBytes) { this.size = new CounterMetric(); this.numAddAttempts = new CounterMetric(); this.numCollisions = new CounterMetric(); this.memSizeCapInBytes = memSizeCapInBytes; - this.maxNumEntries = maxNumEntries; this.atCapacity = new AtomicBoolean(false); this.numRemovalAttempts = new CounterMetric(); this.numSuccessfulRemovals = new CounterMetric(); diff --git a/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java index 9dba9e8165d40..e7370558da80b 100644 --- a/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java @@ -34,7 +34,6 @@ import org.opensearch.common.metrics.CounterMetric; -import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.concurrent.locks.Lock; @@ -77,10 +76,14 @@ public int getValue() { protected RoaringBitmap rbm; private HashMap collidedIntCounters; private HashMap> removalSets; - protected RBMSizeEstimator sizeEstimator; protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); protected final Lock readLock = lock.readLock(); protected final Lock writeLock = lock.writeLock(); + private long mostRecentByteEstimate; + private final int REFRESH_SIZE_EST_INTERVAL = 10000; + // Refresh size estimate every X new elements. Refreshes use the RBM's internal size estimator, which takes ~0.01 ms, + // so we don't want to do it on every get(), and it doesn't matter much if there are +- 10000 keys in this store + // in terms of storage impact // Default constructor sets modulo = 2^28 public RBMIntKeyLookupStore(long memSizeCapInBytes) { @@ -89,18 +92,11 @@ public RBMIntKeyLookupStore(long memSizeCapInBytes) { public RBMIntKeyLookupStore(KeystoreModuloValue moduloValue, long memSizeCapInBytes) { this.modulo = moduloValue.getValue(); - sizeEstimator = new RBMSizeEstimator(modulo); - this.stats = new KeyStoreStats(memSizeCapInBytes, calculateMaxNumEntries(memSizeCapInBytes)); + this.stats = new KeyStoreStats(memSizeCapInBytes); this.rbm = new RoaringBitmap(); this.collidedIntCounters = new HashMap<>(); this.removalSets = new HashMap<>(); - } - - protected int calculateMaxNumEntries(long memSizeCapInBytes) { - if (memSizeCapInBytes == 0) { - return Integer.MAX_VALUE; - } - return sizeEstimator.getNumEntriesFromSizeInBytes(memSizeCapInBytes); + this.mostRecentByteEstimate = 0L; } private final int transform(int value) { @@ -125,7 +121,11 @@ public boolean add(Integer value) { return false; } stats.numAddAttempts.inc(); - if (stats.size.count() == stats.maxNumEntries) { + + if (getSize() % REFRESH_SIZE_EST_INTERVAL == 0) { + mostRecentByteEstimate = getMemorySizeInBytes(); + } + if (getMemorySizeCapInBytes() > 0 && mostRecentByteEstimate > getMemorySizeCapInBytes()) { stats.atCapacity.set(true); return false; } @@ -273,9 +273,24 @@ public boolean isCollision(Integer value1, Integer value2) { return transform(value1) == transform(value2); } + static double getRBMSizeMultiplier(int numEntries, int modulo) { + double x = Math.log10((double) numEntries / modulo); + if (x < -5) { + return 7.0; + } + if (x < -2.75) { + return -2.5 * x - 5.5; + } + if (x <= 0) { + return -3.0 / 22.0 * x + 1; + } + return 1; + } + @Override public long getMemorySizeInBytes() { - return sizeEstimator.getSizeInBytes((int) stats.size.count()); // + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size()); + double multiplier = getRBMSizeMultiplier((int) stats.size.count(), modulo); + return (long) (rbm.getSizeInBytes() * multiplier); } @Override diff --git a/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMSizeEstimator.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMSizeEstimator.java deleted file mode 100644 index e07c645bad0cf..0000000000000 --- a/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMSizeEstimator.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.common.cache.tier.keystore; - -import org.opensearch.common.cache.tier.keystore.RBMIntKeyLookupStore; - -/** - * A class used to estimate roaring bitmap memory sizes (and hash set sizes). - * Values based on experiments with adding randomly distributed integers, which matches the use case for KeyLookupStore. - * In this use case, true values are much higher than an RBM's self-reported size, especially for small RBMs: see - * https://github.com/RoaringBitmap/RoaringBitmap/issues/257 - */ -public class RBMSizeEstimator { - public static final int BYTES_IN_MB = 1048576; - public static final double HASHSET_MEM_SLOPE = 6.46 * Math.pow(10, -5); - protected final double slope; - protected final double bufferMultiplier; - protected final double intercept; - - RBMSizeEstimator(int modulo) { - double[] memValues = calculateMemoryCoefficients(modulo); - this.bufferMultiplier = memValues[0]; - this.slope = memValues[1]; - this.intercept = memValues[2]; - } - - public static double[] calculateMemoryCoefficients(int modulo) { - // Sets up values to help estimate RBM size given a modulo - // Returns an array of {bufferMultiplier, slope, intercept} - - double modifiedModulo; - if (modulo == 0) { - modifiedModulo = 32.0; - } else { - modifiedModulo = Math.log(modulo) / Math.log(2); - } - // we "round up" the modulo to the nearest tested value - double highCutoff = 29.001; // Floating point makes 29 not work - double mediumCutoff = 28.0; - double lowCutoff = 26.0; - double bufferMultiplier = 1.0; - double slope; - double intercept; - if (modifiedModulo > highCutoff) { - // modulo > 2^29 - bufferMultiplier = 1.2; - slope = 0.637; - intercept = 3.091; - } else if (modifiedModulo > mediumCutoff) { - // 2^29 >= modulo > 2^28 - slope = 0.619; - intercept = 2.993; - } else if (modifiedModulo > lowCutoff) { - // 2^28 >= modulo > 2^26 - slope = 0.614; - intercept = 2.905; - } else { - slope = 0.628; - intercept = 2.603; - } - return new double[] { bufferMultiplier, slope, intercept }; - } - - public long getSizeInBytes(int numEntries) { - // Based on a linear fit in log-log space, so that we minimize the error as a proportion rather than as - // an absolute value. Should be within ~50% of the true value at worst, and should overestimate rather - // than underestimate the memory usage - return (long) ((long) Math.pow(numEntries, slope) * (long) Math.pow(10, intercept) * bufferMultiplier); - } - - public int getNumEntriesFromSizeInBytes(long sizeInBytes) { - // This function has some precision issues especially when composed with its inverse: - // numEntries = getNumEntriesFromSizeInBytes(getSizeInBytes(numEntries)) - // In this case the result can be off by up to a couple percent - // However, this shouldn't really matter as both functions are based on memory estimates with higher errors than a couple percent - // and this composition won't happen outside of tests - return (int) Math.pow(sizeInBytes / (bufferMultiplier * Math.pow(10, intercept)), 1 / slope); - - } - - public static long getSizeInBytesWithModulo(int numEntries, int modulo) { - double[] memValues = calculateMemoryCoefficients(modulo); - return (long) ((long) Math.pow(numEntries, memValues[1]) * (long) Math.pow(10, memValues[2]) * memValues[0]); - } - - public static long getSizeInBytesWithModuloValue(int numEntries, RBMIntKeyLookupStore.KeystoreModuloValue moduloValue) { - double[] memValues = calculateMemoryCoefficients(moduloValue.getValue()); - return (long) ((long) Math.pow(numEntries, memValues[1]) * (long) Math.pow(10, memValues[2]) * memValues[0]); - } - - public static int getNumEntriesFromSizeInBytesWithModulo(long sizeInBytes, int modulo) { - double[] memValues = calculateMemoryCoefficients(modulo); - return (int) Math.pow(sizeInBytes / (memValues[0] * Math.pow(10, memValues[2])), 1 / memValues[1]); - } - - protected static long convertMBToBytes(double valMB) { - return (long) (valMB * BYTES_IN_MB); - } - - protected static double convertBytesToMB(long valBytes) { - return (double) valBytes / BYTES_IN_MB; - } - - protected static long getHashsetMemSizeInBytes(int numEntries) { - return convertMBToBytes(HASHSET_MEM_SLOPE * numEntries); - } -} diff --git a/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java index 0e9d63a9731a5..a6d076c8250ad 100644 --- a/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java @@ -32,10 +32,9 @@ package org.opensearch.common.cache.tier.keystore; import org.opensearch.common.Randomness; -import org.opensearch.common.cache.tier.keystore.RBMIntKeyLookupStore; -import org.opensearch.common.cache.tier.keystore.RBMSizeEstimator; import org.opensearch.common.metrics.CounterMetric; import org.opensearch.test.OpenSearchTestCase; +import org.roaringbitmap.RoaringBitmap; import java.util.ArrayList; import java.util.HashSet; @@ -45,8 +44,10 @@ import java.util.concurrent.ThreadPoolExecutor; public class RBMIntKeyLookupStoreTests extends OpenSearchTestCase { + + final int BYTES_IN_MB = 1048576; public void testInit() { - long memCap = 100 * RBMSizeEstimator.BYTES_IN_MB; + long memCap = 100 * BYTES_IN_MB; RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(memCap); assertEquals(0, kls.getSize()); assertEquals(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_EIGHT.getValue(), kls.modulo); @@ -129,16 +130,35 @@ public void testAddingDuplicates() throws Exception { } public void testMemoryCapBlocksAdd() throws Exception { + // Now that we're using a modified version of rbm.getSizeInBytes(), which doesn't provide an inverse function, + // we have to test filling just an RBM with random test values first so that we can get the resulting memory cap limit + // to use with our modified size estimate. + // This is much noisier so the precision is lower. + + // It is necessary to use randomly distributed integers for both parts of this test, as we would do with hashes in the cache, + // as that's what our size estimator is designed for. + // If we add a run of integers, our size estimator is not valid, especially for small RBMs. + + int[] maxEntriesArr = new int[] { 1342000, 100000, 3000000}; + long[] rbmReportedSizes = new long[4]; + Random rand = Randomness.get(); + for (int j = 0; j < maxEntriesArr.length; j++) { + RoaringBitmap rbm = new RoaringBitmap(); + for (int i = 0; i < maxEntriesArr[j]; i++) { + rbm.add(rand.nextInt()); + } + rbmReportedSizes[j] = rbm.getSizeInBytes(); + } RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE; - for (int maxEntries : new int[] { 2342000, 1000, 100000 }) { - long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytesWithModuloValue(maxEntries, moduloValue); + for (int i = 0; i < maxEntriesArr.length; i++) { + double multiplier = RBMIntKeyLookupStore.getRBMSizeMultiplier(maxEntriesArr[i], moduloValue.getValue()); + long memSizeCapInBytes = (long) (rbmReportedSizes[i] * multiplier); + //long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytesWithModuloValue(maxEntries, moduloValue); RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, memSizeCapInBytes); - for (int j = 0; j < maxEntries + 1000; j++) { - kls.add(j); + for (int j = 0; j < maxEntriesArr[i] + 5000; j++) { + kls.add(rand.nextInt()); } - assertTrue(Math.abs(maxEntries - kls.getSize()) < (double) maxEntries / 25); - // exact cap varies a small amount bc of floating point, especially when we use bytes instead of MB for calculations - // precision gets much worse when we compose the two functions, as we do here, but this wouldn't happen in an actual use case + assertTrue(Math.abs(maxEntriesArr[i] - kls.getSize()) < (double) maxEntriesArr[i] / 10); } } @@ -205,7 +225,7 @@ public void testConcurrency() throws Exception { } public void testRemoveNoCollisions() throws Exception { - long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; + long memCap = 100L * BYTES_IN_MB; int numToAdd = 195000; RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.NONE, memCap); // there should be no collisions for sequential positive numbers up to modulo @@ -222,7 +242,7 @@ public void testRemoveNoCollisions() throws Exception { public void testRemoveWithCollisions() throws Exception { int modulo = (int) Math.pow(2, 26); - long memCap = 100L * RBMSizeEstimator.BYTES_IN_MB; + long memCap = 100L * BYTES_IN_MB; RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX, memCap); for (int i = 0; i < 10; i++) { kls.add(i); @@ -278,24 +298,6 @@ public void testNullInputs() throws Exception { assertEquals(4, kls.getSize()); } - public void testMemoryCapValueInitialization() { - RBMIntKeyLookupStore.KeystoreModuloValue[] mods = RBMIntKeyLookupStore.KeystoreModuloValue.values(); - double[] expectedMultipliers = new double[] { 1.2, 1.2, 1, 1, 1 }; - double[] expectedSlopes = new double[] { 0.637, 0.637, 0.619, 0.614, 0.629 }; - double[] expectedIntercepts = new double[] { 3.091, 3.091, 2.993, 2.905, 2.603 }; - long memSizeCapInBytes = (long) 100.0 * RBMSizeEstimator.BYTES_IN_MB; - double delta = 0.01; - for (int i = 0; i < mods.length; i++) { - RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = mods[i]; - RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, memSizeCapInBytes); - assertEquals(kls.stats.memSizeCapInBytes, kls.getMemorySizeCapInBytes(), 1.0); - assertEquals(expectedMultipliers[i], kls.sizeEstimator.bufferMultiplier, delta); - assertEquals(expectedSlopes[i], kls.sizeEstimator.slope, delta); - assertEquals(expectedIntercepts[i], kls.sizeEstimator.intercept, delta); - } - - } - public void testRemovalLogic() throws Exception { RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX; int modulo = moduloValue.getValue(); @@ -395,6 +397,6 @@ public void testRemovalLogicWithHashCollision() throws Exception { assertTrue(removalSet.contains(k1)); assertEquals(1, numCollisions.count()); assertTrue(kls.contains(k1)); - assertTrue(kls.contains(k2)); + assertTrue(kls.contains(k2)); } } From 835e6c2f6d41fa3f42d096fd8671a31e2af7e165 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Wed, 22 Nov 2023 11:21:33 -0800 Subject: [PATCH 7/8] Optimized modulo calculation with bitmask Signed-off-by: Peter Alfonsi --- .../cache/tier/keystore/RBMIntKeyLookupStore.java | 11 +++++++++-- .../tier/keystore/RBMIntKeyLookupStoreTests.java | 10 +++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java index e7370558da80b..4e56e36c130c0 100644 --- a/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java @@ -72,6 +72,8 @@ public int getValue() { } protected final int modulo; + protected final int modulo_bitmask; + // Since our modulo is always a power of two we can optimize it by ANDing with a particular bitmask KeyStoreStats stats; protected RoaringBitmap rbm; private HashMap collidedIntCounters; @@ -92,6 +94,11 @@ public RBMIntKeyLookupStore(long memSizeCapInBytes) { public RBMIntKeyLookupStore(KeystoreModuloValue moduloValue, long memSizeCapInBytes) { this.modulo = moduloValue.getValue(); + if (modulo > 0) { + this.modulo_bitmask = modulo - 1; // keep last log_2(modulo) bits + } else { + this.modulo_bitmask = -1; // -1 in twos complement is all ones -> includes all bits -> same as no modulo + } this.stats = new KeyStoreStats(memSizeCapInBytes); this.rbm = new RoaringBitmap(); this.collidedIntCounters = new HashMap<>(); @@ -99,8 +106,8 @@ public RBMIntKeyLookupStore(KeystoreModuloValue moduloValue, long memSizeCapInBy this.mostRecentByteEstimate = 0L; } - private final int transform(int value) { - return modulo == 0 ? value : value % modulo; + private int transform(int value) { + return value & modulo_bitmask; } private void handleCollisions(int transformedValue) { diff --git a/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java index a6d076c8250ad..c0d16dfbc5744 100644 --- a/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java @@ -61,14 +61,22 @@ public void testTransformationLogic() throws Exception { for (int i = 0; i < 4; i++) { // after this we run into max value, but thats not a flaw with the class design int posValue = i * modulo + offset; kls.add(posValue); + assertEquals(offset, (int) kls.getInternalRepresentation(posValue)); int negValue = -(i * modulo + offset); kls.add(negValue); + assertEquals(modulo - offset, (int) kls.getInternalRepresentation(negValue)); } assertEquals(2, kls.getSize()); int[] testVals = new int[] { 0, 1, -1, -23495, 23058, modulo, -modulo, Integer.MAX_VALUE, Integer.MIN_VALUE }; for (int value : testVals) { assertTrue(kls.getInternalRepresentation(value) < modulo); - assertTrue(kls.getInternalRepresentation(value) > -modulo); + assertTrue(kls.getInternalRepresentation(value) >= 0); + } + RBMIntKeyLookupStore no_modulo_kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.NONE, 0L); + Random rand = Randomness.get(); + for (int i = 0; i < 100; i++) { + int val = rand.nextInt(); + assertEquals(val, (int) no_modulo_kls.getInternalRepresentation(val)); } } From 3c3712697faf59db758a045f8ab866332ae4eadd Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 27 Nov 2023 11:38:40 -0800 Subject: [PATCH 8/8] Changed memory size estimator to reflect optimized modulo Signed-off-by: Peter Alfonsi --- .../cache/tier/keystore/RBMIntKeyLookupStore.java | 11 +++++++++-- .../tier/keystore/RBMIntKeyLookupStoreTests.java | 6 +++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java index 4e56e36c130c0..e690deae7b521 100644 --- a/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java +++ b/server/src/main/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStore.java @@ -82,7 +82,7 @@ public int getValue() { protected final Lock readLock = lock.readLock(); protected final Lock writeLock = lock.writeLock(); private long mostRecentByteEstimate; - private final int REFRESH_SIZE_EST_INTERVAL = 10000; + protected final int REFRESH_SIZE_EST_INTERVAL = 10000; // Refresh size estimate every X new elements. Refreshes use the RBM's internal size estimator, which takes ~0.01 ms, // so we don't want to do it on every get(), and it doesn't matter much if there are +- 10000 keys in this store // in terms of storage impact @@ -281,7 +281,14 @@ public boolean isCollision(Integer value1, Integer value2) { } static double getRBMSizeMultiplier(int numEntries, int modulo) { - double x = Math.log10((double) numEntries / modulo); + double effectiveModulo = (double) modulo / 2; + /* This model was created when we used % operator to calculate modulo. This has range (-modulo, modulo). + Now we have optimized to use a bitmask, which has range [0, modulo). So the number of possible values stored + is halved. */ + if (modulo == 0) { + effectiveModulo = Math.pow(2, 32); + } + double x = Math.log10((double) numEntries / effectiveModulo); if (x < -5) { return 7.0; } diff --git a/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java b/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java index c0d16dfbc5744..d9b1ece1310ca 100644 --- a/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java +++ b/server/src/test/java/org/opensearch/common/cache/tier/keystore/RBMIntKeyLookupStoreTests.java @@ -82,9 +82,13 @@ public void testTransformationLogic() throws Exception { public void testContains() throws Exception { RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L); - for (int i = 0; i < 2000; i++) { + RBMIntKeyLookupStore noModuloKls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.NONE, 0L); + for (int i = 0; i < kls.REFRESH_SIZE_EST_INTERVAL + 1000; i++) { + // set upper bound > number of elements to trigger a size check, ensuring we test that too kls.add(i); assertTrue(kls.contains(i)); + noModuloKls.add(i); + assertTrue(noModuloKls.contains(i)); } }