From 4c78bf8b46b3a1554f2bab2f49d124572a14818e Mon Sep 17 00:00:00 2001 From: brharrington Date: Wed, 3 Jan 2024 18:01:04 -0600 Subject: [PATCH] tune prefix tree implementation (#1105) Updates the prefix tree implementation to improve the performance. Instead of going character by character the nodes will now keep a prefix string for faster matching and less thread contention. Testing on real sample data it reduced the time for matching against the query index by ~6%. --- codequality/checkstyle-suppressions.xml | 3 +- .../spectator/atlas/impl/PrefixTree.java | 386 +++++++++++++----- .../spectator/atlas/impl/PrefixTreeTest.java | 95 ++++- 3 files changed, 375 insertions(+), 109 deletions(-) diff --git a/codequality/checkstyle-suppressions.xml b/codequality/checkstyle-suppressions.xml index d1be766d1..5299cad15 100644 --- a/codequality/checkstyle-suppressions.xml +++ b/codequality/checkstyle-suppressions.xml @@ -6,4 +6,5 @@ - \ No newline at end of file + + diff --git a/spectator-reg-atlas/src/main/java/com/netflix/spectator/atlas/impl/PrefixTree.java b/spectator-reg-atlas/src/main/java/com/netflix/spectator/atlas/impl/PrefixTree.java index 8c8e5c49e..72874ba4f 100644 --- a/spectator-reg-atlas/src/main/java/com/netflix/spectator/atlas/impl/PrefixTree.java +++ b/spectator-reg-atlas/src/main/java/com/netflix/spectator/atlas/impl/PrefixTree.java @@ -1,5 +1,5 @@ /* - * Copyright 2014-2023 Netflix, Inc. + * Copyright 2014-2024 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,11 +15,15 @@ */ package com.netflix.spectator.atlas.impl; +import com.netflix.spectator.impl.Preconditions; + import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; import java.util.List; +import java.util.Objects; import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicReferenceArray; +import java.util.concurrent.CopyOnWriteArraySet; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import java.util.function.Consumer; @@ -32,40 +36,14 @@ */ final class PrefixTree { - private static final int FIRST_CHAR = ' '; - private static final int LAST_CHAR = '~'; - private static final int TABLE_SIZE = LAST_CHAR - FIRST_CHAR + 1; - - private static int indexOf(char c) { - int i = c - FIRST_CHAR; - return (i >= TABLE_SIZE) ? -1 : i; - } - private final Lock lock = new ReentrantLock(); - private final AtomicReferenceArray> children; + + private volatile Node root; private final Set values; /** Create a new instance. */ PrefixTree() { - children = new AtomicReferenceArray<>(TABLE_SIZE); - values = ConcurrentHashMap.newKeySet(); - } - - private PrefixTree computeIfAbsent(int i) { - PrefixTree child = children.get(i); - if (child == null) { - lock.lock(); - try { - child = children.get(i); - if (child == null) { - child = new PrefixTree<>(); - children.set(i, child); - } - } finally { - lock.unlock(); - } - } - return child; + values = newSet(); } /** @@ -77,23 +55,59 @@ private PrefixTree computeIfAbsent(int i) { * Value to associate with the prefix. */ void put(String prefix, T value) { - if (prefix == null) + if (prefix == null) { values.add(value); - else - put(prefix, 0, value); + } else { + lock.lock(); + try { + Node node = root; + if (node == null) { + root = new Node(prefix, EMPTY, asSet(value)); + } else { + root = putImpl(node, prefix, 0, value); + } + } finally { + lock.unlock(); + } + } } - private void put(String prefix, int pos, T value) { - if (pos == prefix.length()) { - values.add(value); - } else { - int i = indexOf(prefix.charAt(pos)); - if (i < 0) { - values.add(value); + private Node putImpl(Node node, String key, int offset, T value) { + final int prefixLength = node.prefix.length(); + final int keyLength = key.length() - offset; + final int commonLength = commonPrefixLength(node.prefix, key, offset); + if (commonLength == 0 && prefixLength > 0) { + // No common prefix + Node n = new Node(key.substring(offset), EMPTY, asSet(value)); + return new Node("", new Node[] {n, node}); + } else if (keyLength == prefixLength && commonLength == prefixLength) { + // Fully matches, add the value to this node + node.values.add(value); + return node; + } else if (keyLength > prefixLength && commonLength == prefixLength) { + // key.startsWith(prefix), put the value into a child + int childOffset = offset + commonLength; + int pos = find(node.children, key, childOffset); + if (pos >= 0) { + Node n = putImpl(node.children[pos], key, childOffset, value); + return node.replaceChild(n, pos); } else { - PrefixTree child = computeIfAbsent(i); - child.put(prefix, pos + 1, value); + Node n = new Node(key.substring(childOffset), EMPTY, asSet(value)); + return node.addChild(n); } + } else if (prefixLength > keyLength && commonLength == keyLength) { + // prefix.startsWith(key), make new parent node and add this node as a child + int childOffset = offset + commonLength; + Node n = new Node(node.prefix.substring(commonLength), node.children, node.values); + return new Node(key.substring(offset, childOffset), new Node[] {n}, asSet(value)); + } else { + // Common prefix is a subset of both + int childOffset = offset + commonLength; + Node[] children = { + new Node(node.prefix.substring(commonLength), node.children, node.values), + new Node(key.substring(childOffset), EMPTY, asSet(value)) + }; + return new Node(node.prefix.substring(0, commonLength), children); } } @@ -108,44 +122,44 @@ private void put(String prefix, int pos, T value) { * Returns true if a value was removed from the tree. */ boolean remove(String prefix, T value) { - if (prefix == null) - return values.remove(value); - else - return remove(prefix, 0, value); - } - - private boolean remove(String prefix, int pos, T value) { - if (pos == prefix.length()) { + if (prefix == null) { return values.remove(value); } else { - int i = indexOf(prefix.charAt(pos)); - if (i < 0) { - return values.remove(value); - } else { - PrefixTree child = children.get(i); - if (child == null) { - return false; - } else { - boolean result = child.remove(prefix, pos + 1, value); - if (result && child.isEmpty()) { - lock.lock(); - try { - // Check that the children array still has the reference to the - // same child object. The entry may have been replaced by another - // thread. - if (child == children.get(i) && child.isEmpty()) { - children.set(i, null); - } - } finally { - lock.unlock(); - } + lock.lock(); + try { + boolean removed = false; + Node node = root; + if (node != null) { + removed = removeImpl(node, prefix, 0, value); + if (removed) { + node = node.compress(); + root = node.isEmpty() ? null : node; } - return result; } + return removed; + } finally { + lock.unlock(); } } } + private boolean removeImpl(Node node, String key, int offset, T value) { + final int prefixLength = node.prefix.length(); + final int keyLength = key.length() - offset; + final int commonLength = commonPrefixLength(node.prefix, key, offset); + if (keyLength == prefixLength && commonLength == prefixLength) { + // Fully matches, remove the value from this node + return node.values.remove(value); + } else if (keyLength > prefixLength && commonLength == prefixLength) { + // Try to remove from children + int childOffset = offset + commonLength; + int pos = find(node.children, key, childOffset); + return pos >= 0 && removeImpl(node.children[pos], key, childOffset, value); + } else { + return false; + } + } + /** * Get a list of values associated with a prefix of the search key. * @@ -155,9 +169,9 @@ private boolean remove(String prefix, int pos, T value) { * Values associated with a matching prefix. */ List get(String key) { - List results = new ArrayList<>(); - forEach(key, results::add); - return results; + List result = new ArrayList<>(); + forEach(key, result::add); + return result; } /** @@ -169,17 +183,31 @@ List get(String key) { * Function to call for matching values. */ void forEach(String key, Consumer consumer) { - forEach(key, 0, consumer); + values.forEach(consumer); + Node node = root; + if (node != null) { + forEachImpl(node, key, 0, consumer); + } } - private void forEach(String key, int pos, Consumer consumer) { - values.forEach(consumer); - if (pos < key.length()) { - int i = indexOf(key.charAt(pos)); - if (i >= 0) { - PrefixTree child = children.get(i); - if (child != null) { - child.forEach(key, pos + 1, consumer); + @SuppressWarnings("unchecked") + private void forEachImpl(Node node, String key, int offset, Consumer consumer) { + final int prefixLength = node.prefix.length(); + final int keyLength = key.length() - offset; + final int commonLength = commonPrefixLength(node.prefix, key, offset); + + if (commonLength == prefixLength) { + // Prefix matches, consume values for this node + for (Object value : node.values) { + consumer.accept((T) value); + } + + if (commonLength < keyLength) { + // There is more to the key, check if there are also matches for child nodes + int childOffset = offset + commonLength; + int pos = find(node.children, key, childOffset); + if (pos >= 0) { + forEachImpl(node.children[pos], key, childOffset, consumer); } } } @@ -189,16 +217,7 @@ private void forEach(String key, int pos, Consumer consumer) { * Returns true if the tree is empty. */ boolean isEmpty() { - if (values.isEmpty()) { - for (int i = 0; i < TABLE_SIZE; ++i) { - if (children.get(i) != null) { - return false; - } - } - return true; - } else { - return false; - } + return values.isEmpty() && (root == null || root.isEmpty()); } /** @@ -206,13 +225,178 @@ boolean isEmpty() { * by traversing the tree, so this call may be expensive. */ int size() { - int sz = values.size(); - for (int i = 0; i < TABLE_SIZE; ++i) { - PrefixTree child = children.get(i); - if (child != null) { + Node r = root; + return (r == null ? 0 : r.size()) + values.size(); + } + + /** + * Determine the length of the common prefix for two strings. + * + * @param str1 + * First string to compare. + * @param str2 + * Second string to compare. + * @param offset + * Offset in the second string for where to start. + * @return + * Length of the common prefix for the two strings. + */ + static int commonPrefixLength(String str1, String str2, int offset) { + int length = Math.min(str1.length(), str2.length() - offset); + for (int i = 0; i < length; ++i) { + if (str1.charAt(i) != str2.charAt(offset + i)) { + return i; + } + } + return length; + } + + private static int find(Node[] nodes, String key, int offset) { + int s = 0; + int e = nodes.length - 1; + while (s <= e) { + int mid = (s + e) >>> 1; + int cmp = Character.compare(nodes[mid].prefix.charAt(0), key.charAt(offset)); + if (cmp == 0) + return mid; + else if (cmp < 0) + s = mid + 1; + else + e = mid - 1; + } + return -1; + } + + private static Set newSet() { + // The copy on write implementation is used because in the hot path traversing the set of values + // is the most important aspect. + return new CopyOnWriteArraySet<>(); + } + + private static Set asSet(Object value) { + Set set = newSet(); + set.add(value); + return set; + } + + private static final Node[] EMPTY = new Node[0]; + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + PrefixTree that = (PrefixTree) o; + return Objects.equals(root, that.root) + && values.equals(that.values); + } + + @Override + public int hashCode() { + return Objects.hash(root, values); + } + + private static class Node { + + final String prefix; + final Node[] children; + final Set values; + + Node(String prefix, Node[] children, Set values) { + this.prefix = Preconditions.checkNotNull(prefix, "prefix"); + this.children = Preconditions.checkNotNull(children, "children"); + this.values = Preconditions.checkNotNull(values, "values"); + Arrays.sort(children, Comparator.comparing(n -> n.prefix)); + } + + Node(String prefix, Node[] children) { + this(prefix, children, newSet()); + } + + Node replaceChild(Node n, int i) { + Node[] cs = new Node[children.length]; + System.arraycopy(children, 0, cs, 0, i); + cs[i] = n; + System.arraycopy(children, i + 1, cs, i + 1, children.length - i - 1); + return new Node(prefix, cs, values); + } + + Node addChild(Node n) { + Node[] cs = new Node[children.length + 1]; + System.arraycopy(children, 0, cs, 0, children.length); + cs[children.length] = n; + return new Node(prefix, cs, values); + } + + Node compress() { + // Create list of compressed children, avoid allocating the list unless + // there is a change. + List cs = null; + for (int i = 0; i < children.length; ++i) { + Node child = children[i]; + Node c = child.compress(); + if (c != child || c.isEmpty()) { + if (cs == null) { + cs = new ArrayList<>(children.length); + for (int j = 0; j < i; ++j) { + cs.add(children[j]); + } + } + if (!c.isEmpty()) { + cs.add(c); + } + } else if (cs != null) { + cs.add(child); + } + } + + // Return compressed node. Merge nodes if intermediates have no values. + if (cs == null) { + return this; + } else if (values.isEmpty() && cs.size() == 1) { + Node c = cs.get(0); + String p = prefix + c.prefix; + return new Node(p, EMPTY, c.values); + } else { + return new Node(prefix, cs.toArray(EMPTY), values); + } + } + + boolean isEmpty() { + return values.isEmpty() && areAllChildrenEmpty(); + } + + private boolean areAllChildrenEmpty() { + for (Node child : children) { + if (!child.isEmpty()) { + return false; + } + } + return true; + } + + int size() { + int sz = values.size(); + for (Node child : children) { sz += child.size(); } + return sz; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Node node = (Node) o; + return prefix.equals(node.prefix) + && Arrays.equals(children, node.children) + && values.equals(node.values); + } + + @Override + public int hashCode() { + int result = Objects.hash(prefix, values); + result = 31 * result + Arrays.hashCode(children); + return result; } - return sz; } } diff --git a/spectator-reg-atlas/src/test/java/com/netflix/spectator/atlas/impl/PrefixTreeTest.java b/spectator-reg-atlas/src/test/java/com/netflix/spectator/atlas/impl/PrefixTreeTest.java index 84a9796f6..7d4d368c0 100644 --- a/spectator-reg-atlas/src/test/java/com/netflix/spectator/atlas/impl/PrefixTreeTest.java +++ b/spectator-reg-atlas/src/test/java/com/netflix/spectator/atlas/impl/PrefixTreeTest.java @@ -1,5 +1,5 @@ /* - * Copyright 2014-2022 Netflix, Inc. + * Copyright 2014-2024 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -106,20 +106,101 @@ public void multipleMatches() { } @Test - public void unsupportedCharInPrefix() { + public void unicodeCharInPrefix() { PrefixTree tree = new PrefixTree<>(); tree.put("aβc", "1"); assertSize(tree, 1); - Assertions.assertEquals(list("1"), tree.get("abcdef")); - Assertions.assertEquals(list("1"), tree.get("abcghi")); - Assertions.assertEquals(list("1"), tree.get("abc")); - Assertions.assertEquals(list("1"), tree.get("abd")); - Assertions.assertEquals(list("1"), tree.get("ab")); + Assertions.assertEquals(list("1"), tree.get("aβcdef")); + Assertions.assertEquals(list("1"), tree.get("aβcghi")); + Assertions.assertEquals(list("1"), tree.get("aβc")); + Assertions.assertEquals(Collections.emptyList(), tree.get("abcdef")); + Assertions.assertEquals(Collections.emptyList(), tree.get("abcghi")); + Assertions.assertEquals(Collections.emptyList(), tree.get("abc")); + Assertions.assertEquals(Collections.emptyList(), tree.get("abd")); + Assertions.assertEquals(Collections.emptyList(), tree.get("ab")); + Assertions.assertEquals(Collections.emptyList(), tree.get("aβ")); Assertions.assertEquals(Collections.emptyList(), tree.get("b")); Assertions.assertTrue(tree.remove("aβc", "1")); assertSize(tree, 0); Assertions.assertEquals(Collections.emptyList(), tree.get("abcdef")); } + + @Test + public void emptyRootPrefix() { + PrefixTree tree = new PrefixTree<>(); + tree.put("ab", "ab"); + tree.put("cd", "cd"); + + Assertions.assertEquals(list("ab"), tree.get("ab")); + Assertions.assertEquals(list("cd"), tree.get("cd")); + Assertions.assertEquals(Collections.emptyList(), tree.get("ef")); + + Assertions.assertTrue(tree.remove("ab", "ab")); + PrefixTree tree2 = new PrefixTree<>(); + tree2.put("cd", "cd"); + Assertions.assertEquals(tree2, tree); + + Assertions.assertEquals(Collections.emptyList(), tree.get("ab")); + Assertions.assertEquals(list("cd"), tree.get("cd")); + Assertions.assertEquals(Collections.emptyList(), tree.get("ef")); + } + + @Test + public void updateExistingNode() { + PrefixTree tree = new PrefixTree<>(); + tree.put("abcdef", "1"); + tree.put("abcdef", "2"); + + Assertions.assertEquals(list("1", "2"), tree.get("abcdef")); + } + + @Test + public void addChildNode() { + PrefixTree tree = new PrefixTree<>(); + tree.put("abc", "1"); + tree.put("abcdefghi", "2"); + tree.put("abcdef", "3"); + + Assertions.assertEquals(list("1", "3"), tree.get("abcdef")); + Assertions.assertEquals(list("1", "3", "2"), tree.get("abcdefghi")); + } + + @Test + public void splitInteriorNode() { + PrefixTree tree = new PrefixTree<>(); + tree.put("abcdef", "abcdef"); + tree.put("abcghi", "abcghi"); + + Assertions.assertEquals(list("abcdef"), tree.get("abcdef")); + Assertions.assertEquals(list("abcghi"), tree.get("abcghi")); + } + + @Test + public void manyDifferentRootPrefixes() { + PrefixTree tree = new PrefixTree<>(); + tree.put("abc", "abc"); + tree.put("def", "def"); + tree.put("ghi", "ghi"); + Assertions.assertEquals(list("abc"), tree.get("abcdef")); + } + + @Test + public void commonPrefixNoMatch() { + Assertions.assertEquals(0, PrefixTree.commonPrefixLength("abcdef", "defghi", 0)); + Assertions.assertEquals(0, PrefixTree.commonPrefixLength("abcdef", "abcdef", 3)); + } + + @Test + public void commonPrefixEmpty() { + Assertions.assertEquals(0, PrefixTree.commonPrefixLength("abcdef", "", 0)); + Assertions.assertEquals(0, PrefixTree.commonPrefixLength("", "abcdef", 0)); + } + + @Test + public void commonPrefix() { + Assertions.assertEquals(3, PrefixTree.commonPrefixLength("abcdef", "abcghi", 0)); + Assertions.assertEquals(6, PrefixTree.commonPrefixLength("defghi", "abcdefghi", 3)); + } }