From ec270f9a17a84b7edd30a47863c6b43daaa2c692 Mon Sep 17 00:00:00 2001 From: John DeRegnaucourt Date: Sat, 30 Mar 2024 20:56:44 -0400 Subject: [PATCH] * Performance improvement: `DeepEquals.deepHashCode()` - now using `IdentityHashMap()` for cycle (visited) detection. * Modernization: `UniqueIdGenerator` - updated to use `Lock.lock()` and `Lock.unlock()` instead of `synchronized` keyword. * Using json-io 4.14.1 for cloning object in "test" scope, eliminates cycle depedencies when building both json-io and java-util. --- README.md | 4 +- changelog.md | 6 +- pom.xml | 4 +- .../util/CaseInsensitiveMap.java | 12 +- .../util/CaseInsensitiveSet.java | 4 +- .../com/cedarsoftware/util/DeepEquals.java | 9 +- .../com/cedarsoftware/util/MapUtilities.java | 1 + .../cedarsoftware/util/StringUtilities.java | 2 +- .../cedarsoftware/util/UniqueIdGenerator.java | 147 +++++++++--------- .../util/TestCaseInsensitiveMap.java | 66 +++++--- .../util/TestDeepEqualsUnordered.java | 15 +- .../util/TestUniqueIdGenerator.java | 20 ++- 12 files changed, 160 insertions(+), 130 deletions(-) diff --git a/README.md b/README.md index 964e492d..d6aa072b 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ The classes in the`.jar`file are version 52 (`JDK 1.8`). To include in your project: ##### GradleF ``` -implementation 'com.cedarsoftware:java-util:2.4.7' +implementation 'com.cedarsoftware:java-util:2.4.8' ``` ##### Maven @@ -23,7 +23,7 @@ implementation 'com.cedarsoftware:java-util:2.4.7' com.cedarsoftware java-util - 2.4.7 + 2.4.8 ``` --- diff --git a/changelog.md b/changelog.md index 76b33e15..73ae2317 100644 --- a/changelog.md +++ b/changelog.md @@ -1,6 +1,8 @@ ### Revision History -* 2.4.8-SNAPSHOT - * Using json-io 4.14.2 for cloning object in "test" scope, eliminates cycle depedencies +* 2.4.8 + * Performance improvement: `DeepEquals.deepHashCode()` - now using `IdentityHashMap()` for cycle (visited) detection. + * Modernization: `UniqueIdGenerator` - updated to use `Lock.lock()` and `Lock.unlock()` instead of `synchronized` keyword. + * Using json-io 4.14.1 for cloning object in "test" scope, eliminates cycle depedencies when building both json-io and java-util. * 2.4.7 * All 687 conversions supported are now 100% cross-product tested. Converter test suite is complete. * 2.4.6 diff --git a/pom.xml b/pom.xml index 42f82d25..954b6ff6 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ com.cedarsoftware java-util jar - 2.4.8-SNAPSHOT + 2.4.8 Java Utilities https://github.com/jdereg/java-util @@ -33,7 +33,7 @@ 5.10.2 5.10.2 3.25.3 - 4.14.2 + 4.14.1 4.11.0 1.21.1 diff --git a/src/main/java/com/cedarsoftware/util/CaseInsensitiveMap.java b/src/main/java/com/cedarsoftware/util/CaseInsensitiveMap.java index d92ce82c..26336dcb 100644 --- a/src/main/java/com/cedarsoftware/util/CaseInsensitiveMap.java +++ b/src/main/java/com/cedarsoftware/util/CaseInsensitiveMap.java @@ -623,12 +623,12 @@ public boolean equals(Object other) { return true; } - else if (other instanceof CaseInsensitiveString) + if (other instanceof CaseInsensitiveString) { return hash == ((CaseInsensitiveString)other).hash && original.equalsIgnoreCase(((CaseInsensitiveString)other).original); } - else if (other instanceof String) + if (other instanceof String) { return original.equalsIgnoreCase((String)other); } @@ -642,15 +642,13 @@ public int compareTo(Object o) CaseInsensitiveString other = (CaseInsensitiveString) o; return original.compareToIgnoreCase(other.original); } - else if (o instanceof String) + if (o instanceof String) { String other = (String)o; return original.compareToIgnoreCase(other); } - else - { // Strings are less than non-Strings (come before) - return -1; - } + // Strings are less than non-Strings (come before) + return -1; } } } diff --git a/src/main/java/com/cedarsoftware/util/CaseInsensitiveSet.java b/src/main/java/com/cedarsoftware/util/CaseInsensitiveSet.java index b07f08bc..28540a3e 100644 --- a/src/main/java/com/cedarsoftware/util/CaseInsensitiveSet.java +++ b/src/main/java/com/cedarsoftware/util/CaseInsensitiveSet.java @@ -44,11 +44,11 @@ public CaseInsensitiveSet(Collection collection) { if (collection instanceof ConcurrentSkipListSet) { - map = new CaseInsensitiveMap<>(new ConcurrentSkipListMap()); + map = new CaseInsensitiveMap<>(new ConcurrentSkipListMap<>()); } else if (collection instanceof SortedSet) { - map = new CaseInsensitiveMap<>(new TreeMap()); + map = new CaseInsensitiveMap<>(new TreeMap<>()); } else { diff --git a/src/main/java/com/cedarsoftware/util/DeepEquals.java b/src/main/java/com/cedarsoftware/util/DeepEquals.java index 17c84aa4..1685e34e 100644 --- a/src/main/java/com/cedarsoftware/util/DeepEquals.java +++ b/src/main/java/com/cedarsoftware/util/DeepEquals.java @@ -10,6 +10,7 @@ import java.util.Deque; import java.util.HashMap; import java.util.HashSet; +import java.util.IdentityHashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -736,22 +737,22 @@ public static boolean hasCustomEquals(Class c) * @return the 'deep' hashCode value for the passed in object. */ public static int deepHashCode(Object obj) { - Set visited = new HashSet<>(); + Map visited = new IdentityHashMap<>(); return deepHashCode(obj, visited); } - private static int deepHashCode(Object obj, Set visited) { + private static int deepHashCode(Object obj, Map visited) { LinkedList stack = new LinkedList<>(); stack.addFirst(obj); int hash = 0; while (!stack.isEmpty()) { obj = stack.removeFirst(); - if (obj == null || visited.contains(obj)) { + if (obj == null || visited.containsKey(obj)) { continue; } - visited.add(obj); + visited.put(obj, null); // Ensure array order matters to hash if (obj.getClass().isArray()) { diff --git a/src/main/java/com/cedarsoftware/util/MapUtilities.java b/src/main/java/com/cedarsoftware/util/MapUtilities.java index d731b69b..bedd9604 100644 --- a/src/main/java/com/cedarsoftware/util/MapUtilities.java +++ b/src/main/java/com/cedarsoftware/util/MapUtilities.java @@ -12,6 +12,7 @@ * Usefule utilities for Maps * * @author Kenneth Partlow + * @author John DeRegnaucourt *
* Copyright (c) Cedar Software LLC *

diff --git a/src/main/java/com/cedarsoftware/util/StringUtilities.java b/src/main/java/com/cedarsoftware/util/StringUtilities.java index 51f7eba5..423cc17c 100644 --- a/src/main/java/com/cedarsoftware/util/StringUtilities.java +++ b/src/main/java/com/cedarsoftware/util/StringUtilities.java @@ -107,7 +107,7 @@ public static boolean equalsIgnoreCase(CharSequence cs1, CharSequence cs2) { } /** - * @see StringUtilities@equalsIgnoreCase(CharSequence, CharSequence) + * @see StringUtilities#equalsIgnoreCase(CharSequence, CharSequence) */ public static boolean equalsIgnoreCase(String s1, String s2) { return equalsIgnoreCase((CharSequence) s1, (CharSequence) s2); diff --git a/src/main/java/com/cedarsoftware/util/UniqueIdGenerator.java b/src/main/java/com/cedarsoftware/util/UniqueIdGenerator.java index aa69be4c..2accf4c1 100644 --- a/src/main/java/com/cedarsoftware/util/UniqueIdGenerator.java +++ b/src/main/java/com/cedarsoftware/util/UniqueIdGenerator.java @@ -1,9 +1,12 @@ package com.cedarsoftware.util; +import java.nio.charset.StandardCharsets; import java.security.SecureRandom; import java.util.Date; import java.util.LinkedHashMap; import java.util.Map; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; import static java.lang.Integer.parseInt; import static java.lang.Math.abs; @@ -13,18 +16,18 @@ * Generate a unique ID that fits within a long value. The ID will be unique for the given JVM, and it makes a * solid attempt to ensure uniqueness in a clustered environment. An environment variable JAVA_UTIL_CLUSTERID * can be set to a value 0-99 to mark this JVM uniquely in the cluster. If this environment variable is not set, - * then a SecureRandom value from 0-99 is chosen for the machine cluster id.
- *
+ * then hostname, cluster id, and finally a SecureRandom value from 0-99 is chosen for the machine's id within cluster. + *

* There is an API [getUniqueId()] to get a unique ID that will work through the year 5138. This API will generate * unique IDs at a rate of up to 1 million per second. There is another API [getUniqueId19()] that will work through * the year 2286, however this API will generate unique IDs at a rate up to 10 million per second. The trade-off is * the faster API will generate positive IDs only good for about 286 years [after 2000].
*
- * The IDs are guaranteed to be strictly increasing. There is an API you can call (getDate()) that will return the - * date and time (to the millisecond) that they ID was created. + * The IDs are guaranteed to be strictly increasing. There is an API you can call (getDate(unique)) that will return + * the date and time (to the millisecond) that the ID was created. * * @author John DeRegnaucourt (jdereg@gmail.com) - * Roger Judd (@HonorKnight on GitHub) for adding code to ensure increasing order. + * @author Roger Judd (@HonorKnight on GitHub) for adding code to ensure increasing order. *
* Copyright (c) Cedar Software LLC *

@@ -45,29 +48,24 @@ public class UniqueIdGenerator { public static final String JAVA_UTIL_CLUSTERID = "JAVA_UTIL_CLUSTERID"; - private UniqueIdGenerator() - { + private UniqueIdGenerator() { } - private static final Object lock = new Object(); - private static final Object lock19 = new Object(); + private static final Lock lock = new ReentrantLock(); + private static final Lock lock19 = new ReentrantLock(); private static int count = 0; private static int count2 = 0; private static long previousTimeMilliseconds = 0; private static long previousTimeMilliseconds2 = 0; private static final int serverId; - private static final Map lastIds = new LinkedHashMap() - { - protected boolean removeEldestEntry(Map.Entry eldest) - { + private static final Map lastIds = new LinkedHashMap() { + protected boolean removeEldestEntry(Map.Entry eldest) { return size() > 1000; } }; - private static final Map lastIdsFull = new LinkedHashMap() - { - protected boolean removeEldestEntry(Map.Entry eldest) - { - return size() > 10000; + private static final Map lastIdsFull = new LinkedHashMap() { + protected boolean removeEldestEntry(Map.Entry eldest) { + return size() > 10_000; } }; @@ -90,10 +88,17 @@ protected boolean removeEldestEntry(Map.Entry eldest) setVia = "environment variable: CF_INSTANCE_INDEX"; if (id == -1) { - // use random number if all else fails - SecureRandom random = new SecureRandom(); - id = abs(random.nextInt()) % 100; - setVia = "new SecureRandom()"; + String hostName = SystemUtilities.getExternalVariable("HOSTNAME"); + if (StringUtilities.isEmpty(hostName)) { + // use random number if all else fails + SecureRandom random = new SecureRandom(); + id = abs(random.nextInt()) % 100; + setVia = "new SecureRandom()"; + } else { + String hostnameSha256 = EncryptionUtilities.calculateSHA256Hash(hostName.getBytes(StandardCharsets.UTF_8)); + id = (byte) ((hostnameSha256.charAt(0) & 0xFF) % 100); + setVia = "environment variable hostname: " + hostName + " (" + hostnameSha256 + ")"; + } } } } @@ -101,20 +106,15 @@ protected boolean removeEldestEntry(Map.Entry eldest) serverId = id; } - private static int getServerId(String externalVarName) - { - String id = SystemUtilities.getExternalVariable(externalVarName); - try - { - if (StringUtilities.isEmpty(id)) - { + private static int getServerId(String externalVarName) { + try { + String id = SystemUtilities.getExternalVariable(externalVarName); + if (StringUtilities.isEmpty(id)) { return -1; } return abs(parseInt(id)) % 100; - } - catch (NumberFormatException e) - { - System.err.println("Unable to get unique server id or index from environment variable/system property key-value: " + externalVarName + "=" + id); + } catch (Throwable e) { + System.err.println("Unable to get unique server id or index from environment variable/system property key-value: " + externalVarName); e.printStackTrace(System.err); return -1; } @@ -127,12 +127,13 @@ private static int getServerId(String externalVarName) * number. This number is chosen when the JVM is started and then stays fixed until next restart. This is to * ensure cluster uniqueness.
*
- * Because there is the possibility two machines could choose the same random number and be at the same cound, at the + * Because there is the possibility two machines could choose the same random number and be at the same count, at the * same time, a unique machine index is chosen to provide a 00 to 99 value for machine instance within a cluster. * To set the unique machine index value, set the environment variable JAVA_UTIL_CLUSTERID to a unique two-digit - * number on each machine in the cluster. If the machines are managed by CloundFoundry, the uniqueId will use the - * CF_INSTANCE_INDEX to provide unique machine ID. Only if neither of these environment variables are set, will it - * resort to using a random number from 00 to 99 for the machine instance number portion of the unique ID.
+ * number on each machine in the cluster. If the machines are in a managed container, the uniqueId will use the + * hash of the hostname, first byte of hash, modulo 100 to provide unique machine ID. If neither of these + * environment variables are set, will it resort to using a secure random number from 00 to 99 for the machine + * instance number portion of the unique ID.
*
* This API is slower than the 19 digit API. Grabbing a bunch of IDs in a tight loop for example, could cause * delays while it waits for the millisecond to tick over. This API can return up to 1,000 unique IDs per millisecond.
@@ -141,54 +142,52 @@ private static int getServerId(String externalVarName) * * @return long unique ID */ - public static long getUniqueId() - { - synchronized (lock) - { + public static long getUniqueId() { + lock.lock(); + try { long id = getUniqueIdAttempt(); - while (lastIds.containsKey(id)) - { + while (lastIds.containsKey(id)) { id = getUniqueIdAttempt(); } lastIds.put(id, null); return id; + } finally { + lock.unlock(); } } - private static long getUniqueIdAttempt() - { + private static long getUniqueIdAttempt() { count++; - if (count >= 1000) - { + if (count >= 1000) { count = 0; } long currentTimeMilliseconds = currentTimeMillis(); - if (currentTimeMilliseconds > previousTimeMilliseconds) - { + if (currentTimeMilliseconds > previousTimeMilliseconds) { count = 0; previousTimeMilliseconds = currentTimeMilliseconds; } - return currentTimeMilliseconds * 100000 + count * 100L + serverId; + return currentTimeMilliseconds * 100_000 + count * 100L + serverId; } /** * ID format will be 1234567890123.9999.99 (no dots - only there for clarity - the number is a long). There are * 13 digits for time - milliseconds since Jan 1, 1970. This is followed by a count that is 0000 through 9999. * This is followed by a random 2 digit number. This number is chosen when the JVM is started and then stays fixed - * until next restart. This is to ensure cluster uniqueness.
+ * until next restart. This is to ensure uniqueness within cluster.
*
- * Because there is the possibility two machines could choose the same random number and be at the same cound, at the + * Because there is the possibility two machines could choose the same random number and be at the same count, at the * same time, a unique machine index is chosen to provide a 00 to 99 value for machine instance within a cluster. * To set the unique machine index value, set the environment variable JAVA_UTIL_CLUSTERID to a unique two-digit - * number on each machine in the cluster. If the machines are managed by CloundFoundry, the uniqueId will use the - * CF_INSTANCE_INDEX to provide unique machine ID. Only if neither of these environment variables are set, will it - * resort to using a random number from 00 to 99 for the machine instance number portion of the unique ID.
+ * number on each machine in the cluster. If the machines are in a managed container, the uniqueId will use the + * hash of the hostname, first byte of hash, modulo 100 to provide unique machine ID. If neither of these + * environment variables are set, will it resort to using a secure random number from 00 to 99 for the machine + * instance number portion of the unique ID.
*
- * The returned ID will be 19 digits and this API will work through 2286. After then, it would likely return - * negative numbers (still unique).
+ * The returned ID will be 19 digits and this API will work through 2286. After then, it will return negative + * numbers (still unique).
*
* This API is faster than the 18 digit API. This API can return up to 10,000 unique IDs per millisecond.
*
@@ -196,38 +195,34 @@ private static long getUniqueIdAttempt() * * @return long unique ID */ - public static long getUniqueId19() - { - synchronized (lock19) - { + public static long getUniqueId19() { + lock19.lock(); + try { long id = getFullUniqueId19(); - while (lastIdsFull.containsKey(id)) - { + while (lastIdsFull.containsKey(id)) { id = getFullUniqueId19(); } lastIdsFull.put(id, null); return id; + } finally { + lock19.unlock(); } } // Use up to 19 digits (much faster) - private static long getFullUniqueId19() - { + private static long getFullUniqueId19() { count2++; - if (count2 >= 10000) - { + if (count2 >= 10_000) { count2 = 0; } long currentTimeMilliseconds = currentTimeMillis(); - if (currentTimeMilliseconds > previousTimeMilliseconds2) - { + if (currentTimeMilliseconds > previousTimeMilliseconds2) { count2 = 0; previousTimeMilliseconds2 = currentTimeMilliseconds; } - - return currentTimeMilliseconds * 1000000 + count2 * 100L + serverId; + return currentTimeMilliseconds * 1_000_000 + count2 * 100L + serverId; } /** @@ -237,9 +232,8 @@ private static long getFullUniqueId19() * @return Date when the ID was generated, with the time portion accurate to the millisecond. The time * is measured in milliseconds, between the time the id was generated and midnight, January 1, 1970 UTC. */ - public static Date getDate(long uniqueId) - { - return new Date(uniqueId / 100000); + public static Date getDate(long uniqueId) { + return new Date(uniqueId / 100_000); } /** @@ -249,8 +243,7 @@ public static Date getDate(long uniqueId) * @return Date when the ID was generated, with the time portion accurate to the millisecond. The time * is measured in milliseconds, between the time the id was generated and midnight, January 1, 1970 UTC. */ - public static Date getDate19(long uniqueId) - { - return new Date(uniqueId / 1000000); + public static Date getDate19(long uniqueId) { + return new Date(uniqueId / 1_000_000); } } diff --git a/src/test/java/com/cedarsoftware/util/TestCaseInsensitiveMap.java b/src/test/java/com/cedarsoftware/util/TestCaseInsensitiveMap.java index 9434e5c2..0acf7650 100644 --- a/src/test/java/com/cedarsoftware/util/TestCaseInsensitiveMap.java +++ b/src/test/java/com/cedarsoftware/util/TestCaseInsensitiveMap.java @@ -1464,36 +1464,56 @@ public void testCaseInsensitiveStringHashcodeCollision() assert !ciString.equals(ciString2); } - @Disabled + private String current = "0"; + public String getNext() { + int length = current.length(); + StringBuilder next = new StringBuilder(current); + boolean carry = true; + + for (int i = length - 1; i >= 0 && carry; i--) { + char ch = next.charAt(i); + if (ch == 'j') { + next.setCharAt(i, '0'); + } else { + if (ch == '9') { + next.setCharAt(i, 'a'); + } else { + next.setCharAt(i, (char) (ch + 1)); + } + carry = false; + } + } + + // If carry is still true, all digits were 'f', append '1' at the beginning + if (carry) { + next.insert(0, '1'); + } + + current = next.toString(); + return current; + } + @Test - public void testGenHash() - { - final String TEXT = "was stored earlier had the same hash as"; + public void testGenHash() { HashMap hs = new HashMap<>(); long t1 = System.currentTimeMillis(); - long t2 = System.currentTimeMillis(); - for (long l = 0; l < Long.MAX_VALUE; l++) - { - CaseInsensitiveMap.CaseInsensitiveString key = new CaseInsensitiveMap.CaseInsensitiveString("f" + l); - if (hs.containsKey(key.hashCode())) - { - System.out.println("'" + hs.get(key.hashCode()) + "' " + TEXT + " '" + key + "'"); - break; - } - else - { - hs.put(key.hashCode(),key); + int dupe = 0; + + while (true) { + String hash = getNext(); + CaseInsensitiveMap.CaseInsensitiveString key = new CaseInsensitiveMap.CaseInsensitiveString(hash); + if (hs.containsKey(key.hashCode())) { + dupe++; + continue; + } else { + hs.put(key.hashCode(), key); } - t2 = System.currentTimeMillis(); - - if (t2 - t1 > 10000) - { - t1 = System.currentTimeMillis(); - System.out.println("10 seconds gone! size is:"+hs.size()); + if (System.currentTimeMillis() - t1 > 250) { + break; } } - System.out.println("Done"); + System.out.println("Done, ran " + (System.currentTimeMillis() - t1) + " ms, " + dupe + " dupes, CaseInsensitiveMap.size: " + hs.size()); } @Test diff --git a/src/test/java/com/cedarsoftware/util/TestDeepEqualsUnordered.java b/src/test/java/com/cedarsoftware/util/TestDeepEqualsUnordered.java index d3af83b3..5717869f 100644 --- a/src/test/java/com/cedarsoftware/util/TestDeepEqualsUnordered.java +++ b/src/test/java/com/cedarsoftware/util/TestDeepEqualsUnordered.java @@ -1,10 +1,14 @@ package com.cedarsoftware.util; -import org.junit.jupiter.api.Test; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; -import java.util.*; +import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertTrue; public class TestDeepEqualsUnordered { @@ -16,7 +20,7 @@ public void testUnorderedCollectionWithCollidingHashcodesAndParentLinks() elementsA.add(new BadHashingValueWithParentLink(1, 0)); Set elementsB = new HashSet<>(); elementsB.add(new BadHashingValueWithParentLink(0, 1)); - elementsB.add( new BadHashingValueWithParentLink(1, 0)); + elementsB.add(new BadHashingValueWithParentLink(1, 0)); Parent parentA = new Parent(); parentA.addElements(elementsA); @@ -48,6 +52,7 @@ public void addElements(Set a) { } private static class BadHashingValueWithParentLink { private final int i; + private final int j; private Parent parent; public BadHashingValueWithParentLink(int i, int j) { @@ -55,8 +60,6 @@ public BadHashingValueWithParentLink(int i, int j) { this.j = j; } - private final int j; - @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/src/test/java/com/cedarsoftware/util/TestUniqueIdGenerator.java b/src/test/java/com/cedarsoftware/util/TestUniqueIdGenerator.java index 12bdbcc2..b84df42f 100644 --- a/src/test/java/com/cedarsoftware/util/TestUniqueIdGenerator.java +++ b/src/test/java/com/cedarsoftware/util/TestUniqueIdGenerator.java @@ -41,7 +41,7 @@ public class TestUniqueIdGenerator private static final int bucketSize = 200000; @Test - public void testIdLengths() + void testIdLengths() { long id18 = getUniqueId(); long id19 = getUniqueId19(); @@ -51,7 +51,7 @@ public void testIdLengths() } @Test - public void testIDtoDate() + void testIDtoDate() { long id = getUniqueId(); Date date = getDate(id); @@ -63,7 +63,7 @@ public void testIDtoDate() } @Test - public void testUniqueIdGeneration() + void testUniqueIdGeneration() { int maxIdGen = 100000; int testSize = maxIdGen; @@ -112,8 +112,20 @@ private void assertMonotonicallyIncreasing(Long[] ids) } } +// @Test +// void speedTest() +// { +// long start = System.currentTimeMillis(); +// int count = 0; +// while (System.currentTimeMillis() < start + 1000) { +// UniqueIdGenerator.getUniqueId19(); +// count++; +// } +// out.println("count = " + count); +// } + @Test - public void testConcurrency() + void testConcurrency() { final CountDownLatch startLatch = new CountDownLatch(1); int numTests = 4;