From 062cdecc1f02360e05cc609e6b7036cbed16778a Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Tue, 10 Sep 2024 09:18:02 -0400 Subject: [PATCH 1/4] fix: remove hashcode, as it could lead to collisions during serialization. --- .../type/label/LabelMultisetEntryList.java | 31 ++++++++++++++++--- .../imglib2/type/label/LabelMultisetType.java | 10 ++++-- .../net/imglib2/type/label/LabelUtils.java | 17 +++++----- 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/src/main/java/net/imglib2/type/label/LabelMultisetEntryList.java b/src/main/java/net/imglib2/type/label/LabelMultisetEntryList.java index 3b17b6c..08b9490 100644 --- a/src/main/java/net/imglib2/type/label/LabelMultisetEntryList.java +++ b/src/main/java/net/imglib2/type/label/LabelMultisetEntryList.java @@ -13,7 +13,7 @@ * all add operations should insert appropriately to remain sorted. If `sortByCount` is ever called (or the order is manually changed) * it is the developers responsibility to `sortById()` prior to any additional calls. */ -public class LabelMultisetEntryList extends MappedObjectArrayList { +public class LabelMultisetEntryList extends MappedObjectArrayList implements Comparable { public LabelMultisetEntryList() { @@ -43,9 +43,8 @@ public boolean equals(Object o) { if (!(o instanceof LabelMultisetEntryList)) return false; final LabelMultisetEntryList other = (LabelMultisetEntryList)o; if (other.size() != size()) return false; - final long[] thisData = ((LongMappedAccessData) data).getData(); - final long[] otherData = ((LongMappedAccessData) other.data).getData(); - return Arrays.equals(thisData, otherData); + if (other.hashCode() != hashCode()) return false; + return compareTo(other) == 0; } public LabelMultisetEntryList(final int capacity) { @@ -396,4 +395,28 @@ public void mergeWith(final Set> entrySet) { } releaseRef(e1); } + + @Override public int compareTo(LabelMultisetEntryList o) { + + final int size = size(); + + final int sizeCompare = Integer.compare(size, o.size()); + if (sizeCompare != 0) return sizeCompare; + + final RefIterator thisIter = iterator(); + final RefIterator otherIter = o.iterator(); + + for (int i = 0; i < size; i++) { + final LabelMultisetEntry thisNext = thisIter.next(); + final LabelMultisetEntry otherNext = otherIter.next(); + + final int idCompare = Long.compare(thisNext.getId(), otherNext.getId()); + if (idCompare != 0) return idCompare; + + final int countCompare = Long.compare(thisNext.getCount(), otherNext.getCount()); + if (countCompare != 0) return countCompare; + } + + return 0; + } } diff --git a/src/main/java/net/imglib2/type/label/LabelMultisetType.java b/src/main/java/net/imglib2/type/label/LabelMultisetType.java index 80b9f81..db7616b 100644 --- a/src/main/java/net/imglib2/type/label/LabelMultisetType.java +++ b/src/main/java/net/imglib2/type/label/LabelMultisetType.java @@ -702,10 +702,14 @@ public int compareTo(final LabelMultisetType arg0) { final long ours = argMax(); final long theirs = arg0.argMax(); - final int initialComparison = Long.compare(ours, theirs); - if (initialComparison != 0) return initialComparison; - else return Long.compare(count(ours), count(theirs)); + final int argMaxCompare = Long.compare(ours, theirs); + if (argMaxCompare != 0) return argMaxCompare; + + final int countCompare = Long.compare(count(ours), count(theirs)); + if (countCompare != 0) return countCompare; + + return labelMultisetEntries().compareTo(arg0.labelMultisetEntries()); } @Override diff --git a/src/main/java/net/imglib2/type/label/LabelUtils.java b/src/main/java/net/imglib2/type/label/LabelUtils.java index a2e7eda..fb98194 100644 --- a/src/main/java/net/imglib2/type/label/LabelUtils.java +++ b/src/main/java/net/imglib2/type/label/LabelUtils.java @@ -1,7 +1,5 @@ package net.imglib2.type.label; -import gnu.trove.impl.Constants; -import gnu.trove.map.hash.TIntIntHashMap; import gnu.trove.map.hash.TIntLongHashMap; import net.imglib2.RandomAccessibleInterval; import net.imglib2.converter.Converters; @@ -15,6 +13,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Collection; +import java.util.TreeMap; import static gnu.trove.impl.Constants.DEFAULT_CAPACITY; import static gnu.trove.impl.Constants.DEFAULT_LOAD_FACTOR; @@ -59,7 +58,7 @@ public static byte[] serializeLabelMultisetTypes( * It's now calculated during deserializtaion instead.*/ writeInt(dataBuffer, 0, ByteOrder.BIG_ENDIAN); - final TIntIntHashMap listOffsets = new TIntIntHashMap(DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1, -1); + final TreeMap listOffsets = new TreeMap<>(); int nextListOffset = 0; @@ -67,11 +66,15 @@ public static byte[] serializeLabelMultisetTypes( final ByteArrayOutputStream entryList = new ByteArrayOutputStream(); for (final LabelMultisetType lmt : lmts) { + + // It's not isEmpty() and size() are not equivalent in this case. + // size() refers to the sum of the count() of each entry. + // entries can have a count of `0` + //noinspection SizeReplaceableByIsEmpty if (!(lmt.isEmpty() || (lmt.size() == 0) )) nonEmptyListCount++; - final int listHash = lmt.listHashCode(); - int listOffset = listOffsets.get(listHash); - if (listOffset != listOffsets.getNoEntryValue()) { + final Integer listOffset = listOffsets.get(lmt.labelMultisetEntries()); + if (listOffset != null) { writeInt(dataBuffer, listOffset, ByteOrder.BIG_ENDIAN); } else { writeInt(entryList, lmt.entrySet().size(), ByteOrder.LITTLE_ENDIAN); @@ -79,7 +82,7 @@ public static byte[] serializeLabelMultisetTypes( writeLong(entryList, entry.getElement().id(), ByteOrder.LITTLE_ENDIAN); writeInt(entryList, entry.getCount(), ByteOrder.LITTLE_ENDIAN); } - listOffsets.put(listHash, nextListOffset); + listOffsets.put(lmt.copy().labelMultisetEntries(), nextListOffset); writeInt(dataBuffer, nextListOffset, ByteOrder.BIG_ENDIAN); nextListOffset = entryList.size(); //Another quirk to maintain size compatibility, see list NOTE above. } From ac21b62c58f64339603dc349e3e84d5c08213317 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Tue, 10 Sep 2024 09:27:28 -0400 Subject: [PATCH 2/4] fix: argmax INVALID if no data --- .../java/net/imglib2/type/label/VolatileLabelMultisetArray.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/net/imglib2/type/label/VolatileLabelMultisetArray.java b/src/main/java/net/imglib2/type/label/VolatileLabelMultisetArray.java index 53fe2e1..850146f 100644 --- a/src/main/java/net/imglib2/type/label/VolatileLabelMultisetArray.java +++ b/src/main/java/net/imglib2/type/label/VolatileLabelMultisetArray.java @@ -112,6 +112,8 @@ public int getArrayLength() { public long argMax(final int offset) { + if (data.length == 0) + return Label.INVALID; return this.argMax[offset]; } From 7cae5c2c0fa48ebc90c4762371a412422332d741 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Wed, 11 Sep 2024 09:29:08 -0400 Subject: [PATCH 3/4] perf: remove hashcode 'shortcut' It's actually slower in many cases, since hashcode calculation always iterates over all entries. Meanwhile, `compareTo` can return early if it's clear they are not equal. --- src/main/java/net/imglib2/type/label/LabelMultisetEntryList.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/net/imglib2/type/label/LabelMultisetEntryList.java b/src/main/java/net/imglib2/type/label/LabelMultisetEntryList.java index 08b9490..5b1c966 100644 --- a/src/main/java/net/imglib2/type/label/LabelMultisetEntryList.java +++ b/src/main/java/net/imglib2/type/label/LabelMultisetEntryList.java @@ -43,7 +43,6 @@ public boolean equals(Object o) { if (!(o instanceof LabelMultisetEntryList)) return false; final LabelMultisetEntryList other = (LabelMultisetEntryList)o; if (other.size() != size()) return false; - if (other.hashCode() != hashCode()) return false; return compareTo(other) == 0; } From 9d1de362d0a472fe59cb6aea6ca09befa31de5c9 Mon Sep 17 00:00:00 2001 From: Caleb Hulbert Date: Thu, 19 Sep 2024 11:11:57 -0400 Subject: [PATCH 4/4] perf: improve compareTo of LabelMultisetEntryList refactor: Comparable organization the `put` operation of `TreeMap` is expensive, since `compareTo` is expensive. So we want to reduce the number of calls to this. To that end we don't want to check equality BEFORE calling `put` since both will calculate `compareTo`. So we `putIfAbsent` on the list. However, the list at this point is a reference that can change, so we need to copy the data. But we don't want to always copy, since most lists (in most cases) are not unique, and we only need a single copy to reference in the `TreeMap`. So only copy the data `via addAll` when the list is a newly added one. In addition, move the `Comparable` implementation to a package-private subclass that also reuses a single ref from construction to do the comparison. smarter way of detecting when all entries are empty (`size()` was expensive, since it requires iterating over all entries) --- .../ComparableLabelMultisetEntryList.java | 58 +++++++++++++++++++ .../type/label/LabelMultisetEntryList.java | 31 ++-------- .../imglib2/type/label/LabelMultisetType.java | 7 ++- .../net/imglib2/type/label/LabelUtils.java | 37 ++++++------ 4 files changed, 86 insertions(+), 47 deletions(-) create mode 100644 src/main/java/net/imglib2/type/label/ComparableLabelMultisetEntryList.java diff --git a/src/main/java/net/imglib2/type/label/ComparableLabelMultisetEntryList.java b/src/main/java/net/imglib2/type/label/ComparableLabelMultisetEntryList.java new file mode 100644 index 0000000..a70e142 --- /dev/null +++ b/src/main/java/net/imglib2/type/label/ComparableLabelMultisetEntryList.java @@ -0,0 +1,58 @@ +package net.imglib2.type.label; + +class ComparableLabelMultisetEntryList extends LabelMultisetEntryList implements Comparable { + + private final LabelMultisetEntry ref; + + ComparableLabelMultisetEntryList() { + this(new LabelMultisetEntry()); + } + + ComparableLabelMultisetEntryList(final LabelMultisetEntry ref) { + + this.ref = ref; + } + + ComparableLabelMultisetEntryList(final LabelMultisetEntryList list) { + this(list, new LabelMultisetEntry()); + + } + ComparableLabelMultisetEntryList(final LabelMultisetEntryList list, final LabelMultisetEntry ref) { + + this.ref = ref; + referToDataAt(list.data, list.getBaseOffset()); + } + + @Override public LabelMultisetEntry createRef() { + + return ref; + } + + @Override public int compareTo(LabelMultisetEntryList o) { + + final int size = size(); + + /* if different sizes or empty, return result of compare size*/ + final int sizeCompare = Integer.compare(size, o.size()); + if (sizeCompare != 0 || size == 0) + return sizeCompare; + + final RefIterator thisIter = iterator(); + final RefIterator otherIter = o.iterator(); + + for (int i = 0; i < size; i++) { + final LabelMultisetEntry thisNext = thisIter.next(); + final LabelMultisetEntry otherNext = otherIter.next(); + + final int idCompare = Long.compare(thisNext.getId(), otherNext.getId()); + if (idCompare != 0) + return idCompare; + + final int countCompare = Integer.compare(thisNext.getCount(), otherNext.getCount()); + if (countCompare != 0) + return countCompare; + } + + return 0; + } +} diff --git a/src/main/java/net/imglib2/type/label/LabelMultisetEntryList.java b/src/main/java/net/imglib2/type/label/LabelMultisetEntryList.java index 5b1c966..9c35ece 100644 --- a/src/main/java/net/imglib2/type/label/LabelMultisetEntryList.java +++ b/src/main/java/net/imglib2/type/label/LabelMultisetEntryList.java @@ -3,7 +3,6 @@ import net.imglib2.type.label.LabelMultisetType.Entry; import org.apache.commons.lang3.builder.HashCodeBuilder; -import java.util.Arrays; import java.util.Collection; import java.util.Iterator; import java.util.Set; @@ -13,7 +12,7 @@ * all add operations should insert appropriately to remain sorted. If `sortByCount` is ever called (or the order is manually changed) * it is the developers responsibility to `sortById()` prior to any additional calls. */ -public class LabelMultisetEntryList extends MappedObjectArrayList implements Comparable { +public class LabelMultisetEntryList extends MappedObjectArrayList { public LabelMultisetEntryList() { @@ -43,7 +42,9 @@ public boolean equals(Object o) { if (!(o instanceof LabelMultisetEntryList)) return false; final LabelMultisetEntryList other = (LabelMultisetEntryList)o; if (other.size() != size()) return false; - return compareTo(other) == 0; + final ComparableLabelMultisetEntryList thisComparable = new ComparableLabelMultisetEntryList(this); + final ComparableLabelMultisetEntryList otherComparable = new ComparableLabelMultisetEntryList(other); + return thisComparable.compareTo(otherComparable) == 0; } public LabelMultisetEntryList(final int capacity) { @@ -394,28 +395,4 @@ public void mergeWith(final Set> entrySet) { } releaseRef(e1); } - - @Override public int compareTo(LabelMultisetEntryList o) { - - final int size = size(); - - final int sizeCompare = Integer.compare(size, o.size()); - if (sizeCompare != 0) return sizeCompare; - - final RefIterator thisIter = iterator(); - final RefIterator otherIter = o.iterator(); - - for (int i = 0; i < size; i++) { - final LabelMultisetEntry thisNext = thisIter.next(); - final LabelMultisetEntry otherNext = otherIter.next(); - - final int idCompare = Long.compare(thisNext.getId(), otherNext.getId()); - if (idCompare != 0) return idCompare; - - final int countCompare = Long.compare(thisNext.getCount(), otherNext.getCount()); - if (countCompare != 0) return countCompare; - } - - return 0; - } } diff --git a/src/main/java/net/imglib2/type/label/LabelMultisetType.java b/src/main/java/net/imglib2/type/label/LabelMultisetType.java index db7616b..514b0b6 100644 --- a/src/main/java/net/imglib2/type/label/LabelMultisetType.java +++ b/src/main/java/net/imglib2/type/label/LabelMultisetType.java @@ -287,8 +287,7 @@ public LabelMultisetType copy() { access.isValid(), access.argMaxCopy()); /* get a new type instance */ - final LabelMultisetType that = new LabelMultisetType(null, accessCopy); - return that; + return new LabelMultisetType(null, accessCopy); } } @@ -709,7 +708,9 @@ public int compareTo(final LabelMultisetType arg0) { final int countCompare = Long.compare(count(ours), count(theirs)); if (countCompare != 0) return countCompare; - return labelMultisetEntries().compareTo(arg0.labelMultisetEntries()); + final ComparableLabelMultisetEntryList thisComparable = new ComparableLabelMultisetEntryList(labelMultisetEntries()); + final ComparableLabelMultisetEntryList otherComparable = new ComparableLabelMultisetEntryList(arg0.labelMultisetEntries()); + return thisComparable.compareTo(otherComparable); } @Override diff --git a/src/main/java/net/imglib2/type/label/LabelUtils.java b/src/main/java/net/imglib2/type/label/LabelUtils.java index fb98194..b54c22d 100644 --- a/src/main/java/net/imglib2/type/label/LabelUtils.java +++ b/src/main/java/net/imglib2/type/label/LabelUtils.java @@ -13,13 +13,13 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Collection; +import java.util.Set; import java.util.TreeMap; import static gnu.trove.impl.Constants.DEFAULT_CAPACITY; import static gnu.trove.impl.Constants.DEFAULT_LOAD_FACTOR; import static net.imglib2.type.label.AbstractLabelMultisetLoader.argMaxListSizeInBytes; import static net.imglib2.type.label.AbstractLabelMultisetLoader.listOffsetsSizeInBytes; -import static net.imglib2.type.label.ByteUtils.INT_SIZE; public class LabelUtils { @@ -51,44 +51,47 @@ public static byte[] serializeLabelMultisetTypes( final LabelMultisetEntry entryReference = new LabelMultisetEntry(0, 1); - final int argMaxSize = INT_SIZE; // in reality, the int value is always `0`, with size of 4 bytes - final int offsetListSize = INT_SIZE * numElements; final ByteArrayOutputStream dataBuffer = new ByteArrayOutputStream(); /* No longer serialized out ArgMax; we do this by specifying the ArgMax size as 0; * It's now calculated during deserializtaion instead.*/ writeInt(dataBuffer, 0, ByteOrder.BIG_ENDIAN); - final TreeMap listOffsets = new TreeMap<>(); + final TreeMap listOffsets = new TreeMap<>(); int nextListOffset = 0; - int nonEmptyListCount = 0; + boolean allListsEmpty = true; final ByteArrayOutputStream entryList = new ByteArrayOutputStream(); + ComparableLabelMultisetEntryList listRef = new ComparableLabelMultisetEntryList(); for (final LabelMultisetType lmt : lmts) { - // It's not isEmpty() and size() are not equivalent in this case. - // size() refers to the sum of the count() of each entry. - // entries can have a count of `0` - //noinspection SizeReplaceableByIsEmpty - if (!(lmt.isEmpty() || (lmt.size() == 0) )) - nonEmptyListCount++; - final Integer listOffset = listOffsets.get(lmt.labelMultisetEntries()); + lmt.getAccess().getValue(lmt.index().get(), listRef); + final Integer listOffset = listOffsets.putIfAbsent(listRef, nextListOffset); if (listOffset != null) { writeInt(dataBuffer, listOffset, ByteOrder.BIG_ENDIAN); } else { - writeInt(entryList, lmt.entrySet().size(), ByteOrder.LITTLE_ENDIAN); - for (final Entry