From b7fb6d01b931faeef0555b78f72ccd7e34a4486f Mon Sep 17 00:00:00 2001 From: Tom Raney Date: Tue, 20 Jun 2017 16:30:52 -0700 Subject: [PATCH 1/3] row key update to include the byte prefix - incomptible update! major version bump. --- RELEASE_NOTES.txt | 7 ++++++- pom.xml | 2 +- .../java/com/urbanairship/datacube/Address.java | 16 ++++++++++++++-- .../java/com/urbanairship/datacube/DataCube.java | 4 +++- 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt index 4229c8f..962888e 100644 --- a/RELEASE_NOTES.txt +++ b/RELEASE_NOTES.txt @@ -1,3 +1,9 @@ +2.0.0 +===== +Non compatible row-key update. In particular, any DataCube objects defined +in previous versions with the useAddressPrefixByteHash parameter set to +true will not be able to access existing buckets with this update. + 1.5.0 ===== Prevent getId calls from creating a new identifier @@ -6,7 +12,6 @@ Optimize id creation. 1.4.0 ===== - Add functionality to optionally add a hash in front of row keys, which permits users to ignore dimension order when considering performance. diff --git a/pom.xml b/pom.xml index 55f4c56..dcce5b5 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ com.urbanairship datacube - 1.5.1-SNAPSHOT + 2.0.0-SNAPSHOT jar datacube diff --git a/src/main/java/com/urbanairship/datacube/Address.java b/src/main/java/com/urbanairship/datacube/Address.java index 621a15c..94d7ae0 100644 --- a/src/main/java/com/urbanairship/datacube/Address.java +++ b/src/main/java/com/urbanairship/datacube/Address.java @@ -146,13 +146,25 @@ private Optional toKey(IdService idService, boolean readOnly) throws IOE for (byte[] keyElement : keyElemsInOrder) { totalKeySize += keyElement.length; } - ByteBuffer bb = ByteBuffer.allocate(totalKeySize); - + ByteBuffer bb; + // Add a place holder for the hash byte if it's required + if (this.cube.useAddressPrefixByteHash()) { + bb = ByteBuffer.allocate(totalKeySize + 1); + bb.put((byte) 0x01); + } else { + bb = ByteBuffer.allocate(totalKeySize); + } for (byte[] keyElement : keyElemsInOrder) { bb.put(keyElement); } + // Update the byte prefix placeholder of the hash of the key contents if required. + if (this.cube.useAddressPrefixByteHash()) { + byte hashByte = Util.hashByteArray(bb.array(), 1, totalKeySize + 1); + bb.put(0, hashByte); + } + if (bb.remaining() != 0) { throw new AssertionError("Key length calculation was somehow wrong, " + bb.remaining() + " bytes remaining"); diff --git a/src/main/java/com/urbanairship/datacube/DataCube.java b/src/main/java/com/urbanairship/datacube/DataCube.java index b9c7d5e..0119877 100644 --- a/src/main/java/com/urbanairship/datacube/DataCube.java +++ b/src/main/java/com/urbanairship/datacube/DataCube.java @@ -54,7 +54,9 @@ public DataCube(List> dims, List rollups) { * like HBase, where monotonically increasing row keys can result in * hot spots. * Warning: Do NOT enable or disable this feature for an existing cube or - * the keys will not map properly. + * the keys will not map properly. Also, data from versions of + * datacube before 2.0.0, with this feature enabled, is not compatible with + * 2.0.0+. */ public DataCube(List> dims, List rollups, boolean useAddressPrefixByteHash) { this.dims = dims; From aa9f55810fc12e197e37995fc4ba7bc9183b2b97 Mon Sep 17 00:00:00 2001 From: Tom Raney Date: Wed, 21 Jun 2017 11:05:37 -0700 Subject: [PATCH 2/3] changed the constructor profile for DataCube to force a compilation error for anyone using the feature - to avoid accidental data access issues. --- .../com/urbanairship/datacube/DataCube.java | 21 ++++++++++++------- .../datacube/tweetcountexample/TweetCube.java | 2 +- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/main/java/com/urbanairship/datacube/DataCube.java b/src/main/java/com/urbanairship/datacube/DataCube.java index 0119877..63177c5 100644 --- a/src/main/java/com/urbanairship/datacube/DataCube.java +++ b/src/main/java/com/urbanairship/datacube/DataCube.java @@ -29,6 +29,8 @@ public class DataCube { private static final Logger log = LoggerFactory.getLogger(DataCube.class); + public enum PREFIX_MODE { NO_ADDRESS_PREFIX, MOD_ADDRESS_PREFIX} + private final List> dims; private final List rollups; private final Multimap,BucketType> bucketsOfInterest; @@ -42,27 +44,30 @@ public class DataCube { * @param rollups see {@link Rollup} */ public DataCube(List> dims, List rollups) { - this(dims, rollups, false); + this(dims, rollups, PREFIX_MODE.NO_ADDRESS_PREFIX); } /** * * @param dims see {@link Dimension} * @param rollups see {@link Rollup} - * @param useAddressPrefixByteHash Prefix the keys by a hash byte (calculated by hashing each element + * @param prefixMode use MOD_ADDRESS_PREFIX to prefix the keys by a hash byte (calculated by hashing each element * in the key). This is only a storage artifact to benefit systems * like HBase, where monotonically increasing row keys can result in * hot spots. - * Warning: Do NOT enable or disable this feature for an existing cube or - * the keys will not map properly. Also, data from versions of - * datacube before 2.0.0, with this feature enabled, is not compatible with - * 2.0.0+. + * Warning: Do NOT switch modes for an existing cube or the keys will + * not map properly. Also, data from versions of datacube before 2.0.0, + * with this feature enabled, is not compatible with 2.0.0+. */ - public DataCube(List> dims, List rollups, boolean useAddressPrefixByteHash) { + public DataCube(List> dims, List rollups, PREFIX_MODE prefixMode) { this.dims = dims; this.rollups = rollups; this.validAddressSet = Sets.newHashSet(); - this.useAddressPrefixByteHash = useAddressPrefixByteHash; + if (PREFIX_MODE.MOD_ADDRESS_PREFIX == prefixMode) { + this.useAddressPrefixByteHash = true; + } else { + this.useAddressPrefixByteHash = false; + } bucketsOfInterest = HashMultimap.create(); diff --git a/src/test/java/com/urbanairship/datacube/tweetcountexample/TweetCube.java b/src/test/java/com/urbanairship/datacube/tweetcountexample/TweetCube.java index f682155..623e8a4 100644 --- a/src/test/java/com/urbanairship/datacube/tweetcountexample/TweetCube.java +++ b/src/test/java/com/urbanairship/datacube/tweetcountexample/TweetCube.java @@ -84,7 +84,7 @@ public TweetCube(DbHarness dbHarness, SyncLevel syncLevel) { * The DataCube defines the core logic that maps input points to database * increments. */ - dataCube = new DataCube(dimensions, rollups, true); + dataCube = new DataCube(dimensions, rollups, DataCube.PREFIX_MODE.MOD_ADDRESS_PREFIX); /* * The DataCubeIo object connects the DataCube logic layer and the From 25f59309b6339505e0eed48aa189cfbec1d2ae56 Mon Sep 17 00:00:00 2001 From: Tom Raney Date: Wed, 21 Jun 2017 11:21:22 -0700 Subject: [PATCH 3/3] made updates to docs for clarity --- RELEASE_NOTES.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt index 962888e..0a50661 100644 --- a/RELEASE_NOTES.txt +++ b/RELEASE_NOTES.txt @@ -1,8 +1,10 @@ 2.0.0 ===== -Non compatible row-key update. In particular, any DataCube objects defined -in previous versions with the useAddressPrefixByteHash parameter set to -true will not be able to access existing buckets with this update. +Non compatible update with existing data cubes. The DataCube constructor in +previous versions with the useAddressPrefixByteHash parameter set to true is +compatible with PREFIX_MODE.NO_ADDRESS_PREFIX, but is not compatible with the +new mode PREFIX_MODE.MOD_ADDRESS_PREFIX because there was a bug since 1.4.0 +that didn't implement this feature correctly. 1.5.0 =====