From 525806ac00b3989d850903d0f4f83b9c049c073a Mon Sep 17 00:00:00 2001 From: Juliano Efson Sales Date: Tue, 11 Jul 2017 12:02:34 +0200 Subject: [PATCH 1/3] casting bugs and minor optimization --- src/main/java/com/spotify/annoy/ANNIndex.java | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/main/java/com/spotify/annoy/ANNIndex.java b/src/main/java/com/spotify/annoy/ANNIndex.java index 25bb13e..edde980 100644 --- a/src/main/java/com/spotify/annoy/ANNIndex.java +++ b/src/main/java/com/spotify/annoy/ANNIndex.java @@ -27,6 +27,7 @@ public class ANNIndex implements AnnoyIndex { private final int INT_SIZE = 4; private final int FLOAT_SIZE = 4; + private final int MAX_NODES_IN_BUFFER; private final int BLOCK_SIZE; private RandomAccessFile memoryMappedFile; @@ -69,9 +70,9 @@ public ANNIndex(final int dimension, // them where the separating plane normally goes) this.MIN_LEAF_SIZE = DIMENSION + 2; this.NODE_SIZE = K_NODE_HEADER_STYLE + FLOAT_SIZE * DIMENSION; - this.BLOCK_SIZE = blockSize == 0 ? + this.MAX_NODES_IN_BUFFER = blockSize == 0 ? Integer.MAX_VALUE / NODE_SIZE : blockSize * NODE_SIZE; - + BLOCK_SIZE = this.MAX_NODES_IN_BUFFER * NODE_SIZE; roots = new ArrayList<>(); load(filename); } @@ -83,7 +84,8 @@ private void load(final String filename) throws IOException { throw new IOException("Index is a 0-byte file?"); } - int buffIndex = (int) ((fileSize - 1) / BLOCK_SIZE); + int numNodes = (int) (fileSize / NODE_SIZE); + int buffIndex = (numNodes - 1) / MAX_NODES_IN_BUFFER; int rest = (int) (fileSize % BLOCK_SIZE); int blockSize = (rest > 0 ? rest : BLOCK_SIZE); long position = fileSize - blockSize; @@ -115,21 +117,21 @@ private void load(final String filename) throws IOException { } private float getFloatInAnnBuf(long pos) { - int b = (int) pos / BLOCK_SIZE; - int f = (int) pos % BLOCK_SIZE; + int b = (int) (pos / BLOCK_SIZE); + int f = (int) (pos % BLOCK_SIZE); return buffers[b].getFloat(f); } private int getIntInAnnBuf(long pos) { - int b = (int) pos / BLOCK_SIZE; - int i = (int) pos % BLOCK_SIZE; + int b = (int) (pos / BLOCK_SIZE); + int i = (int) (pos % BLOCK_SIZE); return buffers[b].getInt(i); } @Override public void getNodeVector(final long nodeOffset, float[] v) { - MappedByteBuffer nodeBuf = buffers[(int) nodeOffset / BLOCK_SIZE]; - int offset = (int) (nodeOffset % BLOCK_SIZE) + K_NODE_HEADER_STYLE; + MappedByteBuffer nodeBuf = buffers[(int) (nodeOffset / BLOCK_SIZE)]; + int offset = (int) ((nodeOffset % BLOCK_SIZE) + K_NODE_HEADER_STYLE); for (int i = 0; i < DIMENSION; i++) { v[i] = nodeBuf.getFloat(offset + i * FLOAT_SIZE); } @@ -145,7 +147,7 @@ private float getNodeBias(final long nodeOffset) { // euclidean-only } public final float[] getItemVector(final int itemIndex) { - return getNodeVector(itemIndex * NODE_SIZE); + return getNodeVector(((long) itemIndex) * NODE_SIZE); } public float[] getNodeVector(final long nodeOffset) { @@ -254,7 +256,7 @@ public final List getNearest(final float[] queryVector, int j = getIntInAnnBuf(topNodeOffset + INDEX_TYPE_OFFSET + i * INT_SIZE); - if (isZeroVec(getNodeVector(j * NODE_SIZE))) + if (isZeroVec(getNodeVector(((long) j) * NODE_SIZE))) continue; nearestNeighbors.add(j); } From 4d1759e5bccb254a986369a5a1f5433465235def Mon Sep 17 00:00:00 2001 From: Juliano Efson Sales Date: Wed, 12 Jul 2017 16:27:28 +0200 Subject: [PATCH 2/3] casting bugs --- src/main/java/com/spotify/annoy/ANNIndex.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/spotify/annoy/ANNIndex.java b/src/main/java/com/spotify/annoy/ANNIndex.java index edde980..2b93f14 100644 --- a/src/main/java/com/spotify/annoy/ANNIndex.java +++ b/src/main/java/com/spotify/annoy/ANNIndex.java @@ -265,8 +265,8 @@ public final List getNearest(final float[] queryVector, cosineMargin(v, queryVector) : euclideanMargin(v, queryVector, getNodeBias(topNodeOffset)); long childrenMemOffset = topNodeOffset + INDEX_TYPE_OFFSET; - long lChild = NODE_SIZE * getIntInAnnBuf(childrenMemOffset); - long rChild = NODE_SIZE * getIntInAnnBuf(childrenMemOffset + 4); + long lChild = ((long) NODE_SIZE) * getIntInAnnBuf(childrenMemOffset); + long rChild = ((long) NODE_SIZE) * getIntInAnnBuf(childrenMemOffset + 4); pq.add(new PQEntry(-margin, lChild)); pq.add(new PQEntry(margin, rChild)); } From 87c1ed637cd286bf46278ea2d81f2bda227896f7 Mon Sep 17 00:00:00 2001 From: Juliano Efson Sales Date: Thu, 13 Jul 2017 22:52:48 +0200 Subject: [PATCH 3/3] avoiding casting problems --- src/main/java/com/spotify/annoy/ANNIndex.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/main/java/com/spotify/annoy/ANNIndex.java b/src/main/java/com/spotify/annoy/ANNIndex.java index 2b93f14..e077384 100644 --- a/src/main/java/com/spotify/annoy/ANNIndex.java +++ b/src/main/java/com/spotify/annoy/ANNIndex.java @@ -23,7 +23,7 @@ public class ANNIndex implements AnnoyIndex { // size of C structs in bytes (initialized in init) private final int K_NODE_HEADER_STYLE; - private final int NODE_SIZE; + private final long NODE_SIZE; private final int INT_SIZE = 4; private final int FLOAT_SIZE = 4; @@ -70,9 +70,9 @@ public ANNIndex(final int dimension, // them where the separating plane normally goes) this.MIN_LEAF_SIZE = DIMENSION + 2; this.NODE_SIZE = K_NODE_HEADER_STYLE + FLOAT_SIZE * DIMENSION; - this.MAX_NODES_IN_BUFFER = blockSize == 0 ? - Integer.MAX_VALUE / NODE_SIZE : blockSize * NODE_SIZE; - BLOCK_SIZE = this.MAX_NODES_IN_BUFFER * NODE_SIZE; + this.MAX_NODES_IN_BUFFER = (int) (blockSize == 0 ? + Integer.MAX_VALUE / NODE_SIZE : blockSize * NODE_SIZE); + BLOCK_SIZE = (int) (this.MAX_NODES_IN_BUFFER * NODE_SIZE); roots = new ArrayList<>(); load(filename); } @@ -101,7 +101,7 @@ private void load(final String filename) throws IOException { buffers[buffIndex--] = annBuf; - for (int i = blockSize - NODE_SIZE; process && i >= 0; i -= NODE_SIZE) { + for (int i = blockSize - (int) NODE_SIZE; process && i >= 0; i -= NODE_SIZE) { index -= NODE_SIZE; int k = annBuf.getInt(i); // node[i].n_descendants if (m == -1 || k == m) { @@ -147,7 +147,7 @@ private float getNodeBias(final long nodeOffset) { // euclidean-only } public final float[] getItemVector(final int itemIndex) { - return getNodeVector(((long) itemIndex) * NODE_SIZE); + return getNodeVector(itemIndex * NODE_SIZE); } public float[] getNodeVector(final long nodeOffset) { @@ -256,7 +256,7 @@ public final List getNearest(final float[] queryVector, int j = getIntInAnnBuf(topNodeOffset + INDEX_TYPE_OFFSET + i * INT_SIZE); - if (isZeroVec(getNodeVector(((long) j) * NODE_SIZE))) + if (isZeroVec(getNodeVector(j * NODE_SIZE))) continue; nearestNeighbors.add(j); } @@ -265,8 +265,8 @@ public final List getNearest(final float[] queryVector, cosineMargin(v, queryVector) : euclideanMargin(v, queryVector, getNodeBias(topNodeOffset)); long childrenMemOffset = topNodeOffset + INDEX_TYPE_OFFSET; - long lChild = ((long) NODE_SIZE) * getIntInAnnBuf(childrenMemOffset); - long rChild = ((long) NODE_SIZE) * getIntInAnnBuf(childrenMemOffset + 4); + long lChild = NODE_SIZE * getIntInAnnBuf(childrenMemOffset); + long rChild = NODE_SIZE * getIntInAnnBuf(childrenMemOffset + 4); pq.add(new PQEntry(-margin, lChild)); pq.add(new PQEntry(margin, rChild)); }