From 0bc4a1cf33406b99b5cc48853ba2aa8760db83c5 Mon Sep 17 00:00:00 2001 From: Claus Stadler Date: Fri, 29 Aug 2025 18:55:51 +0200 Subject: [PATCH 1/3] GH-3404: Canonicalize decimals during inlining for TDB2. --- .../apache/jena/sparql/util/XSDNumUtils.java | 12 ++++ .../apache/jena/tdb2/store/NodeIdInline.java | 1 + .../tdb2/store/value/TestNodeIdInline.java | 67 ++++++++++++++++--- 3 files changed, 72 insertions(+), 8 deletions(-) diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/util/XSDNumUtils.java b/jena-arq/src/main/java/org/apache/jena/sparql/util/XSDNumUtils.java index 453602e6e3c..2354a95a63e 100644 --- a/jena-arq/src/main/java/org/apache/jena/sparql/util/XSDNumUtils.java +++ b/jena-arq/src/main/java/org/apache/jena/sparql/util/XSDNumUtils.java @@ -147,6 +147,18 @@ public static String canonicalDecimalStrNoIntegerDot(BigDecimal bd) { return bd.stripTrailingZeros().toPlainString(); } + /** Return a canonical decimal with a trailing ".0". */ + public static BigDecimal canonicalDecimalWithDot(BigDecimal decimal) { + BigDecimal result = decimal; + if (result.scale() > 1) { + result = decimal.stripTrailingZeros(); + } + if (result.scale() <= 0) { + result = result.setScale(1); + } + return result; + } + /** * Integer-valued decimals have a trailing ".0". * (In XML Schema Datatype 1.1 they did not have a ".0".) diff --git a/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeIdInline.java b/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeIdInline.java index f13cc1f0678..f28da5354cc 100644 --- a/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeIdInline.java +++ b/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeIdInline.java @@ -163,6 +163,7 @@ public static boolean hasInlineDatatype(Node node) { // But at this point we know it's a valid literal so the excessive // chopping by .trim is safe. BigDecimal decimal = new BigDecimal(lit.getLexicalForm().trim()); + decimal = XSDNumUtils.canonicalDecimalWithDot(decimal); // Does range checking. DecimalNode56 dn = DecimalNode56.valueOf(decimal); diff --git a/jena-tdb2/src/test/java/org/apache/jena/tdb2/store/value/TestNodeIdInline.java b/jena-tdb2/src/test/java/org/apache/jena/tdb2/store/value/TestNodeIdInline.java index b5b0bc5f924..61792b0cc94 100644 --- a/jena-tdb2/src/test/java/org/apache/jena/tdb2/store/value/TestNodeIdInline.java +++ b/jena-tdb2/src/test/java/org/apache/jena/tdb2/store/value/TestNodeIdInline.java @@ -91,35 +91,68 @@ public class TestNodeIdInline @Test public void nodeId_int_21() { testNoInline("'300'^^xsd:byte"); } - @Test public void nodeId_decimal_1() + @Test public void nodeId_decimal_01() { test("3.14", "3.14"); } - @Test public void nodeId_decimal_2() + @Test public void nodeId_decimal_02() { testNoInline("123456789.123456789"); } // Just this once, directly create the Node. - @Test public void nodeId_decimal_3() + @Test public void nodeId_decimal_03() { test("12.89", NodeFactory.createLiteralDT("12.89", XSDDatatype.XSDdecimal)); } - @Test public void nodeId_decimal_4() + @Test public void nodeId_decimal_04() { test("-1.0", "-1.0"); } // This number has > 47 bits of value : 2412.80478192688 - @Test public void nodeId_decimal_5() + @Test public void nodeId_decimal_05() { testNoInline("2412.80478192688"); } // This number has > 47 bits of value : -2412.80478192688 - @Test public void nodeId_decimal_6() + @Test public void nodeId_decimal_06() { testNoInline("-2412.80478192688"); } - @Test public void nodeId_decimal_7() + @Test public void nodeId_decimal_07() { test("'0.00000001'^^xsd:decimal", NodeFactory.createLiteralDT("0.00000001", XSDDatatype.XSDdecimal)); } - @Test public void nodeId_decimal_8() + @Test public void nodeId_decimal_08() { test("0.00000001", NodeFactory.createLiteralDT("0.00000001", XSDDatatype.XSDdecimal)); } + @Test public void nodeId_decimal_09() + { testNodeIdRoundtripDecimal("0"); } + + @Test public void nodeId_decimal_10() + { testNodeIdRoundtripDecimal("-0"); } + + @Test public void nodeId_decimal_11() + { testNodeIdRoundtripDecimal("0.0"); } + + @Test public void nodeId_decimal_12() + { testNodeIdRoundtripDecimal("-0.0"); } + + @Test public void nodeId_decimal_13() + { testNodeIdRoundtripDecimal(".0"); } + + @Test public void nodeId_decimal_14() + { testNodeIdRoundtripDecimal("-.0"); } + + @Test public void nodeId_decimal_15() + { testNodeIdRoundtripDecimal("18"); } + + @Test public void nodeId_decimal_16() + { testNodeIdRoundtripDecimal("18."); } + + @Test public void nodeId_decimal_17() + { testNodeIdRoundtripDecimal("18.0"); } + + @Test public void nodeId_decimal_18() + { testNodeIdRoundtripDecimal("18.00"); } + + @Test public void nodeId_decimal_19() + { testNodeIdRoundtripDecimal("18.000"); } + @Test public void nodeId_dateTime_01() { test("'2008-04-28T15:36:15+01:00'^^xsd:dateTime"); } @@ -306,4 +339,22 @@ private void test(String x, Node correct) { // Term equality. assertEquals(correct, n2, ()->"Not same term"); } + + private static void testNodeIdRoundtripDecimal(String decimalStr) { + Node node = NodeFactory.createLiteralDT(decimalStr, XSDDatatype.XSDdecimal); + testNodeIdRoundtrip(node); + } + + /** For a Node n assert: nodeId(n) == nodeId(extract(nodeId(n)) */ + private static void testNodeIdRoundtrip(Node node) { + NodeId nodeId = NodeId.inline(node); + testNodeIdRoundtrip(nodeId); + } + + /** For a NodeId n assert: n == nodeId(extract(n)) */ + private static void testNodeIdRoundtrip(NodeId expected) { + Node extractedNode = NodeId.extract(expected); + NodeId actual = NodeId.inline(extractedNode); + assertEquals(expected, actual); + } } From 1e41a02bd73f3fe15b321a8904fe0d6baf5102e0 Mon Sep 17 00:00:00 2001 From: Andy Seaborne Date: Mon, 8 Sep 2025 14:59:17 +0100 Subject: [PATCH 2/3] GH-3404: Tidy up NodeId and DecimalNode56 --- .../apache/jena/sparql/util/XSDNumUtils.java | 28 +++++++----- .../org/apache/jena/tdb2/store/NodeId.java | 24 +++++++--- .../apache/jena/tdb2/store/NodeIdFactory.java | 2 +- .../apache/jena/tdb2/store/NodeIdInline.java | 2 +- .../jena/tdb2/store/value/DecimalNode56.java | 44 +++++++++---------- 5 files changed, 57 insertions(+), 43 deletions(-) diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/util/XSDNumUtils.java b/jena-arq/src/main/java/org/apache/jena/sparql/util/XSDNumUtils.java index 2354a95a63e..c2e26b53422 100644 --- a/jena-arq/src/main/java/org/apache/jena/sparql/util/XSDNumUtils.java +++ b/jena-arq/src/main/java/org/apache/jena/sparql/util/XSDNumUtils.java @@ -147,18 +147,6 @@ public static String canonicalDecimalStrNoIntegerDot(BigDecimal bd) { return bd.stripTrailingZeros().toPlainString(); } - /** Return a canonical decimal with a trailing ".0". */ - public static BigDecimal canonicalDecimalWithDot(BigDecimal decimal) { - BigDecimal result = decimal; - if (result.scale() > 1) { - result = decimal.stripTrailingZeros(); - } - if (result.scale() <= 0) { - result = result.setScale(1); - } - return result; - } - /** * Integer-valued decimals have a trailing ".0". * (In XML Schema Datatype 1.1 they did not have a ".0".) @@ -178,4 +166,20 @@ public static String canonicalDecimalStrWithDot(BigDecimal decimal) { str = str + ".0"; return str; } + + /** + * Return a canonical decimal with a trailing ".0". + * This is the BigDecimal form used to encode NodeIds in TDB2. + * It has a trailing ".0" so it is Turtle compatible. + */ + public static BigDecimal canonicalDecimal(BigDecimal decimal) { + BigDecimal result = decimal; + if (result.scale() > 1) { + result = decimal.stripTrailingZeros(); + } + if (result.scale() <= 0) { + result = result.setScale(1); + } + return result; + } } diff --git a/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeId.java b/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeId.java index 508fe0e7c64..d6bef2b7d90 100644 --- a/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeId.java +++ b/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeId.java @@ -104,15 +104,24 @@ private final void check(NodeIdType type, int v1, long v2) { // public long getPtrLo() { return value2; } // public int getPtrHi() { return value1 & 0x00FFFFFF; } - // 64 bit + /** The pointer part of a NodeId reference. */ public long getPtrLocation() { return value2; } - // Long. + + // 96 bit // public long getPtrLo() { return value2; } // public int getPtrHi() { return value1; } - public int getTypeValue() { return type.type(); } + // 64 bit. + public int getTypeValue() { + return type.type(); + } + + /** The value (encoding) part of an inline literal (56 bits) */ + public long getValue56() { + return value2; + } - public boolean isInline() { + public boolean isInline() { return isInline(this); } @@ -141,8 +150,11 @@ public boolean isValue() { // public static boolean isDefined(NodeId nodeId) { return nodeId == NodeIdDefined; } // public static boolean isUndefined(NodeId nodeId) { return nodeId == NodeIdUndefined; } - /** Create from a long-encoded value */ - /*package*/ static NodeId createRaw(NodeIdType type, long value) { + /** + * Create from a long-encoded value. + * Caution: an illegal value for the long argument will cause serious problems. + */ + public static NodeId createRaw(NodeIdType type, long value) { return new NodeId(type, 0, value); } diff --git a/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeIdFactory.java b/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeIdFactory.java index cfd8acaedb8..04c165d28a5 100644 --- a/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeIdFactory.java +++ b/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeIdFactory.java @@ -68,7 +68,7 @@ public static NodeId createPtr(long lo) { return createNew(PTR, 0, lo); } - /*package*/ /*long*/ static NodeId createPtrLong(int hi, long lo) { + /*package*/ static NodeId createPtrLong(int hi, long lo) { return create(PTR, hi, lo); } diff --git a/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeIdInline.java b/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeIdInline.java index f28da5354cc..5bd55f4e63f 100644 --- a/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeIdInline.java +++ b/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeIdInline.java @@ -163,7 +163,7 @@ public static boolean hasInlineDatatype(Node node) { // But at this point we know it's a valid literal so the excessive // chopping by .trim is safe. BigDecimal decimal = new BigDecimal(lit.getLexicalForm().trim()); - decimal = XSDNumUtils.canonicalDecimalWithDot(decimal); + decimal = XSDNumUtils.canonicalDecimal(decimal); // Does range checking. DecimalNode56 dn = DecimalNode56.valueOf(decimal); diff --git a/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/value/DecimalNode56.java b/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/value/DecimalNode56.java index 55c5448e381..f653a983862 100644 --- a/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/value/DecimalNode56.java +++ b/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/value/DecimalNode56.java @@ -23,41 +23,39 @@ import org.apache.jena.atlas.lib.BitsLong; - // Decimal packed into 56 bits. public class DecimalNode56 { //private static Logger log = LoggerFactory.getLogger(DecimalNode.class); - BigDecimal decimal = null; - - // signed 8 bits of scale, signed 48 bits of value. - // Decimal precision is 47 bits (it's signed) or around 14 places. - // Not finance industry accuracy nor XSD (18 places minimum) but still useful. + // 56 bits signed 8 bits of scale, signed 48 bits of value. + // Decimal precision is 47 bits (it's signed) or around 13 places (canonical form). + // This is not finance industry accuracy nor XSD (18 places minimum) but still useful. - static final int SCALE_LEN = 8; - static final int VALUE_LEN = 48; - static final int ENC_LEN = 48 + SCALE_LEN; + private static final int SCALE_LEN = 8; + private static final int VALUE_LEN = 48; + private static final int ENC_LEN = 48 + SCALE_LEN; - static final long MAX_VALUE = (1L << (VALUE_LEN - 1)) - 1; - static final long MIN_VALUE = -(1L << (VALUE_LEN - 1)); + private static final long MAX_VALUE = (1L << (VALUE_LEN - 1)) - 1; + private static final long MIN_VALUE = -(1L << (VALUE_LEN - 1)); - static final int MAX_SCALE = (1 << (SCALE_LEN - 1)) - 1; - static final int MIN_SCALE = -(1 << (SCALE_LEN - 1)); + private static final int MAX_SCALE = (1 << (SCALE_LEN - 1)) - 1; + private static final int MIN_SCALE = -(1 << (SCALE_LEN - 1)); - static final BigInteger MAX_I = BigInteger.valueOf(MAX_VALUE); - static final BigInteger MIN_I = BigInteger.valueOf(MIN_VALUE); + private static final BigInteger MAX_I = BigInteger.valueOf(MAX_VALUE); + private static final BigInteger MIN_I = BigInteger.valueOf(MIN_VALUE); - // Bits counts - static private int SCALE_LO = 56 - SCALE_LEN; - static private int SCALE_HI = 56; // Exclusive - // index + // Bits positions [LO, HI) + private static final int SCALE_LO = ENC_LEN - SCALE_LEN; + private static final int SCALE_HI = ENC_LEN; - static private int VALUE_LO = 0; - static private int VALUE_HI = VALUE_LO + VALUE_LEN; + private static final int VALUE_LO = 0; + private static final int VALUE_HI = VALUE_LO + VALUE_LEN; - private int scale; - private long value; + // Object fields. + private BigDecimal decimal = null; + private int scale; + private long value; public static DecimalNode56 valueOf(BigDecimal decimal) { int scale = decimal.scale(); From cf91b672970a9b7c660e6cfc415a23d8fddc452b Mon Sep 17 00:00:00 2001 From: Andy Seaborne Date: Wed, 10 Sep 2025 12:24:00 +0100 Subject: [PATCH 3/3] GH-3404: Disable BigDecimal normalization --- .../main/java/org/apache/jena/tdb2/store/NodeIdInline.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeIdInline.java b/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeIdInline.java index 5bd55f4e63f..587fb045887 100644 --- a/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeIdInline.java +++ b/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/NodeIdInline.java @@ -163,7 +163,10 @@ public static boolean hasInlineDatatype(Node node) { // But at this point we know it's a valid literal so the excessive // chopping by .trim is safe. BigDecimal decimal = new BigDecimal(lit.getLexicalForm().trim()); - decimal = XSDNumUtils.canonicalDecimal(decimal); + + // [Jena6] - enable for Jena6 + if ( false ) + decimal = XSDNumUtils.canonicalDecimal(decimal); // Does range checking. DecimalNode56 dn = DecimalNode56.valueOf(decimal);