Skip to content

Commit

Permalink
Implementation of loadAllElements with iterative fallback once max de…
Browse files Browse the repository at this point in the history
…pth is reached (#103)
  • Loading branch information
popematt authored Sep 9, 2024
1 parent b635d5c commit 20636b5
Show file tree
Hide file tree
Showing 4 changed files with 246 additions and 17 deletions.
2 changes: 2 additions & 0 deletions src/main/kotlin/com/amazon/ionelement/api/IonUtils.kt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ public fun IonElement.toIonValue(factory: ValueFactory): IonValue {
* Bridge function that converts from the mutable [IonValue] to an [AnyElement].
*
* New code that does not need to integrate with uses of the mutable DOM should not use this.
*
* This will fail for IonDatagram if the IonDatagram does not contain exactly one user value.
*/
public fun IonValue.toIonElement(): AnyElement =
this.system.newReader(this).use { reader ->
Expand Down
158 changes: 146 additions & 12 deletions src/main/kotlin/com/amazon/ionelement/impl/IonElementLoaderImpl.kt
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,19 @@ import com.amazon.ion.TextSpan
import com.amazon.ion.system.IonReaderBuilder
import com.amazon.ionelement.api.*
import com.amazon.ionelement.impl.collections.*
import java.util.ArrayDeque
import java.util.ArrayList
import kotlinx.collections.immutable.adapters.ImmutableListAdapter

internal class IonElementLoaderImpl(private val options: IonElementLoaderOptions) : IonElementLoader {

// TODO: It seems like some data can be read faster with a recursive approach, but other data is
// faster with an iterative approach. Consider making this configurable. It probably doesn't
// need to be finely-tune-able—just 0 or 100 (i.e. on/off) is probably sufficient.
companion object {
private const val MAX_RECURSION_DEPTH: Int = 100
}

/**
* Catches an [IonException] occurring in [block] and throws an [IonElementLoaderException] with
* the current [IonLocation] of the fault, if one is available. Note that depending on the state of the
Expand Down Expand Up @@ -86,6 +96,10 @@ internal class IonElementLoaderImpl(private val options: IonElementLoaderOptions
IonReaderBuilder.standard().build(ionText).use(::loadAllElements)

override fun loadCurrentElement(ionReader: IonReader): AnyElement {
return loadCurrentElementRecursively(ionReader)
}

private fun loadCurrentElementRecursively(ionReader: IonReader): AnyElement {
return handleReaderException(ionReader) {
val valueType = requireNotNull(ionReader.type) { "The IonReader was not positioned at an element." }

Expand Down Expand Up @@ -128,26 +142,41 @@ internal class IonElementLoaderImpl(private val options: IonElementLoaderOptions
IonType.BLOB -> BlobElementImpl(ionReader.newBytes(), annotations, metas)
IonType.LIST -> {
ionReader.stepIn()
val list = ListElementImpl(loadAllElements(ionReader).toImmutableListUnsafe(), annotations, metas)
val listContent = ArrayList<AnyElement>()
if (ionReader.depth < MAX_RECURSION_DEPTH) {
while (ionReader.next() != null) {
listContent.add(loadCurrentElementRecursively(ionReader))
}
} else {
loadAllElementsIteratively(ionReader, listContent as MutableList<Any>)
}
ionReader.stepOut()
list
ListElementImpl(listContent.toImmutableListUnsafe(), annotations, metas)
}
IonType.SEXP -> {
ionReader.stepIn()
val sexp = SexpElementImpl(loadAllElements(ionReader).toImmutableListUnsafe(), annotations, metas)
val sexpContent = ArrayList<AnyElement>()
if (ionReader.depth < MAX_RECURSION_DEPTH) {
while (ionReader.next() != null) {
sexpContent.add(loadCurrentElementRecursively(ionReader))
}
} else {
loadAllElementsIteratively(ionReader, sexpContent as MutableList<Any>)
}
ionReader.stepOut()
sexp
SexpElementImpl(sexpContent.toImmutableListUnsafe(), annotations, metas)
}
IonType.STRUCT -> {
val fields = mutableListOf<StructField>()
val fields = ArrayList<StructField>()
ionReader.stepIn()
while (ionReader.next() != null) {
fields.add(
StructFieldImpl(
ionReader.fieldName,
loadCurrentElement(ionReader)
)
)
if (ionReader.depth < MAX_RECURSION_DEPTH) {
while (ionReader.next() != null) {
val fieldName = ionReader.fieldName
val element = loadCurrentElementRecursively(ionReader)
fields.add(StructFieldImpl(fieldName, element))
}
} else {
loadAllElementsIteratively(ionReader, fields as MutableList<Any>)
}
ionReader.stepOut()
StructElementImpl(fields.toImmutableListUnsafe(), annotations, metas)
Expand All @@ -158,4 +187,109 @@ internal class IonElementLoaderImpl(private val options: IonElementLoaderOptions
}.asAnyElement()
}
}

private fun loadAllElementsIteratively(ionReader: IonReader, into: MutableList<Any>) {
// Intentionally not using a "recycling" stack because we have mutable lists that we are going to wrap as
// ImmutableLists and then forget about the reference to the mutable list.
val openContainerStack = ArrayDeque<MutableList<Any>>()
var elements: MutableList<Any> = into

while (true) {
val valueType = ionReader.next()

// End of container or input
if (valueType == null) {
// We're at the top (relative to where we started)
if (elements === into) {
return
} else {
ionReader.stepOut()
elements = openContainerStack.pop()
continue
}
}

// Read a value
val annotations = ionReader.typeAnnotations!!.toImmutableListUnsafe()

var metas = EMPTY_METAS
if (options.includeLocationMeta) {
val location = ionReader.currentLocation()
if (location != null) metas = location.toMetaContainer()
}

if (ionReader.isNullValue) {
elements.addContainerElement(ionReader, ionNull(valueType.toElementType(), annotations, metas).asAnyElement())
continue
} else when (valueType) {
IonType.BOOL -> elements.addContainerElement(ionReader, BoolElementImpl(ionReader.booleanValue(), annotations, metas))
IonType.INT -> {
val intValue = when (ionReader.integerSize!!) {
IntegerSize.BIG_INTEGER -> {
val bigIntValue = ionReader.bigIntegerValue()
if (bigIntValue !in RANGE_OF_LONG)
BigIntIntElementImpl(bigIntValue, annotations, metas)
else {
LongIntElementImpl(ionReader.longValue(), annotations, metas)
}
}
IntegerSize.LONG,
IntegerSize.INT -> LongIntElementImpl(ionReader.longValue(), annotations, metas)
}
elements.addContainerElement(ionReader, intValue)
}
IonType.FLOAT -> elements.addContainerElement(ionReader, FloatElementImpl(ionReader.doubleValue(), annotations, metas))
IonType.DECIMAL -> elements.addContainerElement(ionReader, DecimalElementImpl(ionReader.decimalValue(), annotations, metas))
IonType.TIMESTAMP -> elements.addContainerElement(ionReader, TimestampElementImpl(ionReader.timestampValue(), annotations, metas))
IonType.STRING -> elements.addContainerElement(ionReader, StringElementImpl(ionReader.stringValue(), annotations, metas))
IonType.SYMBOL -> elements.addContainerElement(ionReader, SymbolElementImpl(ionReader.stringValue(), annotations, metas))
IonType.CLOB -> elements.addContainerElement(ionReader, ClobElementImpl(ionReader.newBytes(), annotations, metas))
IonType.BLOB -> elements.addContainerElement(ionReader, BlobElementImpl(ionReader.newBytes(), annotations, metas))
IonType.LIST -> {
val listContent = ArrayList<AnyElement>()
// `listContent` gets wrapped in an `ImmutableListWrapper` so that we can create a ListElementImpl
// right away. Then, we add child elements to `ListContent`. Technically, this is a violation of the
// contract for `ImmutableListAdapter`, but it is safe to do so here because no reads will occur
// after we are done modifying the backing list.
// Same thing applies for `sexpContent` and `structContent` in their respective branches.
elements.addContainerElement(ionReader, ListElementImpl(ImmutableListAdapter(listContent), annotations, metas))
ionReader.stepIn()
openContainerStack.push(elements)
elements = listContent as MutableList<Any>
}
IonType.SEXP -> {
val sexpContent = ArrayList<AnyElement>()
elements.addContainerElement(ionReader, SexpElementImpl(ImmutableListAdapter(sexpContent), annotations, metas))
ionReader.stepIn()
openContainerStack.push(elements)
elements = sexpContent as MutableList<Any>
}
IonType.STRUCT -> {
val structContent = ArrayList<StructField>()
elements.addContainerElement(
ionReader,
StructElementImpl(
ImmutableListAdapter(structContent),
annotations,
metas
)
)
ionReader.stepIn()
openContainerStack.push(elements)
elements = structContent as MutableList<Any>
}
IonType.DATAGRAM -> error("IonElementLoaderImpl does not know what to do with IonType.DATAGRAM")
IonType.NULL -> error("IonType.NULL branch should be unreachable")
}
}
}

private fun MutableList<Any>.addContainerElement(ionReader: IonReader, value: AnyElement) {
val fieldName = ionReader.fieldName
if (fieldName != null) {
add(StructFieldImpl(fieldName, value))
} else {
add(value)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ internal class StructElementImpl(
) : AnyElementBase(), StructElement {

override val type: ElementType get() = ElementType.STRUCT
override val size = allFields.size
override val size: Int get() = allFields.size

// Note that we are not using `by lazy` here because it requires 2 additional allocations and
// has been demonstrated to significantly increase memory consumption!
Expand Down
101 changes: 97 additions & 4 deletions src/test/kotlin/com/amazon/ionelement/IonElementLoaderTests.kt
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,17 @@

package com.amazon.ionelement

import com.amazon.ion.Decimal
import com.amazon.ion.system.IonReaderBuilder
import com.amazon.ionelement.api.*
import com.amazon.ionelement.util.INCLUDE_LOCATION_META
import com.amazon.ionelement.util.ION
import com.amazon.ionelement.util.IonElementLoaderTestCase
import com.amazon.ionelement.util.convertToString
import java.math.BigInteger
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.assertThrows
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.MethodSource

Expand All @@ -45,6 +50,8 @@ class IonElementLoaderTests {
// Converting from IonValue to IonElement should result in an IonElement that is equivalent to the
// parsed IonElement
assertEquals(parsedIonValue.toIonElement(), parsedIonElement)

assertEquals(tc.expectedElement, parsedIonElement)
}

companion object {
Expand All @@ -57,15 +64,29 @@ class IonElementLoaderTests {

IonElementLoaderTestCase("1", ionInt(1)),

IonElementLoaderTestCase("existence::42", ionInt(1).withAnnotations("existence")),
IonElementLoaderTestCase("9223372036854775807", ionInt(Long.MAX_VALUE)),

IonElementLoaderTestCase("\"some string\"", ionString("some string")),
IonElementLoaderTestCase("9223372036854775808", ionInt(BigInteger.valueOf(Long.MAX_VALUE) + BigInteger.ONE)),

IonElementLoaderTestCase("existence::42", ionInt(42).withAnnotations("existence")),

IonElementLoaderTestCase("0.", ionDecimal(Decimal.ZERO)),

IonElementLoaderTestCase("1e0", ionFloat(1.0)),

IonElementLoaderTestCase("2019-10-30T04:23:59Z", ionTimestamp("2019-10-30T04:23:59Z")),

IonElementLoaderTestCase("\"some string\"", ionString("some string")),

IonElementLoaderTestCase("'some symbol'", ionSymbol("some symbol")),

IonElementLoaderTestCase("{{\"some clob\"}}", ionClob("some clob".encodeToByteArray())),

IonElementLoaderTestCase("{{ }}", ionBlob(byteArrayOf())),

IonElementLoaderTestCase("[1, 2, 3]", ionListOf(ionInt(1), ionInt(2), ionInt(3))),

IonElementLoaderTestCase("(1 2 3)", ionListOf(ionInt(1), ionInt(2), ionInt(3))),
IonElementLoaderTestCase("(1 2 3)", ionSexpOf(ionInt(1), ionInt(2), ionInt(3))),

IonElementLoaderTestCase(
"{ foo: 1, bar: 2, bat: 3 }",
Expand All @@ -74,7 +95,79 @@ class IonElementLoaderTests {
"bar" to ionInt(2),
"bat" to ionInt(3)
)
)
),

// Nested container cases
IonElementLoaderTestCase("((null.list))", ionSexpOf(ionSexpOf(ionNull(ElementType.LIST)))),
IonElementLoaderTestCase("(1 (2 3))", ionSexpOf(ionInt(1), ionSexpOf(ionInt(2), ionInt(3)))),
IonElementLoaderTestCase("{foo:[1]}", ionStructOf("foo" to ionListOf(ionInt(1)))),
IonElementLoaderTestCase("[{foo:1}]", ionListOf(ionStructOf("foo" to ionInt(1)))),
IonElementLoaderTestCase("{foo:{bar:1}}", ionStructOf("foo" to ionStructOf("bar" to ionInt(1)))),
IonElementLoaderTestCase("{foo:[{}]}", ionStructOf("foo" to ionListOf(ionStructOf(emptyList())))),
IonElementLoaderTestCase("[{}]", ionListOf(ionStructOf(emptyList()))),
IonElementLoaderTestCase("[{}, {}]", ionListOf(ionStructOf(emptyList()), ionStructOf(emptyList()))),
IonElementLoaderTestCase("[{foo:1, bar: 2}]", ionListOf(ionStructOf("foo" to ionInt(1), "bar" to ionInt(2)))),
IonElementLoaderTestCase(
"{foo:[{bar:({})}]}",
ionStructOf("foo" to ionListOf(ionStructOf("bar" to ionSexpOf(ionStructOf(emptyList())))))
),
)
}

@Test
fun `regardless of depth, no StackOverflowError is thrown`() {
// Throws StackOverflowError in [email protected] and prior versions when there's ~4k nested containers
// Run for every container type to ensure that they all correctly fall back to the iterative impl.

val listData = "[".repeat(999999) + "]".repeat(999999)
loadAllElements(listData)

val sexpData = "(".repeat(999999) + ")".repeat(999999)
loadAllElements(sexpData)

val structData = "{a:".repeat(999999) + "b" + "}".repeat(999999)
loadAllElements(structData)
}

@ParameterizedTest
@MethodSource("parametersForDemoTest")
fun `deeply nested values should be loaded correctly`(tc: IonElementLoaderTestCase) {
// Wrap everything in many layers of Ion lists so that we can be sure to trigger the iterative fallback.
val nestingLevels = 500
val textIon = "[".repeat(nestingLevels) + tc.textIon + "]".repeat(nestingLevels)
var expectedElement = tc.expectedElement
repeat(nestingLevels) { expectedElement = ionListOf(expectedElement) }

val parsedIonValue = ION.singleValue(textIon)
val parsedIonElement = loadSingleElement(textIon, INCLUDE_LOCATION_META)

// Text generated from both should match
assertEquals(convertToString(parsedIonValue), parsedIonElement.toString())

// Converting from IonElement to IonValue results in an IonValue that is equivalent to the parsed IonValue
assertEquals(parsedIonElement.toIonValue(ION), parsedIonValue)

// Converting from IonValue to IonElement should result in an IonElement that is equivalent to the
// parsed IonElement
assertEquals(parsedIonValue.toIonElement(), parsedIonElement)

assertEquals(expectedElement, parsedIonElement)
}

@Test
fun `loadCurrentElement throws exception when not positioned on a value`() {
val reader = IonReaderBuilder.standard().build("foo")
// We do not advance to the first value in the reader.
assertThrows<IllegalArgumentException> { loadCurrentElement(reader) }
}

@Test
fun `loadSingleElement throws exception when no values in reader`() {
assertThrows<IllegalArgumentException> { loadSingleElement("") }
}

@Test
fun `loadSingleElement throws exception when more than one values in reader`() {
assertThrows<IllegalArgumentException> { loadSingleElement("a b") }
}
}

0 comments on commit 20636b5

Please sign in to comment.