Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix CharReaderFromSyncStream read characters less than chunkSize (#2108) #2109

Merged
merged 6 commits into from
Jan 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 28 additions & 11 deletions korge-core/src/korlibs/io/stream/CharReader.kt
Original file line number Diff line number Diff line change
Expand Up @@ -13,33 +13,50 @@ fun ByteArray.toCharReader(charset: Charset, chunkSize: Int = 1024): CharReader
fun SyncStream.toCharReader(charset: Charset, chunkSize: Int = 1024): CharReader =
CharReaderFromSyncStream(this, charset, chunkSize)

class CharReaderFromSyncStream(val stream: SyncStream, val charset: Charset, val chunkSize: Int = 1024) : CharReader {
class CharReaderFromSyncStream(val stream: SyncStream, val charset: Charset, val chunkSize: Int = DEFAULT_CHUNK_SIZE) : CharReader {
private val temp = ByteArray(chunkSize)
private val buffer = ByteArrayDeque()
private var tempStringBuilder = StringBuilder()

init {
require(chunkSize >= MIN_CHUNK_SIZE) { "chunkSize must be greater than $MIN_CHUNK_SIZE, was $chunkSize" }
}

override fun clone(): CharReader = CharReaderFromSyncStream(stream.clone(), charset, chunkSize)

override fun read(out: StringBuilder, count: Int): Int {
while (buffer.availableRead < temp.size) {
val readCount = stream.read(temp)
if (readCount <= 0) break
buffer.write(temp, 0, readCount)
}

bufferUp()
while (tempStringBuilder.length < count) {
val readCount = buffer.peek(temp)
val consumed = charset.decode(tempStringBuilder, temp, 0, readCount)
if (consumed <= 0) break
buffer.skip(consumed)
if (consumed <= 0) {
if (bufferUp() <= 0) break
} else {
buffer.skip(consumed)
}
}

//println("tempStringBuilder=$tempStringBuilder")

val slice = tempStringBuilder.substring(0, kotlin.math.min(count, tempStringBuilder.length))
tempStringBuilder = StringBuilder(slice.length).append(tempStringBuilder.substring(slice.length))

out.append(slice)
return slice.length
}

private fun bufferUp(): Int {
var totalReadCount = 0
while (buffer.availableRead < temp.size) {
val readCount = stream.read(temp)
if (readCount <= 0) break
totalReadCount += readCount
buffer.write(temp, 0, readCount)
}

return totalReadCount
}

companion object {
const val DEFAULT_CHUNK_SIZE = 1024
const val MIN_CHUNK_SIZE = 8
}
}
39 changes: 39 additions & 0 deletions korge-core/test/korlibs/io/stream/CharReaderTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,43 @@ class CharReaderTest {
val reader = "áéíóúñ".toByteArray(UTF8).toCharReader(UTF8)
assertEquals("á,éí,óúñ", listOf(reader.read(1), reader.read(2), reader.read(10)).joinToString(","))
}


@Test
fun test2() {
val reader = "áéíóúñ".repeat(10).toByteArray(UTF8).toCharReader(charset = UTF8, chunkSize = 8)
assertEquals("á,éí,óúñ", listOf(reader.read(1), reader.read(2), reader.read(3)).joinToString(","))

assertFailsWith<IllegalArgumentException> { "áéíóúñ".toByteArray(UTF8).toCharReader(charset = UTF8, chunkSize = CharReaderFromSyncStream.MIN_CHUNK_SIZE -1) }

}

@Test
fun testCharReaderWithRandomStrings() {
val randomStrings = listOf(
"abcdefghijklm",
"ä<a>ä</a>",
"©頷ӨҤもタ編倏病Ҿ0沑âチ麕üӨҀ🙌とガӃ🙄Ҥzせø觧ҥ",
"Ђヹ肯みцë匓ンê😺り磬バëӇØ゚琫ら儂脸😨D亢JZEÕキ燗😨🙉ュӼ`ぺ",
"😳捇😗Ҧヿィ😲😵SӚåぐルҩS😯yѹ=ӪӠÀrキえÄ🙎へ¶Mたじ😞冃😃a\\xa0ÙҒ樗ボ😨",
"らーçウごネ粦😓õ姗ӏ(",
"Sm糛҆Òう楢ょ😽ê.",
"X5O!P%@AP[4\\PZX54(P^)7CC)7}\$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!\$H+H*"
)
(1..30).forEach { readCount ->
randomStrings.forEach { inputData ->
val dataSegments: List<String> = inputData.splitInChunks(readCount)

for (chunkSize in 8 until 2000) {
val charReader = inputData.openSync().toCharReader(charset = Charsets.UTF8, chunkSize = chunkSize)
dataSegments.forEach { data ->
val strBuilder = StringBuilder()
assertEquals(data.length, charReader.read(strBuilder, readCount))
assertEquals(data, strBuilder.toString())
}
assertEquals("", charReader.read(1)) // stream is empty all chars already read
}
}
}
}
}