From 8395a35f9c09f870be83224ada8fe1f943fac23c Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Sun, 11 Aug 2024 15:58:11 +0200 Subject: [PATCH 01/26] Add msgpack item serializer and validator --- .../msgpack/low/internal/ItemSerializer.scala | 188 ++++++++++++++++++ .../msgpack/low/internal/ItemValidator.scala | 158 +++++++++++++++ .../scala/fs2/data/msgpack/low/package.scala | 34 +++- .../fs2/data/msgpack/SerializerSpec.scala | 135 +++++++++++++ 4 files changed, 514 insertions(+), 1 deletion(-) create mode 100644 msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala create mode 100644 msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala create mode 100644 msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala new file mode 100644 index 000000000..1895f570d --- /dev/null +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala @@ -0,0 +1,188 @@ +/* + * Copyright 2024 fs2-data Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 +package data +package msgpack +package low +package internal + +import scodec.bits._ + +private[low] object ItemSerializer { + def compressed: MsgpackItem => ByteVector = { + case MsgpackItem.UnsignedInt(bytes) => + if (bytes.size <= 1) + ByteVector(Headers.Uint8).buffer ++ bytes.padLeft(1) + else if (bytes.size <= 2) + ByteVector(Headers.Uint16).buffer ++ bytes.padLeft(2) + else if (bytes.size <= 4) + ByteVector(Headers.Uint32).buffer ++ bytes.padLeft(4) + else + ByteVector(Headers.Uint64).buffer ++ bytes.padLeft(8) + + case MsgpackItem.SignedInt(bytes) => + if (bytes.size <= 1) + // positive fixint or negative fixint + if ((bytes & hex"7f") == bytes || (bytes & hex"c0") == hex"c0") + bytes.padLeft(1) + else + ByteVector(Headers.Int8).buffer ++ bytes.padLeft(1) + else if (bytes.size <= 2) + ByteVector(Headers.Int16).buffer ++ bytes.padLeft(2) + else if (bytes.size <= 4) + ByteVector(Headers.Int32).buffer ++ bytes.padLeft(4) + else + ByteVector(Headers.Int64).buffer ++ bytes.padLeft(8) + + case MsgpackItem.Float32(float) => + ByteVector(Headers.Float32).buffer ++ ByteVector.fromInt(java.lang.Float.floatToIntBits(float)) + + case MsgpackItem.Float64(double) => + ByteVector(Headers.Float64).buffer ++ ByteVector.fromLong(java.lang.Double.doubleToLongBits(double)) + + case MsgpackItem.Str(bytes) => + if (bytes.size <= 31) { + ByteVector(0xa0 | bytes.size).buffer ++ bytes + } else if (bytes.size <= Math.pow(2, 8) - 1) { + ByteVector(Headers.Str8).buffer ++ ByteVector(bytes.size) ++ bytes + } else if (bytes.size <= Math.pow(2, 16) - 1) { + val size = ByteVector.fromShort(bytes.size.toShort) + ByteVector(Headers.Str16).buffer ++ size ++ bytes + } else { + val size = ByteVector.fromInt(bytes.size.toInt) + ByteVector(Headers.Str32).buffer ++ size ++ bytes + } + + case MsgpackItem.Bin(bytes) => + if (bytes.size <= Math.pow(2, 8) - 1) { + ByteVector(Headers.Bin8).buffer ++ ByteVector(bytes.size) ++ bytes + } else if (bytes.size <= Math.pow(2, 16) - 1) { + val size = ByteVector.fromShort(bytes.size.toShort) + ByteVector(Headers.Bin16).buffer ++ size ++ bytes + } else { + val size = ByteVector.fromInt(bytes.size.toInt).padLeft(4) + ByteVector(Headers.Bin32).buffer ++ size ++ bytes + } + + case MsgpackItem.Array(size) => + if (size <= 15) + ByteVector(0x90 | size) + else if (size <= Math.pow(2, 16) - 1) + ByteVector(Headers.Array16).buffer ++ ByteVector(size).padLeft(2) + else + ByteVector(Headers.Array32).buffer ++ ByteVector(size).padLeft(4) + + case MsgpackItem.Map(size) => + if (size <= 15) + ByteVector(0x80 | size) + else if (size <= Math.pow(2, 16) - 1) + ByteVector(Headers.Map16).buffer ++ ByteVector(size).padLeft(2) + else + ByteVector(Headers.Map32).buffer ++ ByteVector(size).padLeft(4) + + case MsgpackItem.Extension(tpe, bytes) => + if (bytes.size <= 1) + (ByteVector(Headers.FixExt1).buffer :+ tpe) ++ bytes.padLeft(1) + else if (bytes.size <= 2) + (ByteVector(Headers.FixExt2).buffer :+ tpe) ++ bytes.padLeft(2) + else if (bytes.size <= 4) + (ByteVector(Headers.FixExt4).buffer :+ tpe) ++ bytes.padLeft(4) + else if (bytes.size <= 8) + (ByteVector(Headers.FixExt8).buffer :+ tpe) ++ bytes.padLeft(8) + else if (bytes.size <= 16) + (ByteVector(Headers.FixExt16).buffer :+ tpe) ++ bytes.padLeft(16) + else if (bytes.size <= Math.pow(2, 8) - 1) + (ByteVector(Headers.Ext8).buffer ++ ByteVector(bytes.size) :+ tpe) ++ bytes + else if (bytes.size <= Math.pow(2, 16) - 1) + (ByteVector(Headers.Ext16).buffer ++ ByteVector(bytes.size) :+ tpe) ++ bytes.padLeft(2) + else + (ByteVector(Headers.Ext32).buffer ++ ByteVector(bytes.size) :+ tpe) ++ bytes.padLeft(4) + + case MsgpackItem.Timestamp32(seconds) => + (ByteVector(Headers.FixExt4).buffer :+ Headers.Timestamp.toByte) ++ ByteVector.fromInt(seconds) + + case MsgpackItem.Timestamp64(combined) => + (ByteVector(Headers.FixExt8).buffer :+ Headers.Timestamp.toByte) ++ ByteVector.fromLong(combined) + + case MsgpackItem.Timestamp96(nanoseconds, seconds) => + val ns = ByteVector.fromInt(nanoseconds) + val s = ByteVector.fromLong(seconds) + (ByteVector(Headers.Ext8).buffer :+ Headers.Timestamp.toByte) ++ ns ++ s + + case MsgpackItem.Nil => + ByteVector(Headers.Nil) + + case MsgpackItem.False => + ByteVector(Headers.False) + + case MsgpackItem.True => + ByteVector(Headers.True) + } + + def fast: MsgpackItem => ByteVector = { + case item: MsgpackItem.UnsignedInt => + ByteVector(Headers.Uint64) ++ item.bytes.padLeft(8) + + case item: MsgpackItem.SignedInt => + ByteVector(Headers.Int64) ++ item.bytes.padLeft(8) + + case item: MsgpackItem.Float32 => + ByteVector.fromInt(java.lang.Float.floatToIntBits(item.v)) + + case item: MsgpackItem.Float64 => + ByteVector.fromLong(java.lang.Double.doubleToLongBits(item.v)) + + case item: MsgpackItem.Str => + val size = ByteVector.fromInt(item.bytes.size.toInt) + ByteVector(Headers.Str32) ++ size ++ item.bytes + + case item: MsgpackItem.Bin => + val size = ByteVector.fromInt(item.bytes.size.toInt) + ByteVector(Headers.Bin32) ++ size ++ item.bytes + + case item: MsgpackItem.Array => + ByteVector(Headers.Array32) ++ ByteVector.fromInt(item.size).padLeft(4) + + case item: MsgpackItem.Map => + ByteVector(Headers.Map32) ++ ByteVector.fromInt(item.size).padLeft(4) + + case item: MsgpackItem.Extension => + val size = ByteVector.fromInt(item.bytes.size.toInt) + val t = ByteVector(item.tpe) + ByteVector(Headers.Ext32) ++ size ++ t ++ item.bytes + + case item: MsgpackItem.Timestamp32 => + ByteVector(Headers.FixExt4) ++ hex"ff" ++ ByteVector.fromInt(item.seconds) + + case item: MsgpackItem.Timestamp64 => + ByteVector(Headers.FixExt8) ++ hex"ff" ++ ByteVector.fromLong(item.combined) + + case item: MsgpackItem.Timestamp96 => + val ns = ByteVector.fromInt(item.nanoseconds) + val s = ByteVector.fromLong(item.seconds) + ByteVector(Headers.Ext8) ++ hex"0c" ++ hex"ff" ++ ns ++ s + + case MsgpackItem.Nil => + ByteVector(Headers.Nil) + + case MsgpackItem.False => + ByteVector(Headers.False) + + case MsgpackItem.True => + ByteVector(Headers.True) + } +} diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala new file mode 100644 index 000000000..5aac1f2d7 --- /dev/null +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala @@ -0,0 +1,158 @@ +/* + * Copyright 2024 fs2-data Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 +package data +package msgpack +package low +package internal + +case class ValidationErrorAt(at: Long, msg: String) extends Error(s"at position ${at}: ${msg}") +case class ValidationError(msg: String) extends Exception(msg) + +private[low] object ItemValidator { + + case class Expect(n: Int, from: Long) { + def dec = Expect(n - 1, from) + } + + type ValidationContext = (Chunk[MsgpackItem], Int, Long, List[Expect]) + + def none[F[_]]: Pipe[F, MsgpackItem, MsgpackItem] = in => in + + def simple[F[_]](implicit F: RaiseThrowable[F]): Pipe[F, MsgpackItem, MsgpackItem] = { in => + /** Validates one item from a stream + */ + def step1(chunk: Chunk[MsgpackItem], idx: Int, position: Long): Pull[F, MsgpackItem, Option[Expect]] = + chunk(idx) match { + case MsgpackItem.UnsignedInt(bytes) => + if (bytes.size > 8) Pull.raiseError(new ValidationErrorAt(position, "Unsigned int exceeds 64 bits")) + else Pull.pure(None) + + case MsgpackItem.SignedInt(bytes) => + if (bytes.size > 8) Pull.raiseError(new ValidationErrorAt(position, "Signed int exceeds 64 bits")) + else Pull.pure(None) + + case MsgpackItem.Float32(_) => + Pull.pure(None) + + case MsgpackItem.Float64(_) => + Pull.pure(None) + + case MsgpackItem.Str(bytes) => + if (bytes.size > Math.pow(2, 32) - 1) + Pull.raiseError(new ValidationErrorAt(position, "String exceeds (2^32)-1 bytes")) + else + Pull.pure(None) + + case MsgpackItem.Bin(bytes) => + if (bytes.size > Math.pow(2, 32) - 1) + Pull.raiseError(new ValidationErrorAt(position, "Bin exceeds (2^32)-1 bytes")) + else + Pull.pure(None) + + case MsgpackItem.Array(size) => + if (size < 0) + Pull.raiseError(new ValidationErrorAt(position, s"Array has a negative size ${size}")) + else if (size == 0) + Pull.pure(None) + else + Pull.pure(Some(Expect(size, position))) + + case MsgpackItem.Map(size) => + if (size < 0) + Pull.raiseError(new ValidationErrorAt(position, s"Map has a negative size ${size}")) + else if (size == 0) + Pull.pure(None) + else + Pull.pure(Some(Expect(size * 2, position))) + + case MsgpackItem.Extension(_, bytes) => + if (bytes.size > Math.pow(2, 32) - 1) + Pull.raiseError(new ValidationErrorAt(position, "Extension data exceeds (2^32)-1 bytes")) + else + Pull.pure(None) + + case _: MsgpackItem.Timestamp32 => + Pull.pure(None) + + case item: MsgpackItem.Timestamp64 => + if (item.nanoseconds > 999999999) + Pull.raiseError( + new ValidationErrorAt(position, "Timestamp64 nanoseconds cannot be larger than '999999999'")) + else + Pull.pure(None) + + case MsgpackItem.Timestamp96(nanoseconds, _) => + if (nanoseconds > 999999999) + Pull.raiseError( + new ValidationErrorAt(position, "Timestamp96 nanoseconds cannot be larger than '999999999'")) + else + Pull.pure(None) + + case MsgpackItem.Nil => + Pull.pure(None) + + case MsgpackItem.True => + Pull.pure(None) + + case MsgpackItem.False => + Pull.pure(None) + } + + def stepChunk(chunk: Chunk[MsgpackItem], + idx: Int, + stream: Stream[F, MsgpackItem], + position: Long, + state: List[Expect]): Pull[F, MsgpackItem, ValidationContext] = { + if (idx >= chunk.size) + Pull.output(chunk).as((Chunk.empty, 0, position, state)) + else + step1(chunk, idx, position, state).flatMap { el => + val stateNew: List[Expect] = + if (state.isEmpty) + state + else if (state.head.n == 1) + state.tail + else + state.head.dec :: state.tail + + val prepended = el match { + case Some(x) => x :: stateNew + case None => stateNew + } + + stepChunk(chunk, idx + 1, stream, position + 1, prepended) + } + } + + def go(stream: Stream[F, MsgpackItem], idx: Int, position: Long, state: List[Expect]): Pull[F, MsgpackItem, Unit] = + stream.pull.uncons.flatMap { + case Some((chunk, stream)) => + stepChunk(chunk, idx, stream, position, state).flatMap { case (_, idx, position, state) => + go(stream, idx, position, state) + } + case None => + if (state.isEmpty) + Pull.done + else + Pull.raiseError(new ValidationError(s"Unexpected end of input (starting at ${state.head.from})")) + } + + go(in, 0, 0, List.empty).stream + } + +} diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala index 118e19df7..2d512faf6 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala @@ -18,11 +18,43 @@ package fs2 package data package msgpack -import low.internal.ItemParser +import low.internal.{ItemParser, ItemSerializer, ItemValidator} /** A low-level representation of the MessagePack format. */ package object low { def items[F[_]](implicit F: RaiseThrowable[F]): Pipe[F, Byte, MsgpackItem] = ItemParser.pipe[F] + + /** Alias for `bytes(compressed = true, validated = true)` + */ + def toBinary[F[_]: RaiseThrowable]: Pipe[F, MsgpackItem, Byte] = + bytes(true, true) + + def bytes[F[_]](compressed: Boolean, validated: Boolean)(implicit + F: RaiseThrowable[F]): Pipe[F, MsgpackItem, Byte] = { in => + in + .through { if (validated) ItemValidator.simple else ItemValidator.none } + .flatMap { x => + val bytes = + if (compressed) + ItemSerializer.compressed(x) + else + ItemSerializer.fast(x) + + /* Maximum size of a `ByteVector` is bigger than the one of a `Chunk` (Long vs Int). The `Chunk.byteVector` + * function returns `Chunk.empty` if it encounters a `ByteVector` that won't fit in a `Chunk`. We have to work + * around this behaviour and explicitly check the `ByteVector` size. + */ + if (bytes.size <= Int.MaxValue) { + Stream.chunk(Chunk.byteVector(bytes)) + } else { + val (lhs, rhs) = bytes.splitAt(Int.MaxValue) + Stream.chunk(Chunk.byteVector(lhs)) ++ Stream.chunk(Chunk.byteVector(rhs)) + } + } + } + + def validated[F[_]](implicit F: RaiseThrowable[F]): Pipe[F, MsgpackItem, MsgpackItem] = + ItemValidator.simple[F] } diff --git a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala new file mode 100644 index 000000000..3a826c8c6 --- /dev/null +++ b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala @@ -0,0 +1,135 @@ +/* + * Copyright 2024 fs2-data Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 +package data +package msgpack + +import weaver.SimpleIOSuite +import scodec.bits.* +import low.MsgpackItem +import cats.effect.* +import low.internal.ValidationErrorAt +import low.internal.ValidationError + +import java.nio.charset.StandardCharsets + +object SerializerSpec extends SimpleIOSuite { + test("MessagePack item serializer should correctly serialize all formats") { + val cases = List( + // positive fixint + (List(MsgpackItem.SignedInt(hex"7b")), hex"7b", hex"0xd3000000000000007b"), + // fixmap + (List(MsgpackItem.Map(1)), hex"81", hex"0xdf00000001"), + // fixarray + (List(MsgpackItem.Array(1)), hex"91", hex"0xdd00000001"), + // fixstr + (List(MsgpackItem.Str(ByteVector("foobar".getBytes(StandardCharsets.UTF_8)))), + hex"a6666f6f626172", + hex"0xdb00000006666f6f626172"), + // nil, false, true + (List(MsgpackItem.Nil, MsgpackItem.False, MsgpackItem.True), hex"c0c2c3", hex"c0c2c3"), + // bin8, bin16, bin32 + (List(MsgpackItem.Bin(hex"abc")), hex"c4020abc", hex"c6000000020abc"), + (List(MsgpackItem.Bin(hex"abc".padLeft(Math.pow(2, 8).toLong))), + ByteVector(Headers.Bin16) ++ hex"0100" ++ hex"0abc".padLeft(Math.pow(2, 8).toLong), + ByteVector(Headers.Bin32) ++ hex"00000100" ++ hex"0abc".padLeft(Math.pow(2, 8).toLong)), + (List(MsgpackItem.Bin(hex"abc".padLeft(Math.pow(2, 16).toLong))), + ByteVector(Headers.Bin32) ++ hex"00010000" ++ ByteVector.empty.padLeft(Math.pow(2, 16).toLong - 2) ++ hex"0abc", + ByteVector(Headers.Bin32) ++ hex"00010000" ++ ByteVector.empty.padLeft(Math.pow(2, 16).toLong - 2) ++ hex"0abc") + // ext8, ext16, ext32 + ) + + Stream + .emits(cases) + .evalMap { case (source, compressed, fast) => + for { + e1 <- + Stream + .emits(source) + .through(low.bytes(true, false)) + .compile + .fold(ByteVector.empty)(_ :+ _) + .map(expect.same(_, compressed)) + + e2 <- + Stream + .emits(source) + .through(low.bytes(false, false)) + .compile + .fold(ByteVector.empty)(_ :+ _) + .map(expect.same(_, fast)) + } yield e1 and e2 + } + .compile + .foldMonoid + } + + test("MessagePack item serializer should be fix point when optimizing for size") { + val cases = List( + hex"CB3FCB5A858793DD98", + hex"d6ffaabbccdd", + hex"81A77765622D61707083A7736572766C65749583AC736572766C65742D6E616D65A8636F666178434453AD736572766C65742D636C617373B86F72672E636F6661782E6364732E434453536572766C6574AA696E69742D706172616DDE002ABD636F6E666967476C6F73736172793A696E7374616C6C6174696F6E4174B05068696C6164656C706869612C205041B9636F6E666967476C6F73736172793A61646D696E456D61696CAD6B736D40706F626F782E636F6DB8636F6E666967476C6F73736172793A706F77657265644279A5436F666178BC636F6E666967476C6F73736172793A706F7765726564427949636F6EB12F696D616765732F636F6661782E676966B9636F6E666967476C6F73736172793A73746174696350617468AF2F636F6E74656E742F737461746963B674656D706C61746550726F636573736F72436C617373B96F72672E636F6661782E5779736977796754656D706C617465B374656D706C6174654C6F61646572436C617373BD6F72672E636F6661782E46696C657354656D706C6174654C6F61646572AC74656D706C61746550617468A974656D706C61746573B474656D706C6174654F7665727269646550617468A0B364656661756C744C69737454656D706C617465B06C69737454656D706C6174652E68746DB364656661756C7446696C6554656D706C617465B361727469636C6554656D706C6174652E68746DA67573654A5350C2AF6A73704C69737454656D706C617465B06C69737454656D706C6174652E6A7370AF6A737046696C6554656D706C617465B361727469636C6554656D706C6174652E6A7370B563616368655061636B61676554616773547261636BCCC8B563616368655061636B6167655461677353746F7265CCC8B763616368655061636B61676554616773526566726573683CB3636163686554656D706C61746573547261636B64B3636163686554656D706C6174657353746F726532B5636163686554656D706C61746573526566726573680FAF63616368655061676573547261636BCCC8AF6361636865506167657353746F726564B163616368655061676573526566726573680AB3636163686550616765734469727479526561640AB8736561726368456E67696E654C69737454656D706C617465B8666F72536561726368456E67696E65734C6973742E68746DB8736561726368456E67696E6546696C6554656D706C617465B4666F72536561726368456E67696E65732E68746DB4736561726368456E67696E65526F626F74734462B15745422D494E462F726F626F74732E6462AC7573654461746153746F7265C3AE6461746153746F7265436C617373B66F72672E636F6661782E53716C4461746153746F7265B07265646972656374696F6E436C617373B86F72672E636F6661782E53716C5265646972656374696F6EAD6461746153746F72654E616D65A5636F666178AF6461746153746F7265447269766572D92C636F6D2E6D6963726F736F66742E6A6462632E73716C7365727665722E53514C536572766572447269766572AC6461746153746F726555726CD93B6A6462633A6D6963726F736F66743A73716C7365727665723A2F2F4C4F43414C484F53543A313433333B44617461626173654E616D653D676F6F6EAD6461746153746F726555736572A27361B16461746153746F726550617373776F7264B26461746153746F7265546573745175657279B26461746153746F7265546573745175657279D922534554204E4F434F554E54204F4E3B73656C65637420746573743D2774657374273BB06461746153746F72654C6F6746696C65D9242F7573722F6C6F63616C2F746F6D6361742F6C6F67732F6461746173746F72652E6C6F67B26461746153746F7265496E6974436F6E6E730AB16461746153746F72654D6178436F6E6E7364B76461746153746F7265436F6E6E55736167654C696D697464B16461746153746F72654C6F674C6576656CA56465627567AC6D617855726C4C656E677468CD01F483AC736572766C65742D6E616D65AA636F666178456D61696CAD736572766C65742D636C617373BA6F72672E636F6661782E6364732E456D61696C536572766C6574AA696E69742D706172616D82A86D61696C486F7374A56D61696C31B06D61696C486F73744F76657272696465A56D61696C3282AC736572766C65742D6E616D65AA636F66617841646D696EAD736572766C65742D636C617373BA6F72672E636F6661782E6364732E41646D696E536572766C657482AC736572766C65742D6E616D65AB66696C65536572766C6574AD736572766C65742D636C617373B96F72672E636F6661782E6364732E46696C65536572766C657483AC736572766C65742D6E616D65AA636F666178546F6F6C73AD736572766C65742D636C617373BF6F72672E636F6661782E636D732E436F666178546F6F6C73536572766C6574AA696E69742D706172616D8DAC74656D706C61746550617468AF746F6F6C7374656D706C617465732FA36C6F6701AB6C6F674C6F636174696F6ED9252F7573722F6C6F63616C2F746F6D6361742F6C6F67732F436F666178546F6F6C732E6C6F67AA6C6F674D617853697A65A0A7646174614C6F6701AF646174614C6F674C6F636174696F6ED9222F7573722F6C6F63616C2F746F6D6361742F6C6F67732F646174614C6F672E6C6F67AE646174614C6F674D617853697A65A0AF72656D6F7665506167654361636865D9252F636F6E74656E742F61646D696E2F72656D6F76653F63616368653D70616765732669643DB372656D6F766554656D706C6174654361636865D9292F636F6E74656E742F61646D696E2F72656D6F76653F63616368653D74656D706C617465732669643DB266696C655472616E73666572466F6C646572D9342F7573722F6C6F63616C2F746F6D6361742F776562617070732F636F6E74656E742F66696C655472616E73666572466F6C646572AD6C6F6F6B496E436F6E7465787401AC61646D696E47726F7570494404AA62657461536572766572C3AF736572766C65742D6D617070696E6785A8636F666178434453A12FAA636F666178456D61696CB32F636F6661787574696C2F61656D61696C2F2AAA636F66617841646D696EA82F61646D696E2F2AAB66696C65536572766C6574A92F7374617469632F2AAA636F666178546F6F6C73A82F746F6F6C732F2AA67461676C696282AA7461676C69622D757269A9636F6661782E746C64AF7461676C69622D6C6F636174696F6EB72F5745422D494E462F746C64732F636F6661782E746C64" + ) + + Stream + .emits(cases) + .evalMap { hex => + Stream + .chunk(Chunk.byteVector(hex)) + .through(low.items) + .through(low.toBinary) + .compile + .toList + .map(x => expect.same(ByteVector(x), hex)) + } + .compile + .foldMonoid + } + + test("MessagePack item validator should raise for all checks") { + val cases = List( + List(MsgpackItem.UnsignedInt(hex"10000000000000000")) -> new ValidationErrorAt(0, "Unsigned int exceeds 64 bits"), + List(MsgpackItem.SignedInt(hex"10000000000000000")) -> new ValidationErrorAt(0, "Signed int exceeds 64 bits"), + + // TODO: Float32, Float64 + + List(MsgpackItem.Str(ByteVector.fill(Math.pow(2, 32).toLong)(1))) -> new ValidationErrorAt( + 0, + "String exceeds (2^32)-1 bytes"), + List(MsgpackItem.Bin(ByteVector.fill(Math.pow(2, 32).toLong)(1))) -> new ValidationErrorAt( + 0, + "Bin exceeds (2^32)-1 bytes"), + List(MsgpackItem.Array(2), MsgpackItem.True) -> new ValidationError("Unexpected end of input (starting at 0)"), + List(MsgpackItem.Map(1), MsgpackItem.Array(1), MsgpackItem.True) -> new ValidationError( + "Unexpected end of input (starting at 0)") + ) + + Stream + .emits(cases) + .evalMap { case (lhs, rhs) => + Stream + .emits(lhs) + .through(low.validated[IO]) + .compile + .toList + .map(x => failure(s"Expected error for item ${x}")) + .handleErrorWith(err => IO(expect.same(err, rhs))) + } + .compile + .foldMonoid + } +} From 52b48d7122a24a19779cd06073d5d23e5d1fc698 Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Tue, 13 Aug 2024 21:07:22 +0200 Subject: [PATCH 02/26] Fix one additional argument being passed --- .../scala/fs2/data/msgpack/low/internal/ItemValidator.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala index 5aac1f2d7..d9402feec 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala @@ -121,7 +121,7 @@ private[low] object ItemValidator { if (idx >= chunk.size) Pull.output(chunk).as((Chunk.empty, 0, position, state)) else - step1(chunk, idx, position, state).flatMap { el => + step1(chunk, idx, position).flatMap { el => val stateNew: List[Expect] = if (state.isEmpty) state From 7b4246e6e72135719f510b1d5f15656b1952be68 Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Tue, 13 Aug 2024 21:08:29 +0200 Subject: [PATCH 03/26] Add tests to cover all fmts for msgpack serializer And fix issues found during testing --- .../msgpack/low/internal/ItemSerializer.scala | 70 +++++---- .../fs2/data/msgpack/SerializerSpec.scala | 134 +++++++++++++++--- 2 files changed, 157 insertions(+), 47 deletions(-) diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala index 1895f570d..cd6a24731 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala @@ -56,9 +56,10 @@ private[low] object ItemSerializer { case MsgpackItem.Str(bytes) => if (bytes.size <= 31) { - ByteVector(0xa0 | bytes.size).buffer ++ bytes + ByteVector.fromByte((0xa0 | bytes.size).toByte).buffer ++ bytes } else if (bytes.size <= Math.pow(2, 8) - 1) { - ByteVector(Headers.Str8).buffer ++ ByteVector(bytes.size) ++ bytes + val size = ByteVector.fromByte(bytes.size.toByte) + ByteVector(Headers.Str8).buffer ++ size ++ bytes } else if (bytes.size <= Math.pow(2, 16) - 1) { val size = ByteVector.fromShort(bytes.size.toShort) ByteVector(Headers.Str16).buffer ++ size ++ bytes @@ -69,7 +70,8 @@ private[low] object ItemSerializer { case MsgpackItem.Bin(bytes) => if (bytes.size <= Math.pow(2, 8) - 1) { - ByteVector(Headers.Bin8).buffer ++ ByteVector(bytes.size) ++ bytes + val size = ByteVector.fromByte(bytes.size.toByte) + ByteVector(Headers.Bin8).buffer ++ size ++ bytes } else if (bytes.size <= Math.pow(2, 16) - 1) { val size = ByteVector.fromShort(bytes.size.toShort) ByteVector(Headers.Bin16).buffer ++ size ++ bytes @@ -79,38 +81,48 @@ private[low] object ItemSerializer { } case MsgpackItem.Array(size) => - if (size <= 15) - ByteVector(0x90 | size) - else if (size <= Math.pow(2, 16) - 1) - ByteVector(Headers.Array16).buffer ++ ByteVector(size).padLeft(2) - else - ByteVector(Headers.Array32).buffer ++ ByteVector(size).padLeft(4) + if (size <= 15) { + ByteVector.fromByte((0x90 | size).toByte) + } else if (size <= Math.pow(2, 16) - 1) { + val s = ByteVector.fromShort(size.toShort) + ByteVector(Headers.Array16).buffer ++ s + } else { + val s = ByteVector.fromInt(size) + ByteVector(Headers.Array32).buffer ++ s + } case MsgpackItem.Map(size) => - if (size <= 15) - ByteVector(0x80 | size) - else if (size <= Math.pow(2, 16) - 1) - ByteVector(Headers.Map16).buffer ++ ByteVector(size).padLeft(2) - else - ByteVector(Headers.Map32).buffer ++ ByteVector(size).padLeft(4) + if (size <= 15) { + ByteVector.fromByte((0x80 | size).toByte) + } else if (size <= Math.pow(2, 16) - 1) { + val s = ByteVector.fromShort(size.toShort) + ByteVector(Headers.Map16).buffer ++ s + } else { + val s = ByteVector.fromInt(size) + ByteVector(Headers.Map32).buffer ++ s + } case MsgpackItem.Extension(tpe, bytes) => - if (bytes.size <= 1) + if (bytes.size <= 1) { (ByteVector(Headers.FixExt1).buffer :+ tpe) ++ bytes.padLeft(1) - else if (bytes.size <= 2) + } else if (bytes.size <= 2) { (ByteVector(Headers.FixExt2).buffer :+ tpe) ++ bytes.padLeft(2) - else if (bytes.size <= 4) + } else if (bytes.size <= 4) { (ByteVector(Headers.FixExt4).buffer :+ tpe) ++ bytes.padLeft(4) - else if (bytes.size <= 8) + } else if (bytes.size <= 8) { (ByteVector(Headers.FixExt8).buffer :+ tpe) ++ bytes.padLeft(8) - else if (bytes.size <= 16) + } else if (bytes.size <= 16) { (ByteVector(Headers.FixExt16).buffer :+ tpe) ++ bytes.padLeft(16) - else if (bytes.size <= Math.pow(2, 8) - 1) - (ByteVector(Headers.Ext8).buffer ++ ByteVector(bytes.size) :+ tpe) ++ bytes - else if (bytes.size <= Math.pow(2, 16) - 1) - (ByteVector(Headers.Ext16).buffer ++ ByteVector(bytes.size) :+ tpe) ++ bytes.padLeft(2) - else - (ByteVector(Headers.Ext32).buffer ++ ByteVector(bytes.size) :+ tpe) ++ bytes.padLeft(4) + } else if (bytes.size <= Math.pow(2, 8) - 1) { + val size = ByteVector.fromByte(bytes.size.toByte) + (ByteVector(Headers.Ext8).buffer ++ size :+ tpe) ++ bytes + } else if (bytes.size <= Math.pow(2, 16) - 1) { + val size = ByteVector.fromShort(bytes.size.toShort) + (ByteVector(Headers.Ext16).buffer ++ size :+ tpe) ++ bytes + } else { + val size = ByteVector.fromInt(bytes.size.toInt) + (ByteVector(Headers.Ext32).buffer ++ size :+ tpe) ++ bytes + } case MsgpackItem.Timestamp32(seconds) => (ByteVector(Headers.FixExt4).buffer :+ Headers.Timestamp.toByte) ++ ByteVector.fromInt(seconds) @@ -121,7 +133,7 @@ private[low] object ItemSerializer { case MsgpackItem.Timestamp96(nanoseconds, seconds) => val ns = ByteVector.fromInt(nanoseconds) val s = ByteVector.fromLong(seconds) - (ByteVector(Headers.Ext8).buffer :+ Headers.Timestamp.toByte) ++ ns ++ s + (ByteVector(Headers.Ext8).buffer :+ 12 :+ Headers.Timestamp.toByte) ++ ns ++ s case MsgpackItem.Nil => ByteVector(Headers.Nil) @@ -141,10 +153,10 @@ private[low] object ItemSerializer { ByteVector(Headers.Int64) ++ item.bytes.padLeft(8) case item: MsgpackItem.Float32 => - ByteVector.fromInt(java.lang.Float.floatToIntBits(item.v)) + ByteVector(Headers.Float32) ++ ByteVector.fromInt(java.lang.Float.floatToIntBits(item.v)) case item: MsgpackItem.Float64 => - ByteVector.fromLong(java.lang.Double.doubleToLongBits(item.v)) + ByteVector(Headers.Float64) ++ ByteVector.fromLong(java.lang.Double.doubleToLongBits(item.v)) case item: MsgpackItem.Str => val size = ByteVector.fromInt(item.bytes.size.toInt) diff --git a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala index 3a826c8c6..ba89f6819 100644 --- a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala +++ b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala @@ -30,27 +30,121 @@ import java.nio.charset.StandardCharsets object SerializerSpec extends SimpleIOSuite { test("MessagePack item serializer should correctly serialize all formats") { val cases = List( + // nil, false, true + (List(MsgpackItem.Nil, MsgpackItem.False, MsgpackItem.True), hex"c0c2c3", hex"c0c2c3"), + // positive fixint (List(MsgpackItem.SignedInt(hex"7b")), hex"7b", hex"0xd3000000000000007b"), - // fixmap - (List(MsgpackItem.Map(1)), hex"81", hex"0xdf00000001"), - // fixarray - (List(MsgpackItem.Array(1)), hex"91", hex"0xdd00000001"), + // negative fixint + (List(MsgpackItem.SignedInt(hex"d6")), hex"d6", hex"0xd300000000000000d6"), + + // uint 8, uint 16, uint 32, uint 64 + (List(MsgpackItem.UnsignedInt(hex"ab")), hex"ccab", hex"cf00000000000000ab"), + (List(MsgpackItem.UnsignedInt(hex"abcd")), hex"cdabcd", hex"cf000000000000abcd"), + (List(MsgpackItem.UnsignedInt(hex"abcdef01")), hex"ceabcdef01", hex"cf00000000abcdef01"), + (List(MsgpackItem.UnsignedInt(hex"abcdef0123456789")), hex"cfabcdef0123456789", hex"cfabcdef0123456789"), + + // int 8, int 16, int 32, int 64 + (List(MsgpackItem.SignedInt(hex"80")), hex"d080", hex"d30000000000000080"), + (List(MsgpackItem.SignedInt(hex"80ab")), hex"d180ab", hex"d300000000000080ab"), + (List(MsgpackItem.SignedInt(hex"80abcdef")), hex"d280abcdef", hex"d30000000080abcdef"), + (List(MsgpackItem.SignedInt(hex"80abcddef0123456")), hex"d380abcddef0123456", hex"d380abcddef0123456"), + + // float 32, float 64 + (List(MsgpackItem.Float32(0.125F)), hex"ca3e000000", hex"ca3e000000"), + (List(MsgpackItem.Float64(0.125)), hex"cb3fc0000000000000", hex"cb3fc0000000000000"), + // fixstr - (List(MsgpackItem.Str(ByteVector("foobar".getBytes(StandardCharsets.UTF_8)))), - hex"a6666f6f626172", - hex"0xdb00000006666f6f626172"), - // nil, false, true - (List(MsgpackItem.Nil, MsgpackItem.False, MsgpackItem.True), hex"c0c2c3", hex"c0c2c3"), - // bin8, bin16, bin32 - (List(MsgpackItem.Bin(hex"abc")), hex"c4020abc", hex"c6000000020abc"), - (List(MsgpackItem.Bin(hex"abc".padLeft(Math.pow(2, 8).toLong))), - ByteVector(Headers.Bin16) ++ hex"0100" ++ hex"0abc".padLeft(Math.pow(2, 8).toLong), - ByteVector(Headers.Bin32) ++ hex"00000100" ++ hex"0abc".padLeft(Math.pow(2, 8).toLong)), - (List(MsgpackItem.Bin(hex"abc".padLeft(Math.pow(2, 16).toLong))), - ByteVector(Headers.Bin32) ++ hex"00010000" ++ ByteVector.empty.padLeft(Math.pow(2, 16).toLong - 2) ++ hex"0abc", - ByteVector(Headers.Bin32) ++ hex"00010000" ++ ByteVector.empty.padLeft(Math.pow(2, 16).toLong - 2) ++ hex"0abc") - // ext8, ext16, ext32 + (List(MsgpackItem.Str(ByteVector("abc".getBytes(StandardCharsets.UTF_8)))), + hex"a3616263", + hex"0xdb00000003616263"), + + // str 8 + (List(MsgpackItem.Str(ByteVector("abcd".repeat(8).getBytes(StandardCharsets.UTF_8)))), + hex"d920" ++ ByteVector("abcd".repeat(8).getBytes(StandardCharsets.UTF_8)), + hex"db00000020" ++ ByteVector("abcd".repeat(8).getBytes(StandardCharsets.UTF_8))), + + // str 16 + (List(MsgpackItem.Str(ByteVector("a".repeat(Math.pow(2, 8).toInt).getBytes(StandardCharsets.UTF_8)))), + hex"da0100" ++ ByteVector("a".repeat(Math.pow(2, 8).toInt).getBytes(StandardCharsets.UTF_8)), + hex"db00000100" ++ ByteVector("a".repeat(Math.pow(2, 8).toInt).getBytes(StandardCharsets.UTF_8))), + + // str 32 + (List(MsgpackItem.Str(ByteVector("a".repeat(Math.pow(2, 16).toInt).getBytes(StandardCharsets.UTF_8)))), + hex"db00010000" ++ ByteVector("a".repeat(Math.pow(2, 16).toInt).getBytes(StandardCharsets.UTF_8)), + hex"db00010000" ++ ByteVector("a".repeat(Math.pow(2, 16).toInt).getBytes(StandardCharsets.UTF_8))), + + // bin 8 + (List(MsgpackItem.Bin(ByteVector("abcd".repeat(8).getBytes(StandardCharsets.UTF_8)))), + hex"c420" ++ ByteVector("abcd".repeat(8).getBytes(StandardCharsets.UTF_8)), + hex"c600000020" ++ ByteVector("abcd".repeat(8).getBytes(StandardCharsets.UTF_8))), + + // bin 16 + (List(MsgpackItem.Bin(ByteVector("a".repeat(Math.pow(2, 8).toInt).getBytes(StandardCharsets.UTF_8)))), + hex"c50100" ++ ByteVector("a".repeat(Math.pow(2, 8).toInt).getBytes(StandardCharsets.UTF_8)), + hex"c600000100" ++ ByteVector("a".repeat(Math.pow(2, 8).toInt).getBytes(StandardCharsets.UTF_8))), + + // bin 32 + (List(MsgpackItem.Bin(ByteVector("a".repeat(Math.pow(2, 16).toInt).getBytes(StandardCharsets.UTF_8)))), + hex"c600010000" ++ ByteVector("a".repeat(Math.pow(2, 16).toInt).getBytes(StandardCharsets.UTF_8)), + hex"c600010000" ++ ByteVector("a".repeat(Math.pow(2, 16).toInt).getBytes(StandardCharsets.UTF_8))), + + // fixarray + (List(MsgpackItem.Array(0)), hex"90", hex"dd00000000"), + (List(MsgpackItem.Array(1)), hex"91", hex"dd00000001"), + // array 16 + (List(MsgpackItem.Array(16)), hex"dc0010", hex"dd00000010"), + // array 32 + (List(MsgpackItem.Array(Math.pow(2, 16).toInt)), hex"dd00010000", hex"dd00010000"), + + // fixmap + (List(MsgpackItem.Map(0)), hex"80", hex"df00000000"), + (List(MsgpackItem.Map(1)), hex"81", hex"df00000001"), + // map 16 + (List(MsgpackItem.Map(16)), hex"de0010", hex"df00000010"), + // map 32 + (List(MsgpackItem.Map(Math.pow(2, 16).toInt)), hex"df00010000", hex"df00010000"), + + // fixext 1 + (List(MsgpackItem.Extension(0x54.toByte, hex"ab")), hex"d454ab", hex"c90000000154ab"), + // fixext 2 + (List(MsgpackItem.Extension(0x54.toByte, hex"abcd")), hex"d554abcd", hex"c90000000254abcd"), + // fixext 4 + (List(MsgpackItem.Extension(0x54.toByte, hex"abcdef01")), hex"d654abcdef01", hex"c90000000454abcdef01"), + // fixext 8 + (List(MsgpackItem.Extension(0x54.toByte, hex"abcdef0123456789")), + hex"d754abcdef0123456789", + hex"c90000000854abcdef0123456789"), + // fixext 8 + (List(MsgpackItem.Extension(0x54.toByte, hex"abcdef0123456789abcdef0123456789")), + hex"d854abcdef0123456789abcdef0123456789", + hex"c90000001054abcdef0123456789abcdef0123456789"), + + // ext 8 + (List(MsgpackItem.Extension(0x54, hex"ab".padLeft(17))), + hex"c71154" ++ hex"ab".padLeft(17), + hex"c90000001154" ++ hex"ab".padLeft(17)), + + // ext 16 + (List(MsgpackItem.Extension(0x54, hex"ab".padLeft(Math.pow(2, 8).toLong))), + hex"c8010054" ++ hex"ab".padLeft(Math.pow(2, 8).toLong), + hex"c90000010054" ++ hex"ab".padLeft(Math.pow(2, 8).toLong)), + + // ext 32 + (List(MsgpackItem.Extension(0x54, hex"ab".padLeft(Math.pow(2, 16).toLong))), + hex"c90001000054" ++ hex"ab".padLeft(Math.pow(2, 16).toLong), + hex"c90001000054" ++ hex"ab".padLeft(Math.pow(2, 16).toLong)), + + // timestamp 32 + (List(MsgpackItem.Timestamp32(0x0123abcd)), hex"d6ff0123abcd", hex"d6ff0123abcd"), + + // timestamp 64 + (List(MsgpackItem.Timestamp64(0x0123456789abcdefL)), hex"d7ff0123456789abcdef", hex"d7ff0123456789abcdef"), + + // timestamp 96 + (List(MsgpackItem.Timestamp96(0x0123abcd, 0x0123456789abcdefL)), + hex"c70cff0123abcd0123456789abcdef", + hex"c70cff0123abcd0123456789abcdef") ) Stream @@ -63,6 +157,10 @@ object SerializerSpec extends SimpleIOSuite { .through(low.bytes(true, false)) .compile .fold(ByteVector.empty)(_ :+ _) + .flatTap { got => + if (got != compressed) IO.println((got(0), compressed(0))) + else IO.unit + } .map(expect.same(_, compressed)) e2 <- From 9bc1452a236663e685af99e98a64f654c687e9ed Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Tue, 13 Aug 2024 21:18:38 +0200 Subject: [PATCH 04/26] Remove debug code in serializer test --- msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala | 4 ---- 1 file changed, 4 deletions(-) diff --git a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala index ba89f6819..2a9a61545 100644 --- a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala +++ b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala @@ -157,10 +157,6 @@ object SerializerSpec extends SimpleIOSuite { .through(low.bytes(true, false)) .compile .fold(ByteVector.empty)(_ :+ _) - .flatTap { got => - if (got != compressed) IO.println((got(0), compressed(0))) - else IO.unit - } .map(expect.same(_, compressed)) e2 <- From eed74a4b04bedefe40c3bc73a7fbca8f2be829cf Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Thu, 15 Aug 2024 21:59:37 +0200 Subject: [PATCH 05/26] Add validation test cases --- .../fs2/data/msgpack/SerializerSpec.scala | 53 ++------- .../fs2/data/msgpack/ValidationSpec.scala | 103 ++++++++++++++++++ 2 files changed, 112 insertions(+), 44 deletions(-) create mode 100644 msgpack/src/test/scala/fs2/data/msgpack/ValidationSpec.scala diff --git a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala index 2a9a61545..90351d238 100644 --- a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala +++ b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala @@ -18,14 +18,12 @@ package fs2 package data package msgpack -import weaver.SimpleIOSuite -import scodec.bits.* -import low.MsgpackItem -import cats.effect.* -import low.internal.ValidationErrorAt -import low.internal.ValidationError +import cats.effect._ +import scodec.bits._ +import weaver._ import java.nio.charset.StandardCharsets +import low.MsgpackItem object SerializerSpec extends SimpleIOSuite { test("MessagePack item serializer should correctly serialize all formats") { @@ -154,7 +152,7 @@ object SerializerSpec extends SimpleIOSuite { e1 <- Stream .emits(source) - .through(low.bytes(true, false)) + .through(low.bytes[IO](true, false)) .compile .fold(ByteVector.empty)(_ :+ _) .map(expect.same(_, compressed)) @@ -162,7 +160,7 @@ object SerializerSpec extends SimpleIOSuite { e2 <- Stream .emits(source) - .through(low.bytes(false, false)) + .through(low.bytes[IO](false, false)) .compile .fold(ByteVector.empty)(_ :+ _) .map(expect.same(_, fast)) @@ -171,7 +169,6 @@ object SerializerSpec extends SimpleIOSuite { .compile .foldMonoid } - test("MessagePack item serializer should be fix point when optimizing for size") { val cases = List( hex"CB3FCB5A858793DD98", @@ -184,8 +181,8 @@ object SerializerSpec extends SimpleIOSuite { .evalMap { hex => Stream .chunk(Chunk.byteVector(hex)) - .through(low.items) - .through(low.toBinary) + .through(low.items[IO]) + .through(low.toBinary[IO]) .compile .toList .map(x => expect.same(ByteVector(x), hex)) @@ -194,36 +191,4 @@ object SerializerSpec extends SimpleIOSuite { .foldMonoid } - test("MessagePack item validator should raise for all checks") { - val cases = List( - List(MsgpackItem.UnsignedInt(hex"10000000000000000")) -> new ValidationErrorAt(0, "Unsigned int exceeds 64 bits"), - List(MsgpackItem.SignedInt(hex"10000000000000000")) -> new ValidationErrorAt(0, "Signed int exceeds 64 bits"), - - // TODO: Float32, Float64 - - List(MsgpackItem.Str(ByteVector.fill(Math.pow(2, 32).toLong)(1))) -> new ValidationErrorAt( - 0, - "String exceeds (2^32)-1 bytes"), - List(MsgpackItem.Bin(ByteVector.fill(Math.pow(2, 32).toLong)(1))) -> new ValidationErrorAt( - 0, - "Bin exceeds (2^32)-1 bytes"), - List(MsgpackItem.Array(2), MsgpackItem.True) -> new ValidationError("Unexpected end of input (starting at 0)"), - List(MsgpackItem.Map(1), MsgpackItem.Array(1), MsgpackItem.True) -> new ValidationError( - "Unexpected end of input (starting at 0)") - ) - - Stream - .emits(cases) - .evalMap { case (lhs, rhs) => - Stream - .emits(lhs) - .through(low.validated[IO]) - .compile - .toList - .map(x => failure(s"Expected error for item ${x}")) - .handleErrorWith(err => IO(expect.same(err, rhs))) - } - .compile - .foldMonoid - } -} +} \ No newline at end of file diff --git a/msgpack/src/test/scala/fs2/data/msgpack/ValidationSpec.scala b/msgpack/src/test/scala/fs2/data/msgpack/ValidationSpec.scala new file mode 100644 index 000000000..4431d2886 --- /dev/null +++ b/msgpack/src/test/scala/fs2/data/msgpack/ValidationSpec.scala @@ -0,0 +1,103 @@ +/* + * Copyright 2024 fs2-data Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 +package data +package msgpack + +import cats.effect._ +import low.MsgpackItem +import fs2.data.msgpack.low.internal.{ValidationError, ValidationErrorAt} +import scodec.bits.ByteVector +import weaver._ +import scodec.bits._ + +import cats.implicits._ + +object ValidationSpec extends SimpleIOSuite { + def validation1[F[_]: Sync](cases: (MsgpackItem, Throwable)*): F[Expectations] = + Stream + .emits(cases) + .evalMap { case (lhs, rhs) => + Stream + .emit(lhs) + .through(low.toBinary[F]) + .compile + .drain + .map(_ => failure(s"Expected error for item ${lhs}")) + .handleError(expect.same(_, rhs)) + } + .compile + .foldMonoid + + def validation[F[_]: Sync](cases: (List[MsgpackItem], Throwable)*): F[Expectations] = + Stream + .emits(cases) + .evalMap { case (lhs, rhs) => + Stream + .emits(lhs) + .through(low.toBinary[F]) + .compile + .drain + .map(_ => failure(s"Expected error for item ${lhs}")) + .handleError(expect.same(_, rhs)) + } + .compile + .foldMonoid + + + test("should raise if integer values exceed 64 bits") { + validation1( + MsgpackItem.UnsignedInt(hex"10000000000000000") -> new ValidationErrorAt(0, "Unsigned int exceeds 64 bits"), + MsgpackItem.SignedInt(hex"10000000000000000")-> new ValidationErrorAt(0, "Signed int exceeds 64 bits") + ) + } + + test("should raise if string or binary values exceed 2^32 - 1 bytes") { + validation1( + MsgpackItem.Str(ByteVector.empty.padLeft(Math.pow(2, 32).toLong)) -> new ValidationErrorAt(0, "String exceeds (2^32)-1 bytes"), + MsgpackItem.Bin(ByteVector.empty.padLeft(Math.pow(2, 32).toLong)) -> new ValidationErrorAt(0, "Bin exceeds (2^32)-1 bytes"), + ) + } + + test("should raise on unexpected end of input") { + validation( + List(MsgpackItem.Array(2), MsgpackItem.True) -> new ValidationError("Unexpected end of input (starting at 0)"), + List(MsgpackItem.Array(2), MsgpackItem.Array(1), MsgpackItem.True) -> new ValidationError("Unexpected end of input (starting at 0)"), + List(MsgpackItem.Array(1), MsgpackItem.Array(1)) -> new ValidationError("Unexpected end of input (starting at 1)"), + List(MsgpackItem.Array(0), MsgpackItem.Array(1)) -> new ValidationError("Unexpected end of input (starting at 1)"), + + List(MsgpackItem.Map(1), MsgpackItem.True) -> new ValidationError("Unexpected end of input (starting at 0)"), + List(MsgpackItem.Map(1), MsgpackItem.Map(1), MsgpackItem.True, MsgpackItem.True) -> new ValidationError("Unexpected end of input (starting at 0)"), + List(MsgpackItem.Map(2), MsgpackItem.True, MsgpackItem.Map(1)) -> new ValidationError("Unexpected end of input (starting at 2)"), + List(MsgpackItem.Map(2), MsgpackItem.True, MsgpackItem.Map(1)) -> new ValidationError("Unexpected end of input (starting at 2)"), + List(MsgpackItem.Map(0), MsgpackItem.Map(1)) -> new ValidationError("Unexpected end of input (starting at 1)"), + ) + } + + test("validator should raise if extension data exceeds 2^32 - 1 bytes") { + validation1( + MsgpackItem.Extension(0x54, ByteVector.empty.padLeft(Math.pow(2, 32).toLong)) -> new ValidationErrorAt(0, "Extension data exceeds (2^32)-1 bytes"), + ) + } + + test("should raise if nanoseconds fields exceed 999999999") { + validation1( + MsgpackItem.Timestamp64(0xEE6B280000000000L)-> new ValidationErrorAt(0, "Timestamp64 nanoseconds cannot be larger than '999999999'"), + MsgpackItem.Timestamp96(1000000000, 0)-> new ValidationErrorAt(0, "Timestamp96 nanoseconds cannot be larger than '999999999'"), + ) + } +} From 54d7d5585a79ba0ba192ef1127fee8366cd33ff0 Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Sat, 17 Aug 2024 13:02:59 +0200 Subject: [PATCH 06/26] Make msgpack item serializer omit leading zeros This change applies only to types in which leading zeros are insignificant. --- .../msgpack/low/internal/ItemSerializer.scala | 71 ++++++++++--------- 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala index cd6a24731..9495b7e8f 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala @@ -25,28 +25,30 @@ import scodec.bits._ private[low] object ItemSerializer { def compressed: MsgpackItem => ByteVector = { case MsgpackItem.UnsignedInt(bytes) => - if (bytes.size <= 1) - ByteVector(Headers.Uint8).buffer ++ bytes.padLeft(1) - else if (bytes.size <= 2) - ByteVector(Headers.Uint16).buffer ++ bytes.padLeft(2) - else if (bytes.size <= 4) - ByteVector(Headers.Uint32).buffer ++ bytes.padLeft(4) + val bs = bytes.dropWhile(_ == 0) + if (bs.size <= 1) + ByteVector(Headers.Uint8).buffer ++ bs.padLeft(1) + else if (bs.size <= 2) + ByteVector(Headers.Uint16).buffer ++ bs.padLeft(2) + else if (bs.size <= 4) + ByteVector(Headers.Uint32).buffer ++ bs.padLeft(4) else - ByteVector(Headers.Uint64).buffer ++ bytes.padLeft(8) + ByteVector(Headers.Uint64).buffer ++ bs.padLeft(8) case MsgpackItem.SignedInt(bytes) => - if (bytes.size <= 1) + val bs = bytes.dropWhile(_ == 0) + if (bs.size <= 1) // positive fixint or negative fixint - if ((bytes & hex"7f") == bytes || (bytes & hex"c0") == hex"c0") - bytes.padLeft(1) + if ((bs & hex"7f") == bs || (bs & hex"c0") == hex"c0") + bs.padLeft(1) else - ByteVector(Headers.Int8).buffer ++ bytes.padLeft(1) - else if (bytes.size <= 2) - ByteVector(Headers.Int16).buffer ++ bytes.padLeft(2) - else if (bytes.size <= 4) - ByteVector(Headers.Int32).buffer ++ bytes.padLeft(4) + ByteVector(Headers.Int8).buffer ++ bs.padLeft(1) + else if (bs.size <= 2) + ByteVector(Headers.Int16).buffer ++ bs.padLeft(2) + else if (bs.size <= 4) + ByteVector(Headers.Int32).buffer ++ bs.padLeft(4) else - ByteVector(Headers.Int64).buffer ++ bytes.padLeft(8) + ByteVector(Headers.Int64).buffer ++ bs.padLeft(8) case MsgpackItem.Float32(float) => ByteVector(Headers.Float32).buffer ++ ByteVector.fromInt(java.lang.Float.floatToIntBits(float)) @@ -103,25 +105,26 @@ private[low] object ItemSerializer { } case MsgpackItem.Extension(tpe, bytes) => - if (bytes.size <= 1) { - (ByteVector(Headers.FixExt1).buffer :+ tpe) ++ bytes.padLeft(1) - } else if (bytes.size <= 2) { - (ByteVector(Headers.FixExt2).buffer :+ tpe) ++ bytes.padLeft(2) - } else if (bytes.size <= 4) { - (ByteVector(Headers.FixExt4).buffer :+ tpe) ++ bytes.padLeft(4) - } else if (bytes.size <= 8) { - (ByteVector(Headers.FixExt8).buffer :+ tpe) ++ bytes.padLeft(8) - } else if (bytes.size <= 16) { - (ByteVector(Headers.FixExt16).buffer :+ tpe) ++ bytes.padLeft(16) - } else if (bytes.size <= Math.pow(2, 8) - 1) { - val size = ByteVector.fromByte(bytes.size.toByte) - (ByteVector(Headers.Ext8).buffer ++ size :+ tpe) ++ bytes - } else if (bytes.size <= Math.pow(2, 16) - 1) { - val size = ByteVector.fromShort(bytes.size.toShort) - (ByteVector(Headers.Ext16).buffer ++ size :+ tpe) ++ bytes + val bs = bytes.dropWhile(_ == 0) + if (bs.size <= 1) { + (ByteVector(Headers.FixExt1).buffer :+ tpe) ++ bs.padLeft(1) + } else if (bs.size <= 2) { + (ByteVector(Headers.FixExt2).buffer :+ tpe) ++ bs.padLeft(2) + } else if (bs.size <= 4) { + (ByteVector(Headers.FixExt4).buffer :+ tpe) ++ bs.padLeft(4) + } else if (bs.size <= 8) { + (ByteVector(Headers.FixExt8).buffer :+ tpe) ++ bs.padLeft(8) + } else if (bs.size <= 16) { + (ByteVector(Headers.FixExt16).buffer :+ tpe) ++ bs.padLeft(16) + } else if (bs.size <= Math.pow(2, 8) - 1) { + val size = ByteVector.fromByte(bs.size.toByte) + (ByteVector(Headers.Ext8).buffer ++ size :+ tpe) ++ bs + } else if (bs.size <= Math.pow(2, 16) - 1) { + val size = ByteVector.fromShort(bs.size.toShort) + (ByteVector(Headers.Ext16).buffer ++ size :+ tpe) ++ bs } else { - val size = ByteVector.fromInt(bytes.size.toInt) - (ByteVector(Headers.Ext32).buffer ++ size :+ tpe) ++ bytes + val size = ByteVector.fromInt(bs.size.toInt) + (ByteVector(Headers.Ext32).buffer ++ size :+ tpe) ++ bs } case MsgpackItem.Timestamp32(seconds) => From 1ed1f73169d1d47d1b4ecd9f5b1042646e91d820 Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Sun, 18 Aug 2024 18:06:28 +0200 Subject: [PATCH 07/26] Make Extension tests use `ByteVector.fill` --- .../fs2/data/msgpack/SerializerSpec.scala | 18 +++++++++--------- .../fs2/data/msgpack/ValidationSpec.scala | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala index 90351d238..c2e28279b 100644 --- a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala +++ b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala @@ -119,19 +119,19 @@ object SerializerSpec extends SimpleIOSuite { hex"c90000001054abcdef0123456789abcdef0123456789"), // ext 8 - (List(MsgpackItem.Extension(0x54, hex"ab".padLeft(17))), - hex"c71154" ++ hex"ab".padLeft(17), - hex"c90000001154" ++ hex"ab".padLeft(17)), + (List(MsgpackItem.Extension(0x54, ByteVector.fill(17)(0xab))), + hex"c71154" ++ ByteVector.fill(17)(0xab), + hex"c90000001154" ++ ByteVector.fill(17)(0xab)), // ext 16 - (List(MsgpackItem.Extension(0x54, hex"ab".padLeft(Math.pow(2, 8).toLong))), - hex"c8010054" ++ hex"ab".padLeft(Math.pow(2, 8).toLong), - hex"c90000010054" ++ hex"ab".padLeft(Math.pow(2, 8).toLong)), + (List(MsgpackItem.Extension(0x54, ByteVector.fill(Math.pow(2, 8).toLong)(0xab))), + hex"c8010054" ++ ByteVector.fill(Math.pow(2, 8).toLong)(0xab), + hex"c90000010054" ++ ByteVector.fill(Math.pow(2, 8).toLong)(0xab)), // ext 32 - (List(MsgpackItem.Extension(0x54, hex"ab".padLeft(Math.pow(2, 16).toLong))), - hex"c90001000054" ++ hex"ab".padLeft(Math.pow(2, 16).toLong), - hex"c90001000054" ++ hex"ab".padLeft(Math.pow(2, 16).toLong)), + (List(MsgpackItem.Extension(0x54, ByteVector.fill(Math.pow(2, 16).toLong)(0xab))), + hex"c90001000054" ++ ByteVector.fill(Math.pow(2, 16).toLong)(0xab), + hex"c90001000054" ++ ByteVector.fill(Math.pow(2, 16).toLong)(0xab)), // timestamp 32 (List(MsgpackItem.Timestamp32(0x0123abcd)), hex"d6ff0123abcd", hex"d6ff0123abcd"), diff --git a/msgpack/src/test/scala/fs2/data/msgpack/ValidationSpec.scala b/msgpack/src/test/scala/fs2/data/msgpack/ValidationSpec.scala index 4431d2886..a777ffd5a 100644 --- a/msgpack/src/test/scala/fs2/data/msgpack/ValidationSpec.scala +++ b/msgpack/src/test/scala/fs2/data/msgpack/ValidationSpec.scala @@ -88,7 +88,7 @@ object ValidationSpec extends SimpleIOSuite { ) } - test("validator should raise if extension data exceeds 2^32 - 1 bytes") { + test("should raise if extension data exceeds 2^32 - 1 bytes") { validation1( MsgpackItem.Extension(0x54, ByteVector.empty.padLeft(Math.pow(2, 32).toLong)) -> new ValidationErrorAt(0, "Extension data exceeds (2^32)-1 bytes"), ) From ac1452848d5f72ff5af755acda07655edb9baf9f Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Sun, 18 Aug 2024 18:07:17 +0200 Subject: [PATCH 08/26] Refine msgpack fixpoint test The parser mapping ByteVector to MsgpackItem can be seen as a not injective morphism, that is, there are many ByteVectors that will map to the same MsgpackItem. Because of this, we cannot possibly guarantee that `serialize(parse(bs))` is fixpoint for an arbitrary `bs`. However, currently implemented serializers *are* injective (if we exclude the Timestamp format family as it can be represented with Extension types) and so, we can guarantee `serialize(parse(bs)) == bs` if `bs` is a member of a subset of ByteVector that is emitted by a serializer. In other words, the following code will be true for any `bs` if `serialize` is injective and we ignore the Timestamp type family: ``` val first = serialize(parse(bs)) val second = serialize(parse(first)) first == second ``` This test makes sure that the above holds. --- .../fs2/data/msgpack/SerializerSpec.scala | 59 +++++++++++++------ 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala index c2e28279b..59f19fd9e 100644 --- a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala +++ b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala @@ -169,26 +169,49 @@ object SerializerSpec extends SimpleIOSuite { .compile .foldMonoid } - test("MessagePack item serializer should be fix point when optimizing for size") { + + test("MessagePack item serializer should be fixpoint for a subset of ByteVector") { + /* The parser mapping ByteVector to MsgpackItem can be seen as a not injective morphism, that is, there + * are many ByteVectors that will map to the same MsgpackItem. Because of this, we cannot possibly guarantee that + * `serialize(parse(bs))` is fixpoint for an arbitrary `bs`. However, currently implemented serializers *are* + * injective (if we exclude the Timestamp format family as it can be represented with Extension types) and so, we + * can guarantee `serialize(parse(bs)) == bs` if `bs` is a member of a subset of ByteVector that is emitted by a + * serializer. + * + * In other words, the following code will be true for any `bs` if `serialize` is injective and we ignore the + * Timestamp type family: + * {{{ + * val first = serialize(parse(bs)) + * val second = serialize(parse(first)) + * first == second + * }}} + * + * This test makes sure that the above holds. + */ + val cases = List( - hex"CB3FCB5A858793DD98", - hex"d6ffaabbccdd", - hex"81A77765622D61707083A7736572766C65749583AC736572766C65742D6E616D65A8636F666178434453AD736572766C65742D636C617373B86F72672E636F6661782E6364732E434453536572766C6574AA696E69742D706172616DDE002ABD636F6E666967476C6F73736172793A696E7374616C6C6174696F6E4174B05068696C6164656C706869612C205041B9636F6E666967476C6F73736172793A61646D696E456D61696CAD6B736D40706F626F782E636F6DB8636F6E666967476C6F73736172793A706F77657265644279A5436F666178BC636F6E666967476C6F73736172793A706F7765726564427949636F6EB12F696D616765732F636F6661782E676966B9636F6E666967476C6F73736172793A73746174696350617468AF2F636F6E74656E742F737461746963B674656D706C61746550726F636573736F72436C617373B96F72672E636F6661782E5779736977796754656D706C617465B374656D706C6174654C6F61646572436C617373BD6F72672E636F6661782E46696C657354656D706C6174654C6F61646572AC74656D706C61746550617468A974656D706C61746573B474656D706C6174654F7665727269646550617468A0B364656661756C744C69737454656D706C617465B06C69737454656D706C6174652E68746DB364656661756C7446696C6554656D706C617465B361727469636C6554656D706C6174652E68746DA67573654A5350C2AF6A73704C69737454656D706C617465B06C69737454656D706C6174652E6A7370AF6A737046696C6554656D706C617465B361727469636C6554656D706C6174652E6A7370B563616368655061636B61676554616773547261636BCCC8B563616368655061636B6167655461677353746F7265CCC8B763616368655061636B61676554616773526566726573683CB3636163686554656D706C61746573547261636B64B3636163686554656D706C6174657353746F726532B5636163686554656D706C61746573526566726573680FAF63616368655061676573547261636BCCC8AF6361636865506167657353746F726564B163616368655061676573526566726573680AB3636163686550616765734469727479526561640AB8736561726368456E67696E654C69737454656D706C617465B8666F72536561726368456E67696E65734C6973742E68746DB8736561726368456E67696E6546696C6554656D706C617465B4666F72536561726368456E67696E65732E68746DB4736561726368456E67696E65526F626F74734462B15745422D494E462F726F626F74732E6462AC7573654461746153746F7265C3AE6461746153746F7265436C617373B66F72672E636F6661782E53716C4461746153746F7265B07265646972656374696F6E436C617373B86F72672E636F6661782E53716C5265646972656374696F6EAD6461746153746F72654E616D65A5636F666178AF6461746153746F7265447269766572D92C636F6D2E6D6963726F736F66742E6A6462632E73716C7365727665722E53514C536572766572447269766572AC6461746153746F726555726CD93B6A6462633A6D6963726F736F66743A73716C7365727665723A2F2F4C4F43414C484F53543A313433333B44617461626173654E616D653D676F6F6EAD6461746153746F726555736572A27361B16461746153746F726550617373776F7264B26461746153746F7265546573745175657279B26461746153746F7265546573745175657279D922534554204E4F434F554E54204F4E3B73656C65637420746573743D2774657374273BB06461746153746F72654C6F6746696C65D9242F7573722F6C6F63616C2F746F6D6361742F6C6F67732F6461746173746F72652E6C6F67B26461746153746F7265496E6974436F6E6E730AB16461746153746F72654D6178436F6E6E7364B76461746153746F7265436F6E6E55736167654C696D697464B16461746153746F72654C6F674C6576656CA56465627567AC6D617855726C4C656E677468CD01F483AC736572766C65742D6E616D65AA636F666178456D61696CAD736572766C65742D636C617373BA6F72672E636F6661782E6364732E456D61696C536572766C6574AA696E69742D706172616D82A86D61696C486F7374A56D61696C31B06D61696C486F73744F76657272696465A56D61696C3282AC736572766C65742D6E616D65AA636F66617841646D696EAD736572766C65742D636C617373BA6F72672E636F6661782E6364732E41646D696E536572766C657482AC736572766C65742D6E616D65AB66696C65536572766C6574AD736572766C65742D636C617373B96F72672E636F6661782E6364732E46696C65536572766C657483AC736572766C65742D6E616D65AA636F666178546F6F6C73AD736572766C65742D636C617373BF6F72672E636F6661782E636D732E436F666178546F6F6C73536572766C6574AA696E69742D706172616D8DAC74656D706C61746550617468AF746F6F6C7374656D706C617465732FA36C6F6701AB6C6F674C6F636174696F6ED9252F7573722F6C6F63616C2F746F6D6361742F6C6F67732F436F666178546F6F6C732E6C6F67AA6C6F674D617853697A65A0A7646174614C6F6701AF646174614C6F674C6F636174696F6ED9222F7573722F6C6F63616C2F746F6D6361742F6C6F67732F646174614C6F672E6C6F67AE646174614C6F674D617853697A65A0AF72656D6F7665506167654361636865D9252F636F6E74656E742F61646D696E2F72656D6F76653F63616368653D70616765732669643DB372656D6F766554656D706C6174654361636865D9292F636F6E74656E742F61646D696E2F72656D6F76653F63616368653D74656D706C617465732669643DB266696C655472616E73666572466F6C646572D9342F7573722F6C6F63616C2F746F6D6361742F776562617070732F636F6E74656E742F66696C655472616E73666572466F6C646572AD6C6F6F6B496E436F6E7465787401AC61646D696E47726F7570494404AA62657461536572766572C3AF736572766C65742D6D617070696E6785A8636F666178434453A12FAA636F666178456D61696CB32F636F6661787574696C2F61656D61696C2F2AAA636F66617841646D696EA82F61646D696E2F2AAB66696C65536572766C6574A92F7374617469632F2AAA636F666178546F6F6C73A82F746F6F6C732F2AA67461676C696282AA7461676C69622D757269A9636F6661782E746C64AF7461676C69622D6C6F636174696F6EB72F5745422D494E462F746C64732F636F6661782E746C64" + hex"918FA46461746582A662756666657282A474797065A6427566666572A4646174619401234567A474797065CCFFA35F6964B8363663316233363661333137353434376163346335343165A5696E64657800A467756964D92438666665653537302D353938312D346630362D623635382D653435383163363064373539A86973416374697665C3A762616C616E6365CB40A946956A97C84CA361676516A8657965436F6C6F72A4626C7565A46E616D65AD4D6F72746F6E204C6974746C65A761646472657373D9313933372044656172626F726E20436F7572742C204861726C656967682C204D6173736163687573657474732C2033353936AA72656769737465726564BA323032332D30382D32395431303A34353A3335202D30323A3030A86C61746974756465CB4047551159C49774A96C6F6E676974756465CBC065F94A771C970FA47461677397A54C6F72656DA3657374A86465736572756E74A54C6F72656DA46E697369A76C61626F726973A86465736572756E74A7667269656E64739382A2696400A46E616D65B04865726E616E64657A204C6172736F6E82A2696401A46E616D65AF4D616E6E696E672053617267656E7482A2696402A46E616D65AF536176616E6E6168204E65776D616E" ) - Stream - .emits(cases) - .evalMap { hex => - Stream - .chunk(Chunk.byteVector(hex)) - .through(low.items[IO]) - .through(low.toBinary[IO]) - .compile - .toList - .map(x => expect.same(ByteVector(x), hex)) + def round(data: ByteVector, compress: Boolean) = + Stream + .chunk(Chunk.byteVector(data)) + .through(low.items[IO]) + .through(low.bytes[IO](compress, false)) + .fold(ByteVector.empty)(_ :+ _) + + def process(compress: Boolean, serializerName: String) = + for { + data <- Stream.emits(cases) + pre <- round(data, compress) + processed <- round(pre, compress) + } yield { + if (processed == pre) + success + else + failure(s"${serializerName} should be fixpoint for: ${pre} but it emitted ${processed}") } - .compile - .foldMonoid - } -} \ No newline at end of file + (process(true, "ItemSerializer.compressed") ++ process(false, "ItemSerializer.none")).compile.foldMonoid + } +} From 671e693598c9dd142ce07f5eddf70fe0e0d3c010 Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Sun, 18 Aug 2024 21:13:38 +0200 Subject: [PATCH 09/26] Remove redundant `padLeft`s when size is known --- .../fs2/data/msgpack/low/internal/ItemSerializer.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala index 9495b7e8f..9f49ad44f 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala @@ -78,7 +78,7 @@ private[low] object ItemSerializer { val size = ByteVector.fromShort(bytes.size.toShort) ByteVector(Headers.Bin16).buffer ++ size ++ bytes } else { - val size = ByteVector.fromInt(bytes.size.toInt).padLeft(4) + val size = ByteVector.fromInt(bytes.size.toInt) ByteVector(Headers.Bin32).buffer ++ size ++ bytes } @@ -170,10 +170,10 @@ private[low] object ItemSerializer { ByteVector(Headers.Bin32) ++ size ++ item.bytes case item: MsgpackItem.Array => - ByteVector(Headers.Array32) ++ ByteVector.fromInt(item.size).padLeft(4) + ByteVector(Headers.Array32) ++ ByteVector.fromInt(item.size) case item: MsgpackItem.Map => - ByteVector(Headers.Map32) ++ ByteVector.fromInt(item.size).padLeft(4) + ByteVector(Headers.Map32) ++ ByteVector.fromInt(item.size) case item: MsgpackItem.Extension => val size = ByteVector.fromInt(item.bytes.size.toInt) From b73258346c80bbfe178cf6a0838fd4e5e9ce22fe Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Sun, 18 Aug 2024 22:00:54 +0200 Subject: [PATCH 10/26] Reformat ValidationSpec.scala --- .../fs2/data/msgpack/ValidationSpec.scala | 44 ++++++++++++------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/msgpack/src/test/scala/fs2/data/msgpack/ValidationSpec.scala b/msgpack/src/test/scala/fs2/data/msgpack/ValidationSpec.scala index a777ffd5a..9ef00a2e4 100644 --- a/msgpack/src/test/scala/fs2/data/msgpack/ValidationSpec.scala +++ b/msgpack/src/test/scala/fs2/data/msgpack/ValidationSpec.scala @@ -58,46 +58,60 @@ object ValidationSpec extends SimpleIOSuite { .compile .foldMonoid - test("should raise if integer values exceed 64 bits") { validation1( MsgpackItem.UnsignedInt(hex"10000000000000000") -> new ValidationErrorAt(0, "Unsigned int exceeds 64 bits"), - MsgpackItem.SignedInt(hex"10000000000000000")-> new ValidationErrorAt(0, "Signed int exceeds 64 bits") + MsgpackItem.SignedInt(hex"10000000000000000") -> new ValidationErrorAt(0, "Signed int exceeds 64 bits") ) } test("should raise if string or binary values exceed 2^32 - 1 bytes") { validation1( - MsgpackItem.Str(ByteVector.empty.padLeft(Math.pow(2, 32).toLong)) -> new ValidationErrorAt(0, "String exceeds (2^32)-1 bytes"), - MsgpackItem.Bin(ByteVector.empty.padLeft(Math.pow(2, 32).toLong)) -> new ValidationErrorAt(0, "Bin exceeds (2^32)-1 bytes"), + MsgpackItem.Str(ByteVector.empty.padLeft(Math.pow(2, 32).toLong)) -> new ValidationErrorAt( + 0, + "String exceeds (2^32)-1 bytes"), + MsgpackItem.Bin(ByteVector.empty.padLeft(Math.pow(2, 32).toLong)) -> new ValidationErrorAt( + 0, + "Bin exceeds (2^32)-1 bytes") ) } test("should raise on unexpected end of input") { validation( List(MsgpackItem.Array(2), MsgpackItem.True) -> new ValidationError("Unexpected end of input (starting at 0)"), - List(MsgpackItem.Array(2), MsgpackItem.Array(1), MsgpackItem.True) -> new ValidationError("Unexpected end of input (starting at 0)"), - List(MsgpackItem.Array(1), MsgpackItem.Array(1)) -> new ValidationError("Unexpected end of input (starting at 1)"), - List(MsgpackItem.Array(0), MsgpackItem.Array(1)) -> new ValidationError("Unexpected end of input (starting at 1)"), - + List(MsgpackItem.Array(2), MsgpackItem.Array(1), MsgpackItem.True) -> new ValidationError( + "Unexpected end of input (starting at 0)"), + List(MsgpackItem.Array(1), MsgpackItem.Array(1)) -> new ValidationError( + "Unexpected end of input (starting at 1)"), + List(MsgpackItem.Array(0), MsgpackItem.Array(1)) -> new ValidationError( + "Unexpected end of input (starting at 1)"), List(MsgpackItem.Map(1), MsgpackItem.True) -> new ValidationError("Unexpected end of input (starting at 0)"), - List(MsgpackItem.Map(1), MsgpackItem.Map(1), MsgpackItem.True, MsgpackItem.True) -> new ValidationError("Unexpected end of input (starting at 0)"), - List(MsgpackItem.Map(2), MsgpackItem.True, MsgpackItem.Map(1)) -> new ValidationError("Unexpected end of input (starting at 2)"), - List(MsgpackItem.Map(2), MsgpackItem.True, MsgpackItem.Map(1)) -> new ValidationError("Unexpected end of input (starting at 2)"), - List(MsgpackItem.Map(0), MsgpackItem.Map(1)) -> new ValidationError("Unexpected end of input (starting at 1)"), + List(MsgpackItem.Map(1), MsgpackItem.Map(1), MsgpackItem.True, MsgpackItem.True) -> new ValidationError( + "Unexpected end of input (starting at 0)"), + List(MsgpackItem.Map(2), MsgpackItem.True, MsgpackItem.Map(1)) -> new ValidationError( + "Unexpected end of input (starting at 2)"), + List(MsgpackItem.Map(2), MsgpackItem.True, MsgpackItem.Map(1)) -> new ValidationError( + "Unexpected end of input (starting at 2)"), + List(MsgpackItem.Map(0), MsgpackItem.Map(1)) -> new ValidationError("Unexpected end of input (starting at 1)") ) } test("should raise if extension data exceeds 2^32 - 1 bytes") { validation1( - MsgpackItem.Extension(0x54, ByteVector.empty.padLeft(Math.pow(2, 32).toLong)) -> new ValidationErrorAt(0, "Extension data exceeds (2^32)-1 bytes"), + MsgpackItem.Extension(0x54, ByteVector.empty.padLeft(Math.pow(2, 32).toLong)) -> new ValidationErrorAt( + 0, + "Extension data exceeds (2^32)-1 bytes") ) } test("should raise if nanoseconds fields exceed 999999999") { validation1( - MsgpackItem.Timestamp64(0xEE6B280000000000L)-> new ValidationErrorAt(0, "Timestamp64 nanoseconds cannot be larger than '999999999'"), - MsgpackItem.Timestamp96(1000000000, 0)-> new ValidationErrorAt(0, "Timestamp96 nanoseconds cannot be larger than '999999999'"), + MsgpackItem.Timestamp64(0xee6b280000000000L) -> new ValidationErrorAt( + 0, + "Timestamp64 nanoseconds cannot be larger than '999999999'"), + MsgpackItem.Timestamp96(1000000000, 0) -> new ValidationErrorAt( + 0, + "Timestamp96 nanoseconds cannot be larger than '999999999'") ) } } From 3f30e33ef59a5a015726750c0d71e31169730625 Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Sun, 18 Aug 2024 22:24:07 +0200 Subject: [PATCH 11/26] Remove scaladoc from an embedded function --- .../scala/fs2/data/msgpack/low/internal/ItemValidator.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala index d9402feec..20cdfb495 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala @@ -34,8 +34,6 @@ private[low] object ItemValidator { def none[F[_]]: Pipe[F, MsgpackItem, MsgpackItem] = in => in def simple[F[_]](implicit F: RaiseThrowable[F]): Pipe[F, MsgpackItem, MsgpackItem] = { in => - /** Validates one item from a stream - */ def step1(chunk: Chunk[MsgpackItem], idx: Int, position: Long): Pull[F, MsgpackItem, Option[Expect]] = chunk(idx) match { case MsgpackItem.UnsignedInt(bytes) => From aa8658ae5f06cd6ce825811e08c2cc447d99fd5a Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Sat, 24 Aug 2024 15:39:23 +0200 Subject: [PATCH 12/26] Add benchmars for msgpack item serializer --- .../MsgPackItemSerializerBenchmarks.scala | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 benchmarks/src/main/scala/fs2/data/benchmarks/MsgPackItemSerializerBenchmarks.scala diff --git a/benchmarks/src/main/scala/fs2/data/benchmarks/MsgPackItemSerializerBenchmarks.scala b/benchmarks/src/main/scala/fs2/data/benchmarks/MsgPackItemSerializerBenchmarks.scala new file mode 100644 index 000000000..93801e9f3 --- /dev/null +++ b/benchmarks/src/main/scala/fs2/data/benchmarks/MsgPackItemSerializerBenchmarks.scala @@ -0,0 +1,71 @@ +/* + * Copyright 2024 fs2-data Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 +package data.benchmarks + +import java.util.concurrent.TimeUnit +import org.openjdk.jmh.annotations._ + +import cats.effect.SyncIO + +import scodec.bits._ +import fs2._ + +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@BenchmarkMode(Array(Mode.AverageTime)) +@State(org.openjdk.jmh.annotations.Scope.Benchmark) +@Fork(value = 1) +@Warmup(iterations = 3, time = 2) +@Measurement(iterations = 10, time = 2) +class MsgPackItemSerializerBenchmarks { + val msgpackItems: List[fs2.data.msgpack.low.MsgpackItem] = { + val bytes = + fs2.io + .readClassLoaderResource[SyncIO]("twitter_msgpack.txt", 4096) + .through(fs2.text.utf8.decode) + .compile + .string + .map(ByteVector.fromHex(_).get) + .unsafeRunSync() + + Stream + .chunk(Chunk.byteVector(bytes)) + .through(fs2.data.msgpack.low.items[SyncIO]) + .compile + .toList + .unsafeRunSync() + } + + + @Benchmark + def compressed() = + Stream + .emits(msgpackItems) + .through(fs2.data.msgpack.low.bytes[SyncIO](true, false)) + .compile + .drain + .unsafeRunSync() + + @Benchmark + def fast() = + Stream + .emits(msgpackItems) + .through(fs2.data.msgpack.low.bytes[SyncIO](false, false)) + .compile + .drain + .unsafeRunSync() +} From cd9782e0918d73677d76afefcd2b01af053f0b9f Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Thu, 5 Sep 2024 21:38:39 +0200 Subject: [PATCH 13/26] Merge msgpack serializers - There was very little performance difference between serializers so the `fast` serializer was entirely scrapped. - The current serializer buffers the output in 4KiB segments before emitting it. This change brought a significant speedup. --- .../MsgPackItemSerializerBenchmarks.scala | 8 +- .../msgpack/low/internal/ItemSerializer.scala | 226 +++++++++++------- .../msgpack/low/internal/ItemValidator.scala | 4 +- .../scala/fs2/data/msgpack/low/package.scala | 31 +-- .../fs2/data/msgpack/SerializerSpec.scala | 147 +++++------- 5 files changed, 207 insertions(+), 209 deletions(-) diff --git a/benchmarks/src/main/scala/fs2/data/benchmarks/MsgPackItemSerializerBenchmarks.scala b/benchmarks/src/main/scala/fs2/data/benchmarks/MsgPackItemSerializerBenchmarks.scala index 93801e9f3..325bb16dd 100644 --- a/benchmarks/src/main/scala/fs2/data/benchmarks/MsgPackItemSerializerBenchmarks.scala +++ b/benchmarks/src/main/scala/fs2/data/benchmarks/MsgPackItemSerializerBenchmarks.scala @@ -52,19 +52,19 @@ class MsgPackItemSerializerBenchmarks { @Benchmark - def compressed() = + def serialize() = Stream .emits(msgpackItems) - .through(fs2.data.msgpack.low.bytes[SyncIO](true, false)) + .through(fs2.data.msgpack.low.bytes[SyncIO](false)) .compile .drain .unsafeRunSync() @Benchmark - def fast() = + def withValidation() = Stream .emits(msgpackItems) - .through(fs2.data.msgpack.low.bytes[SyncIO](false, false)) + .through(fs2.data.msgpack.low.bytes[SyncIO](true)) .compile .drain .unsafeRunSync() diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala index 9f49ad44f..09bbdf6d9 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala @@ -23,181 +23,225 @@ package internal import scodec.bits._ private[low] object ItemSerializer { - def compressed: MsgpackItem => ByteVector = { + class MalformedItemError extends Error("item exceeds the maximum size of it's format") + class MalformedStringError extends MalformedItemError + class MalformedBinError extends MalformedItemError + class MalformedIntError extends MalformedItemError + class MalformedUintError extends MalformedItemError + + /** Checks whether integer `x` fits in `n` bytes. */ + @inline + private def fitsIn(x: Int, n: Long): Boolean = + java.lang.Integer.compareUnsigned(x, (Math.pow(2, n.toDouble).toLong - 1).toInt) <= 0 + + private case class SerializationContext[F[_]](out: Out[F], + chunk: Chunk[MsgpackItem], + idx: Int, + rest: Stream[F, MsgpackItem]) + + /** Buffers [[Chunk]] into 4KiB segments before calling [[Pull.output]]. + * + * @param contents buffered [[Chunk]] + */ + private class Out[F[_]](contents: Chunk[Byte]) { + private val limit = 4096 + + /** Pushes `bv` into the buffer and emits the buffer if it reaches the limit. + */ + @inline + def push(bv: ByteVector): Pull[F, Byte, Out[F]] = + if (contents.size >= limit) + Pull.output(contents).as(new Out(Chunk.byteVector(bv))) + else + Pull.done.as(new Out(contents ++ Chunk.byteVector(bv))) + + /** Splices `bv` into segments and pushes them into the buffer while emitting the buffer at the same time so + * that it never exceeds the limit during the operation. + * + * Use this instead of [[Out.push]] when `bv` may significantly exceed 4KiB. + */ + def pushBuffered(bv: ByteVector): Pull[F, Byte, Out[F]] = { + @inline + def go(chunk: Chunk[Byte], rest: ByteVector): Pull[F, Byte, Out[F]] = + if (rest.isEmpty) + Pull.done.as(new Out(chunk)) + else + Pull.output(chunk) >> go(Chunk.byteVector(rest.take(limit.toLong)), rest.drop(limit.toLong)) + + if (bv.isEmpty) + this.push(bv) + else if (contents.size >= limit) + Pull.output(contents) >> go(Chunk.byteVector(bv.take(limit.toLong)), bv.drop(limit.toLong)) + else + go(contents ++ Chunk.byteVector(bv.take(limit.toLong - contents.size)), bv.drop(limit.toLong - contents.size)) + } + + /** Outputs the whole buffer. */ + @inline + def flush = Pull.output(contents) + } + + @inline + private def step[F[_]: RaiseThrowable](o: Out[F], item: MsgpackItem): Pull[F, Byte, Out[F]] = item match { case MsgpackItem.UnsignedInt(bytes) => val bs = bytes.dropWhile(_ == 0) if (bs.size <= 1) - ByteVector(Headers.Uint8).buffer ++ bs.padLeft(1) + o.push(ByteVector(Headers.Uint8) ++ bs.padLeft(1)) else if (bs.size <= 2) - ByteVector(Headers.Uint16).buffer ++ bs.padLeft(2) + o.push(ByteVector(Headers.Uint16) ++ bs.padLeft(2)) else if (bs.size <= 4) - ByteVector(Headers.Uint32).buffer ++ bs.padLeft(4) + o.push(ByteVector(Headers.Uint32) ++ bs.padLeft(4)) + else if (bs.size <= 8) + o.push(ByteVector(Headers.Uint64) ++ bs.padLeft(8)) else - ByteVector(Headers.Uint64).buffer ++ bs.padLeft(8) + Pull.raiseError(new MalformedUintError) case MsgpackItem.SignedInt(bytes) => val bs = bytes.dropWhile(_ == 0) if (bs.size <= 1) // positive fixint or negative fixint if ((bs & hex"7f") == bs || (bs & hex"c0") == hex"c0") - bs.padLeft(1) + o.push(bs.padLeft(1)) else - ByteVector(Headers.Int8).buffer ++ bs.padLeft(1) + o.push(ByteVector(Headers.Int8) ++ bs.padLeft(1)) else if (bs.size <= 2) - ByteVector(Headers.Int16).buffer ++ bs.padLeft(2) + o.push(ByteVector(Headers.Int16) ++ bs.padLeft(2)) else if (bs.size <= 4) - ByteVector(Headers.Int32).buffer ++ bs.padLeft(4) + o.push(ByteVector(Headers.Int32) ++ bs.padLeft(4)) + else if (bs.size <= 8) + o.push(ByteVector(Headers.Int64) ++ bs.padLeft(8)) else - ByteVector(Headers.Int64).buffer ++ bs.padLeft(8) + Pull.raiseError(new MalformedIntError) case MsgpackItem.Float32(float) => - ByteVector(Headers.Float32).buffer ++ ByteVector.fromInt(java.lang.Float.floatToIntBits(float)) + o.push(ByteVector(Headers.Float32) ++ ByteVector.fromInt(java.lang.Float.floatToIntBits(float))) case MsgpackItem.Float64(double) => - ByteVector(Headers.Float64).buffer ++ ByteVector.fromLong(java.lang.Double.doubleToLongBits(double)) + o.push(ByteVector(Headers.Float64) ++ ByteVector.fromLong(java.lang.Double.doubleToLongBits(double))) case MsgpackItem.Str(bytes) => if (bytes.size <= 31) { - ByteVector.fromByte((0xa0 | bytes.size).toByte).buffer ++ bytes + o.push(ByteVector.fromByte((0xa0 | bytes.size).toByte) ++ bytes) } else if (bytes.size <= Math.pow(2, 8) - 1) { val size = ByteVector.fromByte(bytes.size.toByte) - ByteVector(Headers.Str8).buffer ++ size ++ bytes + o.push(ByteVector(Headers.Str8) ++ size ++ bytes) } else if (bytes.size <= Math.pow(2, 16) - 1) { val size = ByteVector.fromShort(bytes.size.toShort) - ByteVector(Headers.Str16).buffer ++ size ++ bytes - } else { + o.push(ByteVector(Headers.Str16) ++ size ++ bytes) + } else if (fitsIn(bytes.size.toInt, 32)) { val size = ByteVector.fromInt(bytes.size.toInt) - ByteVector(Headers.Str32).buffer ++ size ++ bytes + /* Max length of str32 (incl. type and length info) is 2^32 + 4 bytes + * which is more than Chunk can handle at once + */ + o.pushBuffered(ByteVector(Headers.Str32) ++ size ++ bytes) + } else { + Pull.raiseError(new MalformedStringError) } case MsgpackItem.Bin(bytes) => if (bytes.size <= Math.pow(2, 8) - 1) { val size = ByteVector.fromByte(bytes.size.toByte) - ByteVector(Headers.Bin8).buffer ++ size ++ bytes + o.push(ByteVector(Headers.Bin8) ++ size ++ bytes) } else if (bytes.size <= Math.pow(2, 16) - 1) { val size = ByteVector.fromShort(bytes.size.toShort) - ByteVector(Headers.Bin16).buffer ++ size ++ bytes - } else { + o.push(ByteVector(Headers.Bin16) ++ size ++ bytes) + } else if (fitsIn(bytes.size.toInt, 32)) { val size = ByteVector.fromInt(bytes.size.toInt) - ByteVector(Headers.Bin32).buffer ++ size ++ bytes + /* Max length of str32 (incl. type and length info) is 2^32 + 4 bytes + * which is more than Chunk can handle at once + */ + o.pushBuffered(ByteVector(Headers.Bin32) ++ size ++ bytes) + } else { + Pull.raiseError(new MalformedBinError) } case MsgpackItem.Array(size) => - if (size <= 15) { - ByteVector.fromByte((0x90 | size).toByte) + if (fitsIn(size, 4)) { + o.push(ByteVector.fromByte((0x90 | size).toByte)) } else if (size <= Math.pow(2, 16) - 1) { val s = ByteVector.fromShort(size.toShort) - ByteVector(Headers.Array16).buffer ++ s + o.push(ByteVector(Headers.Array16) ++ s) } else { val s = ByteVector.fromInt(size) - ByteVector(Headers.Array32).buffer ++ s + o.push(ByteVector(Headers.Array32) ++ s) } case MsgpackItem.Map(size) => if (size <= 15) { - ByteVector.fromByte((0x80 | size).toByte) + o.push(ByteVector.fromByte((0x80 | size).toByte)) } else if (size <= Math.pow(2, 16) - 1) { val s = ByteVector.fromShort(size.toShort) - ByteVector(Headers.Map16).buffer ++ s + o.push(ByteVector(Headers.Map16) ++ s) } else { val s = ByteVector.fromInt(size) - ByteVector(Headers.Map32).buffer ++ s + o.push(ByteVector(Headers.Map32) ++ s) } case MsgpackItem.Extension(tpe, bytes) => val bs = bytes.dropWhile(_ == 0) if (bs.size <= 1) { - (ByteVector(Headers.FixExt1).buffer :+ tpe) ++ bs.padLeft(1) + o.push((ByteVector(Headers.FixExt1) :+ tpe) ++ bs.padLeft(1)) } else if (bs.size <= 2) { - (ByteVector(Headers.FixExt2).buffer :+ tpe) ++ bs.padLeft(2) + o.push((ByteVector(Headers.FixExt2) :+ tpe) ++ bs.padLeft(2)) } else if (bs.size <= 4) { - (ByteVector(Headers.FixExt4).buffer :+ tpe) ++ bs.padLeft(4) + o.push((ByteVector(Headers.FixExt4) :+ tpe) ++ bs.padLeft(4)) } else if (bs.size <= 8) { - (ByteVector(Headers.FixExt8).buffer :+ tpe) ++ bs.padLeft(8) + o.push((ByteVector(Headers.FixExt8) :+ tpe) ++ bs.padLeft(8)) } else if (bs.size <= 16) { - (ByteVector(Headers.FixExt16).buffer :+ tpe) ++ bs.padLeft(16) + o.push((ByteVector(Headers.FixExt16) :+ tpe) ++ bs.padLeft(16)) } else if (bs.size <= Math.pow(2, 8) - 1) { val size = ByteVector.fromByte(bs.size.toByte) - (ByteVector(Headers.Ext8).buffer ++ size :+ tpe) ++ bs + o.push((ByteVector(Headers.Ext8) ++ size :+ tpe) ++ bs) } else if (bs.size <= Math.pow(2, 16) - 1) { val size = ByteVector.fromShort(bs.size.toShort) - (ByteVector(Headers.Ext16).buffer ++ size :+ tpe) ++ bs + o.push((ByteVector(Headers.Ext16) ++ size :+ tpe) ++ bs) } else { val size = ByteVector.fromInt(bs.size.toInt) - (ByteVector(Headers.Ext32).buffer ++ size :+ tpe) ++ bs + /* Max length of ext32 (incl. type and length info) is 2^32 + 5 bytes + * which is more than Chunk can handle at once. + */ + o.pushBuffered((ByteVector(Headers.Ext32) ++ size :+ tpe) ++ bs) } case MsgpackItem.Timestamp32(seconds) => - (ByteVector(Headers.FixExt4).buffer :+ Headers.Timestamp.toByte) ++ ByteVector.fromInt(seconds) + o.push((ByteVector(Headers.FixExt4) :+ Headers.Timestamp.toByte) ++ ByteVector.fromInt(seconds)) case MsgpackItem.Timestamp64(combined) => - (ByteVector(Headers.FixExt8).buffer :+ Headers.Timestamp.toByte) ++ ByteVector.fromLong(combined) + o.push((ByteVector(Headers.FixExt8) :+ Headers.Timestamp.toByte) ++ ByteVector.fromLong(combined)) case MsgpackItem.Timestamp96(nanoseconds, seconds) => val ns = ByteVector.fromInt(nanoseconds) val s = ByteVector.fromLong(seconds) - (ByteVector(Headers.Ext8).buffer :+ 12 :+ Headers.Timestamp.toByte) ++ ns ++ s + o.push((ByteVector(Headers.Ext8) :+ 12 :+ Headers.Timestamp.toByte) ++ ns ++ s) case MsgpackItem.Nil => - ByteVector(Headers.Nil) + o.push(ByteVector(Headers.Nil)) case MsgpackItem.False => - ByteVector(Headers.False) + o.push(ByteVector(Headers.False)) case MsgpackItem.True => - ByteVector(Headers.True) + o.push(ByteVector(Headers.True)) } - def fast: MsgpackItem => ByteVector = { - case item: MsgpackItem.UnsignedInt => - ByteVector(Headers.Uint64) ++ item.bytes.padLeft(8) - - case item: MsgpackItem.SignedInt => - ByteVector(Headers.Int64) ++ item.bytes.padLeft(8) - - case item: MsgpackItem.Float32 => - ByteVector(Headers.Float32) ++ ByteVector.fromInt(java.lang.Float.floatToIntBits(item.v)) - - case item: MsgpackItem.Float64 => - ByteVector(Headers.Float64) ++ ByteVector.fromLong(java.lang.Double.doubleToLongBits(item.v)) - - case item: MsgpackItem.Str => - val size = ByteVector.fromInt(item.bytes.size.toInt) - ByteVector(Headers.Str32) ++ size ++ item.bytes - - case item: MsgpackItem.Bin => - val size = ByteVector.fromInt(item.bytes.size.toInt) - ByteVector(Headers.Bin32) ++ size ++ item.bytes - - case item: MsgpackItem.Array => - ByteVector(Headers.Array32) ++ ByteVector.fromInt(item.size) - - case item: MsgpackItem.Map => - ByteVector(Headers.Map32) ++ ByteVector.fromInt(item.size) - - case item: MsgpackItem.Extension => - val size = ByteVector.fromInt(item.bytes.size.toInt) - val t = ByteVector(item.tpe) - ByteVector(Headers.Ext32) ++ size ++ t ++ item.bytes - - case item: MsgpackItem.Timestamp32 => - ByteVector(Headers.FixExt4) ++ hex"ff" ++ ByteVector.fromInt(item.seconds) - - case item: MsgpackItem.Timestamp64 => - ByteVector(Headers.FixExt8) ++ hex"ff" ++ ByteVector.fromLong(item.combined) - - case item: MsgpackItem.Timestamp96 => - val ns = ByteVector.fromInt(item.nanoseconds) - val s = ByteVector.fromLong(item.seconds) - ByteVector(Headers.Ext8) ++ hex"0c" ++ hex"ff" ++ ns ++ s - - case MsgpackItem.Nil => - ByteVector(Headers.Nil) + private def stepChunk[F[_]: RaiseThrowable](ctx: SerializationContext[F]): Pull[F, Byte, SerializationContext[F]] = + if (ctx.idx >= ctx.chunk.size) + Pull.done.as(ctx) + else + step(ctx.out, ctx.chunk(ctx.idx)).flatMap { out => + stepChunk(SerializationContext(out, ctx.chunk, ctx.idx + 1, ctx.rest)) + } - case MsgpackItem.False => - ByteVector(Headers.False) + def pipe[F[_]: RaiseThrowable]: Pipe[F, MsgpackItem, Byte] = { stream => + def go(out: Out[F], rest: Stream[F, MsgpackItem]): Pull[F, Byte, Unit] = + rest.pull.uncons.flatMap { + case None => out.flush + case Some((chunk, rest)) => + stepChunk(SerializationContext(out, chunk, 0, rest)).flatMap { case SerializationContext(out, _, _, rest) => + go(out, rest) + } + } - case MsgpackItem.True => - ByteVector(Headers.True) + go(new Out(Chunk.empty), stream).stream } } diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala index 20cdfb495..3a61a4bd1 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala @@ -63,9 +63,7 @@ private[low] object ItemValidator { Pull.pure(None) case MsgpackItem.Array(size) => - if (size < 0) - Pull.raiseError(new ValidationErrorAt(position, s"Array has a negative size ${size}")) - else if (size == 0) + if (size == 0) Pull.pure(None) else Pull.pure(Some(Expect(size, position))) diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala index 2d512faf6..5c8424868 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala @@ -26,33 +26,16 @@ package object low { def items[F[_]](implicit F: RaiseThrowable[F]): Pipe[F, Byte, MsgpackItem] = ItemParser.pipe[F] - /** Alias for `bytes(compressed = true, validated = true)` + /** Alias for `bytes(validated = true)` */ def toBinary[F[_]: RaiseThrowable]: Pipe[F, MsgpackItem, Byte] = - bytes(true, true) + bytes(true) - def bytes[F[_]](compressed: Boolean, validated: Boolean)(implicit - F: RaiseThrowable[F]): Pipe[F, MsgpackItem, Byte] = { in => - in - .through { if (validated) ItemValidator.simple else ItemValidator.none } - .flatMap { x => - val bytes = - if (compressed) - ItemSerializer.compressed(x) - else - ItemSerializer.fast(x) - - /* Maximum size of a `ByteVector` is bigger than the one of a `Chunk` (Long vs Int). The `Chunk.byteVector` - * function returns `Chunk.empty` if it encounters a `ByteVector` that won't fit in a `Chunk`. We have to work - * around this behaviour and explicitly check the `ByteVector` size. - */ - if (bytes.size <= Int.MaxValue) { - Stream.chunk(Chunk.byteVector(bytes)) - } else { - val (lhs, rhs) = bytes.splitAt(Int.MaxValue) - Stream.chunk(Chunk.byteVector(lhs)) ++ Stream.chunk(Chunk.byteVector(rhs)) - } - } + def bytes[F[_]: RaiseThrowable](validated: Boolean): Pipe[F, MsgpackItem, Byte] = { + if (validated) + ItemValidator.simple.andThen(ItemSerializer.pipe) + else + ItemSerializer.pipe } def validated[F[_]](implicit F: RaiseThrowable[F]): Pipe[F, MsgpackItem, MsgpackItem] = diff --git a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala index 59f19fd9e..2874d6588 100644 --- a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala +++ b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala @@ -25,146 +25,119 @@ import weaver._ import java.nio.charset.StandardCharsets import low.MsgpackItem -object SerializerSpec extends SimpleIOSuite { +object SerializerSpec extends SimpleIOSuite with Checkers { test("MessagePack item serializer should correctly serialize all formats") { - val cases = List( + val cases: List[(List[MsgpackItem], ByteVector)] = List( // nil, false, true - (List(MsgpackItem.Nil, MsgpackItem.False, MsgpackItem.True), hex"c0c2c3", hex"c0c2c3"), + (List(MsgpackItem.Nil, MsgpackItem.False, MsgpackItem.True), hex"c0c2c3"), // positive fixint - (List(MsgpackItem.SignedInt(hex"7b")), hex"7b", hex"0xd3000000000000007b"), + (List(MsgpackItem.SignedInt(hex"7b")), hex"7b"), // negative fixint - (List(MsgpackItem.SignedInt(hex"d6")), hex"d6", hex"0xd300000000000000d6"), + (List(MsgpackItem.SignedInt(hex"d6")), hex"d6"), // uint 8, uint 16, uint 32, uint 64 - (List(MsgpackItem.UnsignedInt(hex"ab")), hex"ccab", hex"cf00000000000000ab"), - (List(MsgpackItem.UnsignedInt(hex"abcd")), hex"cdabcd", hex"cf000000000000abcd"), - (List(MsgpackItem.UnsignedInt(hex"abcdef01")), hex"ceabcdef01", hex"cf00000000abcdef01"), - (List(MsgpackItem.UnsignedInt(hex"abcdef0123456789")), hex"cfabcdef0123456789", hex"cfabcdef0123456789"), + (List(MsgpackItem.UnsignedInt(hex"ab")), hex"ccab"), + (List(MsgpackItem.UnsignedInt(hex"abcd")), hex"cdabcd"), + (List(MsgpackItem.UnsignedInt(hex"abcdef01")), hex"ceabcdef01"), + (List(MsgpackItem.UnsignedInt(hex"abcdef0123456789")), hex"cfabcdef0123456789"), // int 8, int 16, int 32, int 64 - (List(MsgpackItem.SignedInt(hex"80")), hex"d080", hex"d30000000000000080"), - (List(MsgpackItem.SignedInt(hex"80ab")), hex"d180ab", hex"d300000000000080ab"), - (List(MsgpackItem.SignedInt(hex"80abcdef")), hex"d280abcdef", hex"d30000000080abcdef"), - (List(MsgpackItem.SignedInt(hex"80abcddef0123456")), hex"d380abcddef0123456", hex"d380abcddef0123456"), + (List(MsgpackItem.SignedInt(hex"80")), hex"d080"), + (List(MsgpackItem.SignedInt(hex"80ab")), hex"d180ab"), + (List(MsgpackItem.SignedInt(hex"80abcdef")), hex"d280abcdef"), + (List(MsgpackItem.SignedInt(hex"80abcddef0123456")), hex"d380abcddef0123456"), // float 32, float 64 - (List(MsgpackItem.Float32(0.125F)), hex"ca3e000000", hex"ca3e000000"), - (List(MsgpackItem.Float64(0.125)), hex"cb3fc0000000000000", hex"cb3fc0000000000000"), + (List(MsgpackItem.Float32(0.125F)), hex"ca3e000000"), + (List(MsgpackItem.Float64(0.125)), hex"cb3fc0000000000000"), // fixstr - (List(MsgpackItem.Str(ByteVector("abc".getBytes(StandardCharsets.UTF_8)))), - hex"a3616263", - hex"0xdb00000003616263"), + (List(MsgpackItem.Str(ByteVector("abc".getBytes(StandardCharsets.UTF_8)))), hex"a3616263"), // str 8 (List(MsgpackItem.Str(ByteVector("abcd".repeat(8).getBytes(StandardCharsets.UTF_8)))), - hex"d920" ++ ByteVector("abcd".repeat(8).getBytes(StandardCharsets.UTF_8)), - hex"db00000020" ++ ByteVector("abcd".repeat(8).getBytes(StandardCharsets.UTF_8))), + hex"d920" ++ ByteVector("abcd".repeat(8).getBytes(StandardCharsets.UTF_8))), // str 16 (List(MsgpackItem.Str(ByteVector("a".repeat(Math.pow(2, 8).toInt).getBytes(StandardCharsets.UTF_8)))), - hex"da0100" ++ ByteVector("a".repeat(Math.pow(2, 8).toInt).getBytes(StandardCharsets.UTF_8)), - hex"db00000100" ++ ByteVector("a".repeat(Math.pow(2, 8).toInt).getBytes(StandardCharsets.UTF_8))), + hex"da0100" ++ ByteVector("a".repeat(Math.pow(2, 8).toInt).getBytes(StandardCharsets.UTF_8))), // str 32 (List(MsgpackItem.Str(ByteVector("a".repeat(Math.pow(2, 16).toInt).getBytes(StandardCharsets.UTF_8)))), - hex"db00010000" ++ ByteVector("a".repeat(Math.pow(2, 16).toInt).getBytes(StandardCharsets.UTF_8)), hex"db00010000" ++ ByteVector("a".repeat(Math.pow(2, 16).toInt).getBytes(StandardCharsets.UTF_8))), // bin 8 (List(MsgpackItem.Bin(ByteVector("abcd".repeat(8).getBytes(StandardCharsets.UTF_8)))), - hex"c420" ++ ByteVector("abcd".repeat(8).getBytes(StandardCharsets.UTF_8)), - hex"c600000020" ++ ByteVector("abcd".repeat(8).getBytes(StandardCharsets.UTF_8))), + hex"c420" ++ ByteVector("abcd".repeat(8).getBytes(StandardCharsets.UTF_8))), // bin 16 (List(MsgpackItem.Bin(ByteVector("a".repeat(Math.pow(2, 8).toInt).getBytes(StandardCharsets.UTF_8)))), - hex"c50100" ++ ByteVector("a".repeat(Math.pow(2, 8).toInt).getBytes(StandardCharsets.UTF_8)), - hex"c600000100" ++ ByteVector("a".repeat(Math.pow(2, 8).toInt).getBytes(StandardCharsets.UTF_8))), + hex"c50100" ++ ByteVector("a".repeat(Math.pow(2, 8).toInt).getBytes(StandardCharsets.UTF_8))), // bin 32 (List(MsgpackItem.Bin(ByteVector("a".repeat(Math.pow(2, 16).toInt).getBytes(StandardCharsets.UTF_8)))), - hex"c600010000" ++ ByteVector("a".repeat(Math.pow(2, 16).toInt).getBytes(StandardCharsets.UTF_8)), hex"c600010000" ++ ByteVector("a".repeat(Math.pow(2, 16).toInt).getBytes(StandardCharsets.UTF_8))), // fixarray - (List(MsgpackItem.Array(0)), hex"90", hex"dd00000000"), - (List(MsgpackItem.Array(1)), hex"91", hex"dd00000001"), + (List(MsgpackItem.Array(0)), hex"90"), + (List(MsgpackItem.Array(1)), hex"91"), // array 16 - (List(MsgpackItem.Array(16)), hex"dc0010", hex"dd00000010"), + (List(MsgpackItem.Array(16)), hex"dc0010"), // array 32 - (List(MsgpackItem.Array(Math.pow(2, 16).toInt)), hex"dd00010000", hex"dd00010000"), + (List(MsgpackItem.Array(Math.pow(2, 16).toInt)), hex"dd00010000"), // fixmap - (List(MsgpackItem.Map(0)), hex"80", hex"df00000000"), - (List(MsgpackItem.Map(1)), hex"81", hex"df00000001"), + (List(MsgpackItem.Map(0)), hex"80"), + (List(MsgpackItem.Map(1)), hex"81"), // map 16 - (List(MsgpackItem.Map(16)), hex"de0010", hex"df00000010"), + (List(MsgpackItem.Map(16)), hex"de0010"), // map 32 - (List(MsgpackItem.Map(Math.pow(2, 16).toInt)), hex"df00010000", hex"df00010000"), + (List(MsgpackItem.Map(Math.pow(2, 16).toInt)), hex"df00010000"), // fixext 1 - (List(MsgpackItem.Extension(0x54.toByte, hex"ab")), hex"d454ab", hex"c90000000154ab"), + (List(MsgpackItem.Extension(0x54.toByte, hex"ab")), hex"d454ab"), // fixext 2 - (List(MsgpackItem.Extension(0x54.toByte, hex"abcd")), hex"d554abcd", hex"c90000000254abcd"), + (List(MsgpackItem.Extension(0x54.toByte, hex"abcd")), hex"d554abcd"), // fixext 4 - (List(MsgpackItem.Extension(0x54.toByte, hex"abcdef01")), hex"d654abcdef01", hex"c90000000454abcdef01"), + (List(MsgpackItem.Extension(0x54.toByte, hex"abcdef01")), hex"d654abcdef01"), // fixext 8 - (List(MsgpackItem.Extension(0x54.toByte, hex"abcdef0123456789")), - hex"d754abcdef0123456789", - hex"c90000000854abcdef0123456789"), + (List(MsgpackItem.Extension(0x54.toByte, hex"abcdef0123456789")), hex"d754abcdef0123456789"), // fixext 8 (List(MsgpackItem.Extension(0x54.toByte, hex"abcdef0123456789abcdef0123456789")), - hex"d854abcdef0123456789abcdef0123456789", - hex"c90000001054abcdef0123456789abcdef0123456789"), + hex"d854abcdef0123456789abcdef0123456789"), // ext 8 - (List(MsgpackItem.Extension(0x54, ByteVector.fill(17)(0xab))), - hex"c71154" ++ ByteVector.fill(17)(0xab), - hex"c90000001154" ++ ByteVector.fill(17)(0xab)), + (List(MsgpackItem.Extension(0x54, ByteVector.fill(17)(0xab))), hex"c71154" ++ ByteVector.fill(17)(0xab)), // ext 16 (List(MsgpackItem.Extension(0x54, ByteVector.fill(Math.pow(2, 8).toLong)(0xab))), - hex"c8010054" ++ ByteVector.fill(Math.pow(2, 8).toLong)(0xab), - hex"c90000010054" ++ ByteVector.fill(Math.pow(2, 8).toLong)(0xab)), + hex"c8010054" ++ ByteVector.fill(Math.pow(2, 8).toLong)(0xab)), // ext 32 (List(MsgpackItem.Extension(0x54, ByteVector.fill(Math.pow(2, 16).toLong)(0xab))), - hex"c90001000054" ++ ByteVector.fill(Math.pow(2, 16).toLong)(0xab), hex"c90001000054" ++ ByteVector.fill(Math.pow(2, 16).toLong)(0xab)), // timestamp 32 - (List(MsgpackItem.Timestamp32(0x0123abcd)), hex"d6ff0123abcd", hex"d6ff0123abcd"), + (List(MsgpackItem.Timestamp32(0x0123abcd)), hex"d6ff0123abcd"), // timestamp 64 - (List(MsgpackItem.Timestamp64(0x0123456789abcdefL)), hex"d7ff0123456789abcdef", hex"d7ff0123456789abcdef"), + (List(MsgpackItem.Timestamp64(0x0123456789abcdefL)), hex"d7ff0123456789abcdef"), // timestamp 96 - (List(MsgpackItem.Timestamp96(0x0123abcd, 0x0123456789abcdefL)), - hex"c70cff0123abcd0123456789abcdef", - hex"c70cff0123abcd0123456789abcdef") + (List(MsgpackItem.Timestamp96(0x0123abcd, 0x0123456789abcdefL)), hex"c70cff0123abcd0123456789abcdef") ) Stream .emits(cases) - .evalMap { case (source, compressed, fast) => - for { - e1 <- - Stream - .emits(source) - .through(low.bytes[IO](true, false)) - .compile - .fold(ByteVector.empty)(_ :+ _) - .map(expect.same(_, compressed)) - - e2 <- - Stream - .emits(source) - .through(low.bytes[IO](false, false)) - .compile - .fold(ByteVector.empty)(_ :+ _) - .map(expect.same(_, fast)) - } yield e1 and e2 + .evalMap { case (source, serialized) => + Stream + .emits(source) + .through(low.bytes[IO](false)) + .compile + .fold(ByteVector.empty)(_ :+ _) + .map(expect.same(_, serialized)) + } .compile .foldMonoid @@ -193,25 +166,25 @@ object SerializerSpec extends SimpleIOSuite { hex"918FA46461746582A662756666657282A474797065A6427566666572A4646174619401234567A474797065CCFFA35F6964B8363663316233363661333137353434376163346335343165A5696E64657800A467756964D92438666665653537302D353938312D346630362D623635382D653435383163363064373539A86973416374697665C3A762616C616E6365CB40A946956A97C84CA361676516A8657965436F6C6F72A4626C7565A46E616D65AD4D6F72746F6E204C6974746C65A761646472657373D9313933372044656172626F726E20436F7572742C204861726C656967682C204D6173736163687573657474732C2033353936AA72656769737465726564BA323032332D30382D32395431303A34353A3335202D30323A3030A86C61746974756465CB4047551159C49774A96C6F6E676974756465CBC065F94A771C970FA47461677397A54C6F72656DA3657374A86465736572756E74A54C6F72656DA46E697369A76C61626F726973A86465736572756E74A7667269656E64739382A2696400A46E616D65B04865726E616E64657A204C6172736F6E82A2696401A46E616D65AF4D616E6E696E672053617267656E7482A2696402A46E616D65AF536176616E6E6168204E65776D616E" ) - def round(data: ByteVector, compress: Boolean) = + def round(data: ByteVector) = Stream .chunk(Chunk.byteVector(data)) .through(low.items[IO]) - .through(low.bytes[IO](compress, false)) + .through(low.bytes[IO](false)) .fold(ByteVector.empty)(_ :+ _) - def process(compress: Boolean, serializerName: String) = - for { - data <- Stream.emits(cases) - pre <- round(data, compress) - processed <- round(pre, compress) - } yield { - if (processed == pre) - success - else - failure(s"${serializerName} should be fixpoint for: ${pre} but it emitted ${processed}") - } + val out = for { + data <- Stream.emits(cases) + pre <- round(data) + processed <- round(pre) + } yield { + if (processed == pre) + success + else + failure(s"Serializer should be fixpoint for ${pre} but it emitted ${processed}") + } + + out.compile.foldMonoid - (process(true, "ItemSerializer.compressed") ++ process(false, "ItemSerializer.none")).compile.foldMonoid } } From cdc4894e273b947c083c650d7f132118e5f4b106 Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Sat, 7 Sep 2024 12:27:12 +0200 Subject: [PATCH 14/26] Make `SerializerSpec` no longer extend `Checkers` --- msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala index 2874d6588..55bd27913 100644 --- a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala +++ b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala @@ -25,7 +25,7 @@ import weaver._ import java.nio.charset.StandardCharsets import low.MsgpackItem -object SerializerSpec extends SimpleIOSuite with Checkers { +object SerializerSpec extends SimpleIOSuite { test("MessagePack item serializer should correctly serialize all formats") { val cases: List[(List[MsgpackItem], ByteVector)] = List( // nil, false, true From 309569e48961642af378989709a29637f377759f Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Sat, 7 Sep 2024 12:38:04 +0200 Subject: [PATCH 15/26] Make `msgpack.low` API similar to `cbor.low` API --- .../MsgPackItemSerializerBenchmarks.scala | 4 ++-- .../data/msgpack/low/internal/ItemValidator.scala | 5 +---- .../main/scala/fs2/data/msgpack/low/package.scala | 14 +++++--------- .../scala/fs2/data/msgpack/SerializerSpec.scala | 4 ++-- 4 files changed, 10 insertions(+), 17 deletions(-) diff --git a/benchmarks/src/main/scala/fs2/data/benchmarks/MsgPackItemSerializerBenchmarks.scala b/benchmarks/src/main/scala/fs2/data/benchmarks/MsgPackItemSerializerBenchmarks.scala index 325bb16dd..caf49e347 100644 --- a/benchmarks/src/main/scala/fs2/data/benchmarks/MsgPackItemSerializerBenchmarks.scala +++ b/benchmarks/src/main/scala/fs2/data/benchmarks/MsgPackItemSerializerBenchmarks.scala @@ -55,7 +55,7 @@ class MsgPackItemSerializerBenchmarks { def serialize() = Stream .emits(msgpackItems) - .through(fs2.data.msgpack.low.bytes[SyncIO](false)) + .through(fs2.data.msgpack.low.toNonValidatedBinary[SyncIO]) .compile .drain .unsafeRunSync() @@ -64,7 +64,7 @@ class MsgPackItemSerializerBenchmarks { def withValidation() = Stream .emits(msgpackItems) - .through(fs2.data.msgpack.low.bytes[SyncIO](true)) + .through(fs2.data.msgpack.low.toBinary[SyncIO]) .compile .drain .unsafeRunSync() diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala index 3a61a4bd1..a8ae44423 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala @@ -31,9 +31,7 @@ private[low] object ItemValidator { type ValidationContext = (Chunk[MsgpackItem], Int, Long, List[Expect]) - def none[F[_]]: Pipe[F, MsgpackItem, MsgpackItem] = in => in - - def simple[F[_]](implicit F: RaiseThrowable[F]): Pipe[F, MsgpackItem, MsgpackItem] = { in => + def pipe[F[_]](implicit F: RaiseThrowable[F]): Pipe[F, MsgpackItem, MsgpackItem] = { in => def step1(chunk: Chunk[MsgpackItem], idx: Int, position: Long): Pull[F, MsgpackItem, Option[Expect]] = chunk(idx) match { case MsgpackItem.UnsignedInt(bytes) => @@ -150,5 +148,4 @@ private[low] object ItemValidator { go(in, 0, 0, List.empty).stream } - } diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala index 5c8424868..9433b1d63 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala @@ -29,15 +29,11 @@ package object low { /** Alias for `bytes(validated = true)` */ def toBinary[F[_]: RaiseThrowable]: Pipe[F, MsgpackItem, Byte] = - bytes(true) + _.through(ItemValidator.pipe).through(ItemSerializer.pipe) - def bytes[F[_]: RaiseThrowable](validated: Boolean): Pipe[F, MsgpackItem, Byte] = { - if (validated) - ItemValidator.simple.andThen(ItemSerializer.pipe) - else - ItemSerializer.pipe - } + def toNonValidatedBinary[F[_]: RaiseThrowable]: Pipe[F, MsgpackItem, Byte] = + ItemSerializer.pipe - def validated[F[_]](implicit F: RaiseThrowable[F]): Pipe[F, MsgpackItem, MsgpackItem] = - ItemValidator.simple[F] + def validate[F[_]](implicit F: RaiseThrowable[F]): Pipe[F, MsgpackItem, MsgpackItem] = + ItemValidator.pipe[F] } diff --git a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala index 55bd27913..13bff49ef 100644 --- a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala +++ b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala @@ -133,7 +133,7 @@ object SerializerSpec extends SimpleIOSuite { .evalMap { case (source, serialized) => Stream .emits(source) - .through(low.bytes[IO](false)) + .through(low.toNonValidatedBinary) .compile .fold(ByteVector.empty)(_ :+ _) .map(expect.same(_, serialized)) @@ -170,7 +170,7 @@ object SerializerSpec extends SimpleIOSuite { Stream .chunk(Chunk.byteVector(data)) .through(low.items[IO]) - .through(low.bytes[IO](false)) + .through(low.toNonValidatedBinary) .fold(ByteVector.empty)(_ :+ _) val out = for { From 3d717a3ab6fcc66d6264c387f1d4e3d103f7cac0 Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Sat, 7 Sep 2024 12:53:09 +0200 Subject: [PATCH 16/26] Update msgpack serializer spec documentation --- msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala index 13bff49ef..8fe00a159 100644 --- a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala +++ b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala @@ -146,7 +146,7 @@ object SerializerSpec extends SimpleIOSuite { test("MessagePack item serializer should be fixpoint for a subset of ByteVector") { /* The parser mapping ByteVector to MsgpackItem can be seen as a not injective morphism, that is, there * are many ByteVectors that will map to the same MsgpackItem. Because of this, we cannot possibly guarantee that - * `serialize(parse(bs))` is fixpoint for an arbitrary `bs`. However, currently implemented serializers *are* + * `serialize(parse(bs))` is fixpoint for an arbitrary `bs`. However, currently implemented serializer *is* * injective (if we exclude the Timestamp format family as it can be represented with Extension types) and so, we * can guarantee `serialize(parse(bs)) == bs` if `bs` is a member of a subset of ByteVector that is emitted by a * serializer. From deede3faf14a0b1a81643cb259c22bf9d5b03bbd Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Tue, 10 Sep 2024 20:08:19 +0200 Subject: [PATCH 17/26] Change `msgpack.low.toBinary` scaladoc Reflects changes made in 309569e --- msgpack/src/main/scala/fs2/data/msgpack/low/package.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala index 9433b1d63..a0558f8be 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala @@ -26,7 +26,9 @@ package object low { def items[F[_]](implicit F: RaiseThrowable[F]): Pipe[F, Byte, MsgpackItem] = ItemParser.pipe[F] - /** Alias for `bytes(validated = true)` + /** Transforms a stream of [[MsgpackItem]]s into a stream of [[Byte]]s. + * + * Will fail with an error if the stream is malformed. */ def toBinary[F[_]: RaiseThrowable]: Pipe[F, MsgpackItem, Byte] = _.through(ItemValidator.pipe).through(ItemSerializer.pipe) From 698f727589d6e41cc5613393148b1d63877df0d1 Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Tue, 10 Sep 2024 20:48:00 +0200 Subject: [PATCH 18/26] Fix msgpack doc generation --- msgpack/src/main/scala/fs2/data/msgpack/low/package.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala index a0558f8be..8d2a63e2f 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala @@ -26,7 +26,7 @@ package object low { def items[F[_]](implicit F: RaiseThrowable[F]): Pipe[F, Byte, MsgpackItem] = ItemParser.pipe[F] - /** Transforms a stream of [[MsgpackItem]]s into a stream of [[Byte]]s. + /** Transforms a stream of [[MsgpackItem]]s into a stream of [[scala.Byte]]s. * * Will fail with an error if the stream is malformed. */ From 248fbc68c7e048502b72d76843a6febe1ee68f64 Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Tue, 10 Sep 2024 20:50:56 +0200 Subject: [PATCH 19/26] Add doc for `msgpack.low` public methods --- msgpack/src/main/scala/fs2/data/msgpack/low/package.scala | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala index 8d2a63e2f..872dfaeb0 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala @@ -23,6 +23,8 @@ import low.internal.{ItemParser, ItemSerializer, ItemValidator} /** A low-level representation of the MessagePack format. */ package object low { + /** Transforms a stream of [[scala.Byte]]s into a stream of [[MsgpackItem]]s. + */ def items[F[_]](implicit F: RaiseThrowable[F]): Pipe[F, Byte, MsgpackItem] = ItemParser.pipe[F] @@ -33,9 +35,15 @@ package object low { def toBinary[F[_]: RaiseThrowable]: Pipe[F, MsgpackItem, Byte] = _.through(ItemValidator.pipe).through(ItemSerializer.pipe) + /** Transforms a stream of [[MsgpackItem]]s into a stream of [[scala.Byte]]s. + * + * Will not validate the input stream and can potentially produce malformed data. Consider using [[toBinary]]. + */ def toNonValidatedBinary[F[_]: RaiseThrowable]: Pipe[F, MsgpackItem, Byte] = ItemSerializer.pipe + /** Validates a stream of [[MsgpackItem]]s, fails when the stream is malformed. + */ def validate[F[_]](implicit F: RaiseThrowable[F]): Pipe[F, MsgpackItem, MsgpackItem] = ItemValidator.pipe[F] } From 4760221d4df1d8c31d78264422853b5e946439a1 Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Tue, 10 Sep 2024 20:54:48 +0200 Subject: [PATCH 20/26] Run prePR --- msgpack/src/main/scala/fs2/data/msgpack/low/package.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala index 872dfaeb0..46525f792 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/package.scala @@ -23,6 +23,7 @@ import low.internal.{ItemParser, ItemSerializer, ItemValidator} /** A low-level representation of the MessagePack format. */ package object low { + /** Transforms a stream of [[scala.Byte]]s into a stream of [[MsgpackItem]]s. */ def items[F[_]](implicit F: RaiseThrowable[F]): Pipe[F, Byte, MsgpackItem] = From 041e1359dc50407a7f48a4091edc06296686abd2 Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Sat, 14 Sep 2024 12:07:00 +0200 Subject: [PATCH 21/26] Extract literals into constants --- .../msgpack/low/internal/ItemSerializer.scala | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala index 09bbdf6d9..cfb9230e6 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala @@ -29,6 +29,13 @@ private[low] object ItemSerializer { class MalformedIntError extends MalformedItemError class MalformedUintError extends MalformedItemError + private final val positiveIntMask = hex"7f" + private final val negativeIntMask = hex"e0" + + private final val mapMask = 0x80 + private final val arrayMask = 0x90 + private final val strMask = 0xa0 + /** Checks whether integer `x` fits in `n` bytes. */ @inline private def fitsIn(x: Int, n: Long): Boolean = @@ -100,7 +107,7 @@ private[low] object ItemSerializer { val bs = bytes.dropWhile(_ == 0) if (bs.size <= 1) // positive fixint or negative fixint - if ((bs & hex"7f") == bs || (bs & hex"c0") == hex"c0") + if ((bs & positiveIntMask) == bs || (bs & negativeIntMask) == negativeIntMask) o.push(bs.padLeft(1)) else o.push(ByteVector(Headers.Int8) ++ bs.padLeft(1)) @@ -121,7 +128,7 @@ private[low] object ItemSerializer { case MsgpackItem.Str(bytes) => if (bytes.size <= 31) { - o.push(ByteVector.fromByte((0xa0 | bytes.size).toByte) ++ bytes) + o.push(ByteVector.fromByte((strMask | bytes.size).toByte) ++ bytes) } else if (bytes.size <= Math.pow(2, 8) - 1) { val size = ByteVector.fromByte(bytes.size.toByte) o.push(ByteVector(Headers.Str8) ++ size ++ bytes) @@ -157,7 +164,7 @@ private[low] object ItemSerializer { case MsgpackItem.Array(size) => if (fitsIn(size, 4)) { - o.push(ByteVector.fromByte((0x90 | size).toByte)) + o.push(ByteVector.fromByte((arrayMask | size).toByte)) } else if (size <= Math.pow(2, 16) - 1) { val s = ByteVector.fromShort(size.toShort) o.push(ByteVector(Headers.Array16) ++ s) @@ -168,7 +175,7 @@ private[low] object ItemSerializer { case MsgpackItem.Map(size) => if (size <= 15) { - o.push(ByteVector.fromByte((0x80 | size).toByte)) + o.push(ByteVector.fromByte((mapMask | size).toByte)) } else if (size <= Math.pow(2, 16) - 1) { val s = ByteVector.fromShort(size.toShort) o.push(ByteVector(Headers.Map16) ++ s) From 2be8831a29bf2669eb7a55412e044bc1bbdece6a Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Sat, 14 Sep 2024 12:08:33 +0200 Subject: [PATCH 22/26] Fix msgpack serialization test of negative fixint The serializer itself was corrected in 041e1359dc50407a7f48a4091edc06296686abd2 --- msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala index 8fe00a159..33025a789 100644 --- a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala +++ b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala @@ -34,7 +34,7 @@ object SerializerSpec extends SimpleIOSuite { // positive fixint (List(MsgpackItem.SignedInt(hex"7b")), hex"7b"), // negative fixint - (List(MsgpackItem.SignedInt(hex"d6")), hex"d6"), + (List(MsgpackItem.SignedInt(hex"e6")), hex"e6"), // uint 8, uint 16, uint 32, uint 64 (List(MsgpackItem.UnsignedInt(hex"ab")), hex"ccab"), From fd845e87b9cf5b6c00b6d37e2110881bb6c1da6d Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Sat, 21 Sep 2024 16:14:42 +0200 Subject: [PATCH 23/26] Make msgpack Array and Map use Long for sizes MessagePack Arrays and Maps can hold up to 2^32 - 1 items which is more than the `Int` type can represent without negative values. --- .../fs2/data/msgpack/low/internal/FormatParsers.scala | 4 ++-- .../fs2/data/msgpack/low/internal/ItemParser.scala | 4 ++-- .../fs2/data/msgpack/low/internal/ItemSerializer.scala | 6 +++--- .../fs2/data/msgpack/low/internal/ItemValidator.scala | 10 ++++++++-- .../src/main/scala/fs2/data/msgpack/low/model.scala | 4 ++-- .../test/scala/fs2/data/msgpack/SerializerSpec.scala | 4 ++-- 6 files changed, 19 insertions(+), 13 deletions(-) diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/FormatParsers.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/FormatParsers.scala index b568a6f7d..dde7b2afa 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/FormatParsers.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/FormatParsers.scala @@ -34,14 +34,14 @@ private[internal] object FormatParsers { def parseArray[F[_]](length: Int, ctx: ParserContext[F])(implicit F: RaiseThrowable[F]): Pull[F, MsgpackItem, ParserContext[F]] = { requireBytes(length, ctx).map { res => - res.accumulate(v => MsgpackItem.Array(v.toInt(false, ByteOrdering.BigEndian))) + res.accumulate(v => MsgpackItem.Array(v.toLong(false))) } } def parseMap[F[_]](length: Int, ctx: ParserContext[F])(implicit F: RaiseThrowable[F]): Pull[F, MsgpackItem, ParserContext[F]] = { requireBytes(length, ctx).map { res => - res.accumulate(v => MsgpackItem.Map(v.toInt(false, ByteOrdering.BigEndian))) + res.accumulate(v => MsgpackItem.Map(v.toLong(false))) } } diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemParser.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemParser.scala index 4536d936e..cc04016b9 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemParser.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemParser.scala @@ -77,13 +77,13 @@ private[low] object ItemParser { // fixmap else if ((byte & 0xf0) == 0x80) { val length = byte & 0x0f // 0x8f- 0x80 - Pull.pure(ctx.prepend(MsgpackItem.Map(length))) + Pull.pure(ctx.prepend(MsgpackItem.Map(length.toLong))) } // fixarray else if ((byte & 0xf0) == 0x90) { val length = byte & 0x0f // 0x9f- 0x90 - Pull.pure(ctx.prepend(MsgpackItem.Array(length))) + Pull.pure(ctx.prepend(MsgpackItem.Array(length.toLong))) } // fixstr diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala index cfb9230e6..5d471dd26 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala @@ -163,13 +163,13 @@ private[low] object ItemSerializer { } case MsgpackItem.Array(size) => - if (fitsIn(size, 4)) { + if (size <= 15) { o.push(ByteVector.fromByte((arrayMask | size).toByte)) } else if (size <= Math.pow(2, 16) - 1) { val s = ByteVector.fromShort(size.toShort) o.push(ByteVector(Headers.Array16) ++ s) } else { - val s = ByteVector.fromInt(size) + val s = ByteVector.fromLong(size, 4) o.push(ByteVector(Headers.Array32) ++ s) } @@ -180,7 +180,7 @@ private[low] object ItemSerializer { val s = ByteVector.fromShort(size.toShort) o.push(ByteVector(Headers.Map16) ++ s) } else { - val s = ByteVector.fromInt(size) + val s = ByteVector.fromLong(size, 4) o.push(ByteVector(Headers.Map32) ++ s) } diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala index a8ae44423..b961568ad 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala @@ -25,7 +25,7 @@ case class ValidationError(msg: String) extends Exception(msg) private[low] object ItemValidator { - case class Expect(n: Int, from: Long) { + case class Expect(n: Long, from: Long) { def dec = Expect(n - 1, from) } @@ -61,7 +61,11 @@ private[low] object ItemValidator { Pull.pure(None) case MsgpackItem.Array(size) => - if (size == 0) + if (size < 0) + Pull.raiseError(new ValidationErrorAt(position, s"Array has a negative size ${size}")) + else if (size >= (1L << 32)) + Pull.raiseError(new ValidationErrorAt(position, s"Array size exceeds (2^32)-1")) + else if (size == 0) Pull.pure(None) else Pull.pure(Some(Expect(size, position))) @@ -69,6 +73,8 @@ private[low] object ItemValidator { case MsgpackItem.Map(size) => if (size < 0) Pull.raiseError(new ValidationErrorAt(position, s"Map has a negative size ${size}")) + else if (size >= (1L << 32)) + Pull.raiseError(new ValidationErrorAt(position, s"Map size exceeds (2^32)-1")) else if (size == 0) Pull.pure(None) else diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/model.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/model.scala index 675a45b71..e191d7a05 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/model.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/model.scala @@ -34,8 +34,8 @@ object MsgpackItem { case class Str(bytes: ByteVector) extends MsgpackItem case class Bin(bytes: ByteVector) extends MsgpackItem - case class Array(size: Int) extends MsgpackItem - case class Map(size: Int) extends MsgpackItem + case class Array(size: Long) extends MsgpackItem + case class Map(size: Long) extends MsgpackItem case class Extension(tpe: Byte, bytes: ByteVector) extends MsgpackItem diff --git a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala index 33025a789..79a889b54 100644 --- a/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala +++ b/msgpack/src/test/scala/fs2/data/msgpack/SerializerSpec.scala @@ -85,7 +85,7 @@ object SerializerSpec extends SimpleIOSuite { // array 16 (List(MsgpackItem.Array(16)), hex"dc0010"), // array 32 - (List(MsgpackItem.Array(Math.pow(2, 16).toInt)), hex"dd00010000"), + (List(MsgpackItem.Array(Math.pow(2, 16).toLong)), hex"dd00010000"), // fixmap (List(MsgpackItem.Map(0)), hex"80"), @@ -93,7 +93,7 @@ object SerializerSpec extends SimpleIOSuite { // map 16 (List(MsgpackItem.Map(16)), hex"de0010"), // map 32 - (List(MsgpackItem.Map(Math.pow(2, 16).toInt)), hex"df00010000"), + (List(MsgpackItem.Map(Math.pow(2, 16).toLong)), hex"df00010000"), // fixext 1 (List(MsgpackItem.Extension(0x54.toByte, hex"ab")), hex"d454ab"), From 482bf9e5fbbe4bf9cf244d1fa2fc0d6b081ed598 Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Sun, 22 Sep 2024 15:22:46 +0200 Subject: [PATCH 24/26] Make msgpack exceptions public --- .../scala/fs2/data/msgpack/exceptions.scala | 32 +++++++++ .../msgpack/low/internal/FormatParsers.scala | 2 +- .../data/msgpack/low/internal/Helpers.scala | 5 +- .../msgpack/low/internal/ItemParser.scala | 4 +- .../msgpack/low/internal/ItemSerializer.scala | 22 +++---- .../msgpack/low/internal/ItemValidator.scala | 29 ++++---- .../fs2/data/msgpack/ValidationSpec.scala | 66 +++++++++---------- 7 files changed, 93 insertions(+), 67 deletions(-) create mode 100644 msgpack/src/main/scala/fs2/data/msgpack/exceptions.scala diff --git a/msgpack/src/main/scala/fs2/data/msgpack/exceptions.scala b/msgpack/src/main/scala/fs2/data/msgpack/exceptions.scala new file mode 100644 index 000000000..d4940a68b --- /dev/null +++ b/msgpack/src/main/scala/fs2/data/msgpack/exceptions.scala @@ -0,0 +1,32 @@ +/* + * Copyright 2024 fs2-data Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package fs2 +package data +package msgpack + +abstract class MsgpackException(msg: String, cause: Throwable = null) extends Exception(msg, cause) + +case class MsgpackMalformedItemException(msg: String, position: Option[Long] = None, inner: Throwable = null) + extends MsgpackException(position.fold(msg)(pos => s"at position $pos"), inner) + +case class MsgpackUnexpectedEndOfStreamException(position: Option[Long] = None, inner: Throwable = null) + extends MsgpackException( + position.fold("Unexpected end of stream")(pos => s"Unexpected end of stream starting at position $pos"), + inner) + +case class MsgpackMalformedByteStreamException(msg: String, inner: Throwable = null) + extends MsgpackException(msg, inner) diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/FormatParsers.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/FormatParsers.scala index dde7b2afa..896cf1214 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/FormatParsers.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/FormatParsers.scala @@ -63,7 +63,7 @@ private[internal] object FormatParsers { res <- requireBytes(8, res.toContext) seconds = res.result.toLong(false) } yield res.toContext.prepend(MsgpackItem.Timestamp96(nanosec, seconds)) - case _ => Pull.raiseError(new MsgpackParsingException(s"Invalid timestamp length: ${length}")) + case _ => Pull.raiseError(MsgpackMalformedByteStreamException(s"Invalid timestamp length: ${length}")) } } diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/Helpers.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/Helpers.scala index 12a884cbe..881c81ce3 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/Helpers.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/Helpers.scala @@ -23,7 +23,6 @@ package internal import scodec.bits.ByteVector private[internal] object Helpers { - case class MsgpackParsingException(str: String) extends Exception /** @param chunk Current chunk * @param idx Index of the current [[Byte]] in `chunk` @@ -67,7 +66,7 @@ private[internal] object Helpers { // Inbounds chunk access is guaranteed by `ensureChunk` Pull.pure(ctx.next.toResult(ctx.chunk(ctx.idx))) } { - Pull.raiseError(new MsgpackParsingException("Unexpected end of input")) + Pull.raiseError(MsgpackUnexpectedEndOfStreamException()) } } @@ -93,7 +92,7 @@ private[internal] object Helpers { go(count - available, ParserContext(chunk, slice.size, rest, acc), newBytes) } } { - Pull.raiseError(new MsgpackParsingException("Unexpected end of input")) + Pull.raiseError(MsgpackUnexpectedEndOfStreamException()) } } diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemParser.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemParser.scala index cc04016b9..f411a48ab 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemParser.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemParser.scala @@ -37,7 +37,7 @@ private[low] object ItemParser { ((byte & 0xff): @switch) match { case Headers.Nil => Pull.pure(ctx.prepend(MsgpackItem.Nil)) - case Headers.NeverUsed => Pull.raiseError(new MsgpackParsingException("Reserved value 0xc1 used")) + case Headers.NeverUsed => Pull.raiseError(MsgpackMalformedByteStreamException("Reserved value 0xc1 used")) case Headers.False => Pull.pure(ctx.prepend(MsgpackItem.False)) case Headers.True => Pull.pure(ctx.prepend(MsgpackItem.True)) case Headers.Bin8 => parseBin(1, ctx) @@ -98,7 +98,7 @@ private[low] object ItemParser { else if ((byte & 0xe0) == 0xe0) { Pull.pure(ctx.prepend(MsgpackItem.SignedInt(ByteVector(byte)))) } else { - Pull.raiseError(new MsgpackParsingException(s"Invalid type ${byte}")) + Pull.raiseError(MsgpackMalformedByteStreamException(s"Invalid type ${byte}")) } } } diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala index 5d471dd26..bb405163f 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala @@ -23,12 +23,6 @@ package internal import scodec.bits._ private[low] object ItemSerializer { - class MalformedItemError extends Error("item exceeds the maximum size of it's format") - class MalformedStringError extends MalformedItemError - class MalformedBinError extends MalformedItemError - class MalformedIntError extends MalformedItemError - class MalformedUintError extends MalformedItemError - private final val positiveIntMask = hex"7f" private final val negativeIntMask = hex"e0" @@ -101,7 +95,7 @@ private[low] object ItemSerializer { else if (bs.size <= 8) o.push(ByteVector(Headers.Uint64) ++ bs.padLeft(8)) else - Pull.raiseError(new MalformedUintError) + Pull.raiseError(MsgpackMalformedItemException("Unsigned int exceeds 64 bits")) case MsgpackItem.SignedInt(bytes) => val bs = bytes.dropWhile(_ == 0) @@ -118,7 +112,7 @@ private[low] object ItemSerializer { else if (bs.size <= 8) o.push(ByteVector(Headers.Int64) ++ bs.padLeft(8)) else - Pull.raiseError(new MalformedIntError) + Pull.raiseError(MsgpackMalformedItemException("Signed int exceeds 64 bits")) case MsgpackItem.Float32(float) => o.push(ByteVector(Headers.Float32) ++ ByteVector.fromInt(java.lang.Float.floatToIntBits(float))) @@ -142,7 +136,7 @@ private[low] object ItemSerializer { */ o.pushBuffered(ByteVector(Headers.Str32) ++ size ++ bytes) } else { - Pull.raiseError(new MalformedStringError) + Pull.raiseError(MsgpackMalformedItemException("String exceeds (2^32)-1 bytes")) } case MsgpackItem.Bin(bytes) => @@ -159,7 +153,7 @@ private[low] object ItemSerializer { */ o.pushBuffered(ByteVector(Headers.Bin32) ++ size ++ bytes) } else { - Pull.raiseError(new MalformedBinError) + Pull.raiseError(MsgpackMalformedItemException("Binary data exceeds (2^32)-1 bytes")) } case MsgpackItem.Array(size) => @@ -168,9 +162,11 @@ private[low] object ItemSerializer { } else if (size <= Math.pow(2, 16) - 1) { val s = ByteVector.fromShort(size.toShort) o.push(ByteVector(Headers.Array16) ++ s) - } else { + } else if (size <= (1L << 32) - 1) { val s = ByteVector.fromLong(size, 4) o.push(ByteVector(Headers.Array32) ++ s) + } else { + Pull.raiseError(MsgpackMalformedItemException("Array size exceeds (2^32)-1")) } case MsgpackItem.Map(size) => @@ -179,9 +175,11 @@ private[low] object ItemSerializer { } else if (size <= Math.pow(2, 16) - 1) { val s = ByteVector.fromShort(size.toShort) o.push(ByteVector(Headers.Map16) ++ s) - } else { + } else if (size <= (1L << 32) - 1) { val s = ByteVector.fromLong(size, 4) o.push(ByteVector(Headers.Map32) ++ s) + } else { + Pull.raiseError(MsgpackMalformedItemException("Map size exceeds (2^32)-1 pairs")) } case MsgpackItem.Extension(tpe, bytes) => diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala index b961568ad..dfbaade4c 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala @@ -20,9 +20,6 @@ package msgpack package low package internal -case class ValidationErrorAt(at: Long, msg: String) extends Error(s"at position ${at}: ${msg}") -case class ValidationError(msg: String) extends Exception(msg) - private[low] object ItemValidator { case class Expect(n: Long, from: Long) { @@ -35,11 +32,13 @@ private[low] object ItemValidator { def step1(chunk: Chunk[MsgpackItem], idx: Int, position: Long): Pull[F, MsgpackItem, Option[Expect]] = chunk(idx) match { case MsgpackItem.UnsignedInt(bytes) => - if (bytes.size > 8) Pull.raiseError(new ValidationErrorAt(position, "Unsigned int exceeds 64 bits")) + if (bytes.size > 8) + Pull.raiseError(MsgpackMalformedItemException("Unsigned int exceeds 64 bits", Some(position))) else Pull.pure(None) case MsgpackItem.SignedInt(bytes) => - if (bytes.size > 8) Pull.raiseError(new ValidationErrorAt(position, "Signed int exceeds 64 bits")) + if (bytes.size > 8) + Pull.raiseError(MsgpackMalformedItemException("Signed int exceeds 64 bits", Some(position))) else Pull.pure(None) case MsgpackItem.Float32(_) => @@ -50,21 +49,21 @@ private[low] object ItemValidator { case MsgpackItem.Str(bytes) => if (bytes.size > Math.pow(2, 32) - 1) - Pull.raiseError(new ValidationErrorAt(position, "String exceeds (2^32)-1 bytes")) + Pull.raiseError(MsgpackMalformedItemException("String exceeds (2^32)-1 bytes", Some(position))) else Pull.pure(None) case MsgpackItem.Bin(bytes) => if (bytes.size > Math.pow(2, 32) - 1) - Pull.raiseError(new ValidationErrorAt(position, "Bin exceeds (2^32)-1 bytes")) + Pull.raiseError(MsgpackMalformedItemException("Bin exceeds (2^32)-1 bytes", Some(position))) else Pull.pure(None) case MsgpackItem.Array(size) => if (size < 0) - Pull.raiseError(new ValidationErrorAt(position, s"Array has a negative size ${size}")) + Pull.raiseError(MsgpackMalformedItemException(s"Array has a negative size ${size}", Some(position))) else if (size >= (1L << 32)) - Pull.raiseError(new ValidationErrorAt(position, s"Array size exceeds (2^32)-1")) + Pull.raiseError(MsgpackMalformedItemException(s"Array size exceeds (2^32)-1", Some(position))) else if (size == 0) Pull.pure(None) else @@ -72,9 +71,9 @@ private[low] object ItemValidator { case MsgpackItem.Map(size) => if (size < 0) - Pull.raiseError(new ValidationErrorAt(position, s"Map has a negative size ${size}")) + Pull.raiseError(MsgpackMalformedItemException(s"Map has a negative size ${size}", Some(position))) else if (size >= (1L << 32)) - Pull.raiseError(new ValidationErrorAt(position, s"Map size exceeds (2^32)-1")) + Pull.raiseError(MsgpackMalformedItemException(s"Map size exceeds (2^32)-1", Some(position))) else if (size == 0) Pull.pure(None) else @@ -82,7 +81,7 @@ private[low] object ItemValidator { case MsgpackItem.Extension(_, bytes) => if (bytes.size > Math.pow(2, 32) - 1) - Pull.raiseError(new ValidationErrorAt(position, "Extension data exceeds (2^32)-1 bytes")) + Pull.raiseError(MsgpackMalformedItemException("Extension data exceeds (2^32)-1 bytes", Some(position))) else Pull.pure(None) @@ -92,14 +91,14 @@ private[low] object ItemValidator { case item: MsgpackItem.Timestamp64 => if (item.nanoseconds > 999999999) Pull.raiseError( - new ValidationErrorAt(position, "Timestamp64 nanoseconds cannot be larger than '999999999'")) + MsgpackMalformedItemException("Timestamp64 nanoseconds is larger than '999999999'", Some(position))) else Pull.pure(None) case MsgpackItem.Timestamp96(nanoseconds, _) => if (nanoseconds > 999999999) Pull.raiseError( - new ValidationErrorAt(position, "Timestamp96 nanoseconds cannot be larger than '999999999'")) + MsgpackMalformedItemException("Timestamp96 nanoseconds is larger than '999999999'", Some(position))) else Pull.pure(None) @@ -149,7 +148,7 @@ private[low] object ItemValidator { if (state.isEmpty) Pull.done else - Pull.raiseError(new ValidationError(s"Unexpected end of input (starting at ${state.head.from})")) + Pull.raiseError(MsgpackUnexpectedEndOfStreamException(Some(state.head.from))) } go(in, 0, 0, List.empty).stream diff --git a/msgpack/src/test/scala/fs2/data/msgpack/ValidationSpec.scala b/msgpack/src/test/scala/fs2/data/msgpack/ValidationSpec.scala index 9ef00a2e4..5edd065a4 100644 --- a/msgpack/src/test/scala/fs2/data/msgpack/ValidationSpec.scala +++ b/msgpack/src/test/scala/fs2/data/msgpack/ValidationSpec.scala @@ -20,11 +20,9 @@ package msgpack import cats.effect._ import low.MsgpackItem -import fs2.data.msgpack.low.internal.{ValidationError, ValidationErrorAt} import scodec.bits.ByteVector import weaver._ import scodec.bits._ - import cats.implicits._ object ValidationSpec extends SimpleIOSuite { @@ -60,58 +58,58 @@ object ValidationSpec extends SimpleIOSuite { test("should raise if integer values exceed 64 bits") { validation1( - MsgpackItem.UnsignedInt(hex"10000000000000000") -> new ValidationErrorAt(0, "Unsigned int exceeds 64 bits"), - MsgpackItem.SignedInt(hex"10000000000000000") -> new ValidationErrorAt(0, "Signed int exceeds 64 bits") + MsgpackItem.UnsignedInt(hex"10000000000000000") -> + MsgpackMalformedItemException("Unsigned int exceeds 64 bits", Some(0)), + MsgpackItem.SignedInt(hex"10000000000000000") -> + MsgpackMalformedItemException("Signed int exceeds 64 bits", Some(0)) ) } test("should raise if string or binary values exceed 2^32 - 1 bytes") { validation1( - MsgpackItem.Str(ByteVector.empty.padLeft(Math.pow(2, 32).toLong)) -> new ValidationErrorAt( - 0, - "String exceeds (2^32)-1 bytes"), - MsgpackItem.Bin(ByteVector.empty.padLeft(Math.pow(2, 32).toLong)) -> new ValidationErrorAt( - 0, - "Bin exceeds (2^32)-1 bytes") + MsgpackItem.Str(ByteVector.empty.padLeft(Math.pow(2, 32).toLong)) -> + MsgpackMalformedItemException("String exceeds (2^32)-1 bytes", Some(0)), + MsgpackItem.Bin(ByteVector.empty.padLeft(Math.pow(2, 32).toLong)) -> + MsgpackMalformedItemException("Bin exceeds (2^32)-1 bytes", Some(0)) ) } test("should raise on unexpected end of input") { validation( - List(MsgpackItem.Array(2), MsgpackItem.True) -> new ValidationError("Unexpected end of input (starting at 0)"), - List(MsgpackItem.Array(2), MsgpackItem.Array(1), MsgpackItem.True) -> new ValidationError( - "Unexpected end of input (starting at 0)"), - List(MsgpackItem.Array(1), MsgpackItem.Array(1)) -> new ValidationError( - "Unexpected end of input (starting at 1)"), - List(MsgpackItem.Array(0), MsgpackItem.Array(1)) -> new ValidationError( - "Unexpected end of input (starting at 1)"), - List(MsgpackItem.Map(1), MsgpackItem.True) -> new ValidationError("Unexpected end of input (starting at 0)"), - List(MsgpackItem.Map(1), MsgpackItem.Map(1), MsgpackItem.True, MsgpackItem.True) -> new ValidationError( - "Unexpected end of input (starting at 0)"), - List(MsgpackItem.Map(2), MsgpackItem.True, MsgpackItem.Map(1)) -> new ValidationError( - "Unexpected end of input (starting at 2)"), - List(MsgpackItem.Map(2), MsgpackItem.True, MsgpackItem.Map(1)) -> new ValidationError( - "Unexpected end of input (starting at 2)"), - List(MsgpackItem.Map(0), MsgpackItem.Map(1)) -> new ValidationError("Unexpected end of input (starting at 1)") + List(MsgpackItem.Array(2), MsgpackItem.True) -> + MsgpackUnexpectedEndOfStreamException(Some(0)), + List(MsgpackItem.Array(2), MsgpackItem.Array(1), MsgpackItem.True) -> + MsgpackUnexpectedEndOfStreamException(Some(0)), + List(MsgpackItem.Array(1), MsgpackItem.Array(1)) -> + MsgpackUnexpectedEndOfStreamException(Some(1)), + List(MsgpackItem.Array(0), MsgpackItem.Array(1)) -> + MsgpackUnexpectedEndOfStreamException(Some(1)), + List(MsgpackItem.Map(1), MsgpackItem.True) -> + MsgpackUnexpectedEndOfStreamException(Some(0)), + List(MsgpackItem.Map(1), MsgpackItem.Map(1), MsgpackItem.True, MsgpackItem.True) -> + MsgpackUnexpectedEndOfStreamException(Some(0)), + List(MsgpackItem.Map(2), MsgpackItem.True, MsgpackItem.Map(1)) -> + MsgpackUnexpectedEndOfStreamException(Some(2)), + List(MsgpackItem.Map(2), MsgpackItem.True, MsgpackItem.Map(1)) -> + MsgpackUnexpectedEndOfStreamException(Some(2)), + List(MsgpackItem.Map(0), MsgpackItem.Map(1)) -> + MsgpackUnexpectedEndOfStreamException(Some(1)) ) } test("should raise if extension data exceeds 2^32 - 1 bytes") { validation1( - MsgpackItem.Extension(0x54, ByteVector.empty.padLeft(Math.pow(2, 32).toLong)) -> new ValidationErrorAt( - 0, - "Extension data exceeds (2^32)-1 bytes") + MsgpackItem.Extension(0x54, ByteVector.empty.padLeft(Math.pow(2, 32).toLong)) -> + MsgpackMalformedItemException("Extension data exceeds (2^32)-1 bytes", Some(0)) ) } test("should raise if nanoseconds fields exceed 999999999") { validation1( - MsgpackItem.Timestamp64(0xee6b280000000000L) -> new ValidationErrorAt( - 0, - "Timestamp64 nanoseconds cannot be larger than '999999999'"), - MsgpackItem.Timestamp96(1000000000, 0) -> new ValidationErrorAt( - 0, - "Timestamp96 nanoseconds cannot be larger than '999999999'") + MsgpackItem.Timestamp64(0xee6b280000000000L) -> + MsgpackMalformedItemException("Timestamp64 nanoseconds is larger than '999999999'", Some(0)), + MsgpackItem.Timestamp96(1000000000, 0) -> + MsgpackMalformedItemException("Timestamp96 nanoseconds is larger than '999999999'", Some(0)) ) } } From 8d677682b726c3fb7c5d1f9cf0a0e76ce69f53fc Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Sun, 22 Sep 2024 16:50:06 +0200 Subject: [PATCH 25/26] Move Pull.pure(None) into a constant --- .../msgpack/low/internal/ItemValidator.scala | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala index dfbaade4c..c1f270437 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala @@ -26,6 +26,8 @@ private[low] object ItemValidator { def dec = Expect(n - 1, from) } + private val PullNone = Pull.pure(None) + type ValidationContext = (Chunk[MsgpackItem], Int, Long, List[Expect]) def pipe[F[_]](implicit F: RaiseThrowable[F]): Pipe[F, MsgpackItem, MsgpackItem] = { in => @@ -34,30 +36,30 @@ private[low] object ItemValidator { case MsgpackItem.UnsignedInt(bytes) => if (bytes.size > 8) Pull.raiseError(MsgpackMalformedItemException("Unsigned int exceeds 64 bits", Some(position))) - else Pull.pure(None) + else PullNone case MsgpackItem.SignedInt(bytes) => if (bytes.size > 8) Pull.raiseError(MsgpackMalformedItemException("Signed int exceeds 64 bits", Some(position))) - else Pull.pure(None) + else PullNone case MsgpackItem.Float32(_) => - Pull.pure(None) + PullNone case MsgpackItem.Float64(_) => - Pull.pure(None) + PullNone case MsgpackItem.Str(bytes) => if (bytes.size > Math.pow(2, 32) - 1) Pull.raiseError(MsgpackMalformedItemException("String exceeds (2^32)-1 bytes", Some(position))) else - Pull.pure(None) + PullNone case MsgpackItem.Bin(bytes) => if (bytes.size > Math.pow(2, 32) - 1) Pull.raiseError(MsgpackMalformedItemException("Bin exceeds (2^32)-1 bytes", Some(position))) else - Pull.pure(None) + PullNone case MsgpackItem.Array(size) => if (size < 0) @@ -65,7 +67,7 @@ private[low] object ItemValidator { else if (size >= (1L << 32)) Pull.raiseError(MsgpackMalformedItemException(s"Array size exceeds (2^32)-1", Some(position))) else if (size == 0) - Pull.pure(None) + PullNone else Pull.pure(Some(Expect(size, position))) @@ -75,7 +77,7 @@ private[low] object ItemValidator { else if (size >= (1L << 32)) Pull.raiseError(MsgpackMalformedItemException(s"Map size exceeds (2^32)-1", Some(position))) else if (size == 0) - Pull.pure(None) + PullNone else Pull.pure(Some(Expect(size * 2, position))) @@ -83,33 +85,33 @@ private[low] object ItemValidator { if (bytes.size > Math.pow(2, 32) - 1) Pull.raiseError(MsgpackMalformedItemException("Extension data exceeds (2^32)-1 bytes", Some(position))) else - Pull.pure(None) + PullNone case _: MsgpackItem.Timestamp32 => - Pull.pure(None) + PullNone case item: MsgpackItem.Timestamp64 => if (item.nanoseconds > 999999999) Pull.raiseError( MsgpackMalformedItemException("Timestamp64 nanoseconds is larger than '999999999'", Some(position))) else - Pull.pure(None) + PullNone case MsgpackItem.Timestamp96(nanoseconds, _) => if (nanoseconds > 999999999) Pull.raiseError( MsgpackMalformedItemException("Timestamp96 nanoseconds is larger than '999999999'", Some(position))) else - Pull.pure(None) + PullNone case MsgpackItem.Nil => - Pull.pure(None) + PullNone case MsgpackItem.True => - Pull.pure(None) + PullNone case MsgpackItem.False => - Pull.pure(None) + PullNone } def stepChunk(chunk: Chunk[MsgpackItem], From 05c4c1cc614a114dff8243276a95a11b5da2bf9c Mon Sep 17 00:00:00 2001 From: Mariusz Jakoniuk Date: Sun, 22 Sep 2024 16:52:14 +0200 Subject: [PATCH 26/26] Use bit shifts instead of `Math.pow(2, n)` Also drop the `fitsIn` function as we now use `Long`s instead of `Int`s and so we don't need to compare unsigned values. --- .../msgpack/low/internal/ItemSerializer.scala | 25 ++++++++----------- .../msgpack/low/internal/ItemValidator.scala | 6 ++--- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala index bb405163f..8cd09b8b4 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemSerializer.scala @@ -30,11 +30,6 @@ private[low] object ItemSerializer { private final val arrayMask = 0x90 private final val strMask = 0xa0 - /** Checks whether integer `x` fits in `n` bytes. */ - @inline - private def fitsIn(x: Int, n: Long): Boolean = - java.lang.Integer.compareUnsigned(x, (Math.pow(2, n.toDouble).toLong - 1).toInt) <= 0 - private case class SerializationContext[F[_]](out: Out[F], chunk: Chunk[MsgpackItem], idx: Int, @@ -123,13 +118,13 @@ private[low] object ItemSerializer { case MsgpackItem.Str(bytes) => if (bytes.size <= 31) { o.push(ByteVector.fromByte((strMask | bytes.size).toByte) ++ bytes) - } else if (bytes.size <= Math.pow(2, 8) - 1) { + } else if (bytes.size <= (1 << 8) - 1) { val size = ByteVector.fromByte(bytes.size.toByte) o.push(ByteVector(Headers.Str8) ++ size ++ bytes) - } else if (bytes.size <= Math.pow(2, 16) - 1) { + } else if (bytes.size <= (1 << 16) - 1) { val size = ByteVector.fromShort(bytes.size.toShort) o.push(ByteVector(Headers.Str16) ++ size ++ bytes) - } else if (fitsIn(bytes.size.toInt, 32)) { + } else if (bytes.size <= (1L << 32) - 1) { val size = ByteVector.fromInt(bytes.size.toInt) /* Max length of str32 (incl. type and length info) is 2^32 + 4 bytes * which is more than Chunk can handle at once @@ -140,13 +135,13 @@ private[low] object ItemSerializer { } case MsgpackItem.Bin(bytes) => - if (bytes.size <= Math.pow(2, 8) - 1) { + if (bytes.size <= (1 << 8) - 1) { val size = ByteVector.fromByte(bytes.size.toByte) o.push(ByteVector(Headers.Bin8) ++ size ++ bytes) - } else if (bytes.size <= Math.pow(2, 16) - 1) { + } else if (bytes.size <= (1 << 16) - 1) { val size = ByteVector.fromShort(bytes.size.toShort) o.push(ByteVector(Headers.Bin16) ++ size ++ bytes) - } else if (fitsIn(bytes.size.toInt, 32)) { + } else if (bytes.size <= (1L << 32) - 1) { val size = ByteVector.fromInt(bytes.size.toInt) /* Max length of str32 (incl. type and length info) is 2^32 + 4 bytes * which is more than Chunk can handle at once @@ -159,7 +154,7 @@ private[low] object ItemSerializer { case MsgpackItem.Array(size) => if (size <= 15) { o.push(ByteVector.fromByte((arrayMask | size).toByte)) - } else if (size <= Math.pow(2, 16) - 1) { + } else if (size <= (1L << 16) - 1) { val s = ByteVector.fromShort(size.toShort) o.push(ByteVector(Headers.Array16) ++ s) } else if (size <= (1L << 32) - 1) { @@ -172,7 +167,7 @@ private[low] object ItemSerializer { case MsgpackItem.Map(size) => if (size <= 15) { o.push(ByteVector.fromByte((mapMask | size).toByte)) - } else if (size <= Math.pow(2, 16) - 1) { + } else if (size <= (1L << 16) - 1) { val s = ByteVector.fromShort(size.toShort) o.push(ByteVector(Headers.Map16) ++ s) } else if (size <= (1L << 32) - 1) { @@ -194,10 +189,10 @@ private[low] object ItemSerializer { o.push((ByteVector(Headers.FixExt8) :+ tpe) ++ bs.padLeft(8)) } else if (bs.size <= 16) { o.push((ByteVector(Headers.FixExt16) :+ tpe) ++ bs.padLeft(16)) - } else if (bs.size <= Math.pow(2, 8) - 1) { + } else if (bs.size <= (1 << 8) - 1) { val size = ByteVector.fromByte(bs.size.toByte) o.push((ByteVector(Headers.Ext8) ++ size :+ tpe) ++ bs) - } else if (bs.size <= Math.pow(2, 16) - 1) { + } else if (bs.size <= (1 << 16) - 1) { val size = ByteVector.fromShort(bs.size.toShort) o.push((ByteVector(Headers.Ext16) ++ size :+ tpe) ++ bs) } else { diff --git a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala index c1f270437..59cf0f2a7 100644 --- a/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala +++ b/msgpack/src/main/scala/fs2/data/msgpack/low/internal/ItemValidator.scala @@ -50,13 +50,13 @@ private[low] object ItemValidator { PullNone case MsgpackItem.Str(bytes) => - if (bytes.size > Math.pow(2, 32) - 1) + if (bytes.size > (1L << 32) - 1) Pull.raiseError(MsgpackMalformedItemException("String exceeds (2^32)-1 bytes", Some(position))) else PullNone case MsgpackItem.Bin(bytes) => - if (bytes.size > Math.pow(2, 32) - 1) + if (bytes.size > (1L << 32) - 1) Pull.raiseError(MsgpackMalformedItemException("Bin exceeds (2^32)-1 bytes", Some(position))) else PullNone @@ -82,7 +82,7 @@ private[low] object ItemValidator { Pull.pure(Some(Expect(size * 2, position))) case MsgpackItem.Extension(_, bytes) => - if (bytes.size > Math.pow(2, 32) - 1) + if (bytes.size > (1L << 32) - 1) Pull.raiseError(MsgpackMalformedItemException("Extension data exceeds (2^32)-1 bytes", Some(position))) else PullNone