diff --git a/binrep.cabal b/binrep.cabal index 9950649..692a1ba 100644 --- a/binrep.cabal +++ b/binrep.cabal @@ -34,7 +34,7 @@ flag icu library exposed-modules: Binrep - Binrep.BLen.Simple + Binrep.BLen Binrep.CBLen Binrep.CBLen.Generic Binrep.Extra.HexByteString @@ -64,6 +64,7 @@ library Binrep.Via.Prim Data.Aeson.Extra.SizedVector Raehik.Compat.FlatParse.Basic.Prim + Raehik.Compat.FlatParse.Basic.WithLength Util.TypeNats other-modules: Paths_binrep diff --git a/flake.lock b/flake.lock index 636ce2c..22ddcc3 100644 --- a/flake.lock +++ b/flake.lock @@ -3,11 +3,11 @@ "bytezap": { "flake": false, "locked": { - "lastModified": 1710555689, - "narHash": "sha256-G1IFB1HAmxwySLhkU/bkJsOexgYDcLFXFGM2sG8i+7o=", + "lastModified": 1710619263, + "narHash": "sha256-Mz1w2wKhN/WagMZQno9g0yNGzL/j7t0gxmiju6g37pc=", "owner": "raehik", "repo": "bytezap", - "rev": "08ced1055d8bc52c703a96899695496c8f3511cb", + "rev": "8bb9222e16a2a86135326a88c613e0be17c2916b", "type": "github" }, "original": { diff --git a/src/Binrep.hs b/src/Binrep.hs index 00f1a38..8d2d256 100644 --- a/src/Binrep.hs +++ b/src/Binrep.hs @@ -1,12 +1,12 @@ module Binrep - ( module Binrep.CBLen - , module Binrep.BLen.Simple + ( module Binrep.BLen + , module Binrep.CBLen , module Binrep.Put , module Binrep.Get ) where +import Binrep.BLen import Binrep.CBLen -import Binrep.BLen.Simple import Binrep.Put import Binrep.Get diff --git a/src/Binrep/BLen/Simple.hs b/src/Binrep/BLen.hs similarity index 59% rename from src/Binrep/BLen/Simple.hs rename to src/Binrep/BLen.hs index dc75008..4da06f4 100644 --- a/src/Binrep/BLen/Simple.hs +++ b/src/Binrep/BLen.hs @@ -10,9 +10,18 @@ byte length *before* serializing. This is that. It should be very efficient to calculate serialized byte length for most binrep-compatible Haskell types. If it isn't, consider whether the representation is appropriate for binrep. + +Note that you _may_ encode this inside the serializer type (whatever the @Put@ +class stores). I went back and forth on this a couple times. But some binrep +code seems to make more sense when byte length is standalone. And I don't mind +the extra explicitness. So it's here to stay :) -} -module Binrep.BLen.Simple where +module Binrep.BLen + ( BLen(blen) + , blenGenericNonSum, blenGenericSum + , ViaCBLen(..), cblen + ) where import Binrep.CBLen import GHC.TypeNats @@ -32,7 +41,14 @@ import Generic.Data.Function.FoldMap import Generic.Data.Rep.Assert import Generic.Data.Function.Common -class BLen a where blen :: a -> Int +-- | Class for types with easily-calculated length in bytes. +-- +-- If it appears hard to calculate byte length for a given type (e.g. without +-- first serializing it, then measuring serialized byte length), consider +-- whether this type is a good fit for binrep. +class BLen a where + -- | Calculate the serialized byte length of the given value. + blen :: a -> Int -- newtype sum monoid for generic foldMap newtype BLen' a = BLen' { getBLen' :: a } @@ -67,49 +83,38 @@ blenGenericSum f = getBLen' . genericFoldMapSum @'SumOnly @asserts (BLen' <$> f) instance TypeError ENoEmpty => BLen Void where blen = undefined instance TypeError ENoSum => BLen (Either a b) where blen = undefined --- | Unit type has length 0. -instance BLen () where - {-# INLINE blen #-} - blen () = 0 +-- | _O(1)_ Unit type has length 0. +instance BLen () where blen () = 0 --- | Sum tuples. -instance (BLen l, BLen r) => BLen (l, r) where - {-# INLINE blen #-} - blen (l, r) = blen l + blen r +-- | _O(1)_ Sum tuples. +instance (BLen l, BLen r) => BLen (l, r) where blen (l, r) = blen l + blen r -- | _O(n)_ Sum the length of each element of a list. -instance BLen a => BLen [a] where - {-# INLINE blen #-} - blen = sum . map blen - --- | Length of a bytestring is fairly obvious. -instance BLen B.ByteString where - {-# INLINE blen #-} - blen = B.length - --- Machine integers have a constant byte length. -deriving via CBLenly Word8 instance BLen Word8 -deriving via CBLenly Int8 instance BLen Int8 -deriving via CBLenly Word16 instance BLen Word16 -deriving via CBLenly Int16 instance BLen Int16 -deriving via CBLenly Word32 instance BLen Word32 -deriving via CBLenly Int32 instance BLen Int32 -deriving via CBLenly Word64 instance BLen Word64 -deriving via CBLenly Int64 instance BLen Int64 +instance BLen a => BLen [a] where blen = sum . map blen + +-- | _O(1)_ 'B.ByteString's store their own length. +instance BLen B.ByteString where blen = B.length + +-- All words have a constant byte length-- including host-size words, mind you! +deriving via ViaCBLen Word8 instance BLen Word8 +deriving via ViaCBLen Int8 instance BLen Int8 +deriving via ViaCBLen Word16 instance BLen Word16 +deriving via ViaCBLen Int16 instance BLen Int16 +deriving via ViaCBLen Word32 instance BLen Word32 +deriving via ViaCBLen Int32 instance BLen Int32 +deriving via ViaCBLen Word64 instance BLen Word64 +deriving via ViaCBLen Int64 instance BLen Int64 -------------------------------------------------------------------------------- --- | Deriving via wrapper for types which may derive a 'BLen' instance through --- an existing 'IsCBLen' instance. +-- | DerivingVia wrapper for types which may derive a 'BLen' instance through +-- an existing 'IsCBLen' instance (i.e. it is known at compile time) -- -- Examples of such types include machine integers, and explicitly-sized types -- (e.g. "Binrep.Type.Sized"). -newtype CBLenly a = CBLenly { unCBLenly :: a } -instance KnownNat (CBLen a) => BLen (CBLenly a) where - {-# INLINE blen #-} - blen _ = cblen @a +newtype ViaCBLen a = ViaCBLen { unCBLenly :: a } +instance KnownNat (CBLen a) => BLen (ViaCBLen a) where blen _ = cblen @a -- | Reify a type's constant byte length to the term level. cblen :: forall a n. (n ~ CBLen a, KnownNat n) => Int cblen = natValInt @n -{-# INLINE cblen #-} diff --git a/src/Binrep/CBLen/Generic.hs b/src/Binrep/CBLen/Generic.hs index 8d2c063..299fbef 100644 --- a/src/Binrep/CBLen/Generic.hs +++ b/src/Binrep/CBLen/Generic.hs @@ -14,7 +14,7 @@ generics excel, and the one this module targets. You can (attempt to) derive a 'CBLen' type family instance generically for a type via - instance BLen a where type CBLen a = CBLenGeneric w a + instance CBLen a where type CBLen a = CBLenGeneric w a As with deriving @BLen@ generically, you must provide the type used to store the sum tag for sum types. diff --git a/src/Binrep/Put.hs b/src/Binrep/Put.hs index b64b5bd..3a9aadc 100644 --- a/src/Binrep/Put.hs +++ b/src/Binrep/Put.hs @@ -1,20 +1,11 @@ {-# LANGUAGE UndecidableInstances #-} -- required below GHC 9.6 {-# OPTIONS_GHC -fno-warn-orphans #-} -- for generic data op instance -{- | Serialization using the bytezap library. - -bytezap serializers ("pokes") work by writing bytes into a pointer, which is -assumed to have _precisely_ the space required. The user must determine the -post-serialize length before the fact. For that reason, this module requires -that types to be serialized have a 'BLen' instance. In general, we are happy -about this, because a binrep type should always have an efficient and preferably -simple 'BLen' instance (and if not, it shouldn't be a binrep type). --} - module Binrep.Put where +import Binrep.BLen ( BLen(blen) ) import Data.Functor.Identity -import Bytezap.Write +import Bytezap.Poke import Raehik.Compat.Data.Primitive.Types ( Prim' ) import Binrep.Util.ByteOrder import Raehik.Compat.Data.Primitive.Types.Endian ( ByteSwap ) @@ -36,23 +27,23 @@ import Generic.Data.Rep.Assert import Control.Monad.ST ( RealWorld ) -type Write' = Write RealWorld +type Putter = Poke RealWorld -class Put a where put :: a -> Write' +class Put a where put :: a -> Putter -runPut :: Put a => a -> B.ByteString -runPut = runWriteBS . put +runPut :: (BLen a, Put a) => a -> B.ByteString +runPut a = unsafeRunPokeBS (blen a) (put a) -instance GenericFoldMap Write' where - type GenericFoldMapC Write' a = Put a +instance GenericFoldMap Putter where + type GenericFoldMapC Putter a = Put a genericFoldMapF = put -- | Serialize a term of the non-sum type @a@ via its 'Generic' instance. putGenericNonSum :: forall {cd} {f} {asserts} a - . ( Generic a, Rep a ~ D1 cd f, GFoldMapNonSum Write' f + . ( Generic a, Rep a ~ D1 cd f, GFoldMapNonSum Putter f , asserts ~ '[ 'NoEmpty, 'NoSum], ApplyGCAsserts asserts f) - => a -> Write' + => a -> Putter putGenericNonSum = genericFoldMapNonSum @asserts -- | Serialize a term of the sum type @a@ via its 'Generic' instance. @@ -62,9 +53,9 @@ putGenericNonSum = genericFoldMapNonSum @asserts -- if you want better performance! putGenericSum :: forall {cd} {f} {asserts} a - . (Generic a, Rep a ~ D1 cd f, GFoldMapSum 'SumOnly Write' f + . (Generic a, Rep a ~ D1 cd f, GFoldMapSum 'SumOnly Putter f , asserts ~ '[ 'NoEmpty, 'NeedSum], ApplyGCAsserts asserts f) - => (String -> Write') -> a -> Write' + => (String -> Putter) -> a -> Putter putGenericSum = genericFoldMapSum @'SumOnly @asserts instance Prim' a => Put (ViaPrim a) where put = prim . unViaPrim @@ -74,7 +65,7 @@ instance TypeError ENoSum => Put (Either a b) where put = undefined instance Put a => Put (Identity a) where put = put . runIdentity -instance Put Write' where put = id +instance Put Putter where put = id -- | Unit type serializes to nothing. How zen. instance Put () where diff --git a/src/Binrep/Type/Magic.hs b/src/Binrep/Type/Magic.hs index 6dae103..d5d436b 100644 --- a/src/Binrep/Type/Magic.hs +++ b/src/Binrep/Type/Magic.hs @@ -24,10 +24,9 @@ module Binrep.Type.Magic where import Binrep import Binrep.Type.Byte -import Bytezap.Write.Internal ( Write(Write) ) import FlatParse.Basic qualified as FP import Data.ByteString qualified as B -import Util.TypeNats ( type Length, natValInt ) +import Util.TypeNats ( natValInt ) import GHC.TypeLits @@ -36,50 +35,53 @@ import Data.Data ( Data ) import Strongweak --- | A singleton data type representing a "magic number" (a constant bytestring) --- via a phantom type. +-- | A singleton data type representing a "magic number" via a phantom type. -- -- The phantom type variable unambiguously defines a constant bytestring. -- A handful of types are supported for using magics conveniently, e.g. for pure -- ASCII magics, you may use a 'Symbol' type-level string. -data Magic (a :: k) = Magic - deriving stock (Generic, Data, Show, Eq) +data Magic (a :: k) = Magic deriving stock (Generic, Data, Show, Eq) --- | Weaken a 'Magic a' to the unit. Perhaps you prefer pattern matching on @()@ --- over @Magic@, or wish a weak type to be fully divorced from its binrep --- origins. +-- | Weaken a @'Magic' a@ to the unit. instance Weaken (Magic a) where type Weak (Magic a) = () weaken Magic = () --- | Strengthen the unit to some 'Magic a'. -instance Strengthen (Magic a) where - strengthen () = pure Magic +-- | Strengthen the unit to some @'Magic' a@. +instance Strengthen (Magic a) where strengthen () = pure Magic +-- | The byte length of a magic is known at compile time. instance IsCBLen (Magic a) where type CBLen (Magic a) = Length (MagicBytes a) -deriving via CBLenly (Magic a) instance + +-- | The byte length of a magic is obtained via reifying. +deriving via ViaCBLen (Magic a) instance KnownNat (Length (MagicBytes a)) => BLen (Magic a) -instance (bs ~ MagicBytes a, ReifyBytes bs, KnownNat (Length bs)) - => Put (Magic a) where - put Magic = Write (natValInt @(Length bs)) (reifyBytes @bs) +instance (bs ~ MagicBytes a, ReifyBytes bs) => Put (Magic a) where + put Magic = reifyBytes @bs instance (bs ~ MagicBytes a, ReifyBytes bs, KnownNat (Length bs)) => Get (Magic a) where - -- TODO silly optimization: we _could_ skip comparing BS lengths because we - -- know they have to be the same. lmao get = do -- Nice case where we _want_ flatparse's no-copy behaviour, because -- 'actual' is only in scope for this parser. Except, of course, if we -- error, in which case _now_ we copy. Efficient! - actual <- FP.take (blen magic) + actual <- FP.take (natValInt @(Length bs)) + -- silly optimization: we could skip comparing lengths because we know + -- they must be the same. very silly though if actual == expected - then pure Magic + then pure magic else eBase $ EExpected expected (B.copy actual) where expected = runPut magic magic = Magic :: Magic a +-- TODO might wanna move this +-- | The length of a type-level list. +type family Length (a :: [k]) :: Natural where + Length '[] = 0 + Length (a ': as) = 1 + Length as + {- I do lots of functions on lists, because they're structurally simple. But you can't pass type-level functions as arguments between type families. singletons @@ -115,8 +117,7 @@ class Magical (a :: k) where type MagicBytes a :: [Natural] -- | Type-level naturals go as-is. (Make sure you don't go over 255, though!) -instance Magical (ns :: [Natural]) where - type MagicBytes ns = ns +instance Magical (ns :: [Natural]) where type MagicBytes ns = ns -- | Type-level symbols are turned into their Unicode codepoints - but -- multibyte characters aren't handled, so they'll simply be overlarge bytes, diff --git a/src/Binrep/Type/NullPadded.hs b/src/Binrep/Type/NullPadded.hs index f294f73..0da9437 100644 --- a/src/Binrep/Type/NullPadded.hs +++ b/src/Binrep/Type/NullPadded.hs @@ -3,8 +3,9 @@ module Binrep.Type.NullPadded where import Binrep -import Bytezap.Write qualified as BZ +import Bytezap.Poke qualified as BZ import FlatParse.Basic qualified as FP +import Raehik.Compat.FlatParse.Basic.WithLength qualified as FP import Control.Monad.Combinators ( skipCount ) import Binrep.Util ( tshow ) @@ -55,10 +56,10 @@ instance (BLen a, Put a, KnownNat n) => Put (NullPadded n a) where paddingLen = natValInt @n - blen a -- ^ refinement guarantees >=0 -instance (BLen a, Get a, KnownNat n) => Get (NullPadded n a) where +instance (Get a, KnownNat n) => Get (NullPadded n a) where get = do - a <- get - let paddingLen = natValInt @n - blen a + (a, len) <- FP.parseWithLength get + let paddingLen = natValInt @n - len if paddingLen < 0 then eBase $ EFailNamed "TODO used to be EOverlong, cba" else do diff --git a/src/Binrep/Type/Prefix/Count.hs b/src/Binrep/Type/Prefix/Count.hs index c09a507..3908739 100644 --- a/src/Binrep/Type/Prefix/Count.hs +++ b/src/Binrep/Type/Prefix/Count.hs @@ -34,10 +34,12 @@ instance (KnownNat (Max pfx), Foldable f, Typeable pfx) instance IsCBLen (CountPrefixed pfx f a) where type CBLen (CountPrefixed pfx f a) = CBLen pfx + CBLen (f a) +{- instance (Prefix pfx, Foldable f, BLen pfx, BLen (f a)) => BLen (CountPrefixed pfx f a) where blen rfa = blen (lenToPfx @pfx (Foldable.length fa)) + blen fa where fa = unrefine1 rfa +-} instance (Prefix pfx, Foldable f, Put pfx, Put (f a)) => Put (CountPrefixed pfx f a) where diff --git a/src/Binrep/Type/Sized.hs b/src/Binrep/Type/Sized.hs index 9192ba3..675156e 100644 --- a/src/Binrep/Type/Sized.hs +++ b/src/Binrep/Type/Sized.hs @@ -16,7 +16,7 @@ import Data.Typeable ( typeRep ) import GHC.TypeNats import Util.TypeNats ( natValInt ) --- | Essentially reflects a 'BLen' type to 'CBLen'. +-- | Essentially runtime reflection of a 'BLen' type to 'CBLen'. data Size (n :: Natural) type Sized n = Refined (Size n) @@ -31,13 +31,14 @@ instance (BLen a, KnownNat n) => Predicate (Size n) a where len = blen a instance IsCBLen (Sized n a) where type CBLen (Sized n a) = n -deriving via CBLenly (Sized n a) instance KnownNat n => BLen (Sized n a) +deriving via ViaCBLen (Sized n a) instance KnownNat n => BLen (Sized n a) -instance Put a => Put (Sized n a) where +instance (Put a, KnownNat n) => Put (Sized n a) where put = put . unrefine --- TODO safety: isolate consumes all bytes if succeeds instance (Get a, KnownNat n) => Get (Sized n a) where get = do a <- FP.isolate (natValInt @n) get pure $ reallyUnsafeRefine a + -- ^ REFINE SAFETY: 'FP.isolate' consumes precisely the number of bytes + -- requested when it succeeds diff --git a/src/Raehik/Compat/FlatParse/Basic/WithLength.hs b/src/Raehik/Compat/FlatParse/Basic/WithLength.hs new file mode 100644 index 0000000..f1d7a73 --- /dev/null +++ b/src/Raehik/Compat/FlatParse/Basic/WithLength.hs @@ -0,0 +1,14 @@ +module Raehik.Compat.FlatParse.Basic.WithLength where + +import FlatParse.Basic.Parser +import GHC.Exts + +-- | Run a parser, and return the result as well as the number of bytes it +-- consumed. +parseWithLength :: ParserT st e a -> ParserT st e (a, Int) +parseWithLength (ParserT f) = ParserT $ \fp eob s st -> do + case f fp eob s st of + Fail# st' -> Fail# st' + Err# st' e -> Err# st' e + OK# st' a s' -> OK# st' (a, I# (s' `minusAddr#` s)) s' +{-# inline parseWithLength #-}