Skip to content

Commit

Permalink
ok fine we keep BLen
Browse files Browse the repository at this point in the history
  • Loading branch information
raehik committed Mar 16, 2024
1 parent b7f0352 commit 4c7622d
Show file tree
Hide file tree
Showing 11 changed files with 111 additions and 95 deletions.
3 changes: 2 additions & 1 deletion binrep.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ flag icu
library
exposed-modules:
Binrep
Binrep.BLen.Simple
Binrep.BLen
Binrep.CBLen
Binrep.CBLen.Generic
Binrep.Extra.HexByteString
Expand Down Expand Up @@ -64,6 +64,7 @@ library
Binrep.Via.Prim
Data.Aeson.Extra.SizedVector
Raehik.Compat.FlatParse.Basic.Prim
Raehik.Compat.FlatParse.Basic.WithLength
Util.TypeNats
other-modules:
Paths_binrep
Expand Down
6 changes: 3 additions & 3 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions src/Binrep.hs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
module Binrep
( module Binrep.CBLen
, module Binrep.BLen.Simple
( module Binrep.BLen
, module Binrep.CBLen
, module Binrep.Put
, module Binrep.Get
) where

import Binrep.BLen
import Binrep.CBLen
import Binrep.BLen.Simple
import Binrep.Put
import Binrep.Get

Expand Down
75 changes: 40 additions & 35 deletions src/Binrep/BLen/Simple.hs → src/Binrep/BLen.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,18 @@ byte length *before* serializing. This is that.
It should be very efficient to calculate serialized byte length for most
binrep-compatible Haskell types. If it isn't, consider whether the
representation is appropriate for binrep.
Note that you _may_ encode this inside the serializer type (whatever the @Put@
class stores). I went back and forth on this a couple times. But some binrep
code seems to make more sense when byte length is standalone. And I don't mind
the extra explicitness. So it's here to stay :)
-}

module Binrep.BLen.Simple where
module Binrep.BLen
( BLen(blen)
, blenGenericNonSum, blenGenericSum
, ViaCBLen(..), cblen
) where

import Binrep.CBLen
import GHC.TypeNats
Expand All @@ -32,7 +41,14 @@ import Generic.Data.Function.FoldMap
import Generic.Data.Rep.Assert
import Generic.Data.Function.Common

class BLen a where blen :: a -> Int
-- | Class for types with easily-calculated length in bytes.
--
-- If it appears hard to calculate byte length for a given type (e.g. without
-- first serializing it, then measuring serialized byte length), consider
-- whether this type is a good fit for binrep.
class BLen a where
-- | Calculate the serialized byte length of the given value.
blen :: a -> Int

-- newtype sum monoid for generic foldMap
newtype BLen' a = BLen' { getBLen' :: a }
Expand Down Expand Up @@ -67,49 +83,38 @@ blenGenericSum f = getBLen' . genericFoldMapSum @'SumOnly @asserts (BLen' <$> f)
instance TypeError ENoEmpty => BLen Void where blen = undefined
instance TypeError ENoSum => BLen (Either a b) where blen = undefined

-- | Unit type has length 0.
instance BLen () where
{-# INLINE blen #-}
blen () = 0
-- | _O(1)_ Unit type has length 0.
instance BLen () where blen () = 0

-- | Sum tuples.
instance (BLen l, BLen r) => BLen (l, r) where
{-# INLINE blen #-}
blen (l, r) = blen l + blen r
-- | _O(1)_ Sum tuples.
instance (BLen l, BLen r) => BLen (l, r) where blen (l, r) = blen l + blen r

-- | _O(n)_ Sum the length of each element of a list.
instance BLen a => BLen [a] where
{-# INLINE blen #-}
blen = sum . map blen

-- | Length of a bytestring is fairly obvious.
instance BLen B.ByteString where
{-# INLINE blen #-}
blen = B.length

-- Machine integers have a constant byte length.
deriving via CBLenly Word8 instance BLen Word8
deriving via CBLenly Int8 instance BLen Int8
deriving via CBLenly Word16 instance BLen Word16
deriving via CBLenly Int16 instance BLen Int16
deriving via CBLenly Word32 instance BLen Word32
deriving via CBLenly Int32 instance BLen Int32
deriving via CBLenly Word64 instance BLen Word64
deriving via CBLenly Int64 instance BLen Int64
instance BLen a => BLen [a] where blen = sum . map blen

-- | _O(1)_ 'B.ByteString's store their own length.
instance BLen B.ByteString where blen = B.length

-- All words have a constant byte length-- including host-size words, mind you!
deriving via ViaCBLen Word8 instance BLen Word8
deriving via ViaCBLen Int8 instance BLen Int8
deriving via ViaCBLen Word16 instance BLen Word16
deriving via ViaCBLen Int16 instance BLen Int16
deriving via ViaCBLen Word32 instance BLen Word32
deriving via ViaCBLen Int32 instance BLen Int32
deriving via ViaCBLen Word64 instance BLen Word64
deriving via ViaCBLen Int64 instance BLen Int64

--------------------------------------------------------------------------------

-- | Deriving via wrapper for types which may derive a 'BLen' instance through
-- an existing 'IsCBLen' instance.
-- | DerivingVia wrapper for types which may derive a 'BLen' instance through
-- an existing 'IsCBLen' instance (i.e. it is known at compile time)
--
-- Examples of such types include machine integers, and explicitly-sized types
-- (e.g. "Binrep.Type.Sized").
newtype CBLenly a = CBLenly { unCBLenly :: a }
instance KnownNat (CBLen a) => BLen (CBLenly a) where
{-# INLINE blen #-}
blen _ = cblen @a
newtype ViaCBLen a = ViaCBLen { unCBLenly :: a }
instance KnownNat (CBLen a) => BLen (ViaCBLen a) where blen _ = cblen @a

-- | Reify a type's constant byte length to the term level.
cblen :: forall a n. (n ~ CBLen a, KnownNat n) => Int
cblen = natValInt @n
{-# INLINE cblen #-}
2 changes: 1 addition & 1 deletion src/Binrep/CBLen/Generic.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ generics excel, and the one this module targets.
You can (attempt to) derive a 'CBLen' type family instance generically for a
type via
instance BLen a where type CBLen a = CBLenGeneric w a
instance CBLen a where type CBLen a = CBLenGeneric w a
As with deriving @BLen@ generically, you must provide the type used to store the
sum tag for sum types.
Expand Down
35 changes: 13 additions & 22 deletions src/Binrep/Put.hs
Original file line number Diff line number Diff line change
@@ -1,20 +1,11 @@
{-# LANGUAGE UndecidableInstances #-} -- required below GHC 9.6
{-# OPTIONS_GHC -fno-warn-orphans #-} -- for generic data op instance

{- | Serialization using the bytezap library.
bytezap serializers ("pokes") work by writing bytes into a pointer, which is
assumed to have _precisely_ the space required. The user must determine the
post-serialize length before the fact. For that reason, this module requires
that types to be serialized have a 'BLen' instance. In general, we are happy
about this, because a binrep type should always have an efficient and preferably
simple 'BLen' instance (and if not, it shouldn't be a binrep type).
-}

module Binrep.Put where

import Binrep.BLen ( BLen(blen) )
import Data.Functor.Identity
import Bytezap.Write
import Bytezap.Poke
import Raehik.Compat.Data.Primitive.Types ( Prim' )
import Binrep.Util.ByteOrder
import Raehik.Compat.Data.Primitive.Types.Endian ( ByteSwap )
Expand All @@ -36,23 +27,23 @@ import Generic.Data.Rep.Assert

import Control.Monad.ST ( RealWorld )

type Write' = Write RealWorld
type Putter = Poke RealWorld

class Put a where put :: a -> Write'
class Put a where put :: a -> Putter

runPut :: Put a => a -> B.ByteString
runPut = runWriteBS . put
runPut :: (BLen a, Put a) => a -> B.ByteString
runPut a = unsafeRunPokeBS (blen a) (put a)

instance GenericFoldMap Write' where
type GenericFoldMapC Write' a = Put a
instance GenericFoldMap Putter where
type GenericFoldMapC Putter a = Put a
genericFoldMapF = put

-- | Serialize a term of the non-sum type @a@ via its 'Generic' instance.
putGenericNonSum
:: forall {cd} {f} {asserts} a
. ( Generic a, Rep a ~ D1 cd f, GFoldMapNonSum Write' f
. ( Generic a, Rep a ~ D1 cd f, GFoldMapNonSum Putter f
, asserts ~ '[ 'NoEmpty, 'NoSum], ApplyGCAsserts asserts f)
=> a -> Write'
=> a -> Putter
putGenericNonSum = genericFoldMapNonSum @asserts

-- | Serialize a term of the sum type @a@ via its 'Generic' instance.
Expand All @@ -62,9 +53,9 @@ putGenericNonSum = genericFoldMapNonSum @asserts
-- if you want better performance!
putGenericSum
:: forall {cd} {f} {asserts} a
. (Generic a, Rep a ~ D1 cd f, GFoldMapSum 'SumOnly Write' f
. (Generic a, Rep a ~ D1 cd f, GFoldMapSum 'SumOnly Putter f
, asserts ~ '[ 'NoEmpty, 'NeedSum], ApplyGCAsserts asserts f)
=> (String -> Write') -> a -> Write'
=> (String -> Putter) -> a -> Putter
putGenericSum = genericFoldMapSum @'SumOnly @asserts

instance Prim' a => Put (ViaPrim a) where put = prim . unViaPrim
Expand All @@ -74,7 +65,7 @@ instance TypeError ENoSum => Put (Either a b) where put = undefined

instance Put a => Put (Identity a) where put = put . runIdentity

instance Put Write' where put = id
instance Put Putter where put = id

-- | Unit type serializes to nothing. How zen.
instance Put () where
Expand Down
45 changes: 23 additions & 22 deletions src/Binrep/Type/Magic.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,9 @@ module Binrep.Type.Magic where

import Binrep
import Binrep.Type.Byte
import Bytezap.Write.Internal ( Write(Write) )
import FlatParse.Basic qualified as FP
import Data.ByteString qualified as B
import Util.TypeNats ( type Length, natValInt )
import Util.TypeNats ( natValInt )

import GHC.TypeLits

Expand All @@ -36,50 +35,53 @@ import Data.Data ( Data )

import Strongweak

-- | A singleton data type representing a "magic number" (a constant bytestring)
-- via a phantom type.
-- | A singleton data type representing a "magic number" via a phantom type.
--
-- The phantom type variable unambiguously defines a constant bytestring.
-- A handful of types are supported for using magics conveniently, e.g. for pure
-- ASCII magics, you may use a 'Symbol' type-level string.
data Magic (a :: k) = Magic
deriving stock (Generic, Data, Show, Eq)
data Magic (a :: k) = Magic deriving stock (Generic, Data, Show, Eq)

-- | Weaken a 'Magic a' to the unit. Perhaps you prefer pattern matching on @()@
-- over @Magic@, or wish a weak type to be fully divorced from its binrep
-- origins.
-- | Weaken a @'Magic' a@ to the unit.
instance Weaken (Magic a) where
type Weak (Magic a) = ()
weaken Magic = ()

-- | Strengthen the unit to some 'Magic a'.
instance Strengthen (Magic a) where
strengthen () = pure Magic
-- | Strengthen the unit to some @'Magic' a@.
instance Strengthen (Magic a) where strengthen () = pure Magic

-- | The byte length of a magic is known at compile time.
instance IsCBLen (Magic a) where type CBLen (Magic a) = Length (MagicBytes a)
deriving via CBLenly (Magic a) instance

-- | The byte length of a magic is obtained via reifying.
deriving via ViaCBLen (Magic a) instance
KnownNat (Length (MagicBytes a)) => BLen (Magic a)

instance (bs ~ MagicBytes a, ReifyBytes bs, KnownNat (Length bs))
=> Put (Magic a) where
put Magic = Write (natValInt @(Length bs)) (reifyBytes @bs)
instance (bs ~ MagicBytes a, ReifyBytes bs) => Put (Magic a) where
put Magic = reifyBytes @bs

instance (bs ~ MagicBytes a, ReifyBytes bs, KnownNat (Length bs))
=> Get (Magic a) where
-- TODO silly optimization: we _could_ skip comparing BS lengths because we
-- know they have to be the same. lmao
get = do
-- Nice case where we _want_ flatparse's no-copy behaviour, because
-- 'actual' is only in scope for this parser. Except, of course, if we
-- error, in which case _now_ we copy. Efficient!
actual <- FP.take (blen magic)
actual <- FP.take (natValInt @(Length bs))
-- silly optimization: we could skip comparing lengths because we know
-- they must be the same. very silly though
if actual == expected
then pure Magic
then pure magic
else eBase $ EExpected expected (B.copy actual)
where
expected = runPut magic
magic = Magic :: Magic a

-- TODO might wanna move this
-- | The length of a type-level list.
type family Length (a :: [k]) :: Natural where
Length '[] = 0
Length (a ': as) = 1 + Length as

{-
I do lots of functions on lists, because they're structurally simple. But you
can't pass type-level functions as arguments between type families. singletons
Expand Down Expand Up @@ -115,8 +117,7 @@ class Magical (a :: k) where
type MagicBytes a :: [Natural]

-- | Type-level naturals go as-is. (Make sure you don't go over 255, though!)
instance Magical (ns :: [Natural]) where
type MagicBytes ns = ns
instance Magical (ns :: [Natural]) where type MagicBytes ns = ns

-- | Type-level symbols are turned into their Unicode codepoints - but
-- multibyte characters aren't handled, so they'll simply be overlarge bytes,
Expand Down
9 changes: 5 additions & 4 deletions src/Binrep/Type/NullPadded.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
module Binrep.Type.NullPadded where

import Binrep
import Bytezap.Write qualified as BZ
import Bytezap.Poke qualified as BZ
import FlatParse.Basic qualified as FP
import Raehik.Compat.FlatParse.Basic.WithLength qualified as FP
import Control.Monad.Combinators ( skipCount )

import Binrep.Util ( tshow )
Expand Down Expand Up @@ -55,10 +56,10 @@ instance (BLen a, Put a, KnownNat n) => Put (NullPadded n a) where
paddingLen = natValInt @n - blen a
-- ^ refinement guarantees >=0

instance (BLen a, Get a, KnownNat n) => Get (NullPadded n a) where
instance (Get a, KnownNat n) => Get (NullPadded n a) where
get = do
a <- get
let paddingLen = natValInt @n - blen a
(a, len) <- FP.parseWithLength get
let paddingLen = natValInt @n - len
if paddingLen < 0
then eBase $ EFailNamed "TODO used to be EOverlong, cba"
else do
Expand Down
2 changes: 2 additions & 0 deletions src/Binrep/Type/Prefix/Count.hs
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,12 @@ instance (KnownNat (Max pfx), Foldable f, Typeable pfx)
instance IsCBLen (CountPrefixed pfx f a) where
type CBLen (CountPrefixed pfx f a) = CBLen pfx + CBLen (f a)

{-
instance (Prefix pfx, Foldable f, BLen pfx, BLen (f a))
=> BLen (CountPrefixed pfx f a) where
blen rfa = blen (lenToPfx @pfx (Foldable.length fa)) + blen fa
where fa = unrefine1 rfa
-}

instance (Prefix pfx, Foldable f, Put pfx, Put (f a))
=> Put (CountPrefixed pfx f a) where
Expand Down
9 changes: 5 additions & 4 deletions src/Binrep/Type/Sized.hs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import Data.Typeable ( typeRep )
import GHC.TypeNats
import Util.TypeNats ( natValInt )

-- | Essentially reflects a 'BLen' type to 'CBLen'.
-- | Essentially runtime reflection of a 'BLen' type to 'CBLen'.
data Size (n :: Natural)
type Sized n = Refined (Size n)

Expand All @@ -31,13 +31,14 @@ instance (BLen a, KnownNat n) => Predicate (Size n) a where
len = blen a

instance IsCBLen (Sized n a) where type CBLen (Sized n a) = n
deriving via CBLenly (Sized n a) instance KnownNat n => BLen (Sized n a)
deriving via ViaCBLen (Sized n a) instance KnownNat n => BLen (Sized n a)

instance Put a => Put (Sized n a) where
instance (Put a, KnownNat n) => Put (Sized n a) where
put = put . unrefine

-- TODO safety: isolate consumes all bytes if succeeds
instance (Get a, KnownNat n) => Get (Sized n a) where
get = do
a <- FP.isolate (natValInt @n) get
pure $ reallyUnsafeRefine a
-- ^ REFINE SAFETY: 'FP.isolate' consumes precisely the number of bytes
-- requested when it succeeds
Loading

0 comments on commit 4c7622d

Please sign in to comment.