Skip to content

Commit

Permalink
Serialise Texts without going through ByteString
Browse files Browse the repository at this point in the history
  • Loading branch information
alexbiehl committed Oct 17, 2024
1 parent aee6924 commit ae791f0
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 9 deletions.
17 changes: 11 additions & 6 deletions src/Data/Text.hs
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ import qualified Data.Text.Array as A
import qualified Data.List as L hiding (head, tail)
import qualified Data.List.NonEmpty as NonEmptyList
import Data.Binary (Binary(get, put))
import Data.Binary.Put (putBuilder)
import Data.Monoid (Monoid(..))
import Data.Semigroup (Semigroup(..))
import Data.String (IsString(..))
Expand All @@ -245,7 +246,7 @@ import Data.Text.Internal.Measure (measure_off)
import Data.Text.Internal.Encoding.Utf8 (utf8Length, utf8LengthByLeader, chr3, ord2, ord3, ord4)
import qualified Data.Text.Internal.Fusion as S
import qualified Data.Text.Internal.Fusion.Common as S
import Data.Text.Encoding (decodeUtf8', encodeUtf8)
import Data.Text.Encoding (decodeUtf8', encodeUtf8Builder)
import Data.Text.Internal.Fusion (stream, reverseStream, unstream)
import Data.Text.Internal.Private (span_)
import Data.Text.Internal (Text(..), StrictText, empty, firstf, mul, safe, text, append, pack)
Expand Down Expand Up @@ -394,7 +395,11 @@ instance NFData Text where rnf !_ = ()

-- | @since 1.2.1.0
instance Binary Text where
put t = put (encodeUtf8 t)
put t = do
-- This needs to be in sync with the Binary instance for ByteString
-- in the binary package.
put (lengthWord8 t)
putBuilder (encodeUtf8Builder t)
get = do
bs <- get
case decodeUtf8' bs of
Expand Down Expand Up @@ -556,7 +561,7 @@ null (Text _arr _off len) =
len <= 0
{-# INLINE [1] null #-}

{-# RULES
{-# RULES
"TEXT null/empty -> True" null empty = True
#-}

Expand Down Expand Up @@ -1275,7 +1280,7 @@ take :: Int -> Text -> Text
take n t@(Text arr off len)
| n <= 0 = empty
| n >= len || m >= len || m < 0 = t
| otherwise = Text arr off m
| otherwise = Text arr off m
where
m = measureOff n t
{-# INLINE [1] take #-}
Expand Down Expand Up @@ -1325,7 +1330,7 @@ drop :: Int -> Text -> Text
drop n t@(Text arr off len)
| n <= 0 = t
| n >= len || m >= len || m < 0 = empty
| otherwise = Text arr (off+m) (len-m)
| otherwise = Text arr (off+m) (len-m)
where m = measureOff n t
{-# INLINE [1] drop #-}

Expand Down Expand Up @@ -1434,7 +1439,7 @@ splitAt :: Int -> Text -> (Text, Text)
splitAt n t@(Text arr off len)
| n <= 0 = (empty, t)
| n >= len || m >= len || m < 0 = (t, empty)
| otherwise = (Text arr off m, Text arr (off+m) (len-m))
| otherwise = (Text arr off m, Text arr (off+m) (len-m))
where
m = measureOff n t

Expand Down
9 changes: 7 additions & 2 deletions src/Data/Text/Lazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ import Data.Char (isSpace)
import Data.Data (Data(gfoldl, toConstr, gunfold, dataTypeOf), constrIndex,
Constr, mkConstr, DataType, mkDataType, Fixity(Prefix))
import Data.Binary (Binary(get, put))
import Data.Binary.Put (putBuilder)
import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.List.NonEmpty as NE
import Data.Monoid (Monoid(..))
Expand All @@ -241,7 +242,7 @@ import Data.Text.Internal.Lazy (Text(..), chunk, empty, foldlChunks,
import Data.Text.Internal (firstf, safe, text)
import Data.Text.Internal.Reverse (reverseNonEmpty)
import Data.Text.Internal.Transformation (mapNonEmpty, toCaseFoldNonEmpty, toLowerNonEmpty, toUpperNonEmpty, filter_)
import Data.Text.Lazy.Encoding (decodeUtf8', encodeUtf8)
import Data.Text.Lazy.Encoding (decodeUtf8', encodeUtf8Builder)
import Data.Text.Internal.Lazy.Search (indices)
import qualified GHC.CString as GHC
import qualified GHC.Exts as Exts
Expand Down Expand Up @@ -352,7 +353,11 @@ instance NFData Text where

-- | @since 1.2.1.0
instance Binary Text where
put t = put (encodeUtf8 t)
put t = do
-- This needs to be in sync with the Binary instance for ByteString
-- in the binary package.
put (foldlChunks (\n c -> n + T.lengthWord8 c) 0 t)
putBuilder (encodeUtf8Builder t)
get = do
bs <- get
case decodeUtf8' bs of
Expand Down
15 changes: 14 additions & 1 deletion tests/Tests/Properties/Instances.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ module Tests.Properties.Instances
( testInstances
) where

import Data.Binary (encode, decodeOrFail)
import Data.String (IsString(fromString))
import Test.QuickCheck
import Test.Tasty (TestTree, testGroup)
Expand Down Expand Up @@ -43,6 +44,16 @@ tl_mempty = mempty === (unpackS (mempty :: TL.Text))
t_IsString = fromString `eqP` (T.unpack . fromString)
tl_IsString = fromString `eqP` (TL.unpack . fromString)

t_Binary s =
case decodeOrFail . encode $ (s :: T.Text) of
Left _ -> counterexample (show (T.unpack s)) (property False)
Right (_, _, s') -> s === s'

tl_Binary s =
case decodeOrFail . encode $ (s :: TL.Text) of
Left _ -> counterexample (show (TL.unpack s)) (property False)
Right (_, _, s') -> s === s'

testInstances :: TestTree
testInstances =
testGroup "instances" [
Expand All @@ -65,5 +76,7 @@ testInstances =
testProperty "t_mempty" t_mempty,
testProperty "tl_mempty" tl_mempty,
testProperty "t_IsString" t_IsString,
testProperty "tl_IsString" tl_IsString
testProperty "tl_IsString" tl_IsString,
testProperty "t_Binary" t_Binary,
testProperty "tl_Binary" tl_Binary
]
1 change: 1 addition & 0 deletions text.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ test-suite tests
build-depends:
QuickCheck >= 2.12.6 && < 2.16,
base <5,
binary,
bytestring,
deepseq,
directory,
Expand Down

0 comments on commit ae791f0

Please sign in to comment.