From b31f7c3f0e12e8c31d46c46fd68c750ee028da54 Mon Sep 17 00:00:00 2001
From: Judah Jacobson <judah@users.noreply.github.com>
Date: Wed, 2 Jan 2019 16:41:07 -0800
Subject: [PATCH] Use a custom parsing monad instead of attoparsec. (#298)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All decoding benchmarks show significant speedups after this change.
The biggest improvement is to decoding packed data which is 4-5x as fast
as before.  (See below for a full list of benchmark diffs.)

This parsing monad follows the approach of, e.g., the `store` and `persist`
packages.  It requires that all data be in a *strict* `ByteString`,
and uses simple pointer arithmetic internally to walk through its bytes.

This effectively works against #62 (streaming parsers) since it
needs to read all the input data before starting the parse.  However,
that issue has already existed since the beginning of this library for,
e.g., submessages; see that bug for more details.  So this change
doesn't appear to be a regression.  We also have freedom to later
try out different implementations without changing the API, since
`Parser` is opaque as of #294.

The implementation of Parser differs from `store` and `persist` by using
`ExceptT` to pass around errors internally, rather than exceptions (or
closures, as in `attoparsec`).  We may want to experiment with this later,
but in my initial experiments I didn't see a significant improvement
from those approaches.

Benchmark results (the "time" output from Criterion):

flat(602B)/decode/whnf:
   13.14 μs   (13.02 μs .. 13.29 μs)
=> 8.686 μs   (8.514 μs .. 8.873 μs)

nested(900B)/decode/whnf:
    26.35 μs   (25.85 μs .. 26.86 μs)
=> 11.66 μs   (11.36 μs .. 11.99 μs)

int32-packed(1003B)/decode/whnf:
   36.23 μs   (35.75 μs .. 36.69 μs)
=> 17.31 μs   (17.11 μs .. 17.50 μs)

int32-unpacked(2000B)/decode/whnf:
   65.18 μs   (64.19 μs .. 66.68 μs)
=> 19.35 μs   (19.13 μs .. 19.58 μs)

float-packed(4003B)/decode/whnf:
   78.61 μs   (77.53 μs .. 79.46 μs)
=> 19.56 μs   (19.40 μs .. 19.76 μs)

float-unpacked(5000B)/decode/whnf:
   108.9 μs   (107.8 μs .. 110.3 μs)
=> 22.29 μs   (22.00 μs .. 22.66 μs)

no-unused(10003B)/decode/whnf:
   571.7 μs   (560.0 μs .. 586.6 μs)
=> 356.5 μs   (349.0 μs .. 365.0 μs)

with-unused(10003B)/decode/whnf:
   786.6 μs   (697.8 μs .. 875.5 μs)
=> 368.3 μs   (361.8 μs .. 376.4 μs)

Also added isolate and used it for parsing messages and packed fields.

This improved the nested benchmark a bit compared to without it:

benchmarking nested(900B)/decode/whnf
   14.32 μs   (14.08 μs .. 14.57 μs)
=> 11.66 μs   (11.36 μs .. 11.99 μs)

It didn't make a significant difference in the packed benchmark,
I think because the effects of using lists currently dominate everything else.
---
 package.yaml                          |  28 +++---
 src/Data/ProtoLens/Encoding/Bytes.hs  |  37 +------
 src/Data/ProtoLens/Encoding/Parser.hs | 137 ++++++++++++++++++++++++++
 tests/parser_test.hs                  |  97 ++++++++++++++++++
 4 files changed, 250 insertions(+), 49 deletions(-)
 create mode 100644 src/Data/ProtoLens/Encoding/Parser.hs
 create mode 100644 tests/parser_test.hs

diff --git a/package.yaml b/package.yaml
index 4852bc39..f94927c7 100644
--- a/package.yaml
+++ b/package.yaml
@@ -29,26 +29,10 @@ data-files:
 
 library:
   source-dirs: src
-  exposed-modules:
-    - Data.ProtoLens
-    - Data.ProtoLens.Combinators
-    - Data.ProtoLens.Default
-    - Data.ProtoLens.Encoding
-    - Data.ProtoLens.Encoding.Bytes
-    - Data.ProtoLens.Encoding.Wire
-    - Data.ProtoLens.Message
-    - Data.ProtoLens.Message.Enum
-    - Data.ProtoLens.Service.Types
-    - Data.ProtoLens.TextFormat
-    - Proto.Google.Protobuf.Compiler.Plugin
-    - Proto.Google.Protobuf.Compiler.Plugin_Fields
-    - Proto.Google.Protobuf.Descriptor
-    - Proto.Google.Protobuf.Descriptor_Fields
 
   other-modules:
     - Data.ProtoLens.TextFormat.Parser
   dependencies:
-    - attoparsec == 0.13.*
     - base >= 4.9 && < 4.13
     - bytestring == 0.10.*
     - containers >= 0.5 && < 0.7
@@ -60,3 +44,15 @@ library:
     - text == 1.2.*
     - transformers >= 0.4 && < 0.6
     - void == 0.7.*
+
+tests:
+  parser_test:
+    main: parser_test.hs
+    source-dirs: tests
+    dependencies:
+    - base
+    - bytestring
+    - proto-lens
+    - QuickCheck
+    - test-framework
+    - test-framework-quickcheck2
diff --git a/src/Data/ProtoLens/Encoding/Bytes.hs b/src/Data/ProtoLens/Encoding/Bytes.hs
index 0ca9bd11..7425871e 100644
--- a/src/Data/ProtoLens/Encoding/Bytes.hs
+++ b/src/Data/ProtoLens/Encoding/Bytes.hs
@@ -15,6 +15,7 @@ module Data.ProtoLens.Encoding.Bytes(
     Parser,
     Builder,
     runParser,
+    isolate,
     runBuilder,
     -- * Bytestrings
     getBytes,
@@ -42,38 +43,24 @@ module Data.ProtoLens.Encoding.Bytes(
     (<?>),
     ) where
 
-import qualified Data.Attoparsec.ByteString as Atto
 import Data.Bits
 import Data.ByteString (ByteString)
 import Data.ByteString.Lazy.Builder as Builder
 import qualified Data.ByteString.Lazy as L
 import Data.Int (Int32, Int64)
 import Data.Monoid ((<>))
-import Data.Word (Word8, Word32, Word64)
+import Data.Word (Word32, Word64)
 import Foreign.Ptr (castPtr)
 import Foreign.Marshal.Alloc (alloca)
 import Foreign.Storable (Storable, peek, poke)
 import System.IO.Unsafe (unsafePerformIO)
 
--- | A parsing monad for decoding the wire format.
-newtype Parser a = Parser (Atto.Parser a)
-    deriving (Functor, Applicative, Monad)
-
--- | Evaluates a parser on the given input.
---
--- If the parser does not consume all of the input, the rest of the
--- input is discarded and the parser still succeeds.
-runParser :: Parser a -> ByteString -> Either String a
-runParser (Parser p) = Atto.parseOnly p
+import Data.ProtoLens.Encoding.Parser
 
 -- | Constructs a strict 'ByteString' from the given 'Builder'.
 runBuilder :: Builder -> ByteString
 runBuilder = L.toStrict . Builder.toLazyByteString
 
--- | Parse a @ByteString@ of the given length.
-getBytes :: Int -> Parser ByteString
-getBytes = Parser . Atto.take
-
 -- | Emit a given @ByteString@.
 putBytes :: ByteString -> Builder
 putBytes = Builder.byteString
@@ -96,18 +83,8 @@ putVarInt n
     | otherwise = Builder.word8 (fromIntegral $ n .&. 127 .|. 128)
                       <> putVarInt (n `shiftR` 7)
 
-getWord8 :: Parser Word8
-getWord8 = Parser Atto.anyWord8
-
--- | Little-endian decoding function.
 getFixed32 :: Parser Word32
-getFixed32 = do
-    b1 <- getWord8
-    b2 <- getWord8
-    b3 <- getWord8
-    b4 <- getWord8
-    return $ ((fromIntegral b4 `shiftL` 8 + fromIntegral b3)
-                `shiftL` 8 + fromIntegral b2) `shiftL` 8 + fromIntegral b1
+getFixed32 = getWord32le
 
 getFixed64 :: Parser Word64
 getFixed64 = do
@@ -163,9 +140,3 @@ wordToSignedInt64 n
 
 runEither :: Either String a -> Parser a
 runEither = either fail return
-
-atEnd :: Parser Bool
-atEnd = Parser Atto.atEnd
-
-(<?>) :: Parser a -> String -> Parser a
-Parser p <?> msg = Parser (p Atto.<?> msg)
diff --git a/src/Data/ProtoLens/Encoding/Parser.hs b/src/Data/ProtoLens/Encoding/Parser.hs
new file mode 100644
index 00000000..e7bd4d68
--- /dev/null
+++ b/src/Data/ProtoLens/Encoding/Parser.hs
@@ -0,0 +1,137 @@
+-- | A custom parsing monad, optimized for speed.
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+module Data.ProtoLens.Encoding.Parser
+    ( Parser
+    , runParser
+    , atEnd
+    , isolate
+    , getWord8
+    , getWord32le
+    , getBytes
+    , (<?>)
+    ) where
+
+import Control.Monad (ap)
+import Data.Bits (shiftL, (.|.))
+import Foreign.Ptr
+import Foreign.Storable
+import Control.Monad.Trans.Except
+import Data.Word (Word8, Word32)
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Unsafe as B
+import Data.ByteString (ByteString)
+import Control.Monad.IO.Class
+import System.IO.Unsafe
+
+-- | A monad for parsing an input buffer.
+newtype Parser a = Parser
+    { unParser :: Ptr Word8 -- End position of the input
+               -> Ptr Word8 -- Current position in the input
+               -> ExceptT String IO (ParserResult a) }
+
+data ParserResult a = ParserResult
+    { _newPos :: !(Ptr Word8) -- ^ New position in the input
+    , unParserResult :: a
+    }
+
+instance Functor ParserResult where
+    fmap f (ParserResult p x) = ParserResult p (f x)
+
+instance Functor Parser where
+    fmap f (Parser g) = Parser $ \end cur -> fmap f <$> g end cur
+
+instance Applicative Parser where
+    pure x = Parser $ \_ cur -> return $ ParserResult cur x
+    (<*>) = ap
+
+instance Monad Parser where
+    fail s = Parser $ \_ _ -> throwE s
+    return = pure
+    Parser f >>= g = Parser $ \end pos -> do
+        ParserResult pos' x <- f end pos
+        unParser (g x) end pos'
+
+-- | Evaluates a parser on the given input.
+--
+-- If the parser does not consume all of the input, the rest of the
+-- input is discarded and the parser still succeeds.  Parsers may use
+-- 'atEnd' to detect whether they are at the end of the input.
+--
+-- Values returned from actions in this monad will not hold onto the original
+-- ByteString, but rather make immutable copies of subsets of its bytes.
+runParser :: Parser a -> ByteString -> Either String a
+runParser (Parser m) b = unsafePerformIO $ B.unsafeUseAsCStringLen b $ \(p, len)
+    -> runExceptT $ fmap unParserResult $ m (p `plusPtr` len) (castPtr p)
+
+-- | Returns True if there is no more input left to consume.
+atEnd :: Parser Bool
+atEnd = Parser $ \end pos -> return $ ParserResult pos (pos == end)
+
+-- | Parse a one-byte word.
+getWord8 :: Parser Word8
+getWord8 = withSized 1 "getWord8: Unexpected end of input" peek
+
+-- | Parser a 4-byte word in little-endian order.
+getWord32le :: Parser Word32
+getWord32le = withSized 4 "getWord32le: Unexpected end of input" $ \pos -> do
+    b1 <- fromIntegral <$> peek pos
+    b2 <- fromIntegral <$> peek (pos `plusPtr'` 1)
+    b3 <- fromIntegral <$> peek (pos `plusPtr'` 2)
+    b4 <- fromIntegral <$> peek (pos `plusPtr'` 3)
+    let f b b' = b `shiftL` 8 .|. b'
+    return $! f (f (f b4 b3) b2) b1
+
+-- | Parse a sequence of zero or more bytes of the given length.
+--
+-- The new ByteString is an immutable copy of the bytes in the input
+-- and will be managed separately on the Haskell heap from the original
+-- input 'B.ByteString'.
+--
+-- Fails the parse if given a negative length.
+getBytes :: Int -> Parser B.ByteString
+getBytes n = withSized n "getBytes: Unexpected end of input"
+                    $ \pos -> B.packCStringLen (castPtr pos, n)
+
+-- | Helper function for reading bytes from the current position and
+-- advancing the pointer.
+--
+-- Fails the parse if given a negative length.  (GHC will elide the check
+-- if the length is a nonnegative constant.)
+--
+-- It is only safe for @f@ to peek between its argument @p@ and
+-- @p `plusPtr` (len - 1)@, inclusive.
+withSized :: Int -> String -> (Ptr Word8 -> IO a) -> Parser a
+withSized len message f
+    | len >= 0 = Parser $ \end pos ->
+        let pos' = pos `plusPtr'` len
+        in if pos' > end
+            then throwE $ message
+            else liftIO $ ParserResult pos' <$> f pos
+    | otherwise = fail "withSized: negative length"
+{-# INLINE withSized #-}
+
+-- | Run the given parsing action as if there are only 
+-- @len@ bytes remaining.  That is, once @len@ bytes have been
+-- consumed, 'atEnd' will return 'True' and other actions
+-- like 'getWord8' will act like there is no input remaining.
+--
+-- Fails the parse if given a negative length.
+isolate :: Int -> Parser a -> Parser a
+isolate len (Parser m)
+    | len >= 0 = Parser $ \end pos ->
+        let end' = pos `plusPtr` len
+        in if end' > end
+            then throwE "isolate: unexpected end of input"
+            else m end' pos
+    | otherwise = fail "isolate: negative length"
+
+-- | If the parser fails, prepend an error message.
+(<?>) :: Parser a -> String -> Parser a
+Parser m <?> msg = Parser $ \end p ->
+    withExceptT (\s -> msg ++ ": " ++ s) $ m end p
+
+-- | Advance a pointer.  Unlike 'plusPtr', preserves the type of the input.
+plusPtr' :: Ptr a -> Int -> Ptr a
+plusPtr' = plusPtr
diff --git a/tests/parser_test.hs b/tests/parser_test.hs
new file mode 100644
index 00000000..4ccb46fe
--- /dev/null
+++ b/tests/parser_test.hs
@@ -0,0 +1,97 @@
+-- | Unit and property tests for our custom parsing monad.
+module Main (main) where
+
+import Control.Applicative (liftA2)
+import qualified Data.ByteString as B
+import Data.Either (isLeft)
+
+import Test.QuickCheck
+import Test.Framework (defaultMain, testGroup, Test)
+import Test.Framework.Providers.QuickCheck2 (testProperty)
+
+import Data.ProtoLens.Encoding.Bytes
+import Data.ProtoLens.Encoding.Parser
+
+main :: IO ()
+main = defaultMain
+    [ testGroup "Parser" testParser
+    , testGroup "getWord8" testGetWord8
+    , testGroup "getBytes" testGetBytes
+    , testGroup "getWord32le" testGetWord32le
+    , testGroup "failure" testFailure
+    , testGroup "isolate" testIsolate
+    ]
+
+testParser :: [Test]
+testParser =
+    -- Test out the Applicative instance by using "traverse" to read the same number of bytes
+    -- as in the input.
+    -- "traverse (const f) g" runs f once for every element of g.
+    [ testProperty "traverse" $ \ws -> runParser (traverse (const getWord8) ws)
+                                        (B.pack ws)
+                                    === Right ws
+    ]
+
+testGetWord8 :: [Test]
+testGetWord8 =
+    [ testProperty "atEnd" $ \ws -> runParser atEnd (B.pack ws) === Right (null ws)
+    , testProperty "manyTillEnd"
+            $ \ws -> runParser (manyTillEnd getWord8) (B.pack ws) === Right ws
+    ]
+
+testGetBytes :: [Test]
+testGetBytes =
+    [ testProperty "many"
+            $ \ws -> let
+                packed = map B.pack ws
+                in runParser (mapM (getBytes . B.length) packed) (B.concat packed)
+                    === Right packed
+    , testProperty "negative length"
+        $ \n ws -> n < 0 ==> counterexampleF isLeft
+                                (runParser (getBytes n) $ B.pack ws)
+    ]
+
+testGetWord32le :: [Test]
+testGetWord32le =
+    [ testProperty "align"
+        $ \ws -> length ws `mod` 4 /= 0 ==>
+                    counterexampleF isLeft (runParser (manyTillEnd getWord32le) (B.pack ws))
+    , testProperty "manyTillEnd" $ \ws ->
+            runParser (manyTillEnd getWord32le) (runBuilder $ foldMap putFixed32 ws)
+                === Right ws
+    ]
+
+testFailure :: [Test]
+testFailure =
+    [ testProperty "fail-fast" $ \bs ->
+        runParser (fail "abcde") (B.pack bs)
+            === (Left "abcde" :: Either String ())
+    , testProperty "<?>" $ \bs ->
+        runParser (fail "abcde" <?> "fghij") (B.pack bs)
+            === (Left "fghij: abcde" :: Either String ())
+    ]
+
+testIsolate :: [Test]
+testIsolate =
+    [ testProperty "many" $ \bs bs' ->
+        runParser (liftA2 (,) (isolate (length bs) $ manyTillEnd getWord8)
+                        (manyTillEnd getWord8))
+            (B.pack (bs ++ bs'))
+            == Right (bs, bs')
+    , testProperty "negative length" $ \n ws ->
+        n < 0 ==> counterexampleF isLeft $ runParser (isolate n getWord8) $ B.pack ws
+    ]
+
+-- Since this is a test, just implement the slow stack-heavy way.
+manyTillEnd :: Parser a -> Parser [a]
+manyTillEnd p = do
+    end <- atEnd
+    if end
+        then return []
+        else do
+            x <- p
+            xs <- manyTillEnd p
+            return $ x : xs
+
+counterexampleF :: (Testable prop, Show a) => (a -> prop) -> a -> Property
+counterexampleF f x = counterexample (show x) $ f x