-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
perf: Use ByteString for lexer; decode to Text immediately.
This makes the end-to-end parsing about 2x faster. We can now parse all of toxcore in 0.45 seconds on my machine, making it parse around 3.4MiB/s (including TreeParser and CommentParser). The lexer consumes 0.11s out of those 0.45s, running at around 9.8MiB/s.
- Loading branch information
Showing
8 changed files
with
116 additions
and
60 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
{-# LANGUAGE Strict #-} | ||
module Main (main) where | ||
|
||
import qualified Data.ByteString.Lazy as LBS | ||
import qualified Data.Text as Text | ||
import Data.Time.Clock (diffUTCTime, getCurrentTime) | ||
import Language.Cimple (Alex, Lexeme (..), LexemeClass (..), | ||
alexMonadScan, runAlex) | ||
import System.Environment (getArgs) | ||
|
||
|
||
countTokens :: LBS.ByteString -> Either String (Int, Int) | ||
countTokens str = runAlex str $ loop 0 0 | ||
where | ||
loop :: Int -> Int -> Alex (Int, Int) | ||
loop toks len = do | ||
(L _ c t) <- alexMonadScan | ||
if c == Eof | ||
then return (toks, len) | ||
else loop (toks + 1) (len + Text.length t) | ||
|
||
processFile :: FilePath -> IO (Int, Int) | ||
processFile source = do | ||
contents <- LBS.readFile source | ||
case countTokens contents of | ||
Left err -> fail err | ||
Right ok -> return ok | ||
|
||
processFiles :: [FilePath] -> IO (Int, Int) | ||
processFiles = fmap ((\(a, b) -> (sum a, sum b)) . unzip) . mapM processFile | ||
|
||
main :: IO () | ||
main = do | ||
sources <- getArgs | ||
start <- getCurrentTime | ||
(toks, len) <- processFiles sources | ||
end <- getCurrentTime | ||
putStrLn $ "Tokenised " <> show (length sources) <> " sources into " | ||
<> show toks <> " lexemes (" <> show len <> " bytes) in " | ||
<> show (diffUTCTime end start) <> " (" | ||
<> show (fromIntegral len / 1024 / 1024 / diffUTCTime end start) <> " MiB/s)" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,21 @@ | ||
module Main (main) where | ||
|
||
import qualified Data.ByteString as BS | ||
import qualified Data.Text as Text | ||
import qualified Data.Text.Encoding as Text | ||
import Language.Cimple (alexScanTokens) | ||
import System.Environment (getArgs) | ||
import Text.Groom (groom) | ||
import qualified Data.ByteString.Lazy as LBS | ||
import Language.Cimple (alexScanTokens) | ||
import System.Environment (getArgs) | ||
import Text.Groom (groom) | ||
|
||
processFile :: FilePath -> IO () | ||
processFile source = do | ||
putStrLn $ "Processing " ++ source | ||
contents <- Text.unpack . Text.decodeUtf8 <$> BS.readFile source | ||
contents <- LBS.readFile source | ||
case alexScanTokens contents of | ||
Left err -> fail err | ||
Right ok -> putStrLn $ groom ok | ||
|
||
main :: IO () | ||
main = do | ||
args <- getArgs | ||
case args of | ||
[src] -> processFile src | ||
_ -> fail "Usage: dump-tokens <file.c>" | ||
args <- getArgs | ||
case args of | ||
[src] -> processFile src | ||
_ -> fail "Usage: dump-tokens <file.c>" |