Skip to content

Commit

Permalink
[ re #299 ] refactor: use ShowS in regular expression printer
Browse files Browse the repository at this point in the history
ShowS is the standard technique to avoid quadratic overhead due to
String concatenation.
  • Loading branch information
andreasabel committed Oct 5, 2020
1 parent 295a316 commit feaacf0
Showing 1 changed file with 34 additions and 43 deletions.
77 changes: 34 additions & 43 deletions source/src/BNFC/Backend/Java/RegToJLex.hs
Original file line number Diff line number Diff line change
@@ -1,31 +1,27 @@
module BNFC.Backend.Java.RegToJLex (printRegJLex, escapeChar) where

-- modified from pretty-printer generated by the BNF converter
import Data.Char (ord, showLitChar)

import Data.Char (ord, showLitChar)
import AbsBNF
import BNFC.Options (JavaLexerParser(..))
import AbsBNF (Identifier(..), Reg(..))
import BNFC.Options (JavaLexerParser(..))
import BNFC.Backend.Common (flexEps)

-- the top-level printing method
printRegJLex :: JavaLexerParser -> Reg -> String
printRegJLex lexer = concat . prt lexer 0
-- | Print a regular expression for the Java lexers.

parenth :: [String] -> [String]
parenth ss = ["("] ++ ss ++ [")"]
printRegJLex :: JavaLexerParser -> Reg -> String
printRegJLex lexer reg = prt lexer 0 reg ""

-- the printer class does the job
class Print a where
prt :: JavaLexerParser -> Int -> a -> [String]
prtList :: JavaLexerParser -> [a] -> [String]
prtList lexer = concatMap (prt lexer 0)
prt :: JavaLexerParser -> Int -> a -> ShowS
prtList :: JavaLexerParser -> [a] -> ShowS
prtList lexer xs s = foldr (prt lexer 0) s xs
-- OR: prtList lexer = foldr (.) id . map (prt lexer 0)

instance Print a => Print [a] where
prt lexer _ = prtList lexer

instance Print Char where
prt lexer _ c = [ escapeChar lexer c ]
prtList lexer = map (concat . prt lexer 0)
prt lexer _ c = showString $ escapeChar lexer c

escapeChar :: JavaLexerParser -> Char -> String
escapeChar _ '^' = "\\x5E" -- special case, since \^ is a control character escape
Expand All @@ -42,35 +38,30 @@ jlexReserved = ['?','*','+','|','(',')','^','$','.','[',']','{','}','"','\\']
jflexReserved :: [Char]
jflexReserved = '~':'!':'/':[] -- plus the @jlexReserved@, but they are tested separately


prPrec :: Int -> Int -> [String] -> [String]
prPrec i j = if j<i then parenth else id

instance Print Identifier where
prt _ _ (Identifier i) = [i]
prt _ _ (Identifier x) = showString x

instance Print Reg where
prt lexer i = \case
RSeq reg0 reg -> prPrec i 2 $ concat [ prt lexer 2 reg0, prt lexer 3 reg ]
RAlt reg0 reg -> prPrec i 1 $ concat [ prt lexer 1 reg0, ["|"], prt lexer 2 reg ]

-- JLex does not support set difference
--RMinus reg0 reg -> prPrec i 1 (concat [prt lexer 2 reg0 , ["#"] , prt lexer 2 reg])
RMinus reg0 REps -> prt lexer i reg0 -- REps is identity for set difference
RMinus RAny reg@(RChar _) -> prPrec i 3 $ concat [ ["[^"], prt lexer 0 reg, ["]"] ]
RMinus RAny (RAlts str) -> prPrec i 3 $ concat [ ["[^"], prt lexer 0 str, ["]"] ]
-- FIXME: maybe we could add cases for char - RDigit, RLetter etc.
RMinus _ _ -> error $ "J[F]Lex does not support general set difference"

RStar reg -> prPrec i 3 $ concat [ prt lexer 3 reg, ["*"] ]
RPlus reg -> prPrec i 3 $ concat [ prt lexer 3 reg, ["+"] ]
ROpt reg -> prPrec i 3 $ concat [ prt lexer 3 reg, ["?"] ]
REps -> prPrec i 3 [ flexEps ]
RChar c -> prPrec i 3 $ prt lexer 0 c
RAlts str -> prPrec i 3 $ concat [ ["["], prt lexer 0 str, ["]"] ]
RSeqs str -> prPrec i 2 $ concatMap (prt lexer 0) str
RDigit -> prPrec i 3 [ "{DIGIT}" ]
RLetter -> prPrec i 3 [ "{LETTER}" ]
RUpper -> prPrec i 3 [ "{CAPITAL}" ]
RLower -> prPrec i 3 [ "{SMALL}" ]
RAny -> prPrec i 3 [ "." ]
RSeq reg1 reg2 -> showParen (i > 2) $ prt lexer 2 reg1 . prt lexer 3 reg2
RAlt reg1 reg2 -> showParen (i > 1) $ prt lexer 1 reg1 . showChar '|' . prt lexer 2 reg2

-- JLex does not support set difference in general
RMinus reg0 REps -> prt lexer i reg0 -- REps is identity for set difference
RMinus RAny reg@RChar{} -> showParen (i > 3) $ showString "[^" . prt lexer 0 reg . showString "]"
RMinus RAny (RAlts str) -> showParen (i > 3) $ showString "[^" . prt lexer 0 str . showString "]"
-- FIXME: maybe we could add cases for char - RDigit, RLetter etc.
RMinus _ _ -> error $ "J[F]Lex does not support general set difference"

RStar reg -> showParen (i > 3) $ prt lexer 3 reg . showChar '*'
RPlus reg -> showParen (i > 3) $ prt lexer 3 reg . showChar '+'
ROpt reg -> showParen (i > 3) $ prt lexer 3 reg . showChar '?'
REps -> showParen (i > 3) $ showString flexEps
RChar c -> showParen (i > 3) $ prt lexer 0 c
RAlts str -> showParen (i > 3) $ showChar '[' . prt lexer 0 str . showChar ']'
RSeqs str -> showParen (i > 2) $ prt lexer 0 str
RDigit -> showParen (i > 3) $ showString "{DIGIT}"
RLetter -> showParen (i > 3) $ showString "{LETTER}"
RUpper -> showParen (i > 3) $ showString "{CAPITAL}"
RLower -> showParen (i > 3) $ showString "{SMALL}"
RAny -> showParen (i > 3) $ showChar '.'

0 comments on commit feaacf0

Please sign in to comment.