Skip to content

Commit

Permalink
Adjust emphasis parser for CJK characters.
Browse files Browse the repository at this point in the history
Typst documentation says that `*` strong emphasis
"only works at word boundaries." However, in typst/typst#2648
this was changed for CJK.

Closes #49.
  • Loading branch information
jgm committed May 20, 2024
1 parent b486caf commit dfe0367
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 1 deletion.
41 changes: 40 additions & 1 deletion src/Typst/Parse.hs
Original file line number Diff line number Diff line change
Expand Up @@ -588,11 +588,50 @@ pText :: P Markup
pText = Text . mconcat <$> some
((do xs <- some alphaNum
T.pack . (xs <>) <$>
try (some (char '*' <|> char '_') <* lookAhead alphaNum)
try (some (char '*' <|> char '_') <* lookAhead (satisfy nonCJKAlphaNum))
<|> pure (T.pack xs))
<|> (T.pack <$> some (satisfy (\c -> not (isSpace c || isSpecial c))))
)

nonCJKAlphaNum :: Char -> Bool
nonCJKAlphaNum c = isAlphaNum c && not (isCJK c)

isCJK :: Char -> Bool
isCJK c | c < '\x2e80' = False
isCJK c =
(c >= '\x2e80' && c <= '\x2eff') -- CJK Radicals Supplement
|| (c >= '\x2f00' && c <= '\x2fdf') -- Kangxi Radicals
|| (c >= '\x2ff0' && c <= '\x2fff') -- Ideographic Description Characters
|| (c >= '\x3000' && c <= '\x303f') -- JK Symbols and Punctuation
|| (c >= '\x3040' && c <= '\x309f') -- Hiragana
|| (c >= '\x30a0' && c <= '\x30ff') -- Katakana
|| (c >= '\x3100' && c <= '\x312f') -- Bopomofo
|| (c >= '\x3130' && c <= '\x318f') -- Kanbun
|| (c >= '\x3190' && c <= '\x319f') -- Kanbun
|| (c >= '\x31c0' && c <= '\x31ef') -- CJK Strokes
|| (c >= '\x31f0' && c <= '\x31ff') -- Katakana Phonetic Extensions
|| (c >= '\x3200' && c <= '\x32ff') -- Enclosed CJK Letters & Months
|| (c >= '\x3300' && c <= '\x33ff') -- CJK Compatibility
|| (c >= '\x3400' && c <= '\x4dbf') -- CJK Unified Ideographs Extension A
|| (c >= '\x4e00' && c <= '\x9fff') -- CJK Unified Ideographs
|| (c >= '\xa000' && c <= '\xa48f') -- Yi Syllables
|| (c >= '\xa490' && c <= '\xa4cf') -- Yi Radicals
|| (c >= '\xf900' && c <= '\xfaff') -- CJK Compatibility Ideographs
|| (c >= '\xfe10' && c <= '\xfe1f') -- Vertical forms
|| (c >= '\xfe30' && c <= '\xfe4f') -- CJK Compatibility Forms
|| (c >= '\xFE50' && c <= '\xFE6F') -- Small Form Variants
|| (c >= '\xFF00' && c <= '\xFFEE') -- Halfwidth and Fullwidth Forms
|| (c >= '\x1B000' && c <= '\x1B0FF') -- Kana Supplement
|| (c >= '\x1B100' && c <= '\x1B12F') -- Kana Extended-A
|| (c >= '\x1B130' && c <= '\x1B16F') -- Small Kana Extension
|| (c >= '\x20000' && c <= '\x2A6DF') -- CJK Unified Ideographs Extension B
|| (c >= '\x2A700' && c <= '\x2B73F') -- CJK Unified Ideographs Extension C
|| (c >= '\x2B740' && c <= '\x2B81F') -- CJK Unified Ideographs Extension D
|| (c >= '\x2B820' && c <= '\x2CEAF') -- CJK Unified Ideographs Extension E
|| (c >= '\x2CEB0' && c <= '\x2EBEF') -- CJK Unified Ideographs Extension F
|| (c >= '\x2F800' && c <= '\x2FA1F') -- CJK Compatibility Ideographs Supp
|| (c >= '\x30000' && c <= '\x3134F') -- CJK Unified Ideographs Exten

pEscaped :: P Markup
pEscaped = Text . T.singleton <$> pEsc

Expand Down
52 changes: 52 additions & 0 deletions test/out/regression/issue49.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
--- parse tree ---
[ Code
"test/typ/regression/issue49.typ"
( line 1 , column 2 )
(Let
(BasicBind (Just (Identifier "test")))
(FuncExpr
[ NormalParam (Identifier "x") , NormalParam (Identifier "y") ]
(Block
(CodeBlock
[ If
[ ( Equals (Ident (Identifier "x")) (Ident (Identifier "y"))
, Block (Content [ Text "\9989" ])
)
, ( Literal (Boolean True)
, Block
(Content
[ Text "\10060"
, Text "("
, Code
"test/typ/regression/issue49.typ"
( line 1 , column 47 )
(FuncCall
(Ident (Identifier "repr"))
[ NormalArg (Ident (Identifier "x")) ])
, Space
, Text "/"
, Text "="
, Space
, Code
"test/typ/regression/issue49.typ"
( line 1 , column 59 )
(FuncCall
(Ident (Identifier "repr"))
[ NormalArg (Ident (Identifier "y")) ])
, Text ")"
])
)
]
]))))
, SoftBreak
, Text "\27979\35797\25991\26412"
, Strong [ Text "\21152\31895" ]
, Text "\12290"
, ParBreak
]
--- evaluated ---
document(body: { text(body: [
测试文本]),
strong(body: text(body: [加粗])),
text(body: [。]),
parbreak() })
1 change: 1 addition & 0 deletions test/typ/regression/issue49.typ
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
测试文本*加粗*。

0 comments on commit dfe0367

Please sign in to comment.