Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly parse shared strings in the streaming implementation #178

Merged
merged 2 commits into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions src/Codec/Xlsx/Parser/Stream.hs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ module Codec.Xlsx.Parser.Stream
, WorkbookInfo(..)
, SheetInfo(..)
, wiSheets
, getOrParseSharedStringss
, getWorkbookInfo
, CellRow
, readSheet
Expand Down Expand Up @@ -181,6 +182,8 @@ makeLenses 'MkSheetState
-- | State for parsing shared strings
data SharedStringsState = MkSharedStringsState
{ _ss_string :: TB.Builder -- ^ String we are parsing
-- TODO: At the moment SharedStrings can be used only to create CellText values.
-- We should add support for CellRich values.
, _ss_list :: DL.DList Text -- ^ list of shared strings
} deriving stock (Generic, Show)
makeLenses 'MkSharedStringsState
Expand Down Expand Up @@ -256,10 +259,11 @@ parseSharedStrings
)
=> HexpatEvent -> m (Maybe Text)
parseSharedStrings = \case
StartElement "t" _ -> Nothing <$ (ss_string .= mempty)
EndElement "t" -> Just . LT.toStrict . TB.toLazyText <$> gets _ss_string
CharacterData txt -> Nothing <$ (ss_string <>= TB.fromText txt)
_ -> pure Nothing
-- TODO: Add parsing of text styles to further create CellRich values.
StartElement "si" _ -> Nothing <$ (ss_string .= mempty)
EndElement "si" -> Just . LT.toStrict . TB.toLazyText <$> gets _ss_string
CharacterData txt -> Nothing <$ (ss_string <>= TB.fromText txt)
_ -> pure Nothing

-- | Run a series of actions on an Xlsx file
runXlsxM :: MonadIO m => FilePath -> XlsxM a -> m a
Expand Down
92 changes: 92 additions & 0 deletions test/StreamTests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ tests = testGroup
#else

import Control.Exception
import Codec.Archive.Zip as Zip
import Codec.Xlsx
import Codec.Xlsx.Parser.Stream
import Conduit ((.|))
Expand All @@ -31,10 +32,12 @@ import Data.Set.Lens
import qualified Data.ByteString.Lazy as LB
import qualified Data.ByteString as BS
import Data.Map (Map)
import qualified Data.Conduit.Combinators as C
import qualified Data.Map as M
import qualified Data.IntMap.Strict as IM
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Vector as V
import Diff
import Test.Tasty (TestTree, testGroup)
import Test.Tasty.HUnit (testCase)
Expand Down Expand Up @@ -66,6 +69,11 @@ tests =
, testProperty "Set of input texts is as value set length" sharedStringInputTextsIsSameAsValueSetLength
],

testGroup "Reader/shared strings"
[ testCase "Can parse RichText values" richCellTextIsParsed
],


testGroup "Reader/Writer"
[ testCase "Write as stream, see if memory based implementation can read it" $ readWrite simpleWorkbook
, testCase "Write as stream, see if memory based implementation can read it" $ readWrite simpleWorkbookRow
Expand Down Expand Up @@ -234,4 +242,88 @@ untypedCellsAreParsedAsFloats = do
]
expected @==? (_ri_cell_row . _si_row <$> items)


richCellTextIsParsed :: IO ()
richCellTextIsParsed = do
BS.writeFile "testinput.xlsx" (toBs richWorkbook)
runXlsxM "testinput.xlsx" $ do
sharedStrings <- getOrParseSharedStringss
let result = Set.fromList $ V.toList sharedStrings
liftIO $ expected @==? result

where
expected :: Set.Set Text
expected = Set.fromList
[ textA1
, firstClauseB1 <> secondClauseB1
, firstClauseB2 <> secondClauseB2
]

textA1 = "Text at A1"
firstClauseB1 = "First clause at B1;"
firstClauseB2 = "First clause at B2;"
secondClauseB1 = "Second clause at B1"
secondClauseB2 = "Second clause at B2"

richWorkbook :: Xlsx
richWorkbook = def & atSheet "Sheet1" ?~ toWs
[ ((RowIndex 1, ColumnIndex 1), cellValue ?~ CellText textA1 $ def)
, ((RowIndex 2, ColumnIndex 1), cellValue ?~ cellRich firstClauseB1 secondClauseB1 $ def)
, ((RowIndex 2, ColumnIndex 2), cellValue ?~ cellRich firstClauseB2 secondClauseB2 $ def)
]

cellRich :: Text -> Text -> CellValue
cellRich firstClause secondClause = CellRich
[ RichTextRun
{ _richTextRunProperties = Just RunProperties
{ _runPropertiesBold = Nothing
, _runPropertiesCharset = Just 1
, _runPropertiesColor = Just Color
{ _colorAutomatic = Nothing
, _colorARGB = Nothing
, _colorTheme = Just 1
, _colorTint = Nothing
}
, _runPropertiesCondense = Nothing
, _runPropertiesExtend = Nothing
, _runPropertiesFontFamily = Just FontFamilySwiss
, _runPropertiesItalic = Nothing
, _runPropertiesOutline = Nothing
, _runPropertiesFont = Just "Aptos Narrow"
, _runPropertiesScheme = Nothing
, _runPropertiesShadow = Nothing
, _runPropertiesStrikeThrough = Nothing
, _runPropertiesSize = Just 11.0
, _runPropertiesUnderline = Nothing
, _runPropertiesVertAlign = Nothing
}
, _richTextRunText = firstClause
}
, RichTextRun
{ _richTextRunProperties = Just RunProperties
{ _runPropertiesBold = Just True
, _runPropertiesCharset = Just 1
, _runPropertiesColor = Just Color
{ _colorAutomatic = Nothing
, _colorARGB = Just "FFFF0000"
, _colorTheme = Nothing
, _colorTint = Nothing
}
, _runPropertiesCondense = Nothing
, _runPropertiesExtend = Nothing
, _runPropertiesFontFamily = Just FontFamilySwiss
, _runPropertiesItalic = Nothing
, _runPropertiesOutline = Nothing
, _runPropertiesFont = Just "Arial"
, _runPropertiesScheme = Nothing
, _runPropertiesShadow = Nothing
, _runPropertiesStrikeThrough = Nothing
, _runPropertiesSize = Just 8.0
, _runPropertiesUnderline = Nothing
, _runPropertiesVertAlign = Nothing
}
, _richTextRunText = secondClause
}
]

#endif
1 change: 1 addition & 0 deletions xlsx.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ test-suite data-test
, conduit
, filepath
, deepseq
, zip
if flag(microlens)
Build-depends: microlens >= 0.4 && < 0.5
, microlens-mtl
Expand Down