Skip to content

Commit

Permalink
Add property tests for 'chunks', and fix 'metaTextChunks'
Browse files Browse the repository at this point in the history
  ```
  Cardano.Api
    Test.Cardano.Api.Metadata
      valid & rountrip text chunks:  OK (0.03s)
          ✓ valid & roundtrip text chunks passed 100 tests.
            Empty chunks   3% ▌··················· ✓  1%
            Single chunks 26% █████▏·············· ✓  5%
            Many chunks   71% ██████████████▏····· ✓ 25%
      valid & rountrip bytes chunks: OK
          ✓ valid & roundtrip bytes chunks passed 100 tests.
            Empty chunks   3% ▌··················· ✓  1%
            Single chunks 55% ███████████········· ✓  5%
            Many chunks   42% ████████▍··········· ✓ 25%
  ```

  Turns out there were two issues:

  - Empty {text,byte}strings would generate a singleton chunk with an
    empty value; which is okay semantically but ugly; empty strings now
    generate an empty chunk.

  - Metadata values measure the length of UTF-8-encoded strings, which
    means we can't rely on default text functions to split a text
    string. This is likely an overkill in many situation in the context
    of PR#5050 since most questions / answers will be in plain english.

    However, we can now put emojis and crazy unicode characters in there
    without problems.
  • Loading branch information
KtorZ committed Apr 14, 2023
1 parent 4ed6fac commit 160f67b
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 4 deletions.
2 changes: 2 additions & 0 deletions cardano-api/src/Cardano/Api.hs
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ module Cardano.Api (
TxMetadataValue(..),
makeTransactionMetadata,
mergeTransactionMetadata,
metaTextChunks,
metaBytesChunks,

-- ** Validating metadata
validateTxMetadata,
Expand Down
40 changes: 37 additions & 3 deletions cardano-api/src/Cardano/Api/TxMetadata.hs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ import qualified Data.Aeson.Key as Aeson
import qualified Data.Aeson.KeyMap as KeyMap
import qualified Data.Aeson.Text as Aeson.Text
import qualified Data.Attoparsec.ByteString.Char8 as Atto
import Data.Bifunctor (first)
import Data.Bifunctor (bimap, first)
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Base16 as Base16
Expand All @@ -75,6 +75,7 @@ import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
import qualified Data.Text.Lazy as Text.Lazy
import qualified Data.Text.Lazy.Builder as Text.Builder
import qualified Data.Vector as Vector
import Data.Word

Expand Down Expand Up @@ -147,7 +148,40 @@ metaTextChunks =
txMetadataTextStringMaxByteLength
TxMetaText
(BS.length . Text.encodeUtf8)
Text.splitAt
utf8SplitAt
where
fromBuilder = Text.Lazy.toStrict . Text.Builder.toLazyText

-- 'Text.splitAt' is no good here, because our measurement is on UTF-8
-- encoded text strings; So a char of size 1 in a text string may be
-- encoded over multiple UTF-8 bytes.
--
-- Thus, no choice than folding over each char and manually implementing
-- splitAt that counts utf8 bytes. Using builders for slightly more
-- efficiency.
utf8SplitAt n =
bimap fromBuilder fromBuilder . snd . Text.foldl
(\(len, (left, right)) char ->
-- NOTE:
-- Starting from text >= 2.0.0.0, one can use:
--
-- Data.Text.Internal.Encoding.Utf8#utf8Length
--
let sz = BS.length (Text.encodeUtf8 (Text.singleton char)) in
if len + sz > n then
( n + 1 -- Higher than 'n' to always trigger the predicate
, ( left
, right <> Text.Builder.singleton char
)
)
else
( len + sz
, ( left <> Text.Builder.singleton char
, right
)
)
)
(0, (mempty, mempty))

-- | Create a 'TxMetadataValue' from a 'ByteString' as a list of chunks of an
-- accaptable size.
Expand Down Expand Up @@ -217,7 +251,7 @@ chunks maxLength strHoist strLength strSplitAt str
let (h, t) = strSplitAt maxLength str
in strHoist h : chunks maxLength strHoist strLength strSplitAt t
| otherwise =
[strHoist str]
[strHoist str | strLength str > 0]

-- ----------------------------------------------------------------------------
-- Validate tx metadata
Expand Down
48 changes: 47 additions & 1 deletion cardano-api/test/Test/Cardano/Api/Metadata.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-}

module Test.Cardano.Api.Metadata
( tests
Expand All @@ -9,15 +10,18 @@ module Test.Cardano.Api.Metadata
import Cardano.Api

import Data.ByteString (ByteString)
import Data.Maybe (mapMaybe)
import Data.Word (Word64)
import Hedgehog (Property, property, (===))
import Hedgehog (Gen, Property, property, (===))
import Test.Gen.Cardano.Api.Metadata
import Test.Tasty (TestTree, testGroup)
import Test.Tasty.Hedgehog (testPropertyNamed)

import qualified Data.Aeson as Aeson
import qualified Data.Map.Strict as Map
import qualified Hedgehog
import qualified Hedgehog.Gen as Gen
import qualified Hedgehog.Range as Range

-- ----------------------------------------------------------------------------
-- Golden / unit tests
Expand Down Expand Up @@ -118,6 +122,46 @@ prop_metadata_roundtrip_via_schema_json = Hedgehog.property $ do
Right md === (metadataFromJson TxMetadataJsonDetailedSchema
. metadataToJson TxMetadataJsonDetailedSchema) md

prop_metadata_chunks
:: (Show str, Eq str, Monoid str)
=> Gen str
-> (str -> TxMetadataValue)
-> (TxMetadataValue -> Maybe str)
-> Property
prop_metadata_chunks genStr toMetadataValue extractChunk = Hedgehog.property $ do
str <- Hedgehog.forAll genStr
case toMetadataValue str of
metadataValue@(TxMetaList chunks) -> do
Hedgehog.cover 1 "Empty chunks" (null chunks)
Hedgehog.cover 5 "Single chunks" (length chunks == 1)
Hedgehog.cover 25 "Many chunks" (length chunks > 1)
str === mconcat (mapMaybe extractChunk chunks)
Right () === validateTxMetadata metadata
where
metadata = makeTransactionMetadata (Map.singleton 0 metadataValue)
_ ->
Hedgehog.failure

prop_metadata_text_chunks :: Property
prop_metadata_text_chunks =
prop_metadata_chunks
(Gen.text (Range.linear 0 255) Gen.unicodeAll)
metaTextChunks
(\case
TxMetaText chunk -> Just chunk
_ -> Nothing
)

prop_metadata_bytes_chunks :: Property
prop_metadata_bytes_chunks =
prop_metadata_chunks
(Gen.bytes (Range.linear 0 255))
metaBytesChunks
(\case
TxMetaBytes chunk -> Just chunk
_ -> Nothing
)

-- ----------------------------------------------------------------------------
-- Automagically collecting all the tests
--
Expand All @@ -135,4 +179,6 @@ tests = testGroup "Test.Cardano.Api.Metadata"
, testPropertyNamed "noschema json roundtrip via metadata" "noschema json roundtrip via metadata" prop_noschema_json_roundtrip_via_metadata
, testPropertyNamed "schema json roundtrip via metadata" "schema json roundtrip via metadata" prop_schema_json_roundtrip_via_metadata
, testPropertyNamed "metadata roundtrip via schema json" "metadata roundtrip via schema json" prop_metadata_roundtrip_via_schema_json
, testPropertyNamed "valid & rountrip text chunks" "valid & roundtrip text chunks" prop_metadata_text_chunks
, testPropertyNamed "valid & rountrip bytes chunks" "valid & roundtrip bytes chunks" prop_metadata_bytes_chunks
]

0 comments on commit 160f67b

Please sign in to comment.