diff --git a/src/swarm-engine/Swarm/Game/CESK.hs b/src/swarm-engine/Swarm/Game/CESK.hs index 903ef1100..aa4abbca5 100644 --- a/src/swarm-engine/Swarm/Game/CESK.hs +++ b/src/swarm-engine/Swarm/Game/CESK.hs @@ -169,9 +169,6 @@ data Frame instance ToJSON Frame where toJSON = genericToJSON optionsMinimize -instance FromJSON Frame where - parseJSON = genericParseJSON optionsMinimize - -- | A continuation is just a stack of frames. type Cont = [Frame] @@ -184,7 +181,7 @@ type Addr = Int -- | 'Store' represents a store, /i.e./ memory, indexing integer -- locations to 'Value's. data Store = Store {next :: Addr, mu :: IntMap Value} - deriving (Show, Eq, Generic, FromJSON, ToJSON) + deriving (Show, Eq, Generic, ToJSON) emptyStore :: Store emptyStore = Store 0 IM.empty @@ -269,9 +266,6 @@ data CESK instance ToJSON CESK where toJSON = genericToJSON optionsMinimize -instance FromJSON CESK where - parseJSON = genericParseJSON optionsMinimize - -- | Is the CESK machine in a final (finished) state? If so, extract -- the final value and store. finalValue :: CESK -> Maybe Value diff --git a/src/swarm-engine/Swarm/Game/State/Substate.hs b/src/swarm-engine/Swarm/Game/State/Substate.hs index 85305ab0b..ba98b63ae 100644 --- a/src/swarm-engine/Swarm/Game/State/Substate.hs +++ b/src/swarm-engine/Swarm/Game/State/Substate.hs @@ -132,7 +132,7 @@ data REPLStatus -- entered. The @Maybe Value@ starts out as 'Nothing' and gets -- filled in with a result once the command completes. REPLWorking Polytype (Maybe Value) - deriving (Eq, Show, Generic, FromJSON, ToJSON) + deriving (Eq, Show, Generic, ToJSON) data WinStatus = -- | There are one or more objectives remaining that the player diff --git a/src/swarm-lang/Swarm/Language/Context.hs b/src/swarm-lang/Swarm/Language/Context.hs index ac78de5e7..e44cb85b6 100644 --- a/src/swarm-lang/Swarm/Language/Context.hs +++ b/src/swarm-lang/Swarm/Language/Context.hs @@ -1,81 +1,265 @@ {-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DerivingVia #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE PatternSynonyms #-} -{-# LANGUAGE ViewPatterns #-} -- | -- SPDX-License-Identifier: BSD-3-Clause -- -- Generic contexts (mappings from variables to other things, such as -- types, values, or capability sets) used throughout the codebase. +-- For example, while typechecking we use a context to store a mapping +-- from variables in scope to their types. As another example, at +-- runtime, robots store an 'Env' which contains several contexts, +-- mapping variables to things like their current value, any +-- requirements associated with using the variable, and so on. +-- +-- The implementation here goes to some effort to make it possible to +-- serialize and deserialize contexts so that sharing is preserved and +-- the encoding of serialized contexts does not blow up due to +-- repeated values. module Swarm.Language.Context where -import Control.Algebra (Has) +import Control.Algebra (Has, run) +import Control.Carrier.State.Strict (execState) import Control.Effect.Reader (Reader, ask, local) -import Control.Lens.Empty (AsEmpty (..), pattern Empty) +import Control.Effect.State (State, get, modify) +import Control.Lens.Empty (AsEmpty (..)) import Control.Lens.Prism (prism) -import Data.Aeson (FromJSON (..), ToJSON (..), genericParseJSON, genericToJSON) +import Control.Monad (unless) +import Data.Aeson (FromJSON (..), FromJSONKey, ToJSON (..), ToJSONKey, genericParseJSON, genericToJSON, withText) import Data.Data (Data) +import Data.Function (on) +import Data.Functor.Const +import Data.Hashable +import Data.List.NonEmpty qualified as NE import Data.Map (Map) import Data.Map qualified as M +import Data.Semigroup (Sum (..)) import Data.Text (Text) +import Data.Text qualified as T import GHC.Generics (Generic) -import Prettyprinter (brackets, emptyDoc, hsep, punctuate) -import Swarm.Pretty (PrettyPrec (..), prettyBinding) -import Swarm.Util.JSON (optionsUnwrapUnary) +import Swarm.Pretty (PrettyPrec (..)) +import Swarm.Util (failT, showT) +import Swarm.Util.JSON (optionsMinimize) +import Swarm.Util.Yaml (FromJSONE, getE, liftE, parseJSONE) +import Text.Printf (printf) +import Text.Read (readMaybe) import Prelude hiding (lookup) -- | We use 'Text' values to represent variables. type Var = Text --- | A context is a mapping from variable names to things. -newtype Ctx t = Ctx {unCtx :: Map Var t} +------------------------------------------------------------ +-- Context hash + +-- | A context hash is a hash value used to identify contexts without +-- having to compare them for equality. Hash values are computed +-- homomorphically, so that two equal contexts will be guaranteed to +-- have the same hash value, even if they were constructed with a +-- different sequence of operations. +-- +-- The downside of this approach is that, /in theory/, there could +-- be hash collisions, that is, two different contexts which +-- nonetheless have the same hash value. However, this is extremely +-- unlikely. The benefit is that everything can be purely +-- functional, without the need to thread around some kind of +-- globally unique ID generation effect. +newtype CtxHash = CtxHash {getCtxHash :: Int} + deriving (Eq, Ord, Data, Generic, ToJSONKey, FromJSONKey) + deriving (Semigroup, Monoid) via Sum Int + deriving (Num) via Int + +instance Show CtxHash where + show (CtxHash h) = printf "%016x" h + +instance ToJSON CtxHash where + toJSON h = toJSON (show h) + +instance FromJSON CtxHash where + parseJSON = withText "hash" $ \t -> case readMaybe ("0x" ++ T.unpack t) of + Nothing -> fail "Could not parse CtxHash" + Just h -> pure (CtxHash h) + +-- | The hash for a single variable -> value binding. +singletonHash :: Hashable t => Var -> t -> CtxHash +singletonHash x t = CtxHash $ hashWithSalt (hash x) t + +-- | The hash for an entire Map's worth of bindings. +mapHash :: Hashable t => Map Var t -> CtxHash +mapHash = M.foldMapWithKey singletonHash + +------------------------------------------------------------ +-- Context structure + +-- | 'CtxF' represents one level of structure of a context: a context +-- is either empty, a singleton, or built via deletion or union. +data CtxF f t + = CtxEmpty + | CtxSingle Var t + | CtxDelete Var t (f t) + | CtxUnion (f t) (f t) deriving (Eq, Show, Functor, Foldable, Traversable, Data, Generic) -instance ToJSON t => ToJSON (Ctx t) where - toJSON = genericToJSON optionsUnwrapUnary +instance (ToJSON t, ToJSON (f t)) => ToJSON (CtxF f t) where + toJSON = genericToJSON optionsMinimize + +instance (FromJSON t, FromJSON (f t)) => FromJSON (CtxF f t) where + parseJSON = genericParseJSON optionsMinimize + +-- | Map over the recursive structure stored in a 'CtxF'. +restructureCtx :: (f t -> g t) -> CtxF f t -> CtxF g t +restructureCtx _ CtxEmpty = CtxEmpty +restructureCtx _ (CtxSingle x t) = CtxSingle x t +restructureCtx h (CtxDelete x t f1) = CtxDelete x t (h f1) +restructureCtx h (CtxUnion f1 f2) = CtxUnion (h f1) (h f2) + +-- | A 'CtxTree' is one possible representation of a context, +-- consisting of a structured record of the process by which a +-- context was constructed. This representation would be terrible +-- for doing efficient variable lookups, but it can be used to +-- efficiently serialize/deserialize the context while recovering +-- sharing. +-- +-- It stores a top-level hash of the context, along with a recursive +-- tree built via 'CtxF'. +data CtxTree t = CtxTree CtxHash (CtxF CtxTree t) + deriving (Eq, Functor, Foldable, Traversable, Data, Generic, ToJSON, FromJSON, Show) + +------------------------------------------------------------ +-- Contexts + +-- | A context is a mapping from variable names to things. We store +-- both a 'Map' (for efficient lookup) as well as a 'CtxTree' (for +-- sharing-aware serializing/deserializing). +data Ctx t = Ctx {unCtx :: Map Var t, ctxStruct :: CtxTree t} + deriving (Functor, Traversable, Data, Generic) + +-- | Get the top-level hash of a context. +ctxHash :: Ctx t -> CtxHash +ctxHash (Ctx _ (CtxTree h _)) = h + +instance Show (Ctx t) where + -- An auto-derived, naive Show instance blows up as it loses all + -- sharing, so have `show` simply output a placeholder. + show _ = "" -instance FromJSON t => FromJSON (Ctx t) where - parseJSON = genericParseJSON optionsUnwrapUnary +-- | Compare contexts for equality just by comparing their hashes. +instance Eq (Ctx t) where + (==) = (==) `on` ctxHash + +instance Hashable t => Hashable (Ctx t) where + hash = getCtxHash . ctxHash + hashWithSalt s = hashWithSalt s . getCtxHash . ctxHash + +instance Foldable Ctx where + foldMap f = foldMap f . unCtx + +-- | Rebuild a complete 'Ctx' from a 'CtxTree'. +ctxFromTree :: CtxTree t -> Ctx t +ctxFromTree tree = Ctx (varMap tree) tree + where + varMap (CtxTree _ s) = case s of + CtxEmpty -> M.empty + CtxSingle x t -> M.singleton x t + CtxDelete x _ s1 -> M.delete x (varMap s1) + CtxUnion s1 s2 -> varMap s2 `M.union` varMap s1 + +-- | "Roll up" one level of context structure while building a new +-- top-level Map and computing an appropriate top-level hash. +-- +-- In other words, the input of type @CtxF Ctx t@ represents a +-- context where the topmost level of structure is split out by +-- itself as 'CtxF', with the rest of the recursive structure stored +-- in the embedded 'Ctx' values. 'rollCtx' takes the single level +-- of structure with recursive subtrees and "rolls them up" into one +-- recursive tree. +rollCtx :: Hashable t => CtxF Ctx t -> Ctx t +rollCtx s = Ctx m (CtxTree h (restructureCtx ctxStruct s)) + where + (m, h) = case s of + CtxEmpty -> (M.empty, 0) + CtxSingle x t -> (M.singleton x t, singletonHash x t) + CtxDelete x _ (Ctx m1 (CtxTree h1 _)) -> case M.lookup x m1 of + Nothing -> (m1, h1) + Just t' -> (M.delete x m1, h1 - singletonHash x t') + CtxUnion (Ctx m1 (CtxTree h1 _)) (Ctx m2 (CtxTree h2 _)) -> (m2 `M.union` m1, h') + where + -- `Data.Map.intersection l r` returns a map with common keys, + -- but values from `l`. The values in m1 are the ones we want + -- to subtract from the hash, since they are the ones that will + -- be overwritten. + overwritten = M.intersection m1 m2 + h' = h1 + h2 - mapHash overwritten + +------------------------------------------------------------ +-- Context instances + +-- | Serialize a context simply as its hash; we assume that a +-- top-level CtxMap has been seralized somewhere, from which we can +-- recover this context by looking it up. +instance ToJSON (Ctx t) where + toJSON = toJSON . ctxHash + +-- | Deserialize a context. We expect to see a hash, and look it up +-- in the provided CtxMap. +instance FromJSONE (CtxMap CtxTree t) (Ctx t) where + parseJSONE v = do + h <- liftE $ parseJSON @CtxHash v + m <- getE + case getCtx h m of + Nothing -> failT ["Encountered unknown context hash", showT h] + Just ctx -> pure ctx instance (PrettyPrec t) => PrettyPrec (Ctx t) where - prettyPrec _ Empty = emptyDoc - prettyPrec _ (assocs -> bs) = brackets (hsep (punctuate "," (map prettyBinding bs))) + prettyPrec _ _ = "" -- | The semigroup operation for contexts is /right/-biased union. -instance Semigroup (Ctx t) where +instance Hashable t => Semigroup (Ctx t) where (<>) = union -instance Monoid (Ctx t) where +instance Hashable t => Monoid (Ctx t) where mempty = empty mappend = (<>) instance AsEmpty (Ctx t) where _Empty = prism (const empty) isEmpty where - isEmpty (Ctx c) - | M.null c = Right () - | otherwise = Left (Ctx c) + isEmpty c + | M.null (unCtx c) = Right () + | otherwise = Left c + +------------------------------------------------------------ +-- Context operations -- | The empty context. empty :: Ctx t -empty = Ctx M.empty +-- We could also define empty = rollCtx CtxEmpty but that would introduce an +-- unnecessary Hashable t constraint. +empty = Ctx M.empty (CtxTree mempty CtxEmpty) -- | A singleton context. -singleton :: Var -> t -> Ctx t -singleton x t = Ctx (M.singleton x t) +singleton :: Hashable t => Var -> t -> Ctx t +singleton x t = rollCtx $ CtxSingle x t + +-- | Create a 'Ctx' from a 'Map'. +fromMap :: Hashable t => Map Var t -> Ctx t +fromMap m = case NE.nonEmpty (M.assocs m) of + Nothing -> empty + Just ne -> foldr1 union (NE.map (uncurry singleton) ne) -- | Look up a variable in a context. lookup :: Var -> Ctx t -> Maybe t -lookup x (Ctx c) = M.lookup x c +lookup x = M.lookup x . unCtx -- | Look up a variable in a context in an ambient Reader effect. lookupR :: Has (Reader (Ctx t)) sig m => Var -> m (Maybe t) lookupR x = lookup x <$> ask -- | Delete a variable from a context. -delete :: Var -> Ctx t -> Ctx t -delete x (Ctx c) = Ctx (M.delete x c) +delete :: Hashable t => Var -> Ctx t -> Ctx t +delete x ctx@(Ctx m _) = case M.lookup x m of + Nothing -> ctx + Just t -> rollCtx $ CtxDelete x t ctx -- | Get the list of key-value associations from a context. assocs :: Ctx t -> [(Var, t)] @@ -87,18 +271,76 @@ vars = M.keys . unCtx -- | Add a key-value binding to a context (overwriting the old one if -- the key is already present). -addBinding :: Var -> t -> Ctx t -> Ctx t -addBinding x t (Ctx c) = Ctx (M.insert x t c) +addBinding :: Hashable t => Var -> t -> Ctx t -> Ctx t +addBinding x t ctx = ctx `union` singleton x t -- | /Right/-biased union of contexts. -union :: Ctx t -> Ctx t -> Ctx t -union (Ctx c1) (Ctx c2) = Ctx (c2 `M.union` c1) +union :: Hashable t => Ctx t -> Ctx t -> Ctx t +union ctx1 ctx2 = rollCtx $ CtxUnion ctx1 ctx2 -- | Locally extend the context with an additional binding. -withBinding :: Has (Reader (Ctx t)) sig m => Var -> t -> m a -> m a +withBinding :: (Has (Reader (Ctx t)) sig m, Hashable t) => Var -> t -> m a -> m a withBinding x ty = local (addBinding x ty) -- | Locally extend the context with an additional context of -- bindings. -withBindings :: Has (Reader (Ctx t)) sig m => Ctx t -> m a -> m a +withBindings :: (Has (Reader (Ctx t)) sig m, Hashable t) => Ctx t -> m a -> m a withBindings ctx = local (`union` ctx) + +------------------------------------------------------------ +-- Context serializing/deserializing + +-- | A 'CtxMap' maps context hashes to context structures. Those +-- structures could either be complete context trees, or just a +-- single level of structure containing more hashes. +type CtxMap f t = Map CtxHash (CtxF f t) + +-- | Reconstitute the context corresponding to a particular hash, by +-- looking it up in a context map. +getCtx :: CtxHash -> CtxMap CtxTree t -> Maybe (Ctx t) +getCtx h m = ctxFromTree . CtxTree h <$> M.lookup h m + +-- | Turn a context into a context map containing every subtree of its +-- structure. +toCtxMap :: Ctx t -> CtxMap CtxTree t +toCtxMap (Ctx m s) = run $ execState M.empty (buildCtxMap m s) + +-- | Build a context map by keeping track of the incrementally built +-- map in a state effect, and traverse the given context structure +-- to add all subtrees to the map---but, of course, stopping without +-- recursing further whenever we see a hash that is already in the +-- map. +buildCtxMap :: + forall t m sig. + Has (State (CtxMap CtxTree t)) sig m => + Map Var t -> + CtxTree t -> + m () +buildCtxMap m (CtxTree h s) = do + cm <- get @(CtxMap CtxTree t) + unless (h `M.member` cm) $ do + modify (M.insert h s) + case s of + CtxEmpty -> pure () + CtxSingle {} -> pure () + CtxDelete x t s1 -> buildCtxMap (M.insert x t m) s1 + CtxUnion s1 s2 -> buildCtxMap m s1 *> buildCtxMap m s2 + +-- | "Dehydrate" a context map by replacing the actual context trees +-- with single structure layers containing only hashes. A +-- dehydrated context map is very suitable for serializing, since it +-- makes sharing completely explicit---even if a given context is +-- referenced multiple times, the references are simply hash values, +-- and the context is stored only once, under its hash. +dehydrate :: CtxMap CtxTree t -> CtxMap (Const CtxHash) t +dehydrate = M.map (restructureCtx (\(CtxTree h1 _) -> Const h1)) + +-- | "Rehydrate" a dehydrated context map by replacing every hash with +-- an actual context structure. We do this by building the result +-- as a lazy, recursive map, replacing each hash by the result we +-- get when looking it up in the map being built. A context which +-- is referenced multiple times will thus be shared in memory. +rehydrate :: CtxMap (Const CtxHash) t -> CtxMap CtxTree t +rehydrate m = m' + where + m' = M.map (restructureCtx (\(Const h) -> CtxTree h (m' M.! h))) m diff --git a/src/swarm-lang/Swarm/Language/JSON.hs b/src/swarm-lang/Swarm/Language/JSON.hs index f906ab25d..2e41b13c8 100644 --- a/src/swarm-lang/Swarm/Language/JSON.hs +++ b/src/swarm-lang/Swarm/Language/JSON.hs @@ -8,12 +8,12 @@ -- to put them all here to avoid circular module dependencies. module Swarm.Language.JSON where -import Data.Aeson (FromJSON (..), ToJSON (..), genericParseJSON, genericToJSON, withText) +import Data.Aeson (FromJSON (..), ToJSON (..), genericToJSON, withText) import Data.Aeson qualified as Ae import Swarm.Language.Pipeline (processTermEither) import Swarm.Language.Syntax (Term) import Swarm.Language.Syntax.Pattern (Syntax, TSyntax) -import Swarm.Language.Value (Env, Value) +import Swarm.Language.Value (Env, Value (..)) import Swarm.Pretty (prettyText) import Swarm.Util.JSON (optionsMinimize) import Witch (into) @@ -32,8 +32,41 @@ instance ToJSON Syntax instance ToJSON Value where toJSON = genericToJSON optionsMinimize -instance FromJSON Value where - parseJSON = genericParseJSON optionsMinimize +-- TODO (#2213): Craft some appropriate FromJSONE instances for things +-- like Value and Env. Below is an early experiment. -deriving instance FromJSON Env -deriving instance ToJSON Env +-- instance FromJSONE (CtxMap CtxTree t) Value where +-- parseJSONE = withObjectE "Value" $ \v -> case Ae.toList v of +-- [("VUnit", _)] -> pure VUnit +-- [("VInt", n)] -> VInt <$> liftE (parseJSON n) +-- [("VText", t)] -> VText <$> liftE (parseJSON t) +-- [("VInj", Ae.Array (V.toList -> [i, x]))] -> VInj <$> liftE (parseJSON i) <*> parseJSONE x +-- [("VPair", Ae.Array (V.toList -> [v1, v2]))] -> VPair <$> parseJSONE v1 <*> parseJSONE v2 +-- [("VClo", Ae.Array (V.toList -> [x, t, e]))] -> +-- VClo <$> liftE (parseJSON x) <*> liftE (parseJSON t) <*> parseJSONE e +-- [("VCApp", Ae.Array (V.toList -> [c, vs]))] -> +-- VCApp <$> liftE (parseJSON c) <*> parseJSONE vs +-- [("VBind", Ae.Array (V.toList -> [x, ty, r, t1, t2, e]))] -> +-- VBind +-- <$> liftE (parseJSON x) +-- <*> liftE (parseJSON ty) +-- <*> liftE (parseJSON r) +-- <*> liftE (parseJSON t1) +-- <*> liftE (parseJSON t2) +-- <*> parseJSONE e +-- [("VDelay", Ae.Array (V.toList -> [t, e]))] -> +-- VDelay <$> liftE (parseJSON t) <*> parseJSONE e +-- [("VRef", n)] -> VRef <$> liftE (parseJSON n) +-- [("VIndir", n)] -> VIndir <$> liftE (parseJSON n) +-- [("VRcd", m)] -> VRcd <$> parseJSONE m +-- [("VKey", k)] -> VKey <$> liftE (parseJSON k) +-- [("VRequirements", Ae.Array (V.toList -> [txt, t, e]))] -> +-- VRequirements <$> liftE (parseJSON txt) <*> liftE (parseJSON t) <*> parseJSONE e +-- [("VSuspend", Ae.Array (V.toList -> [t, e]))] -> +-- VSuspend <$> liftE (parseJSON t) <*> parseJSONE e +-- [("VExc", _)] -> pure VExc +-- [("VBlackhole", _)] -> pure VBlackhole +-- _ -> fail "parseJSONE: Unable to parse Value" + +instance ToJSON Env where + toJSON = genericToJSON optionsMinimize diff --git a/src/swarm-lang/Swarm/Language/Key.hs b/src/swarm-lang/Swarm/Language/Key.hs index 788c6ba27..b4b2b0475 100644 --- a/src/swarm-lang/Swarm/Language/Key.hs +++ b/src/swarm-lang/Swarm/Language/Key.hs @@ -20,6 +20,7 @@ where import Data.Aeson (FromJSON, ToJSON) import Data.Foldable (asum) +import Data.Hashable (Hashable) import Data.Kind qualified import Data.List (sort, (\\)) import Data.Set (Set) @@ -37,10 +38,13 @@ import Witch (from) ------------------------------------------------------------ -- Parsing +deriving instance Hashable V.Modifier +deriving instance Hashable V.Key + -- | A keyboard input, represented as a key + modifiers. Invariant: -- the modifier list is always sorted. data KeyCombo = KeyCombo V.Key [V.Modifier] - deriving (Eq, Ord, Show, Generic, FromJSON, ToJSON) + deriving (Eq, Ord, Show, Generic, Hashable, FromJSON, ToJSON) deriving instance FromJSON V.Key deriving instance FromJSON V.Modifier diff --git a/src/swarm-lang/Swarm/Language/Requirements/Type.hs b/src/swarm-lang/Swarm/Language/Requirements/Type.hs index 5d74bc3b5..628a4b7a2 100644 --- a/src/swarm-lang/Swarm/Language/Requirements/Type.hs +++ b/src/swarm-lang/Swarm/Language/Requirements/Type.hs @@ -75,7 +75,7 @@ data Requirements = Requirements , devReqs :: Set Text , invReqs :: Map Text Int } - deriving (Eq, Ord, Show, Data, Generic, FromJSON, ToJSON) + deriving (Eq, Ord, Show, Data, Generic, FromJSON, ToJSON, Hashable) instance Semigroup Requirements where Requirements c1 d1 i1 <> Requirements c2 d2 i2 = diff --git a/src/swarm-lang/Swarm/Language/Syntax/AST.hs b/src/swarm-lang/Swarm/Language/Syntax/AST.hs index f281bdb3b..753fcaafa 100644 --- a/src/swarm-lang/Swarm/Language/Syntax/AST.hs +++ b/src/swarm-lang/Swarm/Language/Syntax/AST.hs @@ -16,6 +16,7 @@ import Control.Lens (Plated (..)) import Data.Aeson.Types hiding (Key) import Data.Data (Data) import Data.Data.Lens (uniplate) +import Data.Hashable (Hashable) import Data.Map.Strict (Map) import Data.Text (Text) import GHC.Generics (Generic) @@ -37,7 +38,7 @@ data Syntax' ty = Syntax' , _sComments :: Comments , _sType :: ty } - deriving (Eq, Show, Functor, Foldable, Traversable, Data, Generic) + deriving (Eq, Show, Functor, Foldable, Traversable, Data, Generic, Hashable) instance Data ty => Plated (Syntax' ty) where plate = uniplate @@ -46,7 +47,7 @@ instance Data ty => Plated (Syntax' ty) where -- as @def x = e1 end; e2@. This enumeration simply records which it -- was so that we can pretty-print appropriatly. data LetSyntax = LSLet | LSDef - deriving (Eq, Ord, Show, Bounded, Enum, Generic, Data, ToJSON, FromJSON) + deriving (Eq, Ord, Show, Bounded, Enum, Generic, Data, Hashable, ToJSON, FromJSON) ------------------------------------------------------------ -- Term: basic syntax tree @@ -148,6 +149,7 @@ data Term' ty , Foldable , Data , Generic + , Hashable , -- | The Traversable instance for Term (and for Syntax') is used during -- typechecking: during intermediate type inference, many of the type -- annotations placed on AST nodes will have unification variables in diff --git a/src/swarm-lang/Swarm/Language/Syntax/Comments.hs b/src/swarm-lang/Swarm/Language/Syntax/Comments.hs index ecdcd8e24..1e868dcfd 100644 --- a/src/swarm-lang/Swarm/Language/Syntax/Comments.hs +++ b/src/swarm-lang/Swarm/Language/Syntax/Comments.hs @@ -25,6 +25,7 @@ import Control.Lens (AsEmpty, makeLenses, pattern Empty) import Data.Aeson qualified as A import Data.Aeson.Types hiding (Key) import Data.Data (Data) +import Data.Hashable (Hashable) import Data.Sequence (Seq) import Data.Text hiding (filter, length, map) import GHC.Generics (Generic) @@ -34,12 +35,12 @@ import Swarm.Pretty (PrettyPrec (..)) -- | Line vs block comments. data CommentType = LineComment | BlockComment - deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Data, ToJSON, FromJSON) + deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Data, Hashable, ToJSON, FromJSON) -- | Was a comment all by itself on a line, or did it occur after some -- other tokens on a line? data CommentSituation = StandaloneComment | SuffixComment - deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Data, ToJSON, FromJSON) + deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Data, Hashable, ToJSON, FromJSON) -- | Test whether a comment is a standalone comment or not. isStandalone :: Comment -> Bool @@ -55,7 +56,7 @@ data Comment = Comment , commentSituation :: CommentSituation , commentText :: Text } - deriving (Eq, Show, Generic, Data, ToJSON, FromJSON) + deriving (Eq, Show, Generic, Data, ToJSON, FromJSON, Hashable) instance PrettyPrec Comment where prettyPrec _ (Comment _ LineComment _ txt) = "//" <> pretty txt @@ -67,7 +68,7 @@ data Comments = Comments { _beforeComments :: Seq Comment , _afterComments :: Seq Comment } - deriving (Eq, Show, Generic, Data) + deriving (Eq, Show, Generic, Data, Hashable) makeLenses ''Comments diff --git a/src/swarm-lang/Swarm/Language/Syntax/Loc.hs b/src/swarm-lang/Swarm/Language/Syntax/Loc.hs index e3bdbf9a6..f09586523 100644 --- a/src/swarm-lang/Swarm/Language/Syntax/Loc.hs +++ b/src/swarm-lang/Swarm/Language/Syntax/Loc.hs @@ -14,6 +14,7 @@ module Swarm.Language.Syntax.Loc ( import Data.Aeson (FromJSON (..), ToJSON (..), genericParseJSON, genericToJSON) import Data.Data (Data) +import Data.Hashable (Hashable) import GHC.Generics (Generic) import Swarm.Language.Context (Var) import Swarm.Util.JSON (optionsUntagged) @@ -28,7 +29,7 @@ data SrcLoc = NoLoc | -- | Half-open interval from start (inclusive) to end (exclusive) SrcLoc Int Int - deriving (Eq, Ord, Show, Data, Generic) + deriving (Eq, Ord, Show, Data, Generic, Hashable) instance ToJSON SrcLoc where toJSON = genericToJSON optionsUntagged @@ -58,4 +59,4 @@ srcLocBefore _ _ = False -- binding sites. (Variable occurrences are a bare TVar which gets -- wrapped in a Syntax node, so we don't need LocVar for those.) data LocVar = LV {lvSrcLoc :: SrcLoc, lvVar :: Var} - deriving (Eq, Ord, Show, Data, Generic, FromJSON, ToJSON) + deriving (Eq, Ord, Show, Data, Generic, Hashable, FromJSON, ToJSON) diff --git a/src/swarm-lang/Swarm/Language/Typecheck.hs b/src/swarm-lang/Swarm/Language/Typecheck.hs index cb89e441b..6d8c702d8 100644 --- a/src/swarm-lang/Swarm/Language/Typecheck.hs +++ b/src/swarm-lang/Swarm/Language/Typecheck.hs @@ -405,7 +405,7 @@ skolemize (unPoly -> (xs, uty)) = do let xs' = map UTyVar skolemNames newSubst = M.fromList $ zip xs xs' s = M.mapKeys Left (newSubst `M.union` unCtx boundSubst) - pure (Ctx newSubst, substU s uty) + pure (Ctx.fromMap newSubst, substU s uty) -- | 'generalize' is the opposite of 'instantiate': add a 'Forall' -- which closes over all free type and unification variables. diff --git a/src/swarm-lang/Swarm/Language/Types.hs b/src/swarm-lang/Swarm/Language/Types.hs index d1198da9c..3ef4e05ae 100644 --- a/src/swarm-lang/Swarm/Language/Types.hs +++ b/src/swarm-lang/Swarm/Language/Types.hs @@ -4,6 +4,7 @@ {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE UndecidableInstances #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} -- | -- SPDX-License-Identifier: BSD-3-Clause @@ -131,6 +132,9 @@ import Data.Data (Data) import Data.Eq.Deriving (deriveEq1) import Data.Fix import Data.Foldable (fold) +import Data.Functor.Classes (Eq1) +import Data.Hashable (Hashable) +import Data.Hashable.Lifted (Hashable1) import Data.Kind qualified import Data.List.NonEmpty ((<|)) import Data.List.NonEmpty qualified as NE @@ -178,7 +182,7 @@ data BaseTy BActor | -- | Keys, i.e. things that can be pressed on the keyboard BKey - deriving (Eq, Ord, Show, Bounded, Enum, Data, Generic, FromJSON, ToJSON) + deriving (Eq, Ord, Show, Bounded, Enum, Data, Generic, Hashable, FromJSON, ToJSON) baseTyName :: BaseTy -> Text baseTyName = into @Text . drop 1 . show @@ -206,7 +210,7 @@ data TyCon TCFun | -- | User-defined type constructor. TCUser Var - deriving (Eq, Ord, Show, Data, Generic) + deriving (Eq, Ord, Show, Data, Generic, Hashable) instance ToJSON TyCon where toJSON = genericToJSON optionsMinimize @@ -227,7 +231,7 @@ instance PrettyPrec TyCon where -- | The arity of a type, /i.e./ the number of type parameters it -- expects. newtype Arity = Arity {getArity :: Int} - deriving (Eq, Ord, Show, Generic, Data) + deriving (Eq, Ord, Show, Generic, Data, Hashable) instance ToJSON Arity where toJSON = genericToJSON optionsUnwrapUnary @@ -246,7 +250,7 @@ instance PrettyPrec Arity where data Nat where NZ :: Nat NS :: Nat -> Nat - deriving (Eq, Ord, Show, Data, Generic, FromJSON, ToJSON) + deriving (Eq, Ord, Show, Data, Generic, Hashable, FromJSON, ToJSON) natToInt :: Nat -> Int natToInt NZ = 0 @@ -275,12 +279,14 @@ data TypeF t -- when pretty-printing; the actual bound variables are represented -- via de Bruijn indices. TyRecF Var t - deriving (Show, Eq, Ord, Functor, Foldable, Traversable, Generic, Generic1, Data) + deriving (Show, Eq, Ord, Functor, Foldable, Traversable, Generic, Generic1, Data, Hashable) deriveEq1 ''TypeF deriveOrd1 ''TypeF deriveShow1 ''TypeF +instance Hashable1 TypeF -- needs the Eq1 instance + instance ToJSON1 TypeF where liftToJSON = genericLiftToJSON optionsMinimize @@ -294,7 +300,7 @@ instance FromJSON1 TypeF where type Type = Fix TypeF newtype IntVar = IntVar Int - deriving (Show, Data, Eq, Ord) + deriving (Show, Data, Eq, Ord, Generic, Hashable) instance PrettyPrec IntVar where prettyPrec _ = pretty . mkVarName "u" @@ -308,6 +314,9 @@ instance PrettyPrec IntVar where -- working with 'UType' as if it were defined directly. type UType = Free TypeF IntVar +-- orphan instance +instance (Eq1 f, Hashable x, Hashable (f (Free f x))) => Hashable (Free f x) + -- | A generic /fold/ for things defined via 'Free' (including, in -- particular, 'UType'). ucata :: Functor t => (v -> a) -> (t a -> a) -> Free t v -> a @@ -498,7 +507,7 @@ data ImplicitQuantification = Unquantified | Quantified -- only way to create a @Poly Quantified@ is through the 'quantify' -- function. data Poly (q :: ImplicitQuantification) t = Forall {_ptVars :: [Var], ptBody :: t} - deriving (Show, Eq, Functor, Foldable, Traversable, Data, Generic, FromJSON, ToJSON) + deriving (Show, Eq, Functor, Foldable, Traversable, Data, Generic, FromJSON, ToJSON, Hashable) -- | Create a raw, unquantified @Poly@ value. mkPoly :: [Var] -> t -> Poly 'Unquantified t @@ -777,7 +786,7 @@ data TydefInfo = TydefInfo { _tydefType :: Polytype , _tydefArity :: Arity } - deriving (Eq, Show, Generic, Data, FromJSON, ToJSON) + deriving (Eq, Show, Generic, Data, FromJSON, ToJSON, Hashable) makeLenses ''TydefInfo diff --git a/src/swarm-lang/Swarm/Language/Value.hs b/src/swarm-lang/Swarm/Language/Value.hs index c1b25e2ca..802146cc5 100644 --- a/src/swarm-lang/Swarm/Language/Value.hs +++ b/src/swarm-lang/Swarm/Language/Value.hs @@ -29,6 +29,7 @@ module Swarm.Language.Value ( import Control.Lens hiding (Const) import Data.Bool (bool) +import Data.Hashable (Hashable) import Data.List (foldl') import Data.Map (Map) import Data.Map qualified as M @@ -118,7 +119,7 @@ data Value where -- .) VBlackhole :: Value - deriving (Eq, Show, Generic) + deriving (Eq, Show, Generic, Hashable) -- | A value context is a mapping from variable names to their runtime -- values. @@ -141,7 +142,7 @@ data Env = Env , _envTydefs :: TDCtx -- ^ Type synonym definitions. } - deriving (Eq, Show, Generic) + deriving (Eq, Show, Generic, Hashable) makeLenses ''Env diff --git a/swarm.cabal b/swarm.cabal index da9e06722..10f8242f5 100644 --- a/swarm.cabal +++ b/swarm.cabal @@ -284,6 +284,9 @@ common parser-combinators common prettyprinter build-depends: prettyprinter >=1.7.0 && <1.8 +common quickcheck-instances + build-depends: quickcheck-instances >=0.3.17 && <0.4 + common random build-depends: random >=1.2.0 && <1.3 @@ -1213,6 +1216,7 @@ test-suite swarm-unit lens, megaparsec, mtl, + quickcheck-instances, tasty, tasty-hunit, tasty-quickcheck, @@ -1220,12 +1224,14 @@ test-suite swarm-unit time, vty, witch, + yaml, main-is: Main.hs type: exitcode-stdio-1.0 other-modules: TestBoolExpr TestCommand + TestContext TestEval TestInventory TestLSP diff --git a/test/unit/Main.hs b/test/unit/Main.hs index a2566a171..dcfc6f6a1 100644 --- a/test/unit/Main.hs +++ b/test/unit/Main.hs @@ -28,6 +28,7 @@ import Test.Tasty.QuickCheck ( ) import TestBoolExpr (testBoolExpr) import TestCommand (testCommands) +import TestContext (testContext) import TestEval (testEval) import TestInventory (testInventory) import TestLSP (testLSP) @@ -82,6 +83,7 @@ statelessTests = , testPrettyConst , testBoolExpr , testCommands + , testContext , testHighScores , testRepl , testRequirements diff --git a/test/unit/TestContext.hs b/test/unit/TestContext.hs new file mode 100644 index 000000000..0cf1ff629 --- /dev/null +++ b/test/unit/TestContext.hs @@ -0,0 +1,91 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +-- | +-- SPDX-License-Identifier: BSD-3-Clause +-- +-- Swarm unit tests for contexts +module TestContext where + +import Control.Monad (replicateM) +import Data.Hashable +import Data.List (nub) +import Data.Map qualified as M +import Data.Yaml (decodeEither', encode) +import Swarm.Language.Context +import Swarm.Util (showT) +import Test.QuickCheck.Instances.Text () +import Test.Tasty (TestTree, testGroup) +import Test.Tasty.HUnit (Assertion, assertBool, assertEqual, testCase) +import Test.Tasty.QuickCheck (Arbitrary (..), Gen, generate, testProperty, withMaxSuccess) + +testContext :: TestTree +testContext = + testGroup + "Contexts" + [ testGroup + "Context equality" + [ testCase "idempotence 1" $ ctxsEqual ctx1 (ctx1 <> ctx1) + , testCase "idempotence 2" $ ctxsEqual ctx2 (ctx2 <> ctx2) + , testCase "deletion" $ ctxsEqual ctx1 (delete "z" ctx2) + , testCase "empty/delete" $ ctxsEqual empty (delete "x" ctx1) + , testCase "fromMap" $ ctxsEqual ctx2 (fromMap (M.fromList [("x", 3), ("z", 6)])) + , testCase "right bias" $ ctxsEqual ctx4 (ctx2 <> ctx3) + , testCase "commutativity" $ ctxsEqual (ctx1 <> ctx5) (ctx5 <> ctx1) + ] + , testGroup + "de/rehydrate round-trip" + (map (\(name, ctx) -> testCase name $ hydrationRoundTrip ctx) testCtxs) + , testGroup + "serialize round-trip" + (map (\(name, ctx) -> testCase name $ serializeRoundTrip ctx) testCtxs) + , testProperty + "no paired hash collisions" + (withMaxSuccess 10000 (hashConsistent @Int)) + , testCase + "no hash collisions in a large pool" + $ do + ctxs <- generate (replicateM 100000 (arbitrary :: Gen (Ctx Int))) + let m = M.fromListWith (++) (map (\ctx -> (ctxHash ctx, [unCtx ctx])) ctxs) + assertBool "foo" $ all ((== 1) . length . nub) m + ] + where + ctx1 = singleton "x" 3 + ctx2 = singleton "x" 3 <> singleton "z" 6 + ctx3 = singleton "x" 5 <> singleton "y" 7 + ctx4 = singleton "x" 5 <> singleton "y" 7 <> singleton "z" 6 + ctx5 = singleton "y" 10 + bigCtx = fromMap . M.fromList $ zip (map (("x" <>) . showT) [1 :: Int ..]) [1 .. 10000] + + testCtxs = [("empty", empty), ("ctx1", ctx1), ("ctx2", ctx2), ("ctx3", ctx3), ("ctx4", ctx4), ("ctx5", ctx5), ("large", bigCtx), ("delete", delete "y" ctx4)] + +instance (Hashable a, Arbitrary a) => Arbitrary (Ctx a) where + arbitrary = fromMap <$> arbitrary + +hashConsistent :: Eq a => Ctx a -> Ctx a -> Bool +hashConsistent ctx1 ctx2 = (ctx1 == ctx2) == (ctx1 `ctxStructEqual` ctx2) + +ctxsEqual :: Ctx Int -> Ctx Int -> Assertion +ctxsEqual ctx1 ctx2 = do + -- Contexts are compared by hash for equality + assertEqual "hash equality" ctx1 ctx2 + + -- Make sure they are also structurally equal + assertBool "structural equality" (ctxStructEqual ctx1 ctx2) + +ctxStructEqual :: Eq a => Ctx a -> Ctx a -> Bool +ctxStructEqual (Ctx m1 _) (Ctx m2 _) = m1 == m2 + +hydrationRoundTrip :: Ctx Int -> Assertion +hydrationRoundTrip ctx = do + case getCtx (ctxHash ctx) (rehydrate (dehydrate (toCtxMap ctx))) of + Nothing -> fail "Failed to reconstitute dehydrated context" + Just ctx' -> ctxsEqual ctx ctx' + +serializeRoundTrip :: Ctx Int -> Assertion +serializeRoundTrip ctx = do + case decodeEither' (encode (dehydrate (toCtxMap ctx))) of + Left e -> fail $ "Failed to decode JSON-encoded context: " ++ show e + Right c -> case getCtx (ctxHash ctx) (rehydrate c) of + Nothing -> fail "Failed to reconstitute dehydrated context" + Just ctx' -> ctxsEqual ctx ctx'