diff --git a/ouroboros-consensus-shelley/ouroboros-consensus-shelley.cabal b/ouroboros-consensus-shelley/ouroboros-consensus-shelley.cabal index 570e924edb5..8268dab8fa6 100644 --- a/ouroboros-consensus-shelley/ouroboros-consensus-shelley.cabal +++ b/ouroboros-consensus-shelley/ouroboros-consensus-shelley.cabal @@ -54,10 +54,12 @@ library , cardano-crypto-class , cardano-ledger-core , cardano-protocol-tpraos + , cardano-prelude , cardano-slotting , cborg >=0.2.2 && <0.3 , containers >=0.5 && <0.7 , data-default-class + , deepseq , measures , mtl >=2.2 && <2.3 , nothunks diff --git a/ouroboros-consensus-shelley/src/Ouroboros/Consensus/Shelley/Ledger/PeerSelection.hs b/ouroboros-consensus-shelley/src/Ouroboros/Consensus/Shelley/Ledger/PeerSelection.hs index 4711cf3986c..599a2d2ee0f 100644 --- a/ouroboros-consensus-shelley/src/Ouroboros/Consensus/Shelley/Ledger/PeerSelection.hs +++ b/ouroboros-consensus-shelley/src/Ouroboros/Consensus/Shelley/Ledger/PeerSelection.hs @@ -7,6 +7,7 @@ {-# OPTIONS_GHC -Wno-orphans #-} module Ouroboros.Consensus.Shelley.Ledger.PeerSelection () where +import Control.DeepSeq (force) import Data.Bifunctor (second) import Data.Foldable (toList) import Data.List (sortOn) @@ -33,7 +34,7 @@ import Ouroboros.Consensus.Shelley.Ledger.Ledger instance c ~ EraCrypto era => LedgerSupportsPeerSelection (ShelleyBlock era) where getPeers ShelleyLedgerState { shelleyLedgerState } = catMaybes - [ (poolStake,) <$> Map.lookup stakePool poolRelayAddresses + [ (poolStake,) <$> Map.lookup stakePool poolRelayAccessPoints | (stakePool, poolStake) <- orderByStake poolDistr ] where @@ -63,35 +64,38 @@ instance c ~ EraCrypto era . SL.nesEs $ shelleyLedgerState - relayToRelayAddress :: SL.StakePoolRelay -> Maybe RelayAddress - relayToRelayAddress (SL.SingleHostAddr (SJust (Port port)) (SJust ipv4) _) = - Just $ RelayAddressAddr (IPv4 ipv4) (fromIntegral port) - relayToRelayAddress (SL.SingleHostAddr (SJust (Port port)) SNothing (SJust ipv6)) = - Just $ RelayAddressAddr (IPv6 ipv6) (fromIntegral port) - relayToRelayAddress (SL.SingleHostName (SJust (Port port)) dnsName) = - Just $ RelayAddressDomain $ DomainAddress (encodeUtf8 $ dnsToText dnsName) (fromIntegral port) - relayToRelayAddress _ = + relayToRelayAccessPoint :: SL.StakePoolRelay -> Maybe RelayAccessPoint + relayToRelayAccessPoint (SL.SingleHostAddr (SJust (Port port)) (SJust ipv4) _) = + Just $ RelayAccessAddress (IPv4 ipv4) (fromIntegral port) + relayToRelayAccessPoint (SL.SingleHostAddr (SJust (Port port)) + SNothing + (SJust ipv6)) = + Just $ RelayAccessAddress (IPv6 ipv6) (fromIntegral port) + relayToRelayAccessPoint (SL.SingleHostName (SJust (Port port)) dnsName) = + Just $ RelayAccessDomain (encodeUtf8 $ dnsToText dnsName) (fromIntegral port) + relayToRelayAccessPoint _ = -- This could be an unsupported relay (SRV records) or an unusable -- relay such as a relay with an IP address but without a port number. Nothing -- | Note that a stake pool can have multiple registered relays - pparamsRelayAddresses :: - (RelayAddress -> StakePoolRelay) + pparamsRelayAccessPoints :: + (RelayAccessPoint -> StakePoolRelay) -> SL.PoolParams c -> Maybe (NonEmpty StakePoolRelay) - pparamsRelayAddresses injStakePoolRelay = + pparamsRelayAccessPoints injStakePoolRelay = NE.nonEmpty - . mapMaybe (fmap injStakePoolRelay . relayToRelayAddress) + . force + . mapMaybe (fmap injStakePoolRelay . relayToRelayAccessPoint) . toList . SL._poolRelays -- | Combine the stake pools registered in the future and the current pool -- parameters, and remove duplicates. - poolRelayAddresses :: + poolRelayAccessPoints :: Map (SL.KeyHash 'SL.StakePool c) (NonEmpty StakePoolRelay) - poolRelayAddresses = + poolRelayAccessPoints = Map.unionWith (\futureRelays currentRelays -> NE.nub (futureRelays <> currentRelays)) - (Map.mapMaybe (pparamsRelayAddresses FutureRelay) futurePoolParams) - (Map.mapMaybe (pparamsRelayAddresses CurrentRelay) poolParams) + (Map.mapMaybe (pparamsRelayAccessPoints FutureRelay) futurePoolParams) + (Map.mapMaybe (pparamsRelayAccessPoints CurrentRelay) poolParams) diff --git a/ouroboros-consensus/src/Ouroboros/Consensus/Ledger/SupportsPeerSelection.hs b/ouroboros-consensus/src/Ouroboros/Consensus/Ledger/SupportsPeerSelection.hs index 70e9c962dc1..77b1ae3f827 100644 --- a/ouroboros-consensus/src/Ouroboros/Consensus/Ledger/SupportsPeerSelection.hs +++ b/ouroboros-consensus/src/Ouroboros/Consensus/Ledger/SupportsPeerSelection.hs @@ -2,34 +2,39 @@ module Ouroboros.Consensus.Ledger.SupportsPeerSelection ( LedgerSupportsPeerSelection (..) , PoolStake (..) , StakePoolRelay (..) - , stakePoolRelayAddress + , stakePoolRelayAccessPoint -- * Re-exports for convenience - , DomainAddress (..) + , DomainAccessPoint (..) , IP (..) , PortNumber - , RelayAddress (..) + , RelayAccessPoint (..) ) where +import Control.DeepSeq (NFData (..)) import Data.List.NonEmpty (NonEmpty) import Ouroboros.Network.PeerSelection.LedgerPeers - (DomainAddress (..), IP (..), PoolStake (..), PortNumber, - RelayAddress (..)) + (DomainAccessPoint (..), IP (..), PoolStake (..), + PortNumber, RelayAccessPoint (..)) import Ouroboros.Consensus.Ledger.Abstract (LedgerState) -- | A relay registered for a stake pool data StakePoolRelay = -- | One of the current relays - CurrentRelay RelayAddress + CurrentRelay RelayAccessPoint -- | One of the future relays - | FutureRelay RelayAddress + | FutureRelay RelayAccessPoint deriving (Show, Eq) -stakePoolRelayAddress :: StakePoolRelay -> RelayAddress -stakePoolRelayAddress (CurrentRelay ra) = ra -stakePoolRelayAddress (FutureRelay ra) = ra +instance NFData StakePoolRelay where + rnf (CurrentRelay ra) = rnf ra + rnf (FutureRelay ra) = rnf ra + +stakePoolRelayAccessPoint :: StakePoolRelay -> RelayAccessPoint +stakePoolRelayAccessPoint (CurrentRelay ra) = ra +stakePoolRelayAccessPoint (FutureRelay ra) = ra class LedgerSupportsPeerSelection blk where -- | Return peers registered in the ledger ordered by descending 'PoolStake'. diff --git a/ouroboros-consensus/src/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus/src/Ouroboros/Consensus/NodeKernel.hs index 2a8bd73e419..e578772f7ea 100644 --- a/ouroboros-consensus/src/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus/src/Ouroboros/Consensus/NodeKernel.hs @@ -23,6 +23,9 @@ module Ouroboros.Consensus.NodeKernel ( , initNodeKernel ) where + + +import Control.DeepSeq (force) import Control.Monad import Control.Monad.Except import Data.Bifunctor (second) @@ -735,14 +738,15 @@ getPeersFromCurrentLedger :: (IOLike m, LedgerSupportsPeerSelection blk) => NodeKernel m remotePeer localPeer blk -> (LedgerState blk -> Bool) - -> STM m (Maybe [(PoolStake, NonEmpty RelayAddress)]) + -> STM m (Maybe [(PoolStake, NonEmpty RelayAccessPoint)]) getPeersFromCurrentLedger kernel p = do immutableLedger <- ledgerState <$> ChainDB.getImmutableLedger (getChainDB kernel) return $ do guard (p immutableLedger) return - $ map (second (fmap stakePoolRelayAddress)) + $ map (second (fmap stakePoolRelayAccessPoint)) + $ force $ getPeers immutableLedger -- | Like 'getPeersFromCurrentLedger' but with a \"after slot number X\" @@ -755,7 +759,7 @@ getPeersFromCurrentLedgerAfterSlot :: ) => NodeKernel m remotePeer localPeer blk -> SlotNo - -> STM m (Maybe [(PoolStake, NonEmpty RelayAddress)]) + -> STM m (Maybe [(PoolStake, NonEmpty RelayAccessPoint)]) getPeersFromCurrentLedgerAfterSlot kernel slotNo = getPeersFromCurrentLedger kernel afterSlotNo where diff --git a/ouroboros-network/ouroboros-network.cabal b/ouroboros-network/ouroboros-network.cabal index 3f48f0586b8..b56cf42700d 100644 --- a/ouroboros-network/ouroboros-network.cabal +++ b/ouroboros-network/ouroboros-network.cabal @@ -77,8 +77,11 @@ library Ouroboros.Network.PeerSelection.KnownPeers Ouroboros.Network.PeerSelection.LedgerPeers Ouroboros.Network.PeerSelection.LocalRootPeers + Ouroboros.Network.PeerSelection.RelayAccessPoint + Ouroboros.Network.PeerSelection.RootPeersDNS.DNSActions Ouroboros.Network.PeerSelection.RootPeersDNS Ouroboros.Network.PeerSelection.Governor + Ouroboros.Network.PeerSelection.Simple Ouroboros.Network.Protocol.ChainSync.Client Ouroboros.Network.Protocol.ChainSync.ClientPipelined Ouroboros.Network.Protocol.ChainSync.Codec @@ -153,11 +156,13 @@ library TypeFamilies, TypeInType build-depends: base >=4.9 && <4.15, + aeson, async >=2.2 && <2.3, base16-bytestring, bytestring >=0.10 && <0.11, cborg >=0.2.1 && <0.3, containers, + deepseq, directory, dns, fingertree >=0.1.4.2 && <0.2, @@ -263,6 +268,7 @@ test-suite test other-modules: Ouroboros.Network.BlockFetch.Examples Ouroboros.Network.MockNode + Data.Signal Test.AnchoredFragment Test.Chain Test.LedgerPeers @@ -273,6 +279,8 @@ test-suite test Test.Ouroboros.Network.PeerSelection Test.Ouroboros.Network.PeerSelection.Instances Test.Ouroboros.Network.PeerSelection.LocalRootPeers + Test.Ouroboros.Network.PeerSelection.RootPeersDNS + Test.Ouroboros.Network.PeerSelection.Json Test.Ouroboros.Network.PeerSelection.MockEnvironment Test.Ouroboros.Network.PeerSelection.PeerGraph Test.Ouroboros.Network.PeerSelection.Script @@ -283,24 +291,32 @@ test-suite test Test.Socket Test.PeerState Test.Version + Test.QuickCheck.Signal + Test.QuickCheck.Utils default-language: Haskell2010 build-depends: base, QuickCheck, + aeson, array, async, bytestring, cborg, containers, dns, + deque, hashable, + iproute, mtl, network, process, + psqueues, random, serialise, tasty, tasty-hunit, tasty-quickcheck, + text, + time, cardano-prelude, cardano-slotting, diff --git a/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor.hs b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor.hs index 110f4bbb278..602d77b6a69 100644 --- a/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor.hs +++ b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor.hs @@ -28,9 +28,13 @@ module Ouroboros.Network.PeerSelection.Governor ( peerChurnGovernor, -- * Internals exported for testing + assertPeerSelectionState, sanePeerSelectionTargets, establishedPeersStatus, PeerSelectionState(..), + PeerSelectionCounters(..), + nullPeerSelectionTargets, + emptyPeerSelectionState, ) where import Data.Void (Void) @@ -41,10 +45,11 @@ import qualified Control.Concurrent.JobPool as JobPool import Control.Concurrent.JobPool (JobPool) import Control.Monad.Class.MonadAsync import Control.Monad.Class.MonadThrow -import Control.Monad.Class.MonadSTM +import Control.Monad.Class.MonadSTM.Strict import Control.Monad.Class.MonadTime import Control.Monad.Class.MonadTimer -import Control.Tracer (Tracer(..), traceWith) +import Control.Tracer (Tracer(..), traceWith, contramap) +import System.Random import qualified Ouroboros.Network.PeerSelection.EstablishedPeers as EstablishedPeers import qualified Ouroboros.Network.PeerSelection.KnownPeers as KnownPeers @@ -54,6 +59,14 @@ import qualified Ouroboros.Network.PeerSelection.Governor.KnownPeers as Kn import qualified Ouroboros.Network.PeerSelection.Governor.Monitor as Monitor import qualified Ouroboros.Network.PeerSelection.Governor.RootPeers as RootPeers import Ouroboros.Network.PeerSelection.Governor.Types +import Ouroboros.Network.BlockFetch (FetchMode (..)) + + +-- TODO: at a later patch it will be defined in +-- 'Ouroboros.Network.Diffusion.Policies' +-- +closeConnectionTimeout :: DiffTime +closeConnectionTimeout = 120 {- $overview @@ -430,16 +443,21 @@ peerSelectionGovernor :: (MonadAsync m, MonadMask m, MonadTime m, MonadTimer m, Ord peeraddr) => Tracer m (TracePeerSelection peeraddr) -> Tracer m (DebugPeerSelection peeraddr peerconn) + -> Tracer m PeerSelectionCounters + -> StdGen -> PeerSelectionActions peeraddr peerconn m -> PeerSelectionPolicy peeraddr m -> m Void -peerSelectionGovernor tracer debugTracer actions policy = +peerSelectionGovernor tracer debugTracer countersTracer fuzzRng actions policy = JobPool.withJobPool $ \jobPool -> peerSelectionGovernorLoop - tracer debugTracer + tracer (debugTracer <> contramap transform countersTracer) actions policy jobPool - emptyPeerSelectionState + (emptyPeerSelectionState fuzzRng) + where + transform :: Ord peeraddr => DebugPeerSelection peeraddr peerconn -> PeerSelectionCounters + transform (TraceGovernorState _ _ st) = peerStateToCounters st -- | Our pattern here is a loop with two sets of guarded actions: @@ -561,8 +579,128 @@ $peer-churn-governor -- | -- -peerChurnGovernor :: MonadSTM m - => PeerSelectionTargets - -> m () --Void -peerChurnGovernor _ = - return () +peerChurnGovernor :: forall m peeraddr. + ( MonadSTM m + , MonadMonotonicTime m + , MonadDelay m + ) + => Tracer m (TracePeerSelection peeraddr) + -> StdGen + -> STM m FetchMode + -> PeerSelectionTargets + -> StrictTVar m PeerSelectionTargets + -> m Void +peerChurnGovernor tracer inRng getFetchMode base peerSelectionVar = do + -- Wait a while so that not only the closest peers have had the time + -- to become warm. + startTs0 <- getMonotonicTime + -- TODO: revisit the policy once we have local root peers in the governor. + -- The intention is to give local root peers give head start and avoid + -- giving advantage to hostile and quick root peers. + threadDelay 3 + atomically increaseActivePeers + endTs0 <- getMonotonicTime + fuzzyDelay inRng (endTs0 `diffTime` startTs0) >>= go + + where + + -- TODO: #3396 revisit the policy for genesis + increaseActivePeers :: STM m () + increaseActivePeers = do + mode <- getFetchMode + modifyTVar peerSelectionVar (\targets -> targets { + targetNumberOfActivePeers = + case mode of + FetchModeDeadline -> + targetNumberOfActivePeers base + FetchModeBulkSync -> + min 2 (targetNumberOfActivePeers base) + }) + + decreaseActivePeers :: STM m () + decreaseActivePeers = do + mode <- getFetchMode + modifyTVar peerSelectionVar (\targets -> targets { + targetNumberOfActivePeers = + case mode of + FetchModeDeadline -> + decrease $ targetNumberOfActivePeers base + FetchModeBulkSync -> + min 1 (targetNumberOfActivePeers base - 1) + }) + + + go :: StdGen -> m Void + go !rng = do + startTs <- getMonotonicTime + + -- Purge the worst active peer(s). + atomically decreaseActivePeers + + -- Short delay, we may have no active peers right now + threadDelay 1 + + -- Pick new active peer(s) based on the best performing established + -- peers. + atomically increaseActivePeers + + -- Give the promotion process time to start + threadDelay 1 + + -- Forget the worst performing non-active peers. + atomically $ modifyTVar peerSelectionVar (\targets -> targets { + targetNumberOfRootPeers = decrease (targetNumberOfRootPeers base) + , targetNumberOfKnownPeers = decrease (targetNumberOfKnownPeers base) + , targetNumberOfEstablishedPeers = + decrease (targetNumberOfEstablishedPeers base) + }) + + -- Give the governor time to properly demote them. + threadDelay $ 1 + closeConnectionTimeout + + -- Pick new non-active peers + atomically $ modifyTVar peerSelectionVar (\targets -> targets { + targetNumberOfRootPeers = targetNumberOfRootPeers base + , targetNumberOfKnownPeers = targetNumberOfKnownPeers base + , targetNumberOfEstablishedPeers = targetNumberOfEstablishedPeers base + }) + endTs <- getMonotonicTime + + fuzzyDelay rng (endTs `diffTime` startTs) >>= go + + -- Randomly delay between churnInterval and churnInterval + maxFuzz seconds. + fuzzyDelay :: StdGen -> DiffTime -> m StdGen + fuzzyDelay rng execTime = do + mode <- atomically getFetchMode + case mode of + FetchModeDeadline -> longDelay rng execTime + FetchModeBulkSync -> shortDelay rng execTime + + fuzzyDelay' :: DiffTime -> Double -> StdGen -> DiffTime -> m StdGen + fuzzyDelay' baseDelay maxFuzz rng execTime = do + let (fuzz, rng') = randomR (0, maxFuzz) rng + delay = realToFrac fuzz + baseDelay - execTime + traceWith tracer $ TraceChurnWait delay + threadDelay delay + return rng' + + + longDelay :: StdGen -> DiffTime -> m StdGen + longDelay = fuzzyDelay' churnInterval 600 + + + shortDelay :: StdGen -> DiffTime -> m StdGen + shortDelay = fuzzyDelay' churnIntervalBulk 60 + + -- The min time between running the churn governor. + churnInterval :: DiffTime + churnInterval = 3300 + + churnIntervalBulk :: DiffTime + churnIntervalBulk = 300 + + -- Replace 20% or at least on peer every churnInterval. + decrease :: Int -> Int + decrease v = v - max 1 (v `div` 5) + + diff --git a/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/ActivePeers.hs b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/ActivePeers.hs index 21aab194057..ebd10b12fe5 100644 --- a/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/ActivePeers.hs +++ b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/ActivePeers.hs @@ -19,6 +19,7 @@ import Control.Concurrent.JobPool (Job(..)) import Control.Exception (SomeException, assert) import qualified Ouroboros.Network.PeerSelection.EstablishedPeers as EstablishedPeers +import qualified Ouroboros.Network.PeerSelection.LocalRootPeers as LocalRootPeers import Ouroboros.Network.PeerSelection.Governor.Types @@ -34,36 +35,136 @@ belowTarget :: forall peeraddr peerconn m. (MonadSTM m, Ord peeraddr) => PeerSelectionActions peeraddr peerconn m -> MkGuardedDecision peeraddr peerconn m -belowTarget actions - PeerSelectionPolicy { +belowTarget = belowTargetLocal <> belowTargetOther + + +belowTargetLocal :: forall peeraddr peerconn m. + (MonadSTM m, Ord peeraddr) + => PeerSelectionActions peeraddr peerconn m + -> MkGuardedDecision peeraddr peerconn m +belowTargetLocal actions + PeerSelectionPolicy { + policyPickWarmPeersToPromote + } + st@PeerSelectionState { + localRootPeers, + establishedPeers, + activePeers, + inProgressPromoteWarm, + inProgressDemoteWarm + } + -- Are there any groups of local peers that are below target? + | not (null groupsBelowTarget) + -- We need this detailed check because it is not enough to check we are + -- below an aggregate target. We can be above target for some groups + -- and below for others. + + -- Are there any groups where we can pick members to promote? + , let groupsAvailableToPromote = + [ (numMembersToPromote, membersAvailableToPromote) + | let availableToPromote = + (LocalRootPeers.keysSet localRootPeers + `Set.intersection` + EstablishedPeers.readyPeers establishedPeers) + Set.\\ activePeers + Set.\\ inProgressPromoteWarm + Set.\\ inProgressDemoteWarm + numPromoteInProgress = Set.size inProgressPromoteWarm + , not (Set.null availableToPromote) + , (target, members, membersActive) <- groupsBelowTarget + , let membersAvailableToPromote = Set.intersection + members availableToPromote + numMembersToPromote = target + - Set.size membersActive + - numPromoteInProgress + , not (Set.null membersAvailableToPromote) + , numMembersToPromote > 0 + ] + , not (null groupsAvailableToPromote) + = Guarded Nothing $ do + selectedToPromote <- + Set.unions <$> sequence + [ pickPeers policyPickWarmPeersToPromote - } - st@PeerSelectionState { - establishedPeers, - activePeers, - inProgressPromoteWarm, - inProgressDemoteWarm, - targets = PeerSelectionTargets { - targetNumberOfActivePeers - } - } + membersAvailableToPromote + numMembersToPromote + | (numMembersToPromote, + membersAvailableToPromote) <- groupsAvailableToPromote ] + + let selectedToPromote' :: Map peeraddr peerconn + selectedToPromote' = EstablishedPeers.toMap establishedPeers + `Map.restrictKeys` selectedToPromote + return $ \_now -> Decision { + decisionTrace = TracePromoteWarmLocalPeers + [ (target, Set.size membersActive) + | (target, _, membersActive) <- groupsBelowTarget ] + selectedToPromote, + decisionState = st { + inProgressPromoteWarm = inProgressPromoteWarm + <> selectedToPromote + }, + decisionJobs = [ jobPromoteWarmPeer actions peeraddr peerconn + | (peeraddr, peerconn) <- Map.assocs selectedToPromote' ] + } + + + -- If we could promote except that there are no peers currently available + -- then we return the next wakeup time (if any) + | not (null groupsBelowTarget) + , let potentialToPromote = + -- These are local peers that are warm but not ready. + (LocalRootPeers.keysSet localRootPeers + `Set.intersection` + EstablishedPeers.toSet establishedPeers) + Set.\\ activePeers + Set.\\ EstablishedPeers.readyPeers establishedPeers + , not (Set.null potentialToPromote) + = GuardedSkip (Min <$> EstablishedPeers.minActivateTime establishedPeers) + + | otherwise + = GuardedSkip Nothing + where + groupsBelowTarget = + [ (target, members, membersActive) + | (target, members) <- LocalRootPeers.toGroupSets localRootPeers + , let membersActive = members `Set.intersection` activePeers + , Set.size membersActive < target + ] + +belowTargetOther :: forall peeraddr peerconn m. + (MonadSTM m, Ord peeraddr) + => PeerSelectionActions peeraddr peerconn m + -> MkGuardedDecision peeraddr peerconn m +belowTargetOther actions + PeerSelectionPolicy { + policyPickWarmPeersToPromote + } + st@PeerSelectionState { + localRootPeers, + establishedPeers, + activePeers, + inProgressPromoteWarm, + inProgressDemoteWarm, + targets = PeerSelectionTargets { + targetNumberOfActivePeers + } + } -- Are we below the target for number of active peers? | numActivePeers + numPromoteInProgress < targetNumberOfActivePeers -- Are there any warm peers we could pick to promote? - , numEstablishedReadyPeers - numActivePeers - - numPromoteInProgress - numDemoteInProgress > 0 + , let availableToPromote :: Set peeraddr + availableToPromote = EstablishedPeers.readyPeers establishedPeers + Set.\\ activePeers + Set.\\ inProgressPromoteWarm + Set.\\ inProgressDemoteWarm + Set.\\ LocalRootPeers.keysSet localRootPeers + numPeersToPromote = targetNumberOfActivePeers + - numActivePeers + - numPromoteInProgress + , not (Set.null availableToPromote) + , numPeersToPromote > 0 = Guarded Nothing $ do - -- The availableToPromote is non-empty due to the second guard. - -- The numPeersToPromote is positive due to the first guard. - let availableToPromote :: Set peeraddr - availableToPromote = EstablishedPeers.readyPeers establishedPeers - Set.\\ activePeers - Set.\\ inProgressPromoteWarm - Set.\\ inProgressDemoteWarm - numPeersToPromote = targetNumberOfActivePeers - - numActivePeers - - numPromoteInProgress selectedToPromote <- pickPeers policyPickWarmPeersToPromote availableToPromote @@ -92,11 +193,8 @@ belowTarget actions | otherwise = GuardedSkip Nothing where - numEstablishedReadyPeers, numActivePeers, numPromoteInProgress :: Int - numEstablishedReadyPeers = EstablishedPeers.sizeReady establishedPeers numActivePeers = Set.size activePeers numPromoteInProgress = Set.size inProgressPromoteWarm - numDemoteInProgress = Set.size inProgressDemoteWarm jobPromoteWarmPeer :: forall peeraddr peerconn m. @@ -161,41 +259,130 @@ jobPromoteWarmPeer PeerSelectionActions{peerStateActions = PeerStateActions {act -- Active peers above target -- --- | If we are above the target of /hot peers/ we demote some of the /warm --- peers/, according to 'policyPickHotPeersToDemote'. +-- | If we are above the target of /hot peers/ we demote some hot peers to be +-- /warm peers/, according to 'policyPickHotPeersToDemote'. -- aboveTarget :: forall peeraddr peerconn m. (MonadSTM m, Ord peeraddr) => PeerSelectionActions peeraddr peerconn m -> MkGuardedDecision peeraddr peerconn m -aboveTarget actions - PeerSelectionPolicy { +aboveTarget = aboveTargetLocal <> aboveTargetOther + -- Start with the local root targets, then the general target. This makes + -- sense since we need to hit both and making progress downwards with the + -- local root targets makes progress for the general target too. + + +aboveTargetLocal :: forall peeraddr peerconn m. + (MonadSTM m, Ord peeraddr) + => PeerSelectionActions peeraddr peerconn m + -> MkGuardedDecision peeraddr peerconn m +aboveTargetLocal actions + PeerSelectionPolicy { + policyPickHotPeersToDemote + } + st@PeerSelectionState { + localRootPeers, + establishedPeers, + activePeers, + inProgressDemoteHot + } + -- Are there any groups of local peers that are below target? + | let groupsAboveTarget = + [ (target, members, membersActive) + | (target, members) <- LocalRootPeers.toGroupSets localRootPeers + , let membersActive = members `Set.intersection` activePeers + , Set.size membersActive > target + ] + , not (null groupsAboveTarget) + -- We need this detailed check because it is not enough to check we are + -- above an aggregate target. We can be above target for some groups + -- and below for others. + + -- Are there any groups where we can pick members to demote? + , let groupsAvailableToDemote = + [ (numMembersToDemote, membersAvailableToDemote) + | let availableToDemote = (LocalRootPeers.keysSet localRootPeers + `Set.intersection` + activePeers) + Set.\\ inProgressDemoteHot + numDemoteInProgress = Set.size inProgressDemoteHot + , not (Set.null availableToDemote) + , (target, members, membersActive) <- groupsAboveTarget + , let membersAvailableToDemote = Set.intersection + members availableToDemote + numMembersToDemote = Set.size membersActive + - target + - numDemoteInProgress + , not (Set.null membersAvailableToDemote) + , numMembersToDemote > 0 + ] + , not (null groupsAvailableToDemote) + = Guarded Nothing $ do + selectedToDemote <- + Set.unions <$> sequence + [ pickPeers policyPickHotPeersToDemote - } - st@PeerSelectionState { - establishedPeers, - activePeers, - inProgressDemoteHot, - targets = PeerSelectionTargets { - targetNumberOfActivePeers - } - } - -- Are we above the target for number of active peers? - -- Or more precisely, how many active peers could we demote? - | let numActivePeers, numPeersToDemote :: Int - numActivePeers = Set.size activePeers - -- The main constraint on how many to demote is the difference in the - -- number we have now vs the target. We must also subtract the number - -- we're already demoting so we don't repeat the same work. - numPeersToDemote = numActivePeers + membersAvailableToDemote + numMembersToDemote + | (numMembersToDemote, + membersAvailableToDemote) <- groupsAvailableToDemote ] + let selectedToDemote' :: Map peeraddr peerconn + selectedToDemote' = EstablishedPeers.toMap establishedPeers + `Map.restrictKeys` selectedToDemote + + return $ \_now -> Decision { + decisionTrace = TraceDemoteLocalHotPeers + [ (target, Set.size membersActive) + | (target, _, membersActive) <- groupsAboveTarget ] + selectedToDemote, + decisionState = st { + inProgressDemoteHot = inProgressDemoteHot + <> selectedToDemote + }, + decisionJobs = [ jobDemoteActivePeer actions peeraddr peerconn + | (peeraddr, peerconn) <- Map.assocs selectedToDemote' ] + } + + | otherwise + = GuardedSkip Nothing + + +aboveTargetOther :: forall peeraddr peerconn m. + (MonadSTM m, Ord peeraddr) + => PeerSelectionActions peeraddr peerconn m + -> MkGuardedDecision peeraddr peerconn m +aboveTargetOther actions + PeerSelectionPolicy { + policyPickHotPeersToDemote + } + st@PeerSelectionState { + localRootPeers, + establishedPeers, + activePeers, + inProgressDemoteHot, + targets = PeerSelectionTargets { + targetNumberOfActivePeers + } + } + -- Are we above the general target for number of active peers? + | numActivePeers > targetNumberOfActivePeers + + -- Would we demote any if we could? + , let numPeersToDemote = numActivePeers - targetNumberOfActivePeers - - Set.size inProgressDemoteHot + - numDemoteInProgress , numPeersToDemote > 0 - = Guarded Nothing $ do - let availableToDemote :: Set peeraddr - availableToDemote = activePeers - Set.\\ inProgressDemoteHot + -- Are there any hot peers we actually can pick to demote? + -- For the moment we say we cannot demote local root peers. + -- TODO: review this decision. If we want to be able to demote local root + -- peers, e.g. for churn and improved selection, then we'll need an extra + -- mechanism to avoid promotion/demotion loops for local peers. + , let availableToDemote = activePeers + Set.\\ inProgressDemoteHot + Set.\\ LocalRootPeers.keysSet localRootPeers + , not (Set.null availableToDemote) + = Guarded Nothing $ do selectedToDemote <- pickPeers policyPickHotPeersToDemote availableToDemote @@ -219,6 +406,9 @@ aboveTarget actions | otherwise = GuardedSkip Nothing + where + numActivePeers = Set.size activePeers + numDemoteInProgress = Set.size inProgressDemoteHot jobDemoteActivePeer :: forall peeraddr peerconn m. diff --git a/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/EstablishedPeers.hs b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/EstablishedPeers.hs index 13649135739..17c1c865081 100644 --- a/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/EstablishedPeers.hs +++ b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/EstablishedPeers.hs @@ -16,9 +16,11 @@ import Control.Concurrent.JobPool (Job(..)) import Control.Monad.Class.MonadSTM import Control.Monad.Class.MonadTime import Control.Exception (SomeException) +import System.Random (randomR) import qualified Ouroboros.Network.PeerSelection.EstablishedPeers as EstablishedPeers import qualified Ouroboros.Network.PeerSelection.KnownPeers as KnownPeers +import qualified Ouroboros.Network.PeerSelection.LocalRootPeers as LocalRootPeers import Ouroboros.Network.PeerSelection.Governor.Types @@ -30,22 +32,136 @@ import Ouroboros.Network.PeerSelection.Governor.Types -- | If we are below the target of /warm peers/ we promote /cold peers/ -- according to 'policyPickColdPeersToPromote'. -- +-- There are two targets we are trying to hit here: +-- +-- 1. a target for the overall number of established peers; and +-- 2. the target that all local root peers are established peers. +-- +-- These two targets overlap: the conditions and the actions overlap since local +-- root peers are also known peers. Since they overlap, the order in which we +-- consider these targets is important. We consider the local peers target +-- /before/ the target for promoting other peers. +-- +-- We will /always/ try to establish connections to the local root peers, even +-- if that would put us over target for the number of established peers. If we +-- do go over target then the action to demote will be triggered. The demote +-- action never picks local root peers. +-- belowTarget :: forall peeraddr peerconn m. (MonadSTM m, Ord peeraddr) => PeerSelectionActions peeraddr peerconn m -> MkGuardedDecision peeraddr peerconn m -belowTarget actions - PeerSelectionPolicy { - policyPickColdPeersToPromote - } - st@PeerSelectionState { - knownPeers, - establishedPeers, - inProgressPromoteCold, - targets = PeerSelectionTargets { - targetNumberOfEstablishedPeers - } - } +belowTarget = belowTargetLocal <> belowTargetOther + + +-- | For locally configured root peers we have the (implicit) target that they +-- should all be warm peers all the time. +-- +belowTargetLocal :: forall peeraddr peerconn m. + (MonadSTM m, Ord peeraddr) + => PeerSelectionActions peeraddr peerconn m + -> MkGuardedDecision peeraddr peerconn m +belowTargetLocal actions + PeerSelectionPolicy { + policyPickColdPeersToPromote + } + st@PeerSelectionState { + localRootPeers, + knownPeers, + establishedPeers, + inProgressPromoteCold + } + + -- Are we below the target for number of /local/ root peers that are + -- established? Our target for established local root peers is all of them! + -- However we still don't want to go over the number of established peers + -- or we'll end up in a cycle. + | numLocalEstablishedPeers + numLocalConnectInProgress + < targetNumberOfLocalPeers + + -- Are there any /local/ root peers that are cold we could possibly pick to + -- connect to? We can subtract the local established ones because by + -- definition they are not cold and our invariant is that they are always + -- in the connect set. We can also subtract the in progress ones since they + -- are also already in the connect set and we cannot pick them again. + , numLocalAvailableToConnect - numLocalEstablishedPeers + - numLocalConnectInProgress > 0 + --TODO: switch style to checking if the set is empty + = Guarded Nothing $ do + -- The availableToPromote here is non-empty due to the second guard. + -- The known peers map restricted to the connect set is the same size as + -- the connect set (because it is a subset). The establishedPeers is a + -- subset of the connect set and we also know that there is no overlap + -- between inProgressPromoteCold and establishedPeers. QED. + -- + -- The numPeersToPromote is positive based on the first guard. + -- + let availableToPromote :: Set peeraddr + availableToPromote = localAvailableToConnect + Set.\\ localEstablishedPeers + Set.\\ localConnectInProgress + + numPeersToPromote = targetNumberOfLocalPeers + - numLocalEstablishedPeers + - numLocalConnectInProgress + selectedToPromote <- pickPeers + policyPickColdPeersToPromote + availableToPromote + numPeersToPromote + return $ \_now -> Decision { + decisionTrace = TracePromoteColdLocalPeers + targetNumberOfLocalPeers + numLocalEstablishedPeers + selectedToPromote, + decisionState = st { + inProgressPromoteCold = inProgressPromoteCold + <> selectedToPromote + }, + decisionJobs = [ jobPromoteColdPeer actions peer + | peer <- Set.toList selectedToPromote ] + } + + -- If we could connect to a local root peer except that there are no local + -- root peers currently available then we return the next wakeup time (if any) + -- TODO: Note that this may wake up too soon, since it considers non-local + -- known peers too for the purpose of the wakeup time. + | numLocalEstablishedPeers + numLocalConnectInProgress < targetNumberOfLocalPeers + = GuardedSkip (Min <$> KnownPeers.minConnectTime knownPeers) + + | otherwise + = GuardedSkip Nothing + where + localRootPeersSet = LocalRootPeers.keysSet localRootPeers + targetNumberOfLocalPeers = LocalRootPeers.size localRootPeers + + localEstablishedPeers = EstablishedPeers.toSet establishedPeers + `Set.intersection` localRootPeersSet + localAvailableToConnect = KnownPeers.availableToConnect knownPeers + `Set.intersection` localRootPeersSet + localConnectInProgress = inProgressPromoteCold + `Set.intersection` localRootPeersSet + + numLocalEstablishedPeers = Set.size localEstablishedPeers + numLocalAvailableToConnect = Set.size localAvailableToConnect + numLocalConnectInProgress = Set.size localConnectInProgress + + +belowTargetOther :: forall peeraddr peerconn m. + (MonadSTM m, Ord peeraddr) + => PeerSelectionActions peeraddr peerconn m + -> MkGuardedDecision peeraddr peerconn m +belowTargetOther actions + PeerSelectionPolicy { + policyPickColdPeersToPromote + } + st@PeerSelectionState { + knownPeers, + establishedPeers, + inProgressPromoteCold, + targets = PeerSelectionTargets { + targetNumberOfEstablishedPeers + } + } -- Are we below the target for number of established peers? | numEstablishedPeers + numConnectInProgress < targetNumberOfEstablishedPeers @@ -103,6 +219,8 @@ belowTarget actions numAvailableToConnect= Set.size availableToConnect +-- | Must be larger than '2' since we add a random value drawn from '(-2, 2)`. +-- baseColdPeerRetryDiffTime :: Int baseColdPeerRetryDiffTime = 5 @@ -124,6 +242,7 @@ jobPromoteColdPeer PeerSelectionActions { handler e = return $ Completion $ \st@PeerSelectionState { establishedPeers, + fuzzRng, targets = PeerSelectionTargets { targetNumberOfEstablishedPeers } @@ -132,12 +251,15 @@ jobPromoteColdPeer PeerSelectionActions { let (failCount, knownPeers') = KnownPeers.incrementFailCount peeraddr (knownPeers st) + (fuzz, fuzzRng') = randomR (-2, 2 :: Double) fuzzRng -- exponential backoff: 5s, 10s, 20s, 40s, 80s, 160s. delay :: DiffTime - delay = fromIntegral $ - baseColdPeerRetryDiffTime - * 2 ^ (pred failCount `min` maxColdPeerRetryBackoff) + delay = realToFrac fuzz + + fromIntegral + ( baseColdPeerRetryDiffTime + * 2 ^ (pred failCount `min` maxColdPeerRetryBackoff) + ) in Decision { decisionTrace = TracePromoteColdFailed targetNumberOfEstablishedPeers @@ -149,7 +271,8 @@ jobPromoteColdPeer PeerSelectionActions { (delay `addTime` now) knownPeers', inProgressPromoteCold = Set.delete peeraddr - (inProgressPromoteCold st) + (inProgressPromoteCold st), + fuzzRng = fuzzRng' }, decisionJobs = [] } @@ -202,6 +325,7 @@ aboveTarget actions policyPickWarmPeersToDemote } st@PeerSelectionState { + localRootPeers, establishedPeers, activePeers, inProgressDemoteWarm, @@ -217,6 +341,11 @@ aboveTarget actions | let numEstablishedPeers, numActivePeers, numPeersToDemote :: Int numEstablishedPeers = EstablishedPeers.size establishedPeers numActivePeers = Set.size activePeers + numLocalWarmPeers = Set.size localWarmPeers + localWarmPeers = Set.intersection + (LocalRootPeers.keysSet localRootPeers) + (EstablishedPeers.toSet establishedPeers) + Set.\\ activePeers -- One constraint on how many to demote is the difference in the -- number we have now vs the target. The other constraint is that -- we pick established peers that are not also active. These @@ -226,6 +355,7 @@ aboveTarget actions numPeersToDemote = min (numEstablishedPeers - targetNumberOfEstablishedPeers) (numEstablishedPeers + - numLocalWarmPeers - numActivePeers) - Set.size inProgressDemoteWarm - Set.size inProgressPromoteWarm @@ -235,6 +365,7 @@ aboveTarget actions let availableToDemote :: Set peeraddr availableToDemote = EstablishedPeers.toSet establishedPeers Set.\\ activePeers + Set.\\ LocalRootPeers.keysSet localRootPeers Set.\\ inProgressDemoteWarm Set.\\ inProgressPromoteWarm selectedToDemote <- pickPeers diff --git a/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/KnownPeers.hs b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/KnownPeers.hs index 29514be0c78..ea3d55b3f3c 100644 --- a/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/KnownPeers.hs +++ b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/KnownPeers.hs @@ -271,16 +271,24 @@ aboveTarget PeerSelectionPolicy { , let numRootPeersCanForget = LocalRootPeers.size localRootPeers + Set.size publicRootPeers - targetNumberOfRootPeers - protectedRootPeers = LocalRootPeers.keysSet localRootPeers - <> Set.drop numRootPeersCanForget publicRootPeers availableToForget = KnownPeers.toSet knownPeers Set.\\ EstablishedPeers.toSet establishedPeers - Set.\\ protectedRootPeers + Set.\\ LocalRootPeers.keysSet localRootPeers + Set.\\ (if numRootPeersCanForget <= 0 + then publicRootPeers else Set.empty) Set.\\ inProgressPromoteCold , not (Set.null availableToForget) = Guarded Nothing $ do - let numPeersToForget = numKnownPeers - targetNumberOfKnownPeers + let numOtherPeersToForget = numKnownPeers + - targetNumberOfKnownPeers + numPeersToForget + | numRootPeersCanForget > 0 = min numRootPeersCanForget + numOtherPeersToForget + | otherwise = numOtherPeersToForget + -- If we /might/ pick a root peer, limit the number to forget so we do + -- not pick too many root peers. This may cause us to go round several + -- times but that is ok. selectedToForget <- pickPeers policyPickColdPeersToForget availableToForget diff --git a/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/Monitor.hs b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/Monitor.hs index c7776a9860c..134dc1b994b 100644 --- a/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/Monitor.hs +++ b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/Monitor.hs @@ -88,11 +88,13 @@ jobs jobPool st = -- reconnectDelay :: DiffTime reconnectDelay = 10 +--TODO: make this a policy param -- | Activation delay after a peer was asynchronously demoted to warm state. -- activateDelay :: DiffTime activateDelay = 60 +--TODO: make this a policy param -- | Monitor connections. @@ -229,7 +231,7 @@ localRoots actions@PeerSelectionActions{readLocalRootPeers} Guarded Nothing $ do -- We have to enforce the invariant that the number of root peers is -- not more than the target number of known peers. It's unlikely in - -- practice so it's ok to resolve it arbitrarily using Map.take. + -- practice so it's ok to resolve it arbitrarily using clampToLimit. localRootPeersRaw <- readLocalRootPeers let localRootPeers' = LocalRootPeers.clampToLimit targetNumberOfKnownPeers diff --git a/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/Types.hs b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/Types.hs index 3b207b9f90d..86afa15586c 100644 --- a/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/Types.hs +++ b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor/Types.hs @@ -30,6 +30,8 @@ module Ouroboros.Network.PeerSelection.Governor.Types , TimedDecision , MkGuardedDecision , Completion (..) + , PeerSelectionCounters (..) + , peerStateToCounters -- * Traces , TracePeerSelection (..) @@ -48,6 +50,7 @@ import Control.Concurrent.JobPool (Job) import Control.Monad.Class.MonadSTM import Control.Monad.Class.MonadTime import Control.Exception (assert, SomeException) +import System.Random (StdGen) import qualified Ouroboros.Network.PeerSelection.EstablishedPeers as EstablishedPeers import Ouroboros.Network.PeerSelection.EstablishedPeers (EstablishedPeers) @@ -287,7 +290,10 @@ data PeerSelectionState peeraddr peerconn = PeerSelectionState { inProgressPromoteCold :: !(Set peeraddr), inProgressPromoteWarm :: !(Set peeraddr), inProgressDemoteWarm :: !(Set peeraddr), - inProgressDemoteHot :: !(Set peeraddr) + inProgressDemoteHot :: !(Set peeraddr), + + -- | Rng for fuzzy delay + fuzzRng :: !StdGen -- TODO: need something like this to distinguish between lots of bad peers -- and us getting disconnected from the network locally. We don't want a @@ -298,9 +304,23 @@ data PeerSelectionState peeraddr peerconn = PeerSelectionState { } deriving (Show, Functor) +data PeerSelectionCounters = PeerSelectionCounters { + coldPeers :: !Int, + warmPeers :: !Int, + hotPeers :: !Int + } deriving Show -emptyPeerSelectionState :: PeerSelectionState peeraddr peerconn -emptyPeerSelectionState = +peerStateToCounters :: Ord peeraddr => PeerSelectionState peeraddr peerconn -> PeerSelectionCounters +peerStateToCounters st = PeerSelectionCounters { coldPeers, warmPeers, hotPeers } + where + knownPeersSet = KnownPeers.toSet (knownPeers st) + establishedPeersSet = EstablishedPeers.toSet (establishedPeers st) + coldPeers = Set.size $ knownPeersSet Set.\\ establishedPeersSet + warmPeers = Set.size $ establishedPeersSet Set.\\ activePeers st + hotPeers = Set.size $ activePeers st + +emptyPeerSelectionState :: StdGen -> PeerSelectionState peeraddr peerconn +emptyPeerSelectionState rng = PeerSelectionState { targets = nullPeerSelectionTargets, localRootPeers = LocalRootPeers.empty, @@ -315,7 +335,8 @@ emptyPeerSelectionState = inProgressPromoteCold = Set.empty, inProgressPromoteWarm = Set.empty, inProgressDemoteWarm = Set.empty, - inProgressDemoteHot = Set.empty + inProgressDemoteHot = Set.empty, + fuzzRng = rng } @@ -515,6 +536,8 @@ data TracePeerSelection peeraddr = | TraceForgetColdPeers Int Int (Set peeraddr) -- | target established, actual established, selected peers | TracePromoteColdPeers Int Int (Set peeraddr) + -- | target local established, actual local established, selected peers + | TracePromoteColdLocalPeers Int Int (Set peeraddr) -- | target established, actual established, peer, delay until next -- promotion, reason | TracePromoteColdFailed Int Int peeraddr DiffTime SomeException @@ -522,6 +545,8 @@ data TracePeerSelection peeraddr = | TracePromoteColdDone Int Int peeraddr -- | target active, actual active, selected peers | TracePromoteWarmPeers Int Int (Set peeraddr) + -- | local per-group (target active, actual active), selected peers + | TracePromoteWarmLocalPeers [(Int, Int)] (Set peeraddr) -- | target active, actual active, peer, reason | TracePromoteWarmFailed Int Int peeraddr SomeException -- | target active, actual active, peer @@ -534,16 +559,19 @@ data TracePeerSelection peeraddr = | TraceDemoteWarmDone Int Int peeraddr -- | target active, actual active, selected peers | TraceDemoteHotPeers Int Int (Set peeraddr) + -- | local per-group (target active, actual active), selected peers + | TraceDemoteLocalHotPeers [(Int, Int)] (Set peeraddr) -- | target active, actual active, peer, reason | TraceDemoteHotFailed Int Int peeraddr SomeException -- | target active, actual active, peer | TraceDemoteHotDone Int Int peeraddr | TraceDemoteAsynchronous (Map peeraddr PeerStatus) | TraceGovernorWakeup + | TraceChurnWait DiffTime deriving Show data DebugPeerSelection peeraddr peerconn = - TraceGovernorState Time - (Maybe DiffTime) + TraceGovernorState Time -- blocked time + (Maybe DiffTime) -- wait time (PeerSelectionState peeraddr peerconn) deriving (Show, Functor) diff --git a/ouroboros-network/src/Ouroboros/Network/PeerSelection/KnownPeers.hs b/ouroboros-network/src/Ouroboros/Network/PeerSelection/KnownPeers.hs index 6ee1bb6ed90..80a5eb923e0 100644 --- a/ouroboros-network/src/Ouroboros/Network/PeerSelection/KnownPeers.hs +++ b/ouroboros-network/src/Ouroboros/Network/PeerSelection/KnownPeers.hs @@ -352,7 +352,7 @@ minConnectTime KnownPeers { nextConnectTimes } setConnectTime :: Ord peeraddr - => Set peeraddr + => Set peeraddr --TODO: make this a single entry -> Time -> KnownPeers peeraddr -> KnownPeers peeraddr diff --git a/ouroboros-network/src/Ouroboros/Network/PeerSelection/LedgerPeers.hs b/ouroboros-network/src/Ouroboros/Network/PeerSelection/LedgerPeers.hs index 2378ef8ce69..f9208ffb097 100644 --- a/ouroboros-network/src/Ouroboros/Network/PeerSelection/LedgerPeers.hs +++ b/ouroboros-network/src/Ouroboros/Network/PeerSelection/LedgerPeers.hs @@ -1,4 +1,5 @@ {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE OverloadedStrings #-} @@ -6,21 +7,28 @@ {-# LANGUAGE ScopedTypeVariables #-} module Ouroboros.Network.PeerSelection.LedgerPeers ( - DomainAddress (..), + DomainAccessPoint (..), IP.IP (..), LedgerPeersConsensusInterface (..), - RelayAddress (..), + RelayAccessPoint (..), PoolStake (..), AccPoolStake (..), TraceLedgerPeers (..), - pickPeers, + NumberOfPeers (..), accPoolStake, + withLedgerPeers, + UseLedgerAfter (..), Socket.PortNumber ) where -import Control.Monad.Class.MonadSTM +import Control.DeepSeq (NFData (..)) +import Control.Exception (assert) +import Control.Monad (when) +import Control.Monad.Class.MonadAsync +import Control.Monad.Class.MonadSTM.Strict +import Control.Monad.Class.MonadTime import Control.Tracer (Tracer, traceWith) import qualified Data.IP as IP import Data.List (foldl') @@ -29,29 +37,50 @@ import qualified Data.List.NonEmpty as NonEmpty import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Ratio +import qualified Data.Set as Set +import Data.Set (Set) import Data.Word +import Data.Void (Void) import qualified Network.Socket as Socket +import Network.Socket (SockAddr) import System.Random import Cardano.Slotting.Slot (SlotNo) import Ouroboros.Network.PeerSelection.RootPeersDNS - (DomainAddress (..)) + (RelayAccessPoint (..), DomainAccessPoint (..)) import Text.Printf +-- | Only use the ledger after the given slot number. +data UseLedgerAfter = DontUseLedger | UseLedgerAfter SlotNo deriving (Eq, Show) + +isLedgerPeersEnabled :: UseLedgerAfter -> Bool +isLedgerPeersEnabled DontUseLedger = False +isLedgerPeersEnabled _ = True + +newtype NumberOfPeers = NumberOfPeers Word16 deriving Show + newtype LedgerPeersConsensusInterface m = LedgerPeersConsensusInterface { - lpGetPeers :: SlotNo -> STM m (Maybe [(PoolStake, NonEmpty RelayAddress)]) + lpGetPeers :: SlotNo -> STM m (Maybe [(PoolStake, NonEmpty RelayAccessPoint)]) } -- | Trace LedgerPeers events. data TraceLedgerPeers = - PickedPeer !RelayAddress !AccPoolStake ! PoolStake + PickedPeer !RelayAccessPoint !AccPoolStake !PoolStake -- ^ Trace for a peer picked with accumulated and relative stake of its pool. - | PickedPeers !Word16 ![RelayAddress] + | PickedPeers !NumberOfPeers ![RelayAccessPoint] -- ^ Trace for the number of peers we wanted to pick and the list of peers picked. | FetchingNewLedgerState !Int -- ^ Trace for fetching a new list of peers from the ledger. Int is the number of peers -- returned. + | DisabledLedgerPeers + -- ^ Trace for when getting peers from the ledger is disabled, that is DontUseLedger. + | TraceUseLedgerAfter !UseLedgerAfter + -- ^ Trace UseLedgerAfter value + | WaitingOnRequest + | RequestForPeers !NumberOfPeers + | ReusingLedgerState !Int !DiffTime + | FallingBackToBootstrapPeers instance Show TraceLedgerPeers where @@ -62,22 +91,30 @@ instance Show TraceLedgerPeers where (fromRational (unAccPoolStake ackStake) :: Double) (show $ unPoolStake stake) (fromRational (unPoolStake stake) :: Double) - show (PickedPeers n peers) = + show (PickedPeers (NumberOfPeers n) peers) = printf "PickedPeers %d %s" n (show peers) show (FetchingNewLedgerState cnt) = printf "Fetching new ledgerstate, %d registered pools" cnt - - -data RelayAddress = RelayAddressDomain DomainAddress - | RelayAddressAddr IP.IP Socket.PortNumber - deriving (Show, Eq, Ord) + show (TraceUseLedgerAfter ula) = + printf "UseLedgerAfter state %s" + (show ula) + show WaitingOnRequest = "WaitingOnRequest" + show (RequestForPeers (NumberOfPeers cnt)) = printf "RequestForPeers %d" cnt + show (ReusingLedgerState cnt age) = + printf "ReusingLedgerState %d peers age %s" + cnt + (show age) + show FallingBackToBootstrapPeers = "Falling back to bootstrap peers" + show DisabledLedgerPeers = "LedgerPeers is disabled" -- | The relative stake of a stakepool in relation to the total amount staked. -- A value in the [0, 1] range. -- newtype PoolStake = PoolStake { unPoolStake :: Rational } deriving (Eq, Fractional, Num, Ord, Show) + deriving newtype NFData + -- | The accumulated relative stake of a stake pool, like PoolStake but it also includes the -- relative stake of all preceding pools. A value in the range [0, 1]. @@ -93,16 +130,16 @@ newtype AccPoolStake = AccPoolStake { unAccPoolStake :: Rational } -- O(log n) time by taking advantage of Map.lookupGE (returns the smallest key greater or equal -- to the provided value). -- -accPoolStake :: [(PoolStake, NonEmpty RelayAddress)] - -> Map AccPoolStake (PoolStake, NonEmpty RelayAddress) +accPoolStake :: [(PoolStake, NonEmpty RelayAccessPoint)] + -> Map AccPoolStake (PoolStake, NonEmpty RelayAccessPoint) accPoolStake pl = let pl' = reRelativeStake pl ackList = foldl' fn [] pl' in Map.fromList ackList where - fn :: [(AccPoolStake, (PoolStake, NonEmpty RelayAddress))] - -> (PoolStake, NonEmpty RelayAddress) - -> [(AccPoolStake, (PoolStake, NonEmpty RelayAddress))] + fn :: [(AccPoolStake, (PoolStake, NonEmpty RelayAccessPoint))] + -> (PoolStake, NonEmpty RelayAccessPoint) + -> [(AccPoolStake, (PoolStake, NonEmpty RelayAccessPoint))] fn [] (s, rs) = [(AccPoolStake (unPoolStake s), (s, rs))] fn ps (s, !rs) = @@ -114,23 +151,39 @@ accPoolStake pl = -- | Not all stake pools have valid \/ usable relay information. This means that we need to -- recalculate the relative stake for each pool. -- -reRelativeStake :: [(PoolStake, NonEmpty RelayAddress)] - -> [(PoolStake, NonEmpty RelayAddress)] +-- The relative stake is scaled by the square root in order to increase the number +-- of down stream peers smaller pools are likely to get. +-- https://en.wikipedia.org/wiki/Penrose_method +-- +reRelativeStake :: [(PoolStake, NonEmpty RelayAccessPoint)] + -> [(PoolStake, NonEmpty RelayAccessPoint)] reRelativeStake pl = - let total = sum $ map fst pl in - map (\(s, rls) -> (s / total, rls)) pl + let total = foldl' (+) 0 $ map (adjustment . fst) pl + pl' = map (\(s, rls) -> (adjustment s / total, rls)) pl + total' = sum $ map fst pl' in + assert (total == 0 || (total' > (PoolStake $ 999999 % 1000000) && + total' < (PoolStake $ 1000001 % 1000000))) pl' + + where + -- We do loose some precisioun in the conversion. However we care about precision + -- in the order of 1 block per year and for that a Double is good enough. + adjustment :: PoolStake -> PoolStake + adjustment (PoolStake s) = + let d = fromRational s ::Double in + PoolStake $ toRational $ sqrt d + -- | Try to pick n random peers. pickPeers :: forall m. Monad m => StdGen -> Tracer m TraceLedgerPeers - -> Map AccPoolStake (PoolStake, NonEmpty RelayAddress) - -> Word16 - -> m (StdGen, [RelayAddress]) + -> Map AccPoolStake (PoolStake, NonEmpty RelayAccessPoint) + -> NumberOfPeers + -> m (StdGen, [RelayAccessPoint]) pickPeers inRng _ pools _ | Map.null pools = return (inRng, []) -pickPeers inRng tracer pools cnt = go inRng cnt [] +pickPeers inRng tracer pools (NumberOfPeers cnt) = go inRng cnt [] where - go :: StdGen -> Word16 -> [RelayAddress] -> m (StdGen, [RelayAddress]) + go :: StdGen -> Word16 -> [RelayAccessPoint] -> m (StdGen, [RelayAccessPoint]) go rng 0 picked = return (rng, picked) go rng n picked = let (r :: Word64, rng') = random rng @@ -144,3 +197,128 @@ pickPeers inRng tracer pools cnt = go inRng cnt [] relay = relays NonEmpty.!! ix traceWith tracer $ PickedPeer relay ackStake stake go rng'' (n - 1) (relay : picked) + + +-- | Run the LedgerPeers worker thread. +-- +ledgerPeersThread :: forall m. + ( MonadAsync m + , MonadTime m + ) + => StdGen + -> Tracer m TraceLedgerPeers + -> STM m UseLedgerAfter + -> LedgerPeersConsensusInterface m + -> ([DomainAccessPoint] -> m (Map DomainAccessPoint (Set SockAddr))) + -> STM m NumberOfPeers + -> (Maybe (Set SockAddr, DiffTime) -> STM m ()) + -> m Void +ledgerPeersThread inRng tracer readUseLedgerAfter LedgerPeersConsensusInterface{..} doResolve + getReq putRsp = + go inRng (Time 0) Map.empty + where + go :: StdGen -> Time -> Map AccPoolStake (PoolStake, NonEmpty RelayAccessPoint) + -> m Void + go rng oldTs peerMap = do + useLedgerAfter <- atomically readUseLedgerAfter + traceWith tracer (TraceUseLedgerAfter useLedgerAfter) + + let peerListLifeTime = if Map.null peerMap && isLedgerPeersEnabled useLedgerAfter + then 30 + else 1847 -- Close to but not exactly 30min. + + traceWith tracer WaitingOnRequest + numRequested <- atomically getReq + traceWith tracer $ RequestForPeers numRequested + !now <- getMonotonicTime + let age = diffTime now oldTs + (peerMap', ts) <- if age > peerListLifeTime + then + case useLedgerAfter of + DontUseLedger -> do + traceWith tracer DisabledLedgerPeers + return (Map.empty, now) + UseLedgerAfter slot -> do + peers_m <- atomically $ lpGetPeers slot + let peers = maybe Map.empty accPoolStake peers_m + traceWith tracer $ FetchingNewLedgerState $ Map.size peers + return (peers, now) + + else do + traceWith tracer $ ReusingLedgerState (Map.size peerMap) age + return (peerMap, oldTs) + + if Map.null peerMap' + then do + when (isLedgerPeersEnabled useLedgerAfter) $ + traceWith tracer FallingBackToBootstrapPeers + atomically $ putRsp Nothing + go rng ts peerMap' + else do + let ttl = 5 -- TTL, used as re-request interval by the governor. + + (rng', !pickedPeers) <- pickPeers rng tracer peerMap' numRequested + traceWith tracer $ PickedPeers numRequested pickedPeers + + let (plainAddrs, domains) = foldl' splitPeers (Set.empty, []) pickedPeers + + domainAddrs <- doResolve domains + + let (rng'', rngDomain) = split rng' + pickedAddrs = snd $ foldl' pickDomainAddrs (rngDomain, plainAddrs) + domainAddrs + + atomically $ putRsp $ Just (pickedAddrs, ttl) + go rng'' ts peerMap' + + -- Randomly pick one of the addresses returned in the DNS result. + pickDomainAddrs :: (StdGen, Set SockAddr) + -> Set SockAddr + -> (StdGen, Set SockAddr) + pickDomainAddrs (rng, pickedAddrs) addrs | Set.null addrs = (rng, pickedAddrs) + pickDomainAddrs (rng, pickedAddrs) addrs = + let (ix, rng') = randomR (0, Set.size addrs - 1) rng + !pickedAddr = Set.elemAt ix addrs in + (rng', Set.insert pickedAddr pickedAddrs) + + + -- Divide the picked peers form the ledger into addresses we can use directly and + -- domain names that we need to resolve. + splitPeers :: (Set SockAddr, [DomainAccessPoint]) + -> RelayAccessPoint + -> (Set SockAddr, [DomainAccessPoint]) + splitPeers (addrs, domains) (RelayDomainAccessPoint domain) = (addrs, domain : domains) + splitPeers (addrs, domains) (RelayAccessAddress ip port) = + let !addr = IP.toSockAddr (ip, port) in + (Set.insert addr addrs, domains) + + +-- | For a LederPeers worker thread and submit request and receive responses. +-- +withLedgerPeers :: forall m a. + ( MonadAsync m + , MonadTime m + ) + => StdGen + -> Tracer m TraceLedgerPeers + -> STM m UseLedgerAfter + -> LedgerPeersConsensusInterface m + -> ([DomainAccessPoint] -> m (Map DomainAccessPoint (Set SockAddr))) + -> ( (NumberOfPeers -> m (Maybe (Set SockAddr, DiffTime))) + -> Async m Void + -> m a ) + -> m a +withLedgerPeers inRng tracer readUseLedgerAfter interface doResolve k = do + reqVar <- newEmptyTMVarIO + respVar <- newEmptyTMVarIO + let getRequest = takeTMVar reqVar + putResponse = putTMVar respVar + request :: NumberOfPeers -> m (Maybe (Set SockAddr, DiffTime)) + request = \numberOfPeers -> do + atomically $ putTMVar reqVar numberOfPeers + atomically $ takeTMVar respVar + withAsync + ( ledgerPeersThread inRng tracer readUseLedgerAfter interface doResolve + getRequest putResponse ) + $ \ thread -> k request thread + diff --git a/ouroboros-network/src/Ouroboros/Network/PeerSelection/LocalRootPeers.hs b/ouroboros-network/src/Ouroboros/Network/PeerSelection/LocalRootPeers.hs index 99edfb669bc..21c4ae06ee3 100644 --- a/ouroboros-network/src/Ouroboros/Network/PeerSelection/LocalRootPeers.hs +++ b/ouroboros-network/src/Ouroboros/Network/PeerSelection/LocalRootPeers.hs @@ -14,6 +14,7 @@ module Ouroboros.Network.PeerSelection.LocalRootPeers ( target, fromGroups, toGroups, + toGroupSets, toMap, keysSet, @@ -44,8 +45,13 @@ data LocalRootPeers peeraddr = -- The groups, but without the associated PeerAdvertise [(Int, Set peeraddr)] - deriving (Eq, Show) + deriving Eq +-- It is an abstract type, so the derived Show is unhelpful, e.g. for replaying +-- test cases. +-- +instance (Show peeraddr, Ord peeraddr) => Show (LocalRootPeers peeraddr) where + show lrps = "fromGroups " ++ show (toGroups lrps) invariant :: Ord peeraddr => LocalRootPeers peeraddr -> Bool invariant (LocalRootPeers m gs) = @@ -56,9 +62,9 @@ invariant (LocalRootPeers m gs) = -- The localRootPeers groups must not overlap with each other && Map.size m == sum [ Set.size g | (_, g) <- gs ] - -- Individual group targets must be zero or more and achievable given the - -- group sizes. - && and [ 0 <= t && t <= Set.size g | (t, g) <- gs ] + -- Individual group targets must be greater than zero and achievable given + -- the group sizes. + && and [ 0 < t && t <= Set.size g | (t, g) <- gs ] empty :: LocalRootPeers peeraddr @@ -79,8 +85,8 @@ toMap (LocalRootPeers m _) = m keysSet :: LocalRootPeers peeraddr -> Set peeraddr keysSet (LocalRootPeers m _) = Map.keysSet m -toGroups :: LocalRootPeers peeraddr -> [(Int, Set peeraddr)] -toGroups (LocalRootPeers _ gs) = gs +toGroupSets :: LocalRootPeers peeraddr -> [(Int, Set peeraddr)] +toGroupSets (LocalRootPeers _ gs) = gs -- | The local root peers info has some invariants that are not directly @@ -104,13 +110,23 @@ fromGroups = -- The groups must not overlap; have achievable targets; and be non-empty. establishStructureInvariant !_ [] = [] establishStructureInvariant !acc ((t, g): gs) - | not (Map.null g') = (t', g') : establishStructureInvariant acc' gs - | otherwise = establishStructureInvariant acc' gs + | t' > 0 = (t', g') : establishStructureInvariant acc' gs + | otherwise = establishStructureInvariant acc' gs where !g' = g `Map.withoutKeys` acc - !t' = min (max 0 t) (Map.size g') + !t' = min t (Map.size g') !acc' = acc <> Map.keysSet g +-- | Inverse of 'fromGroups', for the subset of inputs to 'fromGroups' that +-- satisfy the invariant. +-- +toGroups :: Ord peeraddr + => LocalRootPeers peeraddr + -> [(Int, Map peeraddr PeerAdvertise)] +toGroups (LocalRootPeers m gs) = + [ (t, Map.fromSet (m Map.!) g) + | (t, g) <- gs ] + -- | Limit the size of the root peers collection to fit within given bounds. -- diff --git a/ouroboros-network/src/Ouroboros/Network/PeerSelection/RelayAccessPoint.hs b/ouroboros-network/src/Ouroboros/Network/PeerSelection/RelayAccessPoint.hs new file mode 100644 index 00000000000..b6898ae822a --- /dev/null +++ b/ouroboros-network/src/Ouroboros/Network/PeerSelection/RelayAccessPoint.hs @@ -0,0 +1,111 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternSynonyms #-} +{-# LANGUAGE ViewPatterns #-} + +module Ouroboros.Network.PeerSelection.RelayAccessPoint + ( DomainAccessPoint (..) + , RelayAccessPoint (.., RelayDomainAccessPoint) + , IP.IP (..) + + -- * Socket type re-exports + , Socket.PortNumber, + ) where + +import Control.DeepSeq (NFData (..)) + +import Data.Aeson +import qualified Data.IP as IP +import Data.Text (Text) +import qualified Data.Text as Text +import Data.Text.Encoding (decodeUtf8, encodeUtf8) +import Text.Read (readMaybe) + +import qualified Network.DNS as DNS +import qualified Network.Socket as Socket + +-- | A product of a 'DNS.Domain' and 'Socket.PortNumber'. After resolving the +-- domain we will use the 'Socket.PortNumber' to form 'Socket.SockAddr'. +-- +data DomainAccessPoint = DomainAccessPoint { + dapDomain :: !DNS.Domain, + dapPortNumber :: !Socket.PortNumber + } + deriving (Show, Eq, Ord) + +instance FromJSON DomainAccessPoint where + parseJSON = withObject "DomainAccessPoint" $ \v -> + DomainAccessPoint + <$> (encodeUtf8 <$> v .: "addr") + <*> ((fromIntegral :: Int -> Socket.PortNumber) <$> v .: "port") + +instance ToJSON DomainAccessPoint where + toJSON da = + object + [ "addr" .= decodeUtf8 (dapDomain da) + , "port" .= (fromIntegral (dapPortNumber da) :: Int) + ] + +-- | A relay can have either an IP address and a port number or +-- a domain with a port number +-- +data RelayAccessPoint = RelayAccessDomain !DNS.Domain !Socket.PortNumber + | RelayAccessAddress !IP.IP !Socket.PortNumber + deriving (Show, Eq, Ord) + + +-- | 'RelayDomainAccessPoint' a bidirectional pattern which links +-- 'RelayAccessDomain' and 'DomainAccessPoint'. +-- +pattern RelayDomainAccessPoint :: DomainAccessPoint -> RelayAccessPoint +pattern RelayDomainAccessPoint dap <- (viewRelayAccessPoint -> Just dap) + where + RelayDomainAccessPoint DomainAccessPoint {dapDomain, dapPortNumber} = + RelayAccessDomain dapDomain dapPortNumber + +{-# COMPLETE RelayDomainAccessPoint, RelayAccessAddress #-} + +viewRelayAccessPoint :: RelayAccessPoint -> Maybe DomainAccessPoint +viewRelayAccessPoint (RelayAccessDomain dapDomain dapPortNumber) = + Just DomainAccessPoint {dapDomain, dapPortNumber} +viewRelayAccessPoint RelayAccessAddress {} = + Nothing + + +-- 'IP' nor 'IPv6' is strict, 'IPv4' is strict only because it's a newtype for +-- a primitive type ('Word32'). +-- +instance NFData RelayAccessPoint where + rnf (RelayAccessDomain !_domain !_port) = () + rnf (RelayAccessAddress ip !_port) = + case ip of + IP.IPv4 ipv4 -> rnf (IP.fromIPv4w ipv4) + IP.IPv6 ipv6 -> rnf (IP.fromIPv6w ipv6) + +instance FromJSON RelayAccessPoint where + parseJSON = withObject "RelayAccessPoint" $ \v -> do + addr <- v .: "addr" + port <- v .: "port" + return (toRelayAccessPoint addr port) + +instance ToJSON RelayAccessPoint where + toJSON (RelayAccessDomain addr port) = + object + [ "addr" .= decodeUtf8 addr + , "port" .= (fromIntegral port :: Int) + ] + toJSON (RelayAccessAddress ip port) = + object + [ "addr" .= Text.pack (show ip) + , "port" .= (fromIntegral port :: Int) + ] + +-- | Parse a address field as either an IP address or a DNS address. +-- Returns corresponding RelayAccessPoint. +-- +toRelayAccessPoint :: Text -> Int -> RelayAccessPoint +toRelayAccessPoint address port = + case readMaybe (Text.unpack address) of + Nothing -> RelayAccessDomain (encodeUtf8 address) (fromIntegral port) + Just addr -> RelayAccessAddress addr (fromIntegral port) diff --git a/ouroboros-network/src/Ouroboros/Network/PeerSelection/RootPeersDNS.hs b/ouroboros-network/src/Ouroboros/Network/PeerSelection/RootPeersDNS.hs index fb1fba36564..5f44e2d8fbd 100644 --- a/ouroboros-network/src/Ouroboros/Network/PeerSelection/RootPeersDNS.hs +++ b/ouroboros-network/src/Ouroboros/Network/PeerSelection/RootPeersDNS.hs @@ -1,19 +1,24 @@ -{-# LANGUAGE CPP #-} {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} - --- 'resolverResource' and 'asyncResolverResource' are not used when compiled --- on @Windows@ -{-# OPTIONS_GHC -Wno-unused-top-binds #-} +{-# LANGUAGE OverloadedStrings #-} module Ouroboros.Network.PeerSelection.RootPeersDNS ( + -- * DNS based actions for local and public root providers + DNSActions (..), + + -- * DNS resolver IO auxiliar functions + constantResource, + -- ** DNSActions IO + ioDNSActions, + -- * DNS based provider for local root peers localRootPeersProvider, - DomainAddress (..), - RelayAddress (..), + DomainAccessPoint (..), + RelayAccessPoint (..), IP.IP (..), TraceLocalRootPeers(..), @@ -21,6 +26,9 @@ module Ouroboros.Network.PeerSelection.RootPeersDNS ( publicRootPeersProvider, TracePublicRootPeers(..), + -- DNS lookup support + resolveDomainAccessPoint, + -- * DNS type re-exports DNS.ResolvConf, DNS.Domain, @@ -31,279 +39,189 @@ module Ouroboros.Network.PeerSelection.RootPeersDNS ( ) where import Data.Word (Word32) +import Data.List (elemIndex, foldl') import Data.List.NonEmpty (NonEmpty (..)) -import qualified Data.List.NonEmpty as NonEmpty import qualified Data.Set as Set import Data.Set (Set) import qualified Data.Map.Strict as Map import Data.Map.Strict (Map) +import Data.Sequence (Seq) +import qualified Data.Sequence as Seq +import Data.Void (Void, absurd) -import Control.Exception (IOException) -import Control.Monad (when, unless) +import Control.Applicative ((<|>)) +import Control.Monad (when) import Control.Monad.Class.MonadAsync import Control.Monad.Class.MonadSTM.Strict import Control.Monad.Class.MonadTime -import Control.Monad.Class.MonadTimer hiding (timeout) +import Control.Monad.Class.MonadTimer import Control.Monad.Class.MonadThrow import Control.Tracer (Tracer(..), contramap, traceWith) -import System.Directory (getModificationTime) import Data.IP (IPv4) import qualified Data.IP as IP -import Network.DNS (DNSError) import qualified Network.DNS as DNS import qualified Network.Socket as Socket -import Network.Mux.Timeout import Ouroboros.Network.PeerSelection.Types - --- | A relay can have either an IP address and a port number or --- a domain with a port number. --- TODO: move to a Ledger Peer file. -data RelayAddress = RelayAddressDomain DomainAddress - | RelayAddressAddr IP.IP Socket.PortNumber - deriving (Show, Eq, Ord) - --- | A product of a 'DNS.Domain' and 'Socket.PortNumber'. After resolving the --- domain we will use the 'Socket.PortNumber' to form 'Socket.SockAddr'. --- -data DomainAddress = DomainAddress { - daDomain :: !DNS.Domain, - daPortNumber :: !Socket.PortNumber - } - deriving (Show, Eq, Ord) - - ------------------------------------------------ --- Resource --- - --- | Evolving resource; We use it to reinitialise the dns library if the --- `/etc/resolv.conf` file was modified. --- -data Resource err a = Resource { - withResource :: IO (Either err a, Resource err a) - } - --- | Like 'withResource' but retries untill success. --- -withResource' :: Tracer IO err - -> NonEmpty DiffTime - -- ^ delays between each re-try - -> Resource err a - -> IO (a, Resource err a) -withResource' tracer delays0 = go delays0 - where - dropHead :: NonEmpty a -> NonEmpty a - dropHead as@(_ :| []) = as - dropHead (_ :| a : as) = a :| as - - go !delays resource = do - er <- withResource resource - case er of - (Left err, resource') -> do - traceWith tracer err - threadDelay (NonEmpty.head delays) - withResource' tracer (dropHead delays) resource' - (Right r, resource') -> - pure (r, resource') - - -constantResource :: a -> Resource err a -constantResource a = Resource (pure (Right a, constantResource a)) - -data DNSorIOError - = DNSError !DNSError - | IOError !IOException - deriving Show - -instance Exception DNSorIOError where - - --- | Strict version of 'Maybe' adjusted to the needs ot --- 'asyncResolverResource'. --- -data TimedResolver - = TimedResolver !DNS.Resolver !UTCTime - | NoResolver - --- | --- --- TODO: it could be useful for `publicRootPeersProvider`. --- -resolverResource :: DNS.ResolvConf -> IO (Resource DNSorIOError DNS.Resolver) -resolverResource resolvConf = do - rs <- DNS.makeResolvSeed resolvConf - case DNS.resolvInfo resolvConf of - DNS.RCFilePath filePath -> - pure $ go filePath NoResolver - - _ -> DNS.withResolver rs (pure . constantResource) - - where - handlers :: FilePath - -> TimedResolver - -> [Handler IO - ( Either DNSorIOError DNS.Resolver - , Resource DNSorIOError DNS.Resolver)] - handlers filePath tr = - [ Handler $ - \(err :: IOException) -> - pure (Left (IOError err), go filePath tr) - , Handler $ - \(err :: DNS.DNSError) -> - pure (Left (DNSError err), go filePath tr) - ] - - go :: FilePath - -> TimedResolver - -> Resource DNSorIOError DNS.Resolver - go filePath tr@NoResolver = Resource $ - do - modTime <- getModificationTime filePath - rs <- DNS.makeResolvSeed resolvConf - DNS.withResolver rs - (\resolver -> - pure (Right resolver, go filePath (TimedResolver resolver modTime))) - `catches` handlers filePath tr - - go filePath tr@(TimedResolver resolver modTime) = Resource $ - do - modTime' <- getModificationTime filePath - if modTime' <= modTime - then pure (Right resolver, go filePath (TimedResolver resolver modTime)) - else do - rs <- DNS.makeResolvSeed resolvConf - DNS.withResolver rs - (\resolver' -> - pure (Right resolver', go filePath (TimedResolver resolver' modTime'))) - `catches` handlers filePath tr - - --- | `Resource` which passes the 'DNS.Resolver' through a 'StrictTVar'. Better --- than 'resolverResource' when using in multiple threads. --- -asyncResolverResource :: DNS.ResolvConf -> IO (Resource DNSorIOError DNS.Resolver) -asyncResolverResource resolvConf = - case DNS.resolvInfo resolvConf of - DNS.RCFilePath filePath -> do - resourceVar <- newTVarIO NoResolver - pure $ go filePath resourceVar - _ -> do - rs <- DNS.makeResolvSeed resolvConf - DNS.withResolver rs (pure . constantResource) - where - handlers :: FilePath -> StrictTVar IO TimedResolver - -> [Handler IO - ( Either DNSorIOError DNS.Resolver - , Resource DNSorIOError DNS.Resolver)] - handlers filePath resourceVar = - [ Handler $ - \(err :: IOException) -> - pure (Left (IOError err), go filePath resourceVar) - , Handler $ - \(err :: DNS.DNSError) -> - pure (Left (DNSError err), go filePath resourceVar) - ] - - go :: FilePath -> StrictTVar IO TimedResolver - -> Resource DNSorIOError DNS.Resolver - go filePath resourceVar = Resource $ do - r <- atomically (readTVar resourceVar) - case r of - NoResolver -> - do - modTime <- getModificationTime filePath - rs <- DNS.makeResolvSeed resolvConf - DNS.withResolver rs $ \resolver -> do - atomically (writeTVar resourceVar (TimedResolver resolver modTime)) - pure (Right resolver, go filePath resourceVar) - `catches` handlers filePath resourceVar - - TimedResolver resolver modTime -> - do - modTime' <- getModificationTime filePath - if modTime' <= modTime - then pure (Right resolver, go filePath resourceVar) - else do - rs <- DNS.makeResolvSeed resolvConf - DNS.withResolver rs $ \resolver' -> do - atomically (writeTVar resourceVar (TimedResolver resolver' modTime')) - pure (Right resolver', go filePath resourceVar) - `catches` handlers filePath resourceVar - - -#if defined(mingw32_HOST_OS) --- | Returns a newly intiatialised 'DNS.Resolver' at each step; This is only --- for Windows, where we don't have a way to check that the network --- configuration has changed. On /Windows/ the 'dns' library is using --- @GetNetworkParams@ win32 api call to get the list of default dns servers. --- -newResolverResource :: DNS.ResolvConf -> Resource DNSorIOError DNS.Resolver -newResolverResource resolvConf = go - where - go = Resource $ - do - rs <- DNS.makeResolvSeed resolvConf - DNS.withResolver rs $ \resolver -> pure (Right resolver, go) - `catches` handlers - - handlers :: [Handler IO - ( Either DNSorIOError DNS.Resolver - , Resource DNSorIOError DNS.Resolver)] - handlers = - [ Handler $ - \(err :: IOException) -> - pure (Left (IOError err), go) - , Handler $ - \(err :: DNS.DNSError) -> - pure (Left (DNSError err), go) - ] -#endif - +import Ouroboros.Network.PeerSelection.RelayAccessPoint +import Ouroboros.Network.PeerSelection.RootPeersDNS.DNSActions + ( DNSorIOError (..) + , DNSActions (..) + , Resource (..) + , ioDNSActions + , constantResource + , withResource' + ) ----------------------------------------------- -- local root peer set provider based on DNS -- -data TraceLocalRootPeers = - TraceLocalRootDomains [(DomainAddress, PeerAdvertise)] - | TraceLocalRootWaiting DomainAddress DiffTime - | TraceLocalRootResult DomainAddress [(IPv4, DNS.TTL)] - | TraceLocalRootFailure DomainAddress DNSorIOError +data TraceLocalRootPeers exception = + TraceLocalRootDomains [(Int, Map RelayAccessPoint PeerAdvertise)] + -- ^ 'Int' is the configured valency for the local producer groups + | TraceLocalRootWaiting DomainAccessPoint DiffTime + | TraceLocalRootResult DomainAccessPoint [(IPv4, DNS.TTL)] + | TraceLocalRootGroups (Seq (Int, Map Socket.SockAddr PeerAdvertise)) + -- ^ This traces the results of the local root peer provider + | TraceLocalRootFailure DomainAccessPoint (DNSorIOError exception) --TODO: classify DNS errors, config error vs transitory + | TraceLocalRootError DomainAccessPoint SomeException deriving Show - --- | +-- | Resolve 'RelayAddress'-es of local root peers using dns if needed. Local +-- roots are provided wrapped in a 'StrictTVar', which value might change +-- (re-read form a config file). The resolved dns names are available through +-- the output 'StrictTVar'. -- --- This action typically runs indefinitely, but can terminate successfully in --- corner cases where there is nothing to do. --- -localRootPeersProvider :: Tracer IO TraceLocalRootPeers - -> TimeoutFn IO - -> DNS.ResolvConf - -> StrictTVar IO (Map DomainAddress (Map Socket.SockAddr PeerAdvertise)) - -> [(DomainAddress, PeerAdvertise)] - -> IO () -localRootPeersProvider tracer timeout resolvConf rootPeersVar domains = do - traceWith tracer (TraceLocalRootDomains domains) - unless (null domains) $ do -#if !defined(mingw32_HOST_OS) - rr <- asyncResolverResource resolvConf -#else - let rr = newResolverResource resolvConf -#endif - withAsyncAll (map (monitorDomain rr) domains) $ \asyncs -> - waitAny asyncs >> return () +localRootPeersProvider + :: forall m resolver exception. + ( MonadAsync m + , MonadDelay m + , Eq (Async m Void) + ) + => Tracer m (TraceLocalRootPeers exception) + -> DNS.ResolvConf + -> DNSActions resolver exception m + -> STM m [(Int, Map RelayAccessPoint PeerAdvertise)] + -- ^ input + -> StrictTVar m (Seq (Int, Map Socket.SockAddr PeerAdvertise)) + -- ^ output 'TVar' + -> m Void +localRootPeersProvider tracer + resolvConf + DNSActions { + dnsAsyncResolverResource, + dnsLookupAWithTTL + } + readDomainsGroups + rootPeersGroupsVar = do + atomically readDomainsGroups >>= loop where - monitorDomain :: Resource DNSorIOError DNS.Resolver -> (DomainAddress, PeerAdvertise) -> IO () - monitorDomain rr0 (domain@DomainAddress {daDomain, daPortNumber}, advertisePeer) = - go rr0 0 + loop domainsGroups = do + traceWith tracer (TraceLocalRootDomains domainsGroups) + rr <- dnsAsyncResolverResource resolvConf + let + -- Flatten the local root peers groups and associate its index to + -- each DomainAddress to be monitorized. + -- NOTE: We need to pair the index because the resulting list can be + -- sparse. + domains :: [(Int, DomainAccessPoint, PeerAdvertise)] + domains = [ (index, domain, pa) + | (index, (_, m)) <- zip [0..] domainsGroups + , (RelayDomainAccessPoint domain, pa) <- Map.toList m ] + -- Since we want to preserve the number of groups, the targets, and + -- the addresses within each group, we fill the TVar with + -- a placeholder list, in order for each monitored DomainAddress to + -- be updated in the correct group. + rootPeersGroups :: Seq (Int, Map Socket.SockAddr PeerAdvertise) + rootPeersGroups = Seq.fromList $ map (\(target, m) -> (target, f m)) domainsGroups + where + f :: Map RelayAccessPoint PeerAdvertise + -> Map Socket.SockAddr PeerAdvertise + f = Map.mapKeys + (\k -> case k of + RelayAccessAddress ip port -> + IP.toSockAddr (ip, port) + _ -> + error "localRootPeersProvider: impossible happend" + ) + . Map.filterWithKey + (\k _ -> case k of + RelayAccessAddress {} -> True + RelayAccessDomain {} -> False + ) + + -- Launch DomainAddress monitoring threads and wait for threads to error + -- or for local configuration changes. + domainsGroups' <- + withAsyncAll (monitorDomain rr rootPeersGroups `map` domains) $ \as -> do + res <- atomically $ + -- wait until any of the monitoring threads errors + ((\(a, res) -> + let domain :: DomainAccessPoint + domain = case a `elemIndex` as of + Nothing -> error "localRootPeersProvider: impossible happened" + Just idx -> case (domains !! idx) of (_, x, _) -> x + in either (Left . (domain,)) absurd res) + -- the monitoring thread cannot return, it can only error + <$> waitAnyCatchSTM as) + <|> + -- wait for configuraiton changes + (do a <- readDomainsGroups + -- wait until the input domains groups changes + check (a /= domainsGroups) + return (Right a)) + case res of + Left (domain, err) -> traceWith tracer (TraceLocalRootError domain err) + -- current domain groups haven't changed, we + -- can return them + >> return domainsGroups + Right domainsGroups' -> return domainsGroups' + -- we continue the loop outside of 'withAsyncAll', this makes sure that + -- all the monitoring threads are killed. + loop domainsGroups' + + + resolveDomain + :: resolver + -> DomainAccessPoint + -> PeerAdvertise + -> m (Either DNS.DNSError [((Socket.SockAddr, PeerAdvertise), DNS.TTL)]) + resolveDomain resolver + domain@DomainAccessPoint {dapDomain, dapPortNumber} + advertisePeer = do + reply <- dnsLookupAWithTTL resolvConf resolver dapDomain + case reply of + Left err -> do + traceWith tracer (TraceLocalRootFailure domain (DNSError err)) + return $ Left err + + Right results -> do + traceWith tracer (TraceLocalRootResult domain results) + return $ Right [ (( Socket.SockAddrInet + dapPortNumber + (IP.toHostAddress addr) + , advertisePeer) + , _ttl) + | (addr, _ttl) <- results ] + + monitorDomain + :: Resource m (DNSorIOError exception) resolver + -> Seq (Int, Map Socket.SockAddr PeerAdvertise) + -- ^ local group peers which didnhh + -> (Int, DomainAccessPoint, PeerAdvertise) + -> m Void + monitorDomain rr0 rootPeersGroups0 (index, domain, advertisePeer) = + go rr0 rootPeersGroups0 0 where - go :: Resource DNSorIOError DNS.Resolver -> DiffTime -> IO () - go !rr !ttl = do + go :: Resource m (DNSorIOError exception) resolver + -> Seq (Int, Map Socket.SockAddr PeerAdvertise) + -> DiffTime + -> m Void + go !rr !rootPeersGroups !ttl = do when (ttl > 0) $ do traceWith tracer (TraceLocalRootWaiting domain ttl) threadDelay ttl @@ -312,64 +230,74 @@ localRootPeersProvider tracer timeout resolvConf rootPeersVar domains = do withResource' (TraceLocalRootFailure domain `contramap` tracer) (1 :| [3, 6, 9, 12]) rr - reply <- lookupAWithTTL timeout resolvConf resolver daDomain - case reply of - Left err -> do - traceWith tracer (TraceLocalRootFailure domain (DNSError err)) - go rrNext (ttlForDnsError err ttl) + reply <- resolveDomain resolver domain advertisePeer + case reply of + Left err -> go rrNext rootPeersGroups (ttlForDnsError err ttl) Right results -> do - traceWith tracer (TraceLocalRootResult domain results) - atomically $ do - rootPeers <- readTVar rootPeersVar - let resultsMap :: Map Socket.SockAddr PeerAdvertise - resultsMap = Map.fromList [ ( Socket.SockAddrInet - daPortNumber - (IP.toHostAddress addr) - , advertisePeer) - | (addr, _ttl) <- results ] - rootPeers' :: Map DomainAddress (Map Socket.SockAddr PeerAdvertise) - rootPeers' = Map.insert domain resultsMap rootPeers + rootPeersGroups' <- atomically $ do + let (target, entry) = rootPeersGroups `Seq.index` index + resultsMap = Map.fromList (map fst results) + entry' = resultsMap <> entry + rootPeersGroups' = + Seq.update index + (target, entry') + rootPeersGroups -- Only overwrite if it changed: - when (Map.lookup domain rootPeers /= Just resultsMap) $ - writeTVar rootPeersVar rootPeers' + when (entry /= entry') $ + writeTVar rootPeersGroupsVar rootPeersGroups' - go rrNext (ttlForResults (map snd results)) + return rootPeersGroups' + traceWith tracer (TraceLocalRootGroups rootPeersGroups') + go rrNext rootPeersGroups' (ttlForResults (map snd results)) --------------------------------------------- -- Public root peer set provider using DNS -- data TracePublicRootPeers = - TracePublicRootDomains [DomainAddress] + TracePublicRootRelayAccessPoint [RelayAccessPoint] + | TracePublicRootDomains [DomainAccessPoint] | TracePublicRootResult DNS.Domain [(IPv4, DNS.TTL)] | TracePublicRootFailure DNS.Domain DNS.DNSError --TODO: classify DNS errors, config error vs transitory deriving Show -- | +-- TODO track PeerAdvertise -- -publicRootPeersProvider :: Tracer IO TracePublicRootPeers - -> TimeoutFn IO - -> DNS.ResolvConf - -> [DomainAddress] - -> ((Int -> IO (Set Socket.SockAddr, DiffTime)) -> IO a) - -> IO a -publicRootPeersProvider tracer timeout resolvConf domains action = do - traceWith tracer (TracePublicRootDomains domains) -#if !defined(mingw32_HOST_OS) - rr <- resolverResource resolvConf -#else - let rr = newResolverResource resolvConf -#endif +publicRootPeersProvider + :: forall resolver exception a m. + (MonadThrow m, MonadAsync m, Exception exception) + => Tracer m TracePublicRootPeers + -> DNS.ResolvConf + -> STM m [RelayAccessPoint] + -> DNSActions resolver exception m + -> ((Int -> m (Set Socket.SockAddr, DiffTime)) -> m a) + -> m a +publicRootPeersProvider tracer + resolvConf + readDomains + DNSActions { + dnsResolverResource, + dnsLookupAWithTTL + } + action = do + domains <- atomically readDomains + traceWith tracer (TracePublicRootRelayAccessPoint domains) + rr <- dnsResolverResource resolvConf resourceVar <- newTVarIO rr action (requestPublicRootPeers resourceVar) where - requestPublicRootPeers :: StrictTVar IO (Resource DNSorIOError DNS.Resolver) - -> Int -> IO (Set Socket.SockAddr, DiffTime) + requestPublicRootPeers + :: StrictTVar m (Resource m (DNSorIOError exception) resolver) + -> Int + -> m (Set Socket.SockAddr, DiffTime) requestPublicRootPeers resourceVar _numRequested = do + domains <- atomically readDomains + traceWith tracer (TracePublicRootRelayAccessPoint domains) rr <- atomically $ readTVar resourceVar (er, rr') <- withResource rr atomically $ writeTVar resourceVar rr' @@ -378,58 +306,117 @@ publicRootPeersProvider tracer timeout resolvConf domains action = do Left (IOError err) -> throwIO err Right resolver -> do let lookups = - [ lookupAWithTTL timeout resolvConf resolver daDomain - | DomainAddress {daDomain} <- domains ] + [ (,) (DomainAccessPoint domain port) + <$> dnsLookupAWithTTL + resolvConf + resolver + domain + | RelayAccessDomain domain port <- domains ] -- The timeouts here are handled by the 'lookupAWithTTL'. They're -- configured via the DNS.ResolvConf resolvTimeout field and defaults -- to 3 sec. results <- withAsyncAll lookups (atomically . mapM waitSTM) sequence_ [ traceWith tracer $ case result of - Left dnserr -> TracePublicRootFailure daDomain dnserr - Right ipttls -> TracePublicRootResult daDomain ipttls - | (DomainAddress {daDomain}, result) <- zip domains results ] - let successes = [ (Socket.SockAddrInet daPortNumber (IP.toHostAddress ip), ipttl) - | (Right ipttls, DomainAddress {daPortNumber}) <- (zip results domains) + Left dnserr -> TracePublicRootFailure dapDomain dnserr + Right ipttls -> TracePublicRootResult dapDomain ipttls + | (DomainAccessPoint {dapDomain}, result) <- results ] + let successes = [ ( Socket.SockAddrInet dapPortNumber + (IP.toHostAddress ip) + , ipttl) + | ( DomainAccessPoint {dapPortNumber} + , Right ipttls) <- results , (ip, ipttl) <- ipttls ] - !ips = Set.fromList (map fst successes) + !domainsIps = [ IP.toSockAddr (ip, port) + | RelayAccessAddress ip port <- domains ] + !ips = Set.fromList (map fst successes ++ domainsIps) !ttl = ttlForResults (map snd successes) -- If all the lookups failed we'll return an empty set with a minimum -- TTL, and the governor will invoke its exponential backoff. return (ips, ttl) +-- | Provides DNS resolution functionality. +-- +resolveDomainAccessPoint + :: forall exception resolver m. + (MonadThrow m, MonadAsync m, Exception exception) + => Tracer m TracePublicRootPeers + -> DNS.ResolvConf + -> DNSActions resolver exception m + -> [DomainAccessPoint] + -> m (Map DomainAccessPoint (Set Socket.SockAddr)) +resolveDomainAccessPoint tracer + resolvConf + DNSActions { + dnsResolverResource, + dnsLookupAWithTTL + } + domains + = do + traceWith tracer (TracePublicRootDomains domains) + rr <- dnsResolverResource resolvConf + resourceVar <- newTVarIO rr + requestPublicRootPeers resourceVar + where + requestPublicRootPeers + :: StrictTVar m (Resource m (DNSorIOError exception) resolver) + -> m (Map DomainAccessPoint (Set Socket.SockAddr)) + requestPublicRootPeers resourceVar = do + rr <- atomically $ readTVar resourceVar + (er, rr') <- withResource rr + atomically $ writeTVar resourceVar rr' + case er of + Left (DNSError err) -> throwIO err + Left (IOError err) -> throwIO err + Right resolver -> do + let lookups = + [ (,) domain + <$> dnsLookupAWithTTL + resolvConf + resolver + (dapDomain domain) + | domain <- domains ] + -- The timeouts here are handled by the 'lookupAWithTTL'. They're + -- configured via the DNS.ResolvConf resolvTimeout field and defaults + -- to 3 sec. + results <- withAsyncAll lookups (atomically . mapM waitSTM) + sequence_ + [ traceWith tracer $ case result of + Left dnserr -> TracePublicRootFailure dapDomain dnserr + Right ipttls -> TracePublicRootResult dapDomain ipttls + | (DomainAccessPoint {dapDomain}, result) <- results ] + return $ foldl' buildResult Map.empty results + + buildResult :: Map DomainAccessPoint (Set Socket.SockAddr) + -> (DomainAccessPoint, Either DNS.DNSError [(IPv4, DNS.TTL)]) + -> Map DomainAccessPoint (Set Socket.SockAddr) + buildResult mr (_, Left _) = mr + buildResult mr (domain, Right ipsttls) = + Map.alter addFn domain mr + where + addFn :: Maybe (Set Socket.SockAddr) -> Maybe (Set Socket.SockAddr) + addFn Nothing = + let ips = map fst ipsttls + !addrs = + map ( Socket.SockAddrInet (dapPortNumber domain) + . IP.toHostAddress) + ips + !addrSet = Set.fromList addrs in + Just addrSet + addFn (Just addrSet) = + let ips = map fst ipsttls + !addrs = + map ( Socket.SockAddrInet (dapPortNumber domain) + . IP.toHostAddress) + ips + !addrSet' = Set.union addrSet (Set.fromList addrs) in + Just addrSet' --------------------------------------------- -- Shared utils -- --- | Like 'DNS.lookupA' but also return the TTL for the results. --- --- DNS library timeouts do not work reliably on Windows (#1873), hende the --- additional timeout. --- -lookupAWithTTL :: TimeoutFn IO - -> DNS.ResolvConf - -> DNS.Resolver - -> DNS.Domain - -> IO (Either DNS.DNSError [(IPv4, DNS.TTL)]) -lookupAWithTTL timeout resolvConf resolver domain = do - reply <- timeout (microsecondsAsIntToDiffTime $ DNS.resolvTimeout resolvConf) $ DNS.lookupRaw resolver domain DNS.A - case reply of - Nothing -> return (Left DNS.TimeoutExpired) - Just (Left err) -> return (Left err) - Just (Right ans) -> return (DNS.fromDNSMessage ans selectA) - --TODO: we can get the SOA TTL on NXDOMAIN here if we want to - where - selectA DNS.DNSMessage { DNS.answer } = - [ (addr, ttl) - | DNS.ResourceRecord { - DNS.rdata = DNS.RD_A addr, - DNS.rrttl = ttl - } <- answer - ] - -- | Policy for TTL for positive results ttlForResults :: [DNS.TTL] -> DiffTime @@ -454,31 +441,28 @@ clipTTLAbove, clipTTLBelow :: DiffTime -> DiffTime clipTTLBelow = max 60 -- between 1min clipTTLAbove = min 86400 -- and 24hrs -withAsyncAll :: [IO a] -> ([Async IO a] -> IO b) -> IO b +withAsyncAll :: MonadAsync m => [m a] -> ([Async m a] -> m b) -> m b withAsyncAll xs0 action = go [] xs0 where - go as [] = action as + go as [] = action (reverse as) go as (x:xs) = withAsync x (\a -> go (a:as) xs) - --------------------------------------------- -- Examples -- {- -exampleLocal :: [DomainAddress] -> IO () +exampleLocal :: [DomainAccessPoint] -> IO () exampleLocal domains = do rootPeersVar <- newTVarIO Map.empty withAsync (observer rootPeersVar Map.empty) $ \_ -> provider rootPeersVar where provider rootPeersVar = - withTimeoutSerial $ \timeout -> - localRootPeersProvider - (showTracing stdoutTracer) - timeout - DNS.defaultResolvConf - rootPeersVar - (map (\d -> (d, DoAdvertisePeer)) domains) + localRootPeersProvider + (showTracing stdoutTracer) + DNS.defaultResolvConf + rootPeersVar + (map (\d -> (d, DoAdvertisePeer)) domains) observer :: (Eq a, Show a) => StrictTVar IO a -> a -> IO () observer var fingerprint = do @@ -489,16 +473,14 @@ exampleLocal domains = do traceWith (showTracing stdoutTracer) x observer var x -examplePublic :: [DomainAddress] -> IO () +examplePublic :: [DomainAccessPoint] -> IO () examplePublic domains = do - withTimeoutSerial $ \timeout -> - publicRootPeersProvider - (showTracing stdoutTracer) - timeout - DNS.defaultResolvConf - domains $ \requestPublicRootPeers -> - forever $ do - (ips, ttl) <- requestPublicRootPeers 42 - traceWith (showTracing stdoutTracer) (ips, ttl) - threadDelay ttl + publicRootPeersProvider + (showTracing stdoutTracer) + DNS.defaultResolvConf + domains $ \requestPublicRootPeers -> + forever $ do + (ips, ttl) <- requestPublicRootPeers 42 + traceWith (showTracing stdoutTracer) (ips, ttl) + threadDelay ttl -} diff --git a/ouroboros-network/src/Ouroboros/Network/PeerSelection/RootPeersDNS/DNSActions.hs b/ouroboros-network/src/Ouroboros/Network/PeerSelection/RootPeersDNS/DNSActions.hs new file mode 100644 index 00000000000..f7bd1fbbfd1 --- /dev/null +++ b/ouroboros-network/src/Ouroboros/Network/PeerSelection/RootPeersDNS/DNSActions.hs @@ -0,0 +1,303 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE NamedFieldPuns #-} + +module Ouroboros.Network.PeerSelection.RootPeersDNS.DNSActions + ( + -- * DNS based actions for local and public root providers + DNSActions (..), + + -- * DNSActions IO + ioDNSActions, + + -- * Utils + -- ** Resource + Resource (..), + withResource', + constantResource, + + -- ** Error type + DNSorIOError (..) + ) + where + +import Data.List.NonEmpty (NonEmpty (..)) +import qualified Data.List.NonEmpty as NonEmpty + +import Control.Exception (IOException) +#if !defined(mingw32_HOST_OS) +import Control.Monad.Class.MonadSTM.Strict +#endif +import Control.Monad.Class.MonadTime +import Control.Monad.Class.MonadTimer +import Control.Monad.Class.MonadThrow +import Control.Tracer (Tracer(..), traceWith) + +import System.Directory (getModificationTime) + +import Data.IP (IPv4) +import Network.DNS (DNSError) +import qualified Network.DNS as DNS + + +data DNSorIOError exception + = DNSError !DNSError + | IOError !exception + deriving Show + +instance Exception exception => Exception (DNSorIOError exception) where + +----------------------------------------------- +-- Resource +-- + +-- | Evolving resource; We use it to reinitialise the dns library if the +-- `/etc/resolv.conf` file was modified. +-- +newtype Resource m err a = Resource { + withResource :: m (Either err a, Resource m err a) + } + +-- | Like 'withResource' but retries untill success. +-- +withResource' :: MonadDelay m + => Tracer m err + -> NonEmpty DiffTime + -- ^ delays between each re-try + -> Resource m err a + -> m (a, Resource m err a) +withResource' tracer = go + where + dropHead :: NonEmpty a -> NonEmpty a + dropHead as@(_ :| []) = as + dropHead (_ :| a : as) = a :| as + + go !delays resource = do + er <- withResource resource + case er of + (Left err, resource') -> do + traceWith tracer err + threadDelay (NonEmpty.head delays) + withResource' tracer (dropHead delays) resource' + (Right r, resource') -> + pure (r, resource') + +constantResource :: Applicative m => a -> Resource m err a +constantResource a = Resource (pure (Right a, constantResource a)) + +-- | Strict version of 'Maybe' adjusted to the needs ot +-- 'asyncResolverResource'. +-- +data TimedResolver + = TimedResolver !DNS.Resolver !UTCTime + | NoResolver + +-- | Dictionary of DNS actions vocabulary +-- +data DNSActions resolver exception m = DNSActions { + + -- | + -- + -- TODO: it could be useful for `publicRootPeersProvider`. + -- + dnsResolverResource :: DNS.ResolvConf + -> m (Resource m (DNSorIOError exception) resolver), + + -- | `Resource` which passes the 'DNS.Resolver' (or abstract resolver type) + -- through a 'StrictTVar'. Better than 'resolverResource' when using in + -- multiple threads. + -- + -- On /Windows/ we use a different implementation which always returns + -- a newly initialised 'DNS.Resolver' at each step. This is because on + -- /Windows/ we don't have a way to check that the network configuration has + -- changed. The 'dns' library is using 'GetNetworkParams@ win32 api call + -- to get the list of default dns servers. + dnsAsyncResolverResource :: DNS.ResolvConf + -> m (Resource m (DNSorIOError exception) resolver), + + -- | Like 'DNS.lookupA' but also return the TTL for the results. + -- + -- DNS library timeouts do not work reliably on Windows (#1873), hence the + -- additional timeout. + -- + dnsLookupAWithTTL :: DNS.ResolvConf + -> resolver + -> DNS.Domain + -> m (Either DNS.DNSError [(IPv4, DNS.TTL)]) + } + + + + +-- | +-- +-- TODO: it could be useful for `publicRootPeersProvider`. +-- +resolverResource :: DNS.ResolvConf + -> IO (Resource IO (DNSorIOError IOException) DNS.Resolver) +resolverResource resolvConf = do + rs <- DNS.makeResolvSeed resolvConf + case DNS.resolvInfo resolvConf of + DNS.RCFilePath filePath -> + pure $ go filePath NoResolver + + _ -> DNS.withResolver rs (pure . constantResource) + + where + handlers :: FilePath + -> TimedResolver + -> [Handler IO + ( Either (DNSorIOError IOException) DNS.Resolver + , Resource IO (DNSorIOError IOException) DNS.Resolver)] + handlers filePath tr = + [ Handler $ + \(err :: IOException) -> + pure (Left (IOError err), go filePath tr) + , Handler $ + \(err :: DNS.DNSError) -> + pure (Left (DNSError err), go filePath tr) + ] + + go :: FilePath + -> TimedResolver + -> Resource IO (DNSorIOError IOException) DNS.Resolver + go filePath tr@NoResolver = Resource $ + do + modTime <- getModificationTime filePath + rs <- DNS.makeResolvSeed resolvConf + DNS.withResolver rs + (\resolver -> + pure (Right resolver, go filePath (TimedResolver resolver modTime))) + `catches` handlers filePath tr + + go filePath tr@(TimedResolver resolver modTime) = Resource $ + do + modTime' <- getModificationTime filePath + if modTime' <= modTime + then pure (Right resolver, go filePath (TimedResolver resolver modTime)) + else do + rs <- DNS.makeResolvSeed resolvConf + DNS.withResolver rs + (\resolver' -> + pure (Right resolver', go filePath (TimedResolver resolver' modTime'))) + `catches` handlers filePath tr + + +-- | `Resource` which passes the 'DNS.Resolver' through a 'StrictTVar'. Better +-- than 'resolverResource' when using in multiple threads. +-- +-- On /Windows/ returns newly intiatialised 'DNS.Resolver' at each step; This +-- is because on /Windows/ we don't have a way to check that the network +-- configuration has changed. The 'dns' library is using 'GetNetworkParams@ +-- win32 api call to get the list of default dns servers. +asyncResolverResource :: DNS.ResolvConf + -> IO (Resource IO (DNSorIOError IOException) + DNS.Resolver) +#if !defined(mingw32_HOST_OS) +asyncResolverResource resolvConf = + case DNS.resolvInfo resolvConf of + DNS.RCFilePath filePath -> do + resourceVar <- newTVarIO NoResolver + pure $ go filePath resourceVar + _ -> do + rs <- DNS.makeResolvSeed resolvConf + DNS.withResolver rs (pure . constantResource) + where + handlers :: FilePath -> StrictTVar IO TimedResolver + -> [Handler IO + ( Either (DNSorIOError IOException) DNS.Resolver + , Resource IO (DNSorIOError IOException) DNS.Resolver)] + handlers filePath resourceVar = + [ Handler $ + \(err :: IOException) -> + pure (Left (IOError err), go filePath resourceVar) + , Handler $ + \(err :: DNS.DNSError) -> + pure (Left (DNSError err), go filePath resourceVar) + ] + + go :: FilePath -> StrictTVar IO TimedResolver + -> Resource IO (DNSorIOError IOException) DNS.Resolver + go filePath resourceVar = Resource $ do + r <- atomically (readTVar resourceVar) + case r of + NoResolver -> + do + modTime <- getModificationTime filePath + rs <- DNS.makeResolvSeed resolvConf + DNS.withResolver rs $ \resolver -> do + atomically (writeTVar resourceVar (TimedResolver resolver modTime)) + pure (Right resolver, go filePath resourceVar) + `catches` handlers filePath resourceVar + + TimedResolver resolver modTime -> + do + modTime' <- getModificationTime filePath + if modTime' <= modTime + then pure (Right resolver, go filePath resourceVar) + else do + rs <- DNS.makeResolvSeed resolvConf + DNS.withResolver rs $ \resolver' -> do + atomically (writeTVar resourceVar (TimedResolver resolver' modTime')) + pure (Right resolver', go filePath resourceVar) + `catches` handlers filePath resourceVar +#else +asyncResolverResource resolvConf = return go + where + go = Resource $ + do + rs <- DNS.makeResolvSeed resolvConf + DNS.withResolver rs $ \resolver -> pure (Right resolver, go) + `catches` handlers + + handlers :: [Handler IO + ( Either (DNSorIOError IOException) DNS.Resolver + , Resource IO (DNSorIOError IOException) DNS.Resolver)] + handlers = + [ Handler $ + \(err :: IOException) -> + pure (Left (IOError err), go) + , Handler $ + \(err :: DNS.DNSError) -> + pure (Left (DNSError err), go) + ] +#endif + +-- | Like 'DNS.lookupA' but also return the TTL for the results. +-- +-- DNS library timeouts do not work reliably on Windows (#1873), hence the +-- additional timeout. +-- +lookupAWithTTL :: DNS.ResolvConf + -> DNS.Resolver + -> DNS.Domain + -> IO (Either DNS.DNSError [(IPv4, DNS.TTL)]) +lookupAWithTTL resolvConf resolver domain = do + reply <- timeout (microsecondsAsIntToDiffTime + $ DNS.resolvTimeout resolvConf) + (DNS.lookupRaw resolver domain DNS.A) + case reply of + Nothing -> return (Left DNS.TimeoutExpired) + Just (Left err) -> return (Left err) + Just (Right ans) -> return (DNS.fromDNSMessage ans selectA) + --TODO: we can get the SOA TTL on NXDOMAIN here if we want to + where + selectA DNS.DNSMessage { DNS.answer } = + [ (addr, ttl) + | DNS.ResourceRecord { + DNS.rdata = DNS.RD_A addr, + DNS.rrttl = ttl + } <- answer + ] + +-- | Bundle of DNS Actions that runs in IO +-- +ioDNSActions :: DNSActions DNS.Resolver IOException IO +ioDNSActions = DNSActions { + dnsResolverResource = resolverResource, + dnsAsyncResolverResource = asyncResolverResource, + dnsLookupAWithTTL = lookupAWithTTL + } + diff --git a/ouroboros-network/src/Ouroboros/Network/PeerSelection/Simple.hs b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Simple.hs new file mode 100644 index 00000000000..2dd3d8586ed --- /dev/null +++ b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Simple.hs @@ -0,0 +1,97 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE RankNTypes #-} + +module Ouroboros.Network.PeerSelection.Simple + ( withPeerSelectionActions + -- * Re-exports + , PeerSelectionTargets (..) + , PeerAdvertise (..) + ) where + + +import Data.Foldable (toList) +import Control.Monad.Class.MonadAsync +import Control.Monad.Class.MonadTime +import Control.Monad.Class.MonadSTM.Strict +import Control.Tracer (Tracer) +import Control.Exception (IOException) + +import Data.Map (Map) +import Data.Set (Set) +import Data.Void (Void) + +import qualified Network.DNS as DNS +import qualified Network.Socket as Socket + +import Ouroboros.Network.PeerSelection.Types (PeerAdvertise (..)) +import Ouroboros.Network.PeerSelection.Governor.Types +import Ouroboros.Network.PeerSelection.LedgerPeers +import Ouroboros.Network.PeerSelection.RootPeersDNS + + +withPeerSelectionActions + :: Tracer IO (TraceLocalRootPeers IOException) + -> Tracer IO TracePublicRootPeers + -> STM IO PeerSelectionTargets + -> STM IO [(Int, Map RelayAccessPoint PeerAdvertise)] + -- ^ local root peers + -> STM IO [RelayAccessPoint] + -- ^ public root peers + -> PeerStateActions Socket.SockAddr peerconn IO + -> (NumberOfPeers -> IO (Maybe (Set Socket.SockAddr, DiffTime))) + -> (Maybe (Async IO Void) + -> PeerSelectionActions Socket.SockAddr peerconn IO + -> IO a) + -- ^ continuation, recieves a handle to the local roots peer provider thread + -- (only if local root peers where non-empty). + -> IO a +withPeerSelectionActions + localRootTracer + publicRootTracer + readTargets + readLocalRootPeers + readPublicRootPeers + peerStateActions + getLedgerPeers + k = do + localRootsVar <- newTVarIO mempty + let peerSelectionActions = PeerSelectionActions { + readPeerSelectionTargets = readTargets, + readLocalRootPeers = toList <$> readTVar localRootsVar, + requestPublicRootPeers = requestPublicRootPeers ioDNSActions, + requestPeerGossip = \_ -> pure [], + peerStateActions + } + withAsync + (localRootPeersProvider + localRootTracer + DNS.defaultResolvConf + ioDNSActions + readLocalRootPeers + localRootsVar) + (\thread -> k (Just thread) peerSelectionActions) + where + -- We first try to get poublic root peers from the ledger, but if it fails + -- (for example because the node hasn't synced far enough) we fall back + -- to using the manually configured bootstrap root peers. + requestPublicRootPeers :: DNSActions DNS.Resolver IOException IO + -> Int -> IO (Set Socket.SockAddr, DiffTime) + requestPublicRootPeers dnsActions n = do + peers_m <- getLedgerPeers (NumberOfPeers $ fromIntegral n) + case peers_m of + Nothing -> requestConfiguredRootPeers dnsActions n + Just peers -> return peers + + -- For each call we re-initialise the dns library which forces reading + -- `/etc/resolv.conf`: + -- https://github.com/input-output-hk/cardano-node/issues/731 + requestConfiguredRootPeers :: DNSActions DNS.Resolver IOException IO + -> Int -> IO (Set Socket.SockAddr, DiffTime) + requestConfiguredRootPeers dnsActions n = + publicRootPeersProvider publicRootTracer + DNS.defaultResolvConf + readPublicRootPeers + dnsActions + ($ n) diff --git a/ouroboros-network/src/Ouroboros/Network/PeerSelection/Types.hs b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Types.hs index 2655d522119..6d112d5e5c6 100644 --- a/ouroboros-network/src/Ouroboros/Network/PeerSelection/Types.hs +++ b/ouroboros-network/src/Ouroboros/Network/PeerSelection/Types.hs @@ -1,4 +1,5 @@ {-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} module Ouroboros.Network.PeerSelection.Types ( PeerSource(..), @@ -7,6 +8,8 @@ module Ouroboros.Network.PeerSelection.Types ( ) where import GHC.Generics (Generic) +import Data.Aeson +import Data.Bool (bool) -- | Where did this peer come from? Policy functions can choose to treat @@ -28,6 +31,13 @@ data PeerAdvertise = DoAdvertisePeer | DoNotAdvertisePeer deriving (Eq, Show, Generic) +instance FromJSON PeerAdvertise where + parseJSON = withBool "PeerAdvertise" $ + return . bool DoNotAdvertisePeer DoAdvertisePeer + +instance ToJSON PeerAdvertise where + toJSON DoAdvertisePeer = Bool True + toJSON DoNotAdvertisePeer = Bool False data PeerStatus = PeerCold diff --git a/ouroboros-network/test/Data/Signal.hs b/ouroboros-network/test/Data/Signal.hs new file mode 100644 index 00000000000..4ed369848c4 --- /dev/null +++ b/ouroboros-network/test/Data/Signal.hs @@ -0,0 +1,439 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE ScopedTypeVariables #-} + +module Data.Signal ( + -- * Events + Events, + eventsFromList, + eventsFromListUpToTime, + eventsToList, + selectEvents, + + -- * Low level access + primitiveTransformEvents, + TS(..), + E(..), + + -- * Signals + Signal, + -- ** Construction and conversion + fromChangeEvents, + toChangeEvents, + fromEvents, + + -- * Simple signal transformations + truncateAt, + stable, + nub, + nubBy, + + -- * Temporal operations + linger, + timeout, + until, + difference, + scanl, + + -- * Set-based temporal operations + keyedTimeout, + keyedLinger, + keyedUntil, + + ) where + +import Prelude hiding (until, scanl) + +import Data.Maybe (maybeToList) +import Data.List (groupBy) +import qualified Data.Set as Set +import Data.Set (Set) +import qualified Data.OrdPSQ as PSQ +import Data.OrdPSQ (OrdPSQ) + +import Control.Monad.Class.MonadTime (Time(..), DiffTime, addTime) + + +-- +-- Time stamps and events +-- + +-- The instance Applicative Signal relies on merging event streams. +-- The IO simulator's treatment of time means that we can have many +-- events that occur at the same virtual time, though they are stil +-- causually ordered. +-- +-- We need these compound time stamps to be able to resolve the order +-- of the events that have the same Time when merging event streams. +-- The compound time stamp records the event number from the original +-- trace, for events derivied from the original trace. For artificially +-- constructed events, they can use small or big counters to be ordered +-- before or after other events at the same time. Negative counters are +-- permitted for this purpose. + +data TS = TS !Time !Int + deriving (Eq, Ord, Show) + +-- A single event or entry in a time series, annotated with its timestamp. +-- +data E a = E {-# UNPACK #-} !TS a + deriving (Show, Functor) + + +-- +-- Events +-- + +-- | A time-ordered trace of discrete events that occur at specific times. +-- +-- This corresponds for example to a trace of events or observations from a +-- simulation. +-- +newtype Events a = Events [E a] + deriving (Show, Functor) + +-- | Construct 'Events' from a time series. +-- +eventsFromList :: [(Time, a)] -> Events a +eventsFromList txs = + Events [ E (TS t i) x + | ((t, x), i) <- zip txs [100, 102..] ] + + +-- | Construct 'Events' from a time series. +-- +-- The time series is truncated at (but not including) the given time. This is +-- necessary to check properties over finite prefixes of infinite time series. +-- +eventsFromListUpToTime :: Time -> [(Time, a)] -> Events a +eventsFromListUpToTime horizon txs = + Events [ E (TS t i) x + | let txs' = takeWhile (\(t,_) -> t < horizon) txs + , ((t, x), i) <- zip txs' [100, 102..] ] + + +eventsToList :: Events a -> [(Time, a)] +eventsToList (Events txs) = [ (t, x) | E (TS t _i) x <- txs ] + +selectEvents :: (a -> Maybe b) -> Events a -> Events b +selectEvents select (Events txs) = + Events [ E t y | E t x <- txs, y <- maybeToList (select x) ] + +primitiveTransformEvents :: ([E a] -> [E b]) -> Events a -> Events b +primitiveTransformEvents f (Events txs) = Events (f txs) + + +-- +-- Signals +-- + +-- | A signal is a time-varying value. It has a value at all times. It changes +-- value at discrete times, i.e. it is not continuous. +-- +data Signal a = Signal a [E a] + deriving (Show, Functor) + +instance Applicative Signal where + pure x = Signal x [] + f <*> x = mergeSignals f x + +mergeSignals :: Signal (a -> b) -> Signal a -> Signal b +mergeSignals (Signal f0 fs0) (Signal x0 xs0) = + Signal (f0 x0) (go f0 x0 (mergeBy compareTimestamp fs0 xs0)) + where + go :: (a -> b) -> a -> [MergeResult (E (a -> b)) (E a)] -> [E b] + go _ _ [] = [] + go _ x (OnlyInLeft (E t f) : rs) = E t (f x) : go f x rs + go f _ (OnlyInRight (E t x) : rs) = E t (f x) : go f x rs + go _ _ (InBoth (E t f) (E _ x) : rs) = E t (f x) : go f x rs + +compareTimestamp :: E a -> E b -> Ordering +compareTimestamp (E ts _) (E ts' _) = compare ts ts' + + +-- | Construct a 'Signal' from an initial value and a time series of events +-- that represent new values of the signal. +-- +-- This only makes sense for events that sample a single time-varying value. +-- +fromChangeEvents :: a -> Events a -> Signal a +fromChangeEvents x (Events xs) = Signal x xs + + +-- | Convert a 'Signal' into a time series of events when the signal value +-- changes. +-- +toChangeEvents :: Signal a -> Events a +toChangeEvents = Events . toTimeSeries + +toTimeSeries :: Signal a -> [E a] +toTimeSeries (Signal x xs) = E (TS (Time 0) 0) x : xs + + +-- | Construct a 'Signal' that represents a time series of discrete events. The +-- signal is @Just@ the event value at the time of the event, and is @Nothing@ +-- at all other times. +-- +-- Note that this signal \"instantaneously\" takes the event value and reverts +-- to @Nothing@ before time moves on. Therefore this kind of signal is not +-- \"stable\" in the sense of 'stableSignal'. +-- +fromEvents :: Events a -> Signal (Maybe a) +fromEvents (Events txs) = + Signal Nothing + [ E (TS t i') s + | E (TS t i) x <- txs + , (i', s) <- [(i, Just x), (i+1, Nothing)] + ] + + +-- | A signal can change value more than once at a single point of time. +-- +-- Sometimes we are interested only in the final \"stable\" value of the signal +-- before time moves on. This function discards the other values, keeping only +-- the final value at each time. +-- +stable :: Signal a -> Signal a +stable (Signal x xs) = + Signal x ((map last . groupBy sameTime) xs) + where + sameTime (E (TS t _) _) (E (TS t' _) _) = t == t' + +-- Truncate a 'Signal' after a given time. This is typically necessary to +-- check properties over finite prefixes of infinite signals. +-- +truncateAt :: Time -> Signal a -> Signal a +truncateAt horizon (Signal x txs) = + Signal x (takeWhile (\(E (TS t _) _) -> t < horizon) txs) + + +-- | Sometimes the way a signal is constructed leads to duplicate signal values +-- which can slow down signal processing. This tidies up the signal by +-- eliminating the duplicates. This does not change the meaning (provided the +-- 'Eq' instance is true equality). +-- +nub :: Eq a => Signal a -> Signal a +nub = nubBy (==) + +nubBy :: (a -> a -> Bool) -> Signal a -> Signal a +nubBy eq (Signal x0 xs0) = + Signal x0 (go x0 xs0) + where + go _ [] = [] + go x (E t x' : xs) + | x `eq` x' = go x xs + | otherwise = E t x' : go x' xs + + +-- | A linger signal remains @True@ for the given time after the underlying +-- signal is @True@. +-- +linger :: DiffTime + -> (a -> Bool) + -> Signal a + -> Signal Bool +linger = error "TODO: Signal.linger" + + +-- | Make a timeout signal, based on observing an underlying signal. +-- +-- The timeout signal takes the value @True@ when the timeout has occurred, and +-- @False@ otherwise. +-- +-- The timeout is controlled by an \"arming\" function on the underlying signal. +-- The arming function should return @True@ when the timeout should be started, +-- and it returns the time to wait before the timeout fires. The arming function +-- should return @False@ when the timeout should be cancelled or not started. +-- +-- The output signal becomes @True@ when the arming function has been +-- continuously active (i.e. returning @True@) for the given duration. +-- +timeout :: forall a. + DiffTime -- ^ timeout duration + -> (a -> Bool) -- ^ the arming function + -> Signal a + -> Signal Bool +timeout d arm = + Signal False + . disarmed + . toTimeSeries + where + disarmed :: [E a] -> [E Bool] + disarmed [] = [] + disarmed (E ts@(TS t _) x : txs) + | arm x = armed (d `addTime` t) (E ts x : txs) + | otherwise = E ts False : disarmed txs + + armed :: Time -> [E a] -> [E Bool] + armed !expiry [] = [E expiryTS True] where expiryTS = TS expiry 0 + + armed !expiry (E ts@(TS t _) x : txs) + | t > expiry = E expiryTS True : expired (E ts x : txs) + | not (arm x) = E ts False : disarmed txs + | t < expiry = E ts False : armed expiry txs + | otherwise = E expiryTS True : expired txs + where + expiryTS = TS expiry 0 + + expired :: [E a] -> [E Bool] + expired [] = [] + expired (E t x : txs) + | arm x = E t True : expired txs + | otherwise = E t False : disarmed txs + + +until :: (a -> Bool) -- ^ Start + -> (a -> Bool) -- ^ Stop + -> Signal a + -> Signal Bool +until _ = error "TODO: Signal.until" + + +-- | Make a signal that keeps track of recent activity, based on observing an +-- underlying signal. +-- +-- The underlying signal is scrutinised with the provided \"activity interest\" +-- function that tells us if the signal value is activity of interest to track. +-- If it is, the given key is entered into the result signal set for the given +-- time duration. If the same activity occurs again before the duration expires +-- then the expiry will be extended to the new deadline (it is not cumulative). +-- The key will be removed from the result signal set when it expires. +-- +keyedLinger :: forall a b. Ord b + => DiffTime + -> (a -> Set b) -- ^ The activity set signal + -> Signal a + -> Signal (Set b) +keyedLinger d activity = + Signal Set.empty + . go Set.empty PSQ.empty + . toTimeSeries + where + go :: Set b + -> OrdPSQ b Time () + -> [E a] + -> [E (Set b)] + go _ _ [] = [] + + go lingerSet lingerPSQ (E ts@(TS t _) xs : txs) + | Just (x, t', _, lingerPSQ') <- PSQ.minView lingerPSQ + , t' < t + , let lingerSet' = Set.delete x lingerSet + = E (TS t' 0) lingerSet' : go lingerSet' lingerPSQ' (E ts xs : txs) + + go lingerSet lingerPSQ (E ts@(TS t _) x : txs) = + let ys = activity x + lingerSet' = lingerSet <> ys + lingerPSQ' = Set.foldl' (\s y -> PSQ.insert y t' () s) lingerPSQ ys + t' = addTime d t + in if lingerSet' /= lingerSet + then E ts lingerSet' : go lingerSet' lingerPSQ' txs + else go lingerSet' lingerPSQ' txs + + +keyedTimeout :: forall a b. Ord b + => DiffTime + -> (a -> Set b) -- ^ The timeout arming set signal + -> Signal a + -> Signal (Set b) +keyedTimeout d arm = + Signal Set.empty + . go Set.empty PSQ.empty Set.empty + . toTimeSeries + where + go :: Set b + -> OrdPSQ b Time () + -> Set b + -> [E a] + -> [E (Set b)] + go _ _ _ [] = [] + + go armedSet armedPSQ timedout (E ts@(TS t _) x : txs) + | Just (y, t', _, armedPSQ') <- PSQ.minView armedPSQ + , t' < t + , let armedSet' = Set.delete y armedSet + timedout' = Set.insert y timedout + = E (TS t' 0) timedout' : go armedSet' armedPSQ' timedout' (E ts x : txs) + + go armedSet armedPSQ timedout (E ts@(TS t _) x : txs) = + let armedSet' = arm x + armedAdd = armedSet' Set.\\ armedSet + armedDel = armedSet Set.\\ armedSet' + armedPSQ' = flip (Set.foldl' (\s y -> PSQ.insert y t' () s)) armedAdd + . flip (Set.foldl' (\s y -> PSQ.delete y s)) armedDel + $ armedPSQ + t' = addTime d t + timedout' = timedout `Set.intersection` armedSet' + in if timedout' /= timedout + then E ts timedout' : go armedSet' armedPSQ' timedout' txs + else go armedSet' armedPSQ' timedout' txs + + +keyedUntil :: forall a b. Ord b + => (a -> Set b) -- ^ Start set signal + -> (a -> Set b) -- ^ Stop set signal + -> (a -> Bool) -- ^ Stop all signal + -> Signal a + -> Signal (Set b) +keyedUntil start stop stopAll = + Signal Set.empty + . go Set.empty + . toTimeSeries + where + + go :: Set b + -> [E a] + -> [E (Set b)] + go _ [] = [] + go active (E t x : txs) + | active' /= active = E t active' : go active' txs + | otherwise = go active' txs + where + active' + | stopAll x = Set.empty + | otherwise = (active <> start x) Set.\\ stop x + + +difference :: (a -> a -> b) + -> Signal a + -> Signal (Maybe b) +difference diff (Signal x0 txs0) = + Signal Nothing (go x0 txs0) + where + go _ [] = [] + go x (E (TS t i) x' : txs) = E (TS t i) (Just (diff x x')) + : E (TS t (i+1)) Nothing + : go x' txs + + +scanl :: (b -> a -> b) -> b -> Signal a -> Signal b +scanl f z (Signal x0 txs0) = + let a0 = f z x0 in + Signal a0 (go a0 txs0) + where + go !_ [] = [] + go !a (E ts x : txs) = E ts a' : go a' txs + where + a' = f a x + + +-- +-- Utils +-- + +-- | Generic merging utility. For sorted input lists this is a full outer join. +-- +mergeBy :: (a -> b -> Ordering) -> [a] -> [b] -> [MergeResult a b] +mergeBy cmp = merge + where + merge [] ys = [ OnlyInRight y | y <- ys] + merge xs [] = [ OnlyInLeft x | x <- xs] + merge (x:xs) (y:ys) = + case x `cmp` y of + GT -> OnlyInRight y : merge (x:xs) ys + EQ -> InBoth x y : merge xs ys + LT -> OnlyInLeft x : merge xs (y:ys) + +data MergeResult a b = OnlyInLeft a | InBoth a b | OnlyInRight b + deriving (Eq, Show) + diff --git a/ouroboros-network/test/Main.hs b/ouroboros-network/test/Main.hs index b8372c707ec..cc6d5f4bc5e 100644 --- a/ouroboros-network/test/Main.hs +++ b/ouroboros-network/test/Main.hs @@ -26,6 +26,10 @@ import qualified Ouroboros.Network.Protocol.LocalTxSubmission.Test (tests) import qualified Ouroboros.Network.Protocol.KeepAlive.Test (tests) import qualified Ouroboros.Network.Protocol.TipSample.Test (tests) import qualified Test.Ouroboros.Network.PeerSelection (tests) +import qualified Test.Ouroboros.Network.PeerSelection.Json (tests) +import qualified Test.Ouroboros.Network.PeerSelection.LocalRootPeers +import qualified Test.Ouroboros.Network.PeerSelection.MockEnvironment +import qualified Test.Ouroboros.Network.PeerSelection.RootPeersDNS import qualified Test.Socket (tests) main :: IO () @@ -59,6 +63,10 @@ tests = , Test.PeerState.tests , Test.Ouroboros.Network.BlockFetch.tests , Test.Ouroboros.Network.PeerSelection.tests + , Test.Ouroboros.Network.PeerSelection.Json.tests + , Test.Ouroboros.Network.PeerSelection.LocalRootPeers.tests + , Test.Ouroboros.Network.PeerSelection.MockEnvironment.tests + , Test.Ouroboros.Network.PeerSelection.RootPeersDNS.tests , Test.Ouroboros.Network.KeepAlive.tests , Test.Ouroboros.Network.TxSubmission.tests , Test.Ouroboros.Network.NodeToNode.Version.tests diff --git a/ouroboros-network/test/Test/LedgerPeers.hs b/ouroboros-network/test/Test/LedgerPeers.hs index 0cbcd82b438..daafc94240b 100644 --- a/ouroboros-network/test/Test/LedgerPeers.hs +++ b/ouroboros-network/test/Test/LedgerPeers.hs @@ -10,16 +10,26 @@ import Control.Monad.Class.MonadAsync import Control.Monad.Class.MonadFork import Control.Monad.Class.MonadSay import Control.Monad.Class.MonadTime +import Control.Monad.Class.MonadTimer import Control.Monad.Class.MonadThrow import Control.Monad.IOSim import Control.Tracer (showTracing, Tracer (..), traceWith) -import Data.List (foldl', intercalate) +import Data.List (foldl', intercalate, nub) import Data.List.NonEmpty (NonEmpty (..)) +import qualified Data.List.NonEmpty as NonEmpty +import qualified Data.IP as IP +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +import Data.Maybe (fromMaybe) +import Data.Set (Set) +import qualified Data.Set as Set import Data.Word import Data.Ratio import System.Random import Ouroboros.Network.PeerSelection.LedgerPeers +import Network.Socket (SockAddr) +import Network.DNS (Domain) import Test.Tasty (TestTree, testGroup) import Test.Tasty.QuickCheck (testProperty) @@ -32,17 +42,25 @@ tests = testGroup "LedgerPeers" , testProperty "Pick" prop_pick ] -newtype ArbitraryRelayAddress = ArbitraryRelayAddress RelayAddress +newtype ArbitraryPortNumber = ArbitraryPortNumber { getArbitraryPortNumber :: PortNumber } -instance Arbitrary ArbitraryRelayAddress where - arbitrary = do - ArbitraryRelayAddress <$> elements [ RelayAddressAddr (read "1.1.1.1") 1234 - , RelayAddressDomain (DomainAddress "relay.iohk.example" 1234) - ] +instance Arbitrary ArbitraryPortNumber where + arbitrary = elements + $ map (ArbitraryPortNumber . read . show) + $ ([1000..1100] :: [Int]) + +newtype ArbitraryRelayAccessPoint = ArbitraryRelayAccessPoint RelayAccessPoint + +instance Arbitrary ArbitraryRelayAccessPoint where + arbitrary = + ArbitraryRelayAccessPoint <$> + oneof [ RelayAccessAddress (read "1.1.1.1") . getArbitraryPortNumber <$> arbitrary + , RelayAccessDomain "relay.iohk.example" . getArbitraryPortNumber <$> arbitrary + ] data StakePool = StakePool { spStake :: !Word64 - , spRelay :: NonEmpty RelayAddress + , spRelay :: NonEmpty RelayAccessPoint } deriving Show @@ -50,19 +68,31 @@ data StakePool = StakePool { instance Arbitrary StakePool where arbitrary = do stake <- choose (0, 1000000) - (ArbitraryRelayAddress firstRelay) <- arbitrary - moreRelays <- map unAddr <$> arbitrary + (ArbitraryRelayAccessPoint firstRelay) <- arbitrary + moreRelays <- filter (/= firstRelay) . nub . map unAddr <$> arbitrary return $ StakePool stake (firstRelay :| moreRelays) where - unAddr (ArbitraryRelayAddress a) = a + unAddr (ArbitraryRelayAccessPoint a) = a -newtype LedgerPools = LedgerPools [(PoolStake, NonEmpty RelayAddress)] deriving Show + shrink sp@StakePool { spStake, spRelay } = + [ sp { spStake = spStake' } + | spStake' <- shrink spStake + ] + ++ + [ sp { spRelay = NonEmpty.fromList spRelay' } + | spRelay'@(_ : _) <- shrinkList (const []) + (NonEmpty.toList spRelay) + ] + +newtype LedgerPools = LedgerPools [(PoolStake, NonEmpty RelayAccessPoint)] + deriving Show instance Arbitrary LedgerPools where arbitrary = LedgerPools . calculateRelativeStake <$> arbitrary where - calculateRelativeStake :: [StakePool] -> [(PoolStake, NonEmpty RelayAddress)] + calculateRelativeStake :: [StakePool] + -> [(PoolStake, NonEmpty RelayAccessPoint)] calculateRelativeStake sps = let totalStake = foldl' (\s p -> s + spStake p) 0 sps in map (\p -> ( PoolStake (fromIntegral (spStake p) % fromIntegral totalStake) @@ -73,21 +103,39 @@ prop_pick100 :: Word16 -> Property prop_pick100 seed = let rng = mkStdGen $ fromIntegral seed - sps = [ (1, RelayAddressAddr (read "1.1.1.1") 1 :| []) - , (0, RelayAddressAddr (read "0.0.0.0") 0 :| []) + sps = [ (1, RelayAccessAddress (read "1.1.1.1") 1 :| []) + , (0, RelayAccessAddress (read "0.0.0.0") 0 :| []) ] - peerMap = accPoolStake sps - tr = (runSimTrace $ pickPeers rng verboseTracer peerMap 1) in - ioProperty $ do - tr' <- evaluateTrace tr + + sim :: IOSim s [RelayAccessPoint] + sim = withLedgerPeers + rng verboseTracer + (pure (UseLedgerAfter 0)) + interface + (\_ -> pure Map.empty) -- we're not relying on domain name resolution in this simulation + (\request _ -> do + threadDelay 1900 -- we need to invalidate ledger peer's cache + resp <- request (NumberOfPeers 1) + pure $ case resp of + Nothing -> [] + Just (peers, _) -> [ RelayAccessAddress ip port + | Just (ip, port) <- IP.fromSockAddr + <$> Set.toList peers + ] + ) + where + interface = LedgerPeersConsensusInterface $ \_ -> pure (Just (Map.elems (accPoolStake sps))) + + in ioProperty $ do + tr' <- evaluateTrace (runSimTrace sim) case tr' of SimException e trace -> do return $ counterexample (intercalate "\n" $ show e : trace) False SimDeadLock trace -> do return $ counterexample (intercalate "\n" $ "Deadlock" : trace) False - SimReturn (_, peers) _trace -> do + SimReturn peers _trace -> do -- printf "Log: %s\n" (intercalate "\n" _trace) - return $ peers === [ RelayAddressAddr (read "1.1.1.1") 1 ] + return $ peers === [ RelayAccessAddress (read "1.1.1.1") 1 ] -- | Veify that given at least one peer we manage to pick `count` peers. prop_pick :: LedgerPools @@ -96,21 +144,73 @@ prop_pick :: LedgerPools -> Property prop_pick (LedgerPools lps) count seed = let rng = mkStdGen $ fromIntegral seed - peerMap = accPoolStake lps - tr = runSimTrace (pickPeers rng verboseTracer peerMap count) in - ioProperty $ do - tr' <- evaluateTrace tr + + sim :: IOSim s [RelayAccessPoint] + sim = withLedgerPeers + rng (verboseTracer) -- <> Tracer Debug.traceShowM) + (pure (UseLedgerAfter 0)) + interface resolve + (\request _ -> do + threadDelay 1900 -- we need to invalidate ledger peer's cache + resp <- request (NumberOfPeers count) + pure $ case resp of + Nothing -> [] + Just (peers, _) -> [ reverseLookup (RelayAccessAddress ip port) + | Just (ip, port) <- IP.fromSockAddr + `fmap` Set.toList peers + ] + ) + where + interface :: LedgerPeersConsensusInterface (IOSim s) + interface = LedgerPeersConsensusInterface $ \_ -> pure (Just (Map.elems (accPoolStake lps))) + + domainMap :: Map Domain (Set IP) + domainMap = Map.fromList [("relay.iohk.example", Set.singleton (read "2.2.2.2"))] + + resolve :: [DomainAccessPoint] + -> IOSim s (Map DomainAccessPoint (Set SockAddr)) + resolve = \daps -> + pure $ Map.fromList + [ (dap, addrs) + | dap@(DomainAccessPoint domain port) <- daps + , let addrs = Set.map (\ip -> IP.toSockAddr (ip, port)) + . fromMaybe Set.empty + $ Map.lookup domain domainMap + ] + + reverseLookup :: RelayAccessPoint -> RelayAccessPoint + reverseLookup ap@(RelayAccessAddress ip port) + = case [ domain + | (domain, addrs) <- Map.assocs domainMap + , ip `Set.member` addrs + ] of + (domain : _) -> RelayAccessDomain domain port + _ -> ap + reverseLookup ap = ap + + + + in ioProperty $ do + tr' <- evaluateTrace (runSimTrace sim) case tr' of SimException e trace -> do return $ counterexample (intercalate "\n" $ show e : trace) False SimDeadLock trace -> do return $ counterexample (intercalate "\n" $ "Deadlock" : trace) False - SimReturn (_, peers) trace -> do + SimReturn peers trace -> do + let numOfPeers = length peers if null lps then return $ property $ null peers - else return $ counterexample (intercalate "\n" $ "Lenght missmatch" : trace) - (length peers == fromIntegral count) + else return $ counterexample (intercalate "\n" $ + ( "Lenght missmatch " + ++ show (length peers) + ) + : trace) + (numOfPeers + == fromIntegral count `min` numOfPeers) +prop :: Property +prop = prop_pick (LedgerPools [(PoolStake {unPoolStake = 1 % 1},RelayAccessAddress (read "1.1.1.1") 1016 :| [])]) 0 2 -- TODO: Belongs in iosim. data SimResult a = SimReturn a [String] diff --git a/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection.hs b/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection.hs index 6f8ae56d392..a9312af91db 100644 --- a/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection.hs +++ b/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection.hs @@ -1,9 +1,10 @@ {-# LANGUAGE BangPatterns #-} -{-# LANGUAGE DeriveTraversable #-} +{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DerivingVia #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GeneralisedNewtypeDeriving #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE NumericUnderscores #-} {-# LANGUAGE RecordWildCards #-} @@ -16,133 +17,179 @@ module Test.Ouroboros.Network.PeerSelection (tests) where import qualified Data.ByteString.Char8 as BS import Data.Function (on) -import Data.List (groupBy) -import Data.Maybe (listToMaybe) +import Data.List (groupBy, foldl') +import Data.Maybe (listToMaybe, isNothing, fromMaybe) import Data.Set (Set) import qualified Data.Set as Set +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map import Data.Void (Void) +import qualified Data.Signal as Signal +import Data.Signal (Signal, Events, E(E), TS(TS)) +import qualified Data.OrdPSQ as PSQ +import System.Random (mkStdGen) + +import Control.Monad.Class.MonadSTM.Strict (STM) import Control.Monad.Class.MonadTime import Control.Tracer (Tracer (..)) import qualified Network.DNS as DNS (defaultResolvConf) import Network.Socket (SockAddr) -import Network.Mux.Timeout import Ouroboros.Network.PeerSelection.Governor hiding (PeerSelectionState (..)) import qualified Ouroboros.Network.PeerSelection.Governor as Governor import qualified Ouroboros.Network.PeerSelection.KnownPeers as KnownPeers import qualified Ouroboros.Network.PeerSelection.LocalRootPeers as LocalRootPeers +import qualified Ouroboros.Network.PeerSelection.EstablishedPeers as EstablishedPeers import Ouroboros.Network.PeerSelection.RootPeersDNS import Test.Ouroboros.Network.PeerSelection.Instances -import qualified Test.Ouroboros.Network.PeerSelection.LocalRootPeers import Test.Ouroboros.Network.PeerSelection.MockEnvironment hiding (tests) -import qualified Test.Ouroboros.Network.PeerSelection.MockEnvironment +import Test.Ouroboros.Network.PeerSelection.PeerGraph import Test.QuickCheck -import Test.Tasty (TestTree, testGroup) +import Test.QuickCheck.Signal +import Test.Tasty (TestTree, testGroup, after, DependencyType(..)) import Test.Tasty.QuickCheck (testProperty) tests :: TestTree tests = testGroup "Ouroboros.Network.PeerSelection" - [ Test.Ouroboros.Network.PeerSelection.LocalRootPeers.tests - , Test.Ouroboros.Network.PeerSelection.MockEnvironment.tests + [ testGroup "basic" + [ testProperty "has output" prop_governor_hasoutput + , testProperty "no failure" prop_governor_nofail + , testProperty "no livelock" prop_governor_nolivelock + ] + + -- The no livelock property is needed to ensure other tests terminate + , after AllSucceed "Ouroboros.Network.PeerSelection.basic" $ + testGroup "safety" + [ testProperty "no excess busyness" prop_governor_nobusyness + , testProperty "event coverage" prop_governor_trace_coverage + , testProperty "connection status" prop_governor_connstatus + ] + + -- The no livelock property is needed to ensure other tests terminate + , after AllSucceed "Ouroboros.Network.PeerSelection.basic" $ + testGroup "progress" + [ testProperty "gossip reachable" prop_governor_gossip_1hr + +-- , testProperty "progresses towards public root peers target (from below)" +-- prop_governor_target_publicroots + + , testProperty "progresses towards known peers target (from below)" + prop_governor_target_known_below + , testProperty "progresses towards known peers target (from above)" + prop_governor_target_known_above + + , testProperty "progresses towards established peers target (from below)" + prop_governor_target_established_below + , testProperty "progresses towards established peers target (from above)" + prop_governor_target_established_above + + , testProperty "progresses towards active peers target (from below)" + prop_governor_target_active_below + , testProperty "progresses towards active peers target (from above)" + prop_governor_target_active_above + + , testProperty "progresses towards established local root peers target" + prop_governor_target_established_local + , testProperty "progresses towards active local root peers target (from below)" + prop_governor_target_active_local_below + , testProperty "progresses towards active local root peers target (from above)" + prop_governor_target_active_local_above + ] , testProperty "governor gossip reachable in 1hr" prop_governor_gossip_1hr , testProperty "governor connection status" prop_governor_connstatus , testProperty "governor no livelock" prop_governor_nolivelock ] + --TODO: We should add separate properties to check that we do not overshoot + -- our targets: known peers from below can overshoot, but all the others + -- should be precise and not overshoot. The public root target from below + -- is a one-sided target and we can and will overshoot, but we should not + -- overshoot by too much. -- -- QuickCheck properties -- --- Things we might like to test... +-- We start with basic properties in the style of "never does bad things" +-- and progress to properties that check that it "eventually does good things". +-- +-- In the "never does bad things" category we have: +-- +-- * A basic state space exploration property that checks we don't encounter +-- internal errors. This includes some limited checking that we get adequate +-- coverage of the different actions, by looking for coverage of all the +-- trace events. The coverage checks here are useful to give us confidence +-- about coverage for some of the other properties. +-- +-- * A no-livelock property. This checks that the governor does not get stuck +-- doing too many steps at a single moment in (virtual) time. It's quite easy +-- to write bugs that don't cause the governor to fail, but cause it to go +-- into a busy cycle. See also the "no excessive busyness" property for a +-- more advanced version. +-- +-- * A "no excessive busyness" property. This checks that the governor does not +-- remain too busy for too long. It's quite easy to write bugs that don't +-- cause the governor to fail, but cause it to go into fairly-busy cycles. +-- +-- * A state consistency property that the governor's view of part of the state +-- and the "true" state of the mock environment are maintained in an +-- appropriate correspondence. +-- +-- In the "eventually does good things" category we have: +-- +-- * A basic property to check the governor does produce non-trivial traces. +-- +-- * A cold peer gossip "reachable" property: that the governor either hits +-- its target for the number of cold peers, or finds all the reachable peers. +-- +-- * A known peer target progress property: that the governor makes progress +-- within a bounded time towards its known peers target, from below and above. +-- +-- * An established peer target property: the same as above but for established +-- peers. +-- +-- * An active peer target property: the same as above but for active peers. +-- +-- Properties that we would like to have: +-- +-- * A public root peers target property: that the governor hits its target for +-- for the number of public root peers (or as near as possible), and does +-- not "grossly" overshoot. Since the public roots is a one sided target, but +-- we don't want to overshoot excessively. -- --- * for even insane environments, there is no insane behaviour --- trace properties: --- * progress: all actions should make monotonic progress --- * no busy work: limit on number of governor iterations before time advances --- * trace sanity: model of state can be reconstructed from trace events +-- * A local root peers target property: that the governor hits its target for +-- getting all its local root peers into the established state, and a target +-- number of them into the active state (or as near as possible). +-- +-- Other properties we might like to think about -- --- * for vaguely stable envs, we do stablise at our target number of cold peers --- * we stabilise without going insane even if the available nodes are fewer than the target --- * time to stabilise after a change is not crazy -- * time to find new nodes after a graph change is ok -- * targets or root peer set dynamic --- * check local root peers are what we expect --- * check governor view of connection status does not lag reality too much --- | Run the governor for up to 24 hours (simulated obviously) and see if it --- throws any exceptions (assertions such as invariant violations) or if it --- encounters a livelock situation. --- --- | It is easy to get bugs where the governor is stuck in a busy loop working --- but not making progress. This kind of bug would result in the governor --- thread consuming all the cpu, but wouldn't actually stop the node, so might --- not be easily noticed. --- --- We check for this condition by requiring that trace events a certain number --- of events apart are sufficiently far apart in time too. This will be --- violated if the governor starts making very slow forward progress. +-- | As the most basic property we run the governor and check that it produces +-- any trace output at all. It should elicit some activity, unless the test +-- environment is actually empty. -- --- This uses static targets and root peers. --- --- TODO: Reenable this testcase. -prop_governor_nolivelock :: GovernorMockEnvironment -> Property -prop_governor_nolivelock env = - within 10_000_000 $ - let trace = takeFirstNHours 24 . - selectGovernorEvents . - selectPeerSelectionTraceEvents $ +prop_governor_hasoutput :: GovernorMockEnvironment -> Bool +prop_governor_hasoutput env = + let trace = selectPeerSelectionTraceEvents $ runGovernorInMockEnvironment env - in -{- - -- uncomment to check expected distribution - tabulate "env size" [renderRanges 10 envSize] $ - tabulate "max events" [renderRanges 10 (maxEvents 5 trace)] $ - tabulate "events/graph ratio" - [show (maxEvents 5 trace `div` envSize)] $ --} - hasOutput trace - - -- Check we don't get too many events within a given time span. - -- How many events is too many? It scales with the graph size. - -- The ratio between them is from experimental evidence. - .&&. let maxevents = (2+envSize) * 8 -- ratio from experiments - timespan = 5 -- seconds - actual = maxEvents (floor timespan) trace - in counterexample ("Too many events in a span of time!\n" - ++ " time span: " ++ show timespan ++ " seconds\n" - ++ " env size: " ++ show envSize ++ "\n" - ++ " num events: " ++ show actual) $ - - property (makesAdequateProgress maxevents timespan - (map fst trace)) - where - hasOutput :: [(Time, TracePeerSelection PeerAddr)] -> Property - hasOutput (_:_) = property True - hasOutput [] = counterexample "no trace output" $ - property (isEmptyEnv env) - - envSize = length g + length (targets env) - where PeerGraph g = peerGraph env - maxEvents n = maximum - . (0:) - . map length - . timeSpans n - - timeSpans :: Int -> [(Time, a)] -> [[(Time, a)]] - timeSpans _ [] = [] - timeSpans n (x@(t,_):xs) = - let (xs', xs'') = span (\(t',_) -> t' <= addTime (fromIntegral n) t) (x:xs) - in xs' : timeSpans n xs'' + in hasOutput env (selectGovernorEvents trace) + +hasOutput :: GovernorMockEnvironment + -> [(Time, TracePeerSelection PeerAddr)] + -> Bool +hasOutput _ (_:_) = True +hasOutput env [] = isEmptyEnv env isEmptyEnv :: GovernorMockEnvironment -> Bool isEmptyEnv GovernorMockEnvironment { @@ -156,16 +203,323 @@ isEmptyEnv GovernorMockEnvironment { || all (\(t,_) -> targetNumberOfRootPeers t == 0) targets) --- Check that events that are 100 events apart have an adequate time --- between them, to indicate we're not in a busy livelock situation. -makesAdequateProgress :: Int -> DiffTime -> [Time] -> Bool -makesAdequateProgress n adequate ts = - go ts (drop n ts) +-- | As a basic property we run the governor to explore its state space a bit +-- and check it does not throw any exceptions (assertions such as invariant +-- violations). +-- +-- We do /not/ assume freedom from livelock for this property, so we run the +-- governor for a maximum number of trace events rather than for a fixed +-- simulated time. +-- +prop_governor_nofail :: GovernorMockEnvironment -> Bool +prop_governor_nofail env = + let trace = take 5000 . + selectPeerSelectionTraceEvents $ + runGovernorInMockEnvironment env + + in foldl' (flip seq) True + [ assertPeerSelectionState st () + | (_, GovernorDebug (TraceGovernorState _ _ st)) <- trace ] + + +-- | It is relatively easy to write bugs where the governor is stuck in a tight +-- cycle of continuous activity. Due to the way the I\/O simulator manages +-- virtual time, these bugs exhibits themselves by infinite trace activity +-- without time advancing. +-- +-- It is important to catch these bugs early in the set of tests, since it is +-- hard to write many of the other more interesting properties if there are +-- these kinds of livelock bugs. Or to put it another way, the other properties +-- can be expressed more simply if they can assume within event traces that the +-- time always advances after some finite number of events. +-- +prop_governor_nolivelock :: GovernorMockEnvironment -> Property +prop_governor_nolivelock env = + let trace = take 5000 . + selectGovernorEvents . + selectPeerSelectionTraceEvents $ + runGovernorInMockEnvironment env + in case tooManyEventsBeforeTimeAdvances 1000 trace of + Nothing -> property True + Just (t, es) -> + counterexample + ("over 1000 events at time: " ++ show t ++ "\n" ++ + "first 50 events: " ++ (unlines . map show . take 50 $ es)) $ + property False + + +-- | Scan the trace and return any occurrence where we have at least threshold +-- events before virtual time moves on. Return the tail of the trace from that +-- point on. +-- +tooManyEventsBeforeTimeAdvances :: Int -> [(Time, e)] -> Maybe (Time, [e]) +tooManyEventsBeforeTimeAdvances _ [] = Nothing +tooManyEventsBeforeTimeAdvances threshold trace0 = + go [ (t, diffTime t' t, e) + | ((t, e), (t', _)) <- zip trace0 (tail trace0) ] where - go (a:as) (b:bs) - | diffTime b a < adequate = False - | otherwise = go as bs - go _ _ = True + go [] = Nothing + go trace@((t,_,_):_) = case countdown threshold trace of + Just es' -> go es' + Nothing -> Just (t, trace') + where + trace' = take threshold [ e | (_,_,e) <- trace ] + + countdown 0 _ = Nothing + countdown _ [] = Just [] + countdown n ((_t,dt,_e):es) + | dt == 0 = countdown (n-1) es + | otherwise = Just es + + +-- | It is easy to get bugs where the governor is stuck in a cycle, working but +-- not making progress. This kind of bug would result in the governor thread +-- being excessively busy, so it might not be easily noticed. +-- +-- This is more subtle and general than a simple livelock test that just checks +-- we don't get completely stuck. This property is about the possibility that +-- the governor is excessively busy over some period of time. This includes +-- "slow" livelocks where time advances during some of the steps in the cycle. +-- More interestingly this is also about a failure to converge and return to +-- being idle sufficiently quickly. +-- +-- For example the governor could gets stuck in a cycle promoting and demoting +-- a peer once a second. In such a failure mode it will have a continuous level +-- of activity and will not return to being idle (perhaps ever or perhaps for +-- an extended period until some other environment perturbation gets us out of +-- the cycle). +-- +-- The approach we take is based on the observation that the governor can +-- (quite reasonably) start with a big burst of activity (e.g. as it gossips +-- to discover a big graph) but that in the long term it settles down and only +-- has small bursts of activity in reaction to perturbations in the environment +-- such as failures or changes in targets. +-- +-- The approach we take is to look at spans of busy activity followed by +-- periods of idleness. If the spans of busy activity are too long then we +-- fail. So this counts the time of busyness not the number of events. We +-- account for activity in the environment that the governor needs to respond +-- to by counting \"perturbation credits"\: more credits means we allow longer +-- spans of busyness. +-- +-- More specifically: we look at runs of events where the time between events +-- is less than a threshold. This implies there follows a threshold level of +-- idleness. Starting or within that run of events there can be environment +-- events. These are the perturbations from the environment that we expect to +-- trigger a series of responses from the governor. So we expect longer periods +-- of business for bigger perturbations. We sum all the perturbations credits +-- included in a run of events. We use a formula that relates the credits to +-- the permitted time span of the run. If the run is within the permitted time +-- span then it is ok, otherwise it is a failure (and the run is the +-- counterexample). +-- +-- TODO: This test uses static root peers, but we should move to dynamic root +-- peers. +-- +prop_governor_nobusyness :: GovernorMockEnvironment -> Property +prop_governor_nobusyness env = + let trace = selectPeerSelectionTraceEvents $ + runGovernorInMockEnvironment env + + in case tooBusyForTooLong (takeFirstNHours 10 trace) of + Nothing -> property True + Just (busyStartTime, busyEndTime, credits, trace') -> + counterexample + ("busy span too long\n" ++ + "start time: " ++ show busyStartTime ++ "\n" ++ + "end time: " ++ show busyEndTime ++ "\n" ++ + "span credits: " ++ show credits ++ "\n" ++ + "first 50 events:\n" ++ + (unlines . map show . take 50 $ trace')) $ + property False + +-- +tooBusyForTooLong :: [(Time, TestTraceEvent)] + -> Maybe (Time, Time, DiffTime, + [(Time, TestTraceEvent)]) +tooBusyForTooLong trace0 = + -- Pass in each timed event, with the diff-time to the next event + idle [ (t, diffTime t' t, e) + | ((t, e), (t', _)) <- zip trace0 (tail trace0) ] + where + -- How long between events before we say it's the end of a busy span + sameSpanThreshold :: DiffTime + sameSpanThreshold = 45 + + -- Starting credits for a busy span, even if there are no triggering + -- environment events. The value chosen here takes account of the normal + -- exponential backoff is 2+4+8+16+32 = 62, before a gap of 64 that's + -- bigger than the sameSpanThreshold of 45. + initialEventCredits :: DiffTime + initialEventCredits = 65 + + -- We process the event trace linearly, flipping between two states: idle + -- and busy. In the idle state, the next (non-debug) event flips us into + -- the busy state, starting with some minimal initial credits. + + idle :: [(Time, DiffTime, TestTraceEvent)] + -> Maybe (Time, Time, DiffTime, [(Time, TestTraceEvent)]) + idle [] = Nothing + idle ((_, _, GovernorDebug{}):trace') = idle trace' + idle trace@((busyStartTime,_,_):_) = + case busy busyStartTime initialEventCredits trace of + Right trace' -> idle trace' + Left (busyEndTime, credits) -> + Just (busyStartTime, busyEndTime, credits, trace') + where + trace' = [ (t, e) + | (t,_dt, e) <- + takeWhile (\(t,_,_) -> t <= busyEndTime) trace + , case e of + GovernorDebug{} -> False + _ -> True + ] + + busy :: Time -> DiffTime -> [(Time, DiffTime, TestTraceEvent)] + -> Either (Time, DiffTime) [(Time, DiffTime, TestTraceEvent)] + + -- For normal governor events we check if the length of the busy time span + -- is now too big (adjusted for any perturbation credits). If so we've + -- found a violation. + busy !busyStartTime !credits ((busyEndTime, _dt, GovernorEvent{}) : _trace') + | busySpanLength > credits = Left (busyEndTime, credits) + where + busySpanLength = diffTime busyEndTime busyStartTime + + -- We also look at how long it is to the next event to see if this is the + -- last event in the busy span, and if so we return to idle. + busy !_busyStartTime !_credits ((_t, dt, _event) : trace') + | dt > sameSpanThreshold = Right trace' + + -- For environment events we calculate the perturbation credits this + -- contributes and add it to our running total. + busy !busyStartTime !credits ((_, _, MockEnvEvent e) : trace') = + busy busyStartTime (credits + fromIntegral (envEventCredits e)) trace' + + -- Otherwise we move on to the next event, updating the length of this busy + -- time span. + busy !busyStartTime !credits (_ : trace') = + busy busyStartTime credits trace' + + -- running out of events before we find a violation is ok + busy !_ !_ [] = Right [] + + +envEventCredits :: TraceMockEnv -> Int +envEventCredits (TraceEnvAddPeers peerGraph) = 80 * 5 + length adjacency * 5 + where + PeerGraph adjacency = peerGraph + +envEventCredits (TraceEnvSetLocalRoots peers) = LocalRootPeers.size peers +envEventCredits (TraceEnvSetPublicRoots peers) = Set.size peers +envEventCredits TraceEnvPublicRootTTL = 60 +envEventCredits (TraceEnvGossipTTL _) = 30 + +envEventCredits (TraceEnvSetTargets PeerSelectionTargets { + targetNumberOfRootPeers = _, + targetNumberOfKnownPeers, + targetNumberOfEstablishedPeers, + targetNumberOfActivePeers + }) = 80 + + 10 * (targetNumberOfKnownPeers + + targetNumberOfEstablishedPeers + + targetNumberOfActivePeers) + +envEventCredits (TraceEnvPeersDemote Noop _) = 10 +envEventCredits (TraceEnvPeersDemote ToWarm _) = 30 +envEventCredits (TraceEnvPeersDemote ToCold _) = 30 + +envEventCredits TraceEnvPeersStatus{} = 0 +-- These events are visible in the environment but are the result of actions +-- initiated by the governor, hence the get no credit. +envEventCredits TraceEnvRootsResult{} = 0 +envEventCredits TraceEnvGossipRequest{} = 0 +envEventCredits TraceEnvGossipResult{} = 0 + + + +-- | A coverage property, much like 'prop_governor_nofail' but we look to see +-- that we get adequate coverage of the state space. We look for all the trace +-- events that the governor can produce, and tabules which ones we see. +-- +prop_governor_trace_coverage :: GovernorMockEnvironment -> Property +prop_governor_trace_coverage env = + let trace = take 5000 . + selectPeerSelectionTraceEvents $ + runGovernorInMockEnvironment env + + traceNumsSeen = collectTraces trace + traceNamesSeen = allTraceNames `Map.restrictKeys` traceNumsSeen + + in coverTable "trace events" [ (n, 1) | n <- Map.elems allTraceNames ] $ + tabulate "trace events" (Map.elems traceNamesSeen) + True + --TODO: use cover to check we do indeed get them all. There are a few + -- cases we do not cover yet. These should be fixed first. + +collectTraces :: [(Time, TestTraceEvent)] -> Set Int +collectTraces trace = + Set.fromList [ traceNum e | (_, GovernorEvent e) <- trace ] + +traceNum :: TracePeerSelection peeraddr -> Int +traceNum TraceLocalRootPeersChanged{} = 00 +traceNum TraceTargetsChanged{} = 01 +traceNum TracePublicRootsRequest{} = 02 +traceNum TracePublicRootsResults{} = 03 +traceNum TracePublicRootsFailure{} = 04 +traceNum TraceGossipRequests{} = 05 +traceNum TraceGossipResults{} = 06 +traceNum TraceForgetColdPeers{} = 07 +traceNum TracePromoteColdPeers{} = 08 +traceNum TracePromoteColdLocalPeers{} = 09 +traceNum TracePromoteColdFailed{} = 10 +traceNum TracePromoteColdDone{} = 11 +traceNum TracePromoteWarmPeers{} = 12 +traceNum TracePromoteWarmLocalPeers{} = 13 +traceNum TracePromoteWarmFailed{} = 14 +traceNum TracePromoteWarmDone{} = 15 +traceNum TraceDemoteWarmPeers{} = 16 +traceNum TraceDemoteWarmFailed{} = 17 +traceNum TraceDemoteWarmDone{} = 18 +traceNum TraceDemoteHotPeers{} = 19 +traceNum TraceDemoteLocalHotPeers{} = 20 +traceNum TraceDemoteHotFailed{} = 21 +traceNum TraceDemoteHotDone{} = 22 +traceNum TraceDemoteAsynchronous{} = 23 +traceNum TraceGovernorWakeup{} = 24 +traceNum TraceChurnWait{} = 25 + +allTraceNames :: Map Int String +allTraceNames = + Map.fromList + [ (00, "TraceLocalRootPeersChanged") + , (01, "TraceTargetsChanged") + , (02, "TracePublicRootsRequest") + , (03, "TracePublicRootsResults") + , (04, "TracePublicRootsFailure") + , (05, "TraceGossipRequests") + , (06, "TraceGossipResults") + , (07, "TraceForgetColdPeers") + , (08, "TracePromoteColdPeers") + , (09, "TracePromoteColdLocalPeers") + , (10, "TracePromoteColdFailed") + , (11, "TracePromoteColdDone") + , (12, "TracePromoteWarmPeers") + , (13, "TracePromoteWarmLocalPeers") + , (14, "TracePromoteWarmFailed") + , (15, "TracePromoteWarmDone") + , (16, "TraceDemoteWarmPeers") + , (17, "TraceDemoteWarmFailed") + , (18, "TraceDemoteWarmDone") + , (19, "TraceDemoteHotPeers") + , (20, "TraceDemoteLocalHotPeers") + , (21, "TraceDemoteHotFailed") + , (22, "TraceDemoteHotDone") + , (23, "TraceDemoteAsynchronous") + , (24, "TraceGovernorWakeup") + , (25, "TraceChurnWait") + ] + -- | Run the governor for up to 1 hour (simulated obviously) and look at the -- set of known peers it has selected. This uses static targets and root peers. @@ -178,13 +532,13 @@ makesAdequateProgress n adequate ts = -- must find all the reachable ones, or if the target for the number of known -- peers to find is too low then it should at least find the target number. -- -prop_governor_gossip_1hr :: GovernorMockEnvironmentWithoutAsyncDemotion -> Property -prop_governor_gossip_1hr (GovernorMockEnvironmentWAD env@GovernorMockEnvironment{ - peerGraph, - localRootPeers, - publicRootPeers, - targets - }) = +prop_governor_gossip_1hr :: GovernorMockEnvironment -> Property +prop_governor_gossip_1hr env@GovernorMockEnvironment { + peerGraph, + localRootPeers, + publicRootPeers, + targets + } = let trace = selectPeerSelectionTraceEvents $ runGovernorInMockEnvironment env { targets = singletonScript (targets', NoDelay) @@ -243,13 +597,14 @@ prop_governor_gossip_1hr (GovernorMockEnvironmentWAD env@GovernorMockEnvironment -- | Check the governor's view of connection status does not lag behind reality -- by too much. -- -prop_governor_connstatus :: GovernorMockEnvironmentWithoutAsyncDemotion -> Bool -prop_governor_connstatus (GovernorMockEnvironmentWAD env) = +prop_governor_connstatus :: GovernorMockEnvironment -> Property +prop_governor_connstatus env = let trace = takeFirstNHours 1 . selectPeerSelectionTraceEvents $ runGovernorInMockEnvironment env - --TODO: check any actually get a true status output and try some deliberate bugs - in all ok (groupBy ((==) `on` fst) trace) + --TODO: check any actually get a true status output and try some + -- deliberate bugs + in conjoin (map ok (groupBy ((==) `on` fst) trace)) where -- We look at events when the environment's view of the state of all the -- peer connections changed, and check that before simulated time advances @@ -257,24 +612,1224 @@ prop_governor_connstatus (GovernorMockEnvironmentWAD env) = -- -- We do that by finding the env events and then looking for the last -- governor state event before time moves on. - ok :: [(Time, TestTraceEvent)] -> Bool + ok :: [(Time, TestTraceEvent)] -> Property ok trace = - case (lastTrueStatus, lastTestStatus) of - (Nothing, _) -> True - (Just trueStatus, Just testStatus) -> trueStatus == testStatus - (Just _, Nothing) -> False + counterexample ("last few events:\n" ++ (unlines . map show) trace) $ + case (lastEnvStatus, lastGovStatus) of + (Nothing, _) -> property True + (Just envStatus, Just govStatus) -> envStatus === govStatus + (Just envStatus, Nothing) -> envStatus === Map.empty where - lastTrueStatus = + lastEnvStatus = listToMaybe - [ status + [ Map.filter (not . isPeerCold) status | (_, MockEnvEvent (TraceEnvPeersStatus status)) <- reverse trace ] - lastTestStatus = + isPeerCold PeerCold = True + isPeerCold _ = False + + lastGovStatus = listToMaybe [ Governor.establishedPeersStatus st | (_, GovernorDebug (TraceGovernorState _ _ st)) <- reverse trace ] +-- +-- Progress properties +-- + +-- | The main progress property for known peers: that we make progress towards +-- the target for known peers from below. See 'prop_governor_target_known_above' +-- for the (simpler) corresponding property for hitting the target from above. +-- +-- Intuitively the property we want is that the governor either hits its target +-- for the number of known peers, or gets as close as reasonably possible. The +-- environment may be such that it prevents the governor from reaching its +-- target, e.g. because the target is too high, or not all peers may be +-- reachable by the gossip graph. +-- +-- We approach this property as the conjunction of several simpler properties. +-- We take this approach for three main reasons. +-- +-- 1. Firstly modularity help us break down a complex problem into simpler +-- problems. Overall this progress idea turns out to be quite subtle and +-- tricky to express precisely in a way that actually works. +-- 2. Secondly, modularity can give us opportunities to reuse code in other +-- properties and we want to have progress properties for all the governor +-- targets. +-- 3. Thirdly, it turns out to be hard to dictate in a universal way precisely +-- what the governor can be expected to do. It is simpler to specify looser +-- constraints on what it must and must not do. We can then argue informally +-- that the combination of properties must lead to the kinds of outcomes we +-- intend. +-- +-- We decompose the progress property into the following (informally stated) +-- properties: +-- +-- 1. The set of peers the governor knows about is a subset of the peers the +-- environment has told the governor about. +-- +-- This is a weak property since it simply says that the governor does not +-- invent things out of thin air. We might expect that we could strengthen +-- this property to require that the subset be maximal in some sense however +-- such a property is violated by dynamic targets. There are also timing +-- issues which would complicate such a strengthened property: the governor +-- has legitimate reasons to update its internal state some time after the +-- environment informs it about new peers. +-- +-- 2. If the governor is below target and has the opportunity to gossip then +-- within a bounded time it should perform a gossip with one of its known +-- peers. +-- +-- This is the primary progress property. It is a relatively weak property: +-- we do not require that progress is actually made, just that opportunities +-- for progress are taken when available. We cannot always demand actual +-- progress since there are environments where it is not possible to make +-- progress, even though opportunities for gossip remain available. Examples +-- include environments where the total set of peers in the graph is less +-- than the target for known peers. +-- +-- 3. The governor should not gossip too frequently with any individual peer, +-- except when the governor forgets known peers. +-- +-- This is both useful in its own right, but it also helps to strengthen the +-- primary property by helping to ensure that the choices of which peers to +-- gossip with are reasonable. In the primary property we do not require that +-- the peer the the governor chooses to gossip with is one of the +-- opportunities as defined by the property. We do not require this because +-- the set of opportunities is a lower bound not an upper bound, and trying +-- to make it a tight bound becomes complex and over-specifies behaviour. +-- There is the danger however that the governor could appear to try to make +-- progress by gossiping but always picking useless choices that avoid making +-- actual progress. By requiring that the governor not gossip with any +-- individual peer too often we can shrink the set of peers the governor can +-- choose and thus force the governor to eventually pick other peers to +-- gossip with, which should mean the governor eventually picks peers that +-- can enable progress. +-- +-- 4. When the governor does perform a gossip, within a bounded time it should +-- include the results into its known peer set, or the known peer set should +-- reach its target size. +-- +-- This helps to strengthen the primary progress property by ensuring the +-- results of gossip are used to make progress when that is possible. +-- +-- 5. The governor should not shrink its known peer set except when it is above +-- the target size. +-- +-- This also helps to strengthen the second property by ensuring monotonic +-- progress, except when we overshoot targets or when targets are reduced. +-- +-- The overall progress argument is then an semi-formal argument, structured +-- much like classic proofs about loops. A classic loop proof has two parts: 1. +-- if the loop does terminate it gets the right result, and 2. it must +-- eventually terminate because it makes progress in some measure that is +-- bounded. +-- +-- Of course in our setting there is no termination, but we can reach a goal +-- and remain in a steady state until the environment changes. Our argument is +-- that the governor makes progress to increase the size of its set of known +-- peers until either it hits its target number of known peers, or it reaches a +-- maximal possible set. As a corollary if the targets do not change too +-- frequently then it will eventually hit the target or reach a maximal set. +-- +-- Property number 1 above tells us that if we do reach our goal condition that +-- we will have a correct result, as property 1 tells us that all the governors' +-- known peers are ones supplied by the environment. +-- +-- Progress from below relies on the combination of property 2, 3, 4 and 5. +-- Property 2 tells us that we eventually do some gossip with some peer, but +-- does not by itself establish that we make progress in a bounded measure. +-- Property 3 gives us the bounded measure. Property 3 gives us a set of peers +-- that we have not gossiped with recently. When the governor does gossip with +-- a peer then it is removed from this set (but scheduled to be added back some +-- time later). So the measure is the size of this set of peers. It is clearly +-- bounded below by the empty set. So the combination of 2 and 3 tells us we +-- make progress in this bounded measure, but that does not directly translate +-- into increasing the size of the known peers set. Properties 4 and 5 tell us +-- that progress with gossiping will eventually translate into increasing the +-- size of the known peers set if that is possible. +-- +-- There is one known wrinkle to this argument to do with property 3 that when +-- the governor gossips with a peer it is removed from the tracking set however +-- it gets added back some time later. If they get added back too soon then it +-- would undermine the progress argument because it would break the argument +-- about decreasing the bounded measure. This is readily solved however: we +-- simply need to make sure the time scale for gossip frequency is relatively +-- long, and the other progress bounds are relatively short. +-- +prop_governor_target_known_below :: GovernorMockEnvironment -> Property +prop_governor_target_known_below env = + prop_governor_target_known_1_valid_subset env + .&&. prop_governor_target_known_2_opportunity_taken env + .&&. prop_governor_target_known_3_not_too_chatty env + .&&. prop_governor_target_known_4_results_used env + .&&. prop_governor_target_known_5_no_shrink_below env + + +-- | The set of peers the governor knows about is a subset of the peers the +-- environment has told the governor about. +-- +-- We derive a number of signals: +-- +-- 1. A signal of the accumulation of all the peers the environment has ever +-- told the governor about, based on the environment trace. +-- +-- 2. A signal of the set of known peers in the governor state. +-- +-- Based on these signals we check: +-- +-- * That the governor known peers is a subset of the accumulated environment +-- known peers. +-- +prop_governor_target_known_1_valid_subset :: GovernorMockEnvironment + -> Property +prop_governor_target_known_1_valid_subset env = + let events = Signal.eventsFromListUpToTime (Time (60 * 60)) + . selectPeerSelectionTraceEvents + . runGovernorInMockEnvironment + $ env + + envKnownPeersSig :: Signal (Set PeerAddr) + envKnownPeersSig = + Signal.nubBy ((==) `on` Set.size) + . Signal.scanl Set.union Set.empty + . Signal.fromChangeEvents Set.empty + . Signal.selectEvents + (\case + TraceEnvSetLocalRoots x -> Just (LocalRootPeers.keysSet x) + TraceEnvRootsResult x -> Just (Set.fromList x) + TraceEnvGossipResult _ x -> Just (Set.fromList x) + _ -> Nothing + ) + . selectEnvEvents + $ events + + govKnownPeersSig :: Signal (Set PeerAddr) + govKnownPeersSig = + selectGovState (KnownPeers.toSet . Governor.knownPeers) events + + validState :: Set PeerAddr -> Set PeerAddr -> Bool + validState knownPeersEnv knownPeersGov = + knownPeersGov `Set.isSubsetOf` knownPeersEnv + + in counterexample + "Signal key: (environment known peers, governor known peers)" $ + + signalProperty 20 show (uncurry validState) $ + (,) <$> envKnownPeersSig + <*> govKnownPeersSig + + +-- | If the governor is below target and has the opportunity to gossip then +-- within a bounded time it should perform a gossip with one of its known peers. +-- +-- We derive a number of signals: +-- +-- 1. A signal of the target for known peers from the environment +-- +-- 2. A signal of the set of known peers in the governor state. +-- +-- 3. A signal of the set of peers with which the governor has gossiped +-- recently, based on the requests to the environment +-- +-- 4. Based on 2 and 3, a signal of the set of gossip opportunities: the +-- current known peers that are not in the recent gossip set. +-- +-- 5. A signal of the environment gossip request events. +-- +-- 6. Based on 1, 2, 4 and 5, a signal that becomes False if for 30 seconds: +-- the number of known peers is below target; the set of opportunities is +-- non empty; and no gossip request event has occurred. +-- +-- Based on these signals we check: +-- +-- * That the signal 6 remains True at all times. +-- +prop_governor_target_known_2_opportunity_taken :: GovernorMockEnvironment + -> Property +prop_governor_target_known_2_opportunity_taken env = + + let events = Signal.eventsFromListUpToTime (Time (10 * 60 * 60)) + . selectPeerSelectionTraceEvents + . runGovernorInMockEnvironment + $ env + + envTargetsSig :: Signal Int + envTargetsSig = + selectEnvTargets targetNumberOfKnownPeers events + + govKnownPeersSig :: Signal (Set PeerAddr) + govKnownPeersSig = + selectGovState (KnownPeers.toSet . Governor.knownPeers) events + + envGossipUnavailableSig :: Signal (Set PeerAddr) + envGossipUnavailableSig = + Signal.keyedLinger + -- peers are unavailable for gossip for at least an + -- hour after each gossip interaction + (60 * 60) + (maybe Set.empty Set.singleton) + . Signal.fromEvents + . Signal.selectEvents + (\case TraceEnvGossipRequest peer _ -> Just peer + _ -> Nothing) + . selectEnvEvents + $ events + + -- We define the governor's gossip opportunities at any point in time + -- to be the governor's set of known peers, less the ones we can see + -- that it has gossiped with recently. + -- + gossipOpportunitiesSig :: Signal (Set PeerAddr) + gossipOpportunitiesSig = + (Set.\\) <$> govKnownPeersSig <*> envGossipUnavailableSig + + -- Note that we only require that the governor try to gossip, it does + -- not have to succeed. + envGossipsEventsAsSig :: Signal (Maybe PeerAddr) + envGossipsEventsAsSig = + Signal.fromEvents + . Signal.selectEvents + (\case TraceEnvGossipRequest addr _ -> Just addr + _ -> Nothing) + . selectEnvEvents + $ events + + -- The signal of all the things of interest for this property. + -- This is used to compute the final predicate, and is also what + -- we want to report if there is a property violation. + combinedSig :: Signal (Int, + Set PeerAddr, + Set PeerAddr, + Maybe PeerAddr) + combinedSig = + (,,,) <$> envTargetsSig + <*> govKnownPeersSig + <*> gossipOpportunitiesSig + <*> envGossipsEventsAsSig + + -- This is the ultimate predicate signal + gossipOpportunitiesOkSig :: Signal Bool + gossipOpportunitiesOkSig = + Signal.truncateAt (Time (60 * 60 * 10)) $ + governorEventuallyTakesGossipOpportunities combinedSig + + in counterexample + "Signal key: (target, known peers, opportunities, gossip event)" $ + + -- Check the predicate signal but for failures report the input signal + signalProperty 20 (show . snd) fst $ + (,) <$> gossipOpportunitiesOkSig + <*> combinedSig + + +governorEventuallyTakesGossipOpportunities + :: Signal (Int, Set PeerAddr, Set PeerAddr, Maybe PeerAddr) + -> Signal Bool +governorEventuallyTakesGossipOpportunities = + -- Time out and fail after 30 seconds if we enter and remain in a bad state + fmap not + . Signal.timeout timeLimit badState + where + timeLimit :: DiffTime + timeLimit = 30 + + badState (target, govKnownPeers, gossipOpportunities, gossipEvent) = + + -- A bad state is one where we are below target; + Set.size govKnownPeers < target + + -- where we do have opportunities; and + && not (Set.null gossipOpportunities) + + -- are not performing an action to take the opportunity. + && isNothing gossipEvent + + -- Note that if a gossip does take place, we do /not/ require the gossip + -- target to be a member of the gossipOpportunities. This is because + -- the gossip opportunities set is a lower bound not an upper bound. + -- There is a separate property to check that we do not gossip too + -- frequently with any individual peer. + + + +-- | The governor should not gossip too frequently with any individual peer, +-- except when the governor forgets known peers. +-- +-- We derive a number of signals: +-- +-- Based on these signals we check: +-- +prop_governor_target_known_3_not_too_chatty :: GovernorMockEnvironment + -> Property +prop_governor_target_known_3_not_too_chatty env = + let events = Signal.eventsFromListUpToTime (Time (10 * 60 * 60)) + . selectPeerSelectionTraceEvents + . runGovernorInMockEnvironment + $ env + + gossipOk Nothing _ = True + gossipOk (Just peers) unavailable = + Set.null (peers `Set.intersection` unavailable) + + in signalProperty 20 show (uncurry gossipOk) $ + recentGossipActivity 3600 events + + +recentGossipActivity :: DiffTime + -> Events TestTraceEvent + -> Signal (Maybe (Set PeerAddr), Set PeerAddr) +recentGossipActivity d = + Signal.fromChangeEvents (Nothing, Set.empty) + . Signal.primitiveTransformEvents (go Set.empty PSQ.empty) + --TODO: we should be able to avoid primitiveTransformEvents and express + -- this as some combo of keyed linger and keyed until. + where + go :: Set PeerAddr + -> PSQ.OrdPSQ PeerAddr Time () + -> [E TestTraceEvent] + -> [E (Maybe (Set PeerAddr), Set PeerAddr)] + go !recentSet !recentPSQ txs@(E (TS t _) _ : _) + | Just (k, t', _, recentPSQ') <- PSQ.minView recentPSQ + , t' <= t + , let recentSet' = Set.delete k recentSet + = E (TS t' 0) (Nothing, recentSet') + : go recentSet' recentPSQ' txs + + -- When we see a gossip request we add it to the recent set and schedule + -- it to be removed again at time d+t. We arrange for the change in the + -- recent set to happen after the gossip event. + go !recentSet !recentPSQ + (E (TS t i) (GovernorEvent (TraceGossipRequests _ _ _ addrs)) : txs) = + let recentSet' = recentSet <> addrs + recentPSQ' = foldl' (\q a -> PSQ.insert a t' () q) recentPSQ addrs + t' = d `addTime` t + in E (TS t i) (Just addrs, recentSet) + : E (TS t (i+1)) (Nothing, recentSet') -- updated in next change at same time + : go recentSet' recentPSQ' txs + + -- When the governor is forced to forget known peers, we drop it from + -- the recent activity tracking, which means if it is added back again + -- later then we can gossip with it again earlier than the normal limit. + -- + -- Alternatively we could track this more coarsely by dropping all tracking + -- when the targets are adjusted downwards, but we use small target + -- adjustments to perform churn. + -- + -- There is a separate property to check that the governor does not forget + -- peers unnecessarily. + -- + go !recentSet !recentPSQ + (E t (GovernorEvent (TraceForgetColdPeers _ _ addrs)) : txs) = + let recentSet' = foldl' (flip Set.delete) recentSet addrs + recentPSQ' = foldl' (flip PSQ.delete) recentPSQ addrs + in E t (Nothing, recentSet') + : go recentSet' recentPSQ' txs + + go !recentSet !recentPSQ (_ : txs) = + go recentSet recentPSQ txs + + go !_ !_ [] = [] + + +-- | When the governor does perform a gossip, within a bounded time it should +-- include the results into its known peer set, or the known peer set should +-- reach its target size. +-- +-- We derive a number of signals: +-- +-- 1. A signal of the target for known peers from the environment +-- +-- 2. A signal of the set of known peers in the governor state. +-- +-- 3. A signal of the environment gossip result events, as the set of results +-- at any point in time. +-- +-- 4. Based on 1, 2 and 3, a signal that tracks a set of peers that we have +-- gossiped with, such that the peers remain in the set until either they +-- appear in the governor known peers set or until the known peer set +-- reaches its target size. +-- +-- 5. Based on 4, a signal of the subset of elements that have been a member +-- continuously for at least X seconds duration. +-- +-- Based on these signals we assert: +-- +-- * That the signal 4 above is always empty. +-- +prop_governor_target_known_4_results_used :: GovernorMockEnvironment -> Property +prop_governor_target_known_4_results_used env = + let events = Signal.eventsFromListUpToTime (Time (10 * 60 * 60)) + . selectPeerSelectionTraceEvents + . runGovernorInMockEnvironment + $ env + + envTargetsSig :: Signal Int + envTargetsSig = + selectEnvTargets targetNumberOfKnownPeers events + + govKnownPeersSig :: Signal (Set PeerAddr) + govKnownPeersSig = + selectGovState (KnownPeers.toSet . Governor.knownPeers) events + + envGossipResultsSig :: Signal (Set PeerAddr) + envGossipResultsSig = + fmap (maybe Set.empty Set.fromList) + . Signal.fromEvents + . Signal.selectEvents + (\case TraceEnvGossipResult _ addrs -> Just addrs + _ -> Nothing) + . selectEnvEvents + $ events + + gossipResultsUntilKnown :: Signal (Set PeerAddr) + gossipResultsUntilKnown = + Signal.keyedUntil + (\(_, _, gossips) -> gossips) -- start set + (\(_, known, _) -> known) -- stop set + (\(target, known, _) -> Set.size known <= target) -- reset condition + ((,,) <$> envTargetsSig + <*> govKnownPeersSig + <*> envGossipResultsSig) + + gossipResultsUnknownTooLong :: Signal (Set PeerAddr) + gossipResultsUnknownTooLong = + Signal.keyedTimeout + (10 + 1) -- policyGossipOverallTimeout + id + gossipResultsUntilKnown + + in counterexample + ("\nSignal key: (known peers, gossip result, results unknown, " ++ + "results unknown too long)") $ + + signalProperty 20 show + (\(_,_,_,_,x) -> Set.null x) $ + (,,,,) <$> envTargetsSig + <*> govKnownPeersSig + <*> envGossipResultsSig + <*> gossipResultsUntilKnown + <*> gossipResultsUnknownTooLong + + +-- | The governor should not shrink its known peer set except when it is above +-- the target size. +-- +-- We derive a number of signals: +-- +-- 1. A signal of the target for known peers from the environment +-- +-- 2. A signal of the set of known peers in the governor state. +-- +-- 3. Based on 2, a signal of change events when the set of known peers shrinks. +-- +-- 4. Based on 1, 2 and 3, a signal of unexpected shrink events: a signal that +-- is True when there is a shrink event and the new size of the set of known +-- peers is below the target. +-- +-- Based on these signals we assert: +-- +-- * That the signal 4 above is always False. +-- +prop_governor_target_known_5_no_shrink_below :: GovernorMockEnvironment -> Property +prop_governor_target_known_5_no_shrink_below env = + let events = Signal.eventsFromListUpToTime (Time (10 * 60 * 60)) + . selectPeerSelectionTraceEvents + . runGovernorInMockEnvironment + $ env + + envTargetsSig :: Signal Int + envTargetsSig = + selectEnvTargets targetNumberOfKnownPeers events + + govKnownPeersSig :: Signal (Set PeerAddr) + govKnownPeersSig = + selectGovState (KnownPeers.toSet . Governor.knownPeers) events + + knownPeersShrinksSig :: Signal (Set PeerAddr) + knownPeersShrinksSig = + Signal.nub + . fmap (fromMaybe Set.empty) + $ Signal.difference + (\x x' -> x Set.\\ x') + govKnownPeersSig + + unexpectedShrink :: Signal Bool + unexpectedShrink = + -- Note that when we observe a shrink, the known peers set at the + -- same time is the new shrunk value. This means our test has to be + -- Set.size known < target rather than Set.size known <= target + -- It also has the bonus of checking that we are checking that the + -- size of the known peer set after the shrink is not strictly + -- smaller than the target, which means we're checking that we do + -- not undershoot the target: from above we hit the target exactly. + (\target known shrinks -> + not (Set.null shrinks) + && Set.size known < target + ) <$> envTargetsSig + <*> govKnownPeersSig + <*> knownPeersShrinksSig + + in counterexample + "\nSignal key: (target, known peers, shrinks, unexpected)" $ + + signalProperty 20 show + (\(_,_,_,unexpected) -> not unexpected) + ((,,,) <$> envTargetsSig + <*> govKnownPeersSig + <*> knownPeersShrinksSig + <*> unexpectedShrink) + +-- | The governor should shrink its known peer set within a bounded time when +-- it is above the target size. +-- +-- This deals with hitting the target from above. We have to allow some bounded +-- time rather than demand instant shrinking because in some situation the +-- governor must demote active or established peers before it can forget known +-- peers. +-- +-- We derive a number of signals: +-- +-- 1. A signal of the effective target for known peers from the environment, +-- based on both the given target and the local root peers. +-- +-- 2. A signal of the set of known peers in the governor state. +-- +-- 3. Based on 2, a signal of change events when the set of known peers shrinks. +-- +-- 5. Based on 1, 2 and 3, a signal that becomes True if for X seconds, the +-- known peers is above target and there is no shrink event. +-- +-- Based on these signals we check: +-- +-- * That the signal 5 above is always False. +-- +prop_governor_target_known_above :: GovernorMockEnvironment -> Property +prop_governor_target_known_above env = + let events = Signal.eventsFromListUpToTime (Time (10 * 60 * 60)) + . selectPeerSelectionTraceEvents + . runGovernorInMockEnvironment + $ env + + envTargetsSig :: Signal PeerSelectionTargets + envTargetsSig = + selectEnvTargets id events + + govLocalRootPeersSig :: Signal (Set PeerAddr) + govLocalRootPeersSig = + selectGovState (LocalRootPeers.keysSet . Governor.localRootPeers) + events + + govPublicRootPeersSig :: Signal (Set PeerAddr) + govPublicRootPeersSig = + selectGovState Governor.publicRootPeers events + + govKnownPeersSig :: Signal (Set PeerAddr) + govKnownPeersSig = + selectGovState (KnownPeers.toSet . Governor.knownPeers) events + + govEstablishedPeersSig :: Signal (Set PeerAddr) + govEstablishedPeersSig = + selectGovState (EstablishedPeers.toSet . Governor.establishedPeers) + events + + -- There are no demotion opportunities if we're at or below target. + -- Otherwise, the opportunities for demotion are known peers that + -- are not currently established and are not local. + -- + demotionOpportunity targets local public known established + | Set.size known <= targetNumberOfKnownPeers targets + = Set.empty + + | otherwise + = known Set.\\ established + Set.\\ local + Set.\\ publicProtected + where + -- Furthermore, public roots are protected from demotion if we are + -- at or below target for roots peers. + publicProtected + | Set.size local + Set.size public + <= targetNumberOfRootPeers targets + = public + + | otherwise + = Set.empty + + demotionOpportunities :: Signal (Set PeerAddr) + demotionOpportunities = + demotionOpportunity + <$> envTargetsSig + <*> govLocalRootPeersSig + <*> govPublicRootPeersSig + <*> govKnownPeersSig + <*> govEstablishedPeersSig + + demotionOpportunitiesIgnoredTooLong :: Signal (Set PeerAddr) + demotionOpportunitiesIgnoredTooLong = + Signal.keyedTimeout + 10 -- seconds + id + demotionOpportunities + + in counterexample + ("\nSignal key: (target (root, known), local peers, public peers, known peers, " ++ + "established peers, demotion opportunities, ignored too long)") $ + + signalProperty 20 show + (\(_,_,_,_,_,_,toolong) -> Set.null toolong) + ((,,,,,,) <$> ((\t -> (targetNumberOfRootPeers t, + targetNumberOfKnownPeers t)) <$> envTargetsSig) + <*> govLocalRootPeersSig + <*> govPublicRootPeersSig + <*> govKnownPeersSig + <*> govEstablishedPeersSig + <*> demotionOpportunities + <*> demotionOpportunitiesIgnoredTooLong) + + +-- | Check that the governor can hit (but not overshoot) its target for the +-- number of warm peers. This has to be bounded by what is possible: we cannot +-- always find enough peers, and when we can, some of them fail. +-- +-- This is a somewhat tricky property to express because it is non-trivial to +-- find the maximum number of possible established connections by inspecting +-- the mock environment. +-- +-- We approach it in three parts: from above, from below and statistically. +-- +-- The simplest is from above: the environment knows how many established +-- connections there are at any point in (virtual) time, and what the targets +-- are. So we can easily compare the two. This can be a tight bound above. +-- When the target is stable, the governor should never overshoot the target. +-- When the target changes to be smaller, the governor should shrink the number +-- of established connections to be within the target within a relatively short +-- period of time. +-- +-- +-- +-- Tracking very precisely the maximum number of peers we could reasonably +-- establish connections to is tricky and hence prone to mistakes in the test +-- definition. So as an extra sanity check we take a simpler but fuzzy approach. +-- In some fraction of test runs, the environment should be such that it is +-- possible to actually hit the target for the number of established peers. So +-- we label the cases where this happens, and then we can use a statistical +-- test to assert that this happens in some fraction of test cases. +-- +prop_governor_target_established_below :: GovernorMockEnvironment -> Property +prop_governor_target_established_below env = + let events = Signal.eventsFromListUpToTime (Time (10 * 60 * 60)) + . selectPeerSelectionTraceEvents + . runGovernorInMockEnvironment + $ env + + envTargetsSig :: Signal Int + envTargetsSig = + selectEnvTargets targetNumberOfEstablishedPeers events + + govKnownPeersSig :: Signal (Set PeerAddr) + govKnownPeersSig = + selectGovState (KnownPeers.toSet . Governor.knownPeers) events + + govEstablishedPeersSig :: Signal (Set PeerAddr) + govEstablishedPeersSig = + selectGovState + (EstablishedPeers.toSet . Governor.establishedPeers) + events + + govEstablishedFailuresSig :: Signal (Set PeerAddr) + govEstablishedFailuresSig = + Signal.keyedLinger + 180 -- 3 minutes -- TODO: too eager to reconnect? + (fromMaybe Set.empty) + . Signal.fromEvents + . Signal.selectEvents + (\case TracePromoteColdFailed _ _ peer _ _ -> + --TODO: the environment does not yet cause this to happen + -- it requires synchronous failure in the establish action + Just (Set.singleton peer) + --TODO: what about TraceDemoteWarmDone ? + -- these are also not immediate candidates + -- why does the property not fail for not tracking these? + TraceDemoteAsynchronous status + | Set.null failures -> Nothing + | otherwise -> Just failures + where + failures = Map.keysSet (Map.filter (==PeerCold) status) + _ -> Nothing + ) + . selectGovEvents + $ events + + -- There are no opportunities if we're at or above target + -- + promotionOpportunity target known established recentFailures + | Set.size established >= target + = Set.empty + + | otherwise + = known Set.\\ established + Set.\\ recentFailures + + promotionOpportunities :: Signal (Set PeerAddr) + promotionOpportunities = + promotionOpportunity + <$> envTargetsSig + <*> govKnownPeersSig + <*> govEstablishedPeersSig + <*> govEstablishedFailuresSig + + promotionOpportunitiesIgnoredTooLong :: Signal (Set PeerAddr) + promotionOpportunitiesIgnoredTooLong = + Signal.keyedTimeout + 10 -- seconds + id + promotionOpportunities + + in counterexample + ("\nSignal key: (target, known peers, established peers, recent failures, " ++ + "opportunities, ignored too long)") $ + + signalProperty 20 show + (\(_,_,_,_,_,toolong) -> Set.null toolong) + ((,,,,,) <$> envTargetsSig + <*> govKnownPeersSig + <*> govEstablishedPeersSig + <*> govEstablishedFailuresSig + <*> promotionOpportunities + <*> promotionOpportunitiesIgnoredTooLong) + +prop_governor_target_active_below :: GovernorMockEnvironment -> Property +prop_governor_target_active_below env = + let events = Signal.eventsFromListUpToTime (Time (10 * 60 * 60)) + . selectPeerSelectionTraceEvents + . runGovernorInMockEnvironment + $ env + + envTargetsSig :: Signal Int + envTargetsSig = + selectEnvTargets targetNumberOfActivePeers events + + govLocalRootPeersSig :: Signal (LocalRootPeers.LocalRootPeers PeerAddr) + govLocalRootPeersSig = + selectGovState Governor.localRootPeers events + + govEstablishedPeersSig :: Signal (Set PeerAddr) + govEstablishedPeersSig = + selectGovState + (EstablishedPeers.toSet . Governor.establishedPeers) + events + + govActivePeersSig :: Signal (Set PeerAddr) + govActivePeersSig = + selectGovState Governor.activePeers events + + govActiveFailuresSig :: Signal (Set PeerAddr) + govActiveFailuresSig = + Signal.keyedLinger + 180 -- 3 minutes -- TODO: too eager to reconnect? + (fromMaybe Set.empty) + . Signal.fromEvents + . Signal.selectEvents + (\case TracePromoteWarmFailed _ _ peer _ -> + --TODO: the environment does not yet cause this to happen + -- it requires synchronous failure in the establish action + Just (Set.singleton peer) + --TODO + TraceDemoteAsynchronous status + | Set.null failures -> Nothing + | otherwise -> Just failures + where + failures = Map.keysSet (Map.filter (==PeerWarm) status) + _ -> Nothing + ) + . selectGovEvents + $ events + + -- There are no opportunities if we're at or above target. + -- + -- We define local root peers not to be promotion opportunities for + -- the purpose of the general target of active peers. + -- The local root peers have a separate target with a separate property. + -- And we cannot count local peers since we can have corner cases where + -- the only choices are local roots in a group that is already at target + -- but the general target is still higher. In such situations we do not + -- want to promote any, since we'd then be above target for the local + -- root peer group. + -- + promotionOpportunity target local established active recentFailures + | Set.size active >= target + = Set.empty + + | otherwise + = established Set.\\ active + Set.\\ LocalRootPeers.keysSet local + Set.\\ recentFailures + + promotionOpportunities :: Signal (Set PeerAddr) + promotionOpportunities = + promotionOpportunity + <$> envTargetsSig + <*> govLocalRootPeersSig + <*> govEstablishedPeersSig + <*> govActivePeersSig + <*> govActiveFailuresSig + + promotionOpportunitiesIgnoredTooLong :: Signal (Set PeerAddr) + promotionOpportunitiesIgnoredTooLong = + Signal.keyedTimeout + 10 -- seconds + id + promotionOpportunities + + in counterexample + ("\nSignal key: (target, local peers, established peers, " ++ + "active peers, recent failures, opportunities, ignored too long)") $ + + signalProperty 20 show + (\(_,_,_,_,_,_,toolong) -> Set.null toolong) + ((,,,,,,) <$> envTargetsSig + <*> govLocalRootPeersSig + <*> govEstablishedPeersSig + <*> govActivePeersSig + <*> govActiveFailuresSig + <*> promotionOpportunities + <*> promotionOpportunitiesIgnoredTooLong) + +prop_governor_target_established_above :: GovernorMockEnvironment -> Property +prop_governor_target_established_above env = + let events = Signal.eventsFromListUpToTime (Time (10 * 60 * 60)) + . selectPeerSelectionTraceEvents + . runGovernorInMockEnvironment + $ env + + envTargetsSig :: Signal Int + envTargetsSig = + selectEnvTargets targetNumberOfEstablishedPeers events + + govLocalRootPeersSig :: Signal (LocalRootPeers.LocalRootPeers PeerAddr) + govLocalRootPeersSig = + selectGovState Governor.localRootPeers events + + govEstablishedPeersSig :: Signal (Set PeerAddr) + govEstablishedPeersSig = + selectGovState + (EstablishedPeers.toSet . Governor.establishedPeers) + events + + govActivePeersSig :: Signal (Set PeerAddr) + govActivePeersSig = + selectGovState Governor.activePeers events + + -- There are no demotion opportunities if we're at or below target. + -- Otherwise the demotion opportunities are the established peers that + -- are not active and not local root peers. + -- + demotionOpportunity target local established active + | Set.size established <= target + = Set.empty + + | otherwise + = established Set.\\ active + Set.\\ LocalRootPeers.keysSet local + demotionOpportunities :: Signal (Set PeerAddr) + demotionOpportunities = + demotionOpportunity + <$> envTargetsSig + <*> govLocalRootPeersSig + <*> govEstablishedPeersSig + <*> govActivePeersSig + + demotionOpportunitiesIgnoredTooLong :: Signal (Set PeerAddr) + demotionOpportunitiesIgnoredTooLong = + Signal.keyedTimeout + 10 -- seconds + id + demotionOpportunities + + in counterexample + ("\nSignal key: (target, local peers, established peers, active peers, " ++ + "demotion opportunities, ignored too long)") $ + + signalProperty 20 show + (\(_,_,_,_,_,toolong) -> Set.null toolong) + ((,,,,,) <$> envTargetsSig + <*> (LocalRootPeers.toGroupSets <$> govLocalRootPeersSig) + <*> govEstablishedPeersSig + <*> govActivePeersSig + <*> demotionOpportunities + <*> demotionOpportunitiesIgnoredTooLong) + +prop_governor_target_active_above :: GovernorMockEnvironment -> Property +prop_governor_target_active_above env = + let events = Signal.eventsFromListUpToTime (Time (10 * 60 * 60)) + . selectPeerSelectionTraceEvents + . runGovernorInMockEnvironment + $ env + + envTargetsSig :: Signal Int + envTargetsSig = + selectEnvTargets targetNumberOfActivePeers events + + govLocalRootPeersSig :: Signal (LocalRootPeers.LocalRootPeers PeerAddr) + govLocalRootPeersSig = + selectGovState Governor.localRootPeers events + + govActivePeersSig :: Signal (Set PeerAddr) + govActivePeersSig = + selectGovState Governor.activePeers events + + demotionOpportunity target local active + | Set.size active <= target + = Set.empty + + | otherwise + = active Set.\\ LocalRootPeers.keysSet local + + demotionOpportunities :: Signal (Set PeerAddr) + demotionOpportunities = + demotionOpportunity + <$> envTargetsSig + <*> govLocalRootPeersSig + <*> govActivePeersSig + + demotionOpportunitiesIgnoredTooLong :: Signal (Set PeerAddr) + demotionOpportunitiesIgnoredTooLong = + Signal.keyedTimeout + 10 -- seconds + id + demotionOpportunities + + in counterexample + ("\nSignal key: (target, local peers, active peers, " ++ + "demotion opportunities, ignored too long)") $ + + signalProperty 20 show + (\(_,_,_,_,toolong) -> Set.null toolong) + ((,,,,) <$> envTargetsSig + <*> (LocalRootPeers.toGroupSets <$> govLocalRootPeersSig) + <*> govActivePeersSig + <*> demotionOpportunities + <*> demotionOpportunitiesIgnoredTooLong) + +-- | A variant of 'prop_governor_target_established_below' but for the target +-- that all local root peers should become established. +-- +-- We do not need separate above and below variants of this property since it +-- is not possible to exceed the target. +-- +prop_governor_target_established_local :: GovernorMockEnvironment -> Property +prop_governor_target_established_local env = + let events = Signal.eventsFromListUpToTime (Time (10 * 60 * 60)) + . selectPeerSelectionTraceEvents + . runGovernorInMockEnvironment + $ env + + govLocalRootPeersSig :: Signal (Set PeerAddr) + govLocalRootPeersSig = + selectGovState (LocalRootPeers.keysSet . Governor.localRootPeers) + events + + govEstablishedPeersSig :: Signal (Set PeerAddr) + govEstablishedPeersSig = + selectGovState + (EstablishedPeers.toSet . Governor.establishedPeers) + events + + govEstablishedFailuresSig :: Signal (Set PeerAddr) + govEstablishedFailuresSig = + Signal.keyedLinger + 180 -- 3 minutes -- TODO: too eager to reconnect? + (fromMaybe Set.empty) + . Signal.fromEvents + . Signal.selectEvents + (\case TracePromoteColdFailed _ _ peer _ _ -> + --TODO: the environment does not yet cause this to happen + -- it requires synchronous failure in the establish action + Just (Set.singleton peer) + --TODO: what about TraceDemoteWarmDone ? + -- these are also not immediate candidates + -- why does the property not fail for not tracking these? + TraceDemoteAsynchronous status + | Set.null failures -> Nothing + | otherwise -> Just failures + where + failures = Map.keysSet (Map.filter (==PeerCold) status) + _ -> Nothing + ) + . selectGovEvents + $ events + + promotionOpportunities :: Signal (Set PeerAddr) + promotionOpportunities = + (\local established recentFailures -> + local Set.\\ established + Set.\\ recentFailures + ) <$> govLocalRootPeersSig + <*> govEstablishedPeersSig + <*> govEstablishedFailuresSig + + promotionOpportunitiesIgnoredTooLong :: Signal (Set PeerAddr) + promotionOpportunitiesIgnoredTooLong = + Signal.keyedTimeout + 10 -- seconds + id + promotionOpportunities + + in counterexample + ("\nSignal key: (local root peers, established peers, " ++ + "recent failures, opportunities, ignored too long)") $ + + signalProperty 20 show + (\(_,_,_,_,toolong) -> Set.null toolong) + ((,,,,) <$> govLocalRootPeersSig + <*> govEstablishedPeersSig + <*> govEstablishedFailuresSig + <*> promotionOpportunities + <*> promotionOpportunitiesIgnoredTooLong) + + +-- | A variant of 'prop_governor_target_active_below' but for the target that +-- certain numbers out of groups of local root peers should become active. +-- +-- We do not need separate above and below variants of this property because +-- the target for active local root peers is one-sided: it is ok if we are +-- above target for any individual group. It is the overall active peers target +-- that can cause us to demote local roots if that's possible for any group +-- without going under target. +-- +-- TODO: perhaps we do need a below property that we do not demote active peers +-- causing us to undershoot the target for local root peers being active. +-- +prop_governor_target_active_local_below :: GovernorMockEnvironment -> Property +prop_governor_target_active_local_below env = + let events = Signal.eventsFromListUpToTime (Time (10 * 60 * 60)) + . selectPeerSelectionTraceEvents + . runGovernorInMockEnvironment + $ env + + govLocalRootPeersSig :: Signal (LocalRootPeers.LocalRootPeers PeerAddr) + govLocalRootPeersSig = + selectGovState Governor.localRootPeers events + + govEstablishedPeersSig :: Signal (Set PeerAddr) + govEstablishedPeersSig = + selectGovState + (EstablishedPeers.toSet . Governor.establishedPeers) + events + + govActivePeersSig :: Signal (Set PeerAddr) + govActivePeersSig = + selectGovState Governor.activePeers events + + govActiveFailuresSig :: Signal (Set PeerAddr) + govActiveFailuresSig = + Signal.keyedLinger + 180 -- 3 minutes -- TODO: too eager to reconnect? + (fromMaybe Set.empty) + . Signal.fromEvents + . Signal.selectEvents + (\case TracePromoteWarmFailed _ _ peer _ -> + --TODO: the environment does not yet cause this to happen + -- it requires synchronous failure in the establish action + Just (Set.singleton peer) + --TODO + TraceDemoteAsynchronous status + | Set.null failures -> Nothing + | otherwise -> Just failures + where + failures = Map.keysSet (Map.filter (==PeerWarm) status) + _ -> Nothing + ) + . selectGovEvents + $ events + + promotionOpportunities :: Signal (Set PeerAddr) + promotionOpportunities = + (\local established active recentFailures -> + Set.unions + [ -- There are no opportunities if we're at or above target + if Set.size groupActive >= target + then Set.empty + else groupEstablished Set.\\ active + Set.\\ recentFailures + | (target, group) <- LocalRootPeers.toGroupSets local + , let groupActive = group `Set.intersection` active + groupEstablished = group `Set.intersection` established + ] + ) <$> govLocalRootPeersSig + <*> govEstablishedPeersSig + <*> govActivePeersSig + <*> govActiveFailuresSig + + promotionOpportunitiesIgnoredTooLong :: Signal (Set PeerAddr) + promotionOpportunitiesIgnoredTooLong = + Signal.keyedTimeout + 10 -- seconds + id + promotionOpportunities + + in counterexample + ("\nSignal key: (local, established peers, active peers, " ++ + "recent failures, opportunities, ignored too long)") $ + + signalProperty 20 show + (\(_,_,_,_,_,toolong) -> Set.null toolong) + ((,,,,,) <$> (LocalRootPeers.toGroupSets <$> govLocalRootPeersSig) + <*> govEstablishedPeersSig + <*> govActivePeersSig + <*> govActiveFailuresSig + <*> promotionOpportunities + <*> promotionOpportunitiesIgnoredTooLong) + +prop_governor_target_active_local_above :: GovernorMockEnvironment -> Property +prop_governor_target_active_local_above env = + let events = Signal.eventsFromListUpToTime (Time (10 * 60 * 60)) + . selectPeerSelectionTraceEvents + . runGovernorInMockEnvironment + $ env + + govLocalRootPeersSig :: Signal (LocalRootPeers.LocalRootPeers PeerAddr) + govLocalRootPeersSig = + selectGovState Governor.localRootPeers events + + govActivePeersSig :: Signal (Set PeerAddr) + govActivePeersSig = + selectGovState Governor.activePeers events + + deomotionOpportunities :: Signal (Set PeerAddr) + deomotionOpportunities = + (\local active -> + Set.unions + [ -- There are no opportunities if we're at or below target + if Set.size groupActive <= target + then Set.empty + else groupActive + | (target, group) <- LocalRootPeers.toGroupSets local + , let groupActive = group `Set.intersection` active + ] + ) <$> govLocalRootPeersSig + <*> govActivePeersSig + + demotionOpportunitiesIgnoredTooLong :: Signal (Set PeerAddr) + demotionOpportunitiesIgnoredTooLong = + Signal.keyedTimeout + 10 -- seconds + id + deomotionOpportunities + + in counterexample + ("\nSignal key: (local peers, active peers, " ++ + "demotion opportunities, ignored too long)") $ + + signalProperty 20 show + (\(_,_,_,toolong) -> Set.null toolong) + ((,,,) <$> (LocalRootPeers.toGroupSets <$> govLocalRootPeersSig) + <*> govActivePeersSig + <*> deomotionOpportunities + <*> demotionOpportunitiesIgnoredTooLong) + -- -- Utils for properties -- @@ -282,22 +1837,65 @@ prop_governor_connstatus (GovernorMockEnvironmentWAD env) = takeFirstNHours :: DiffTime -> [(Time, a)] -> [(Time, a)] takeFirstNHours h = takeWhile (\(t,_) -> t < Time (60*60*h)) +selectEnvEvents :: Events TestTraceEvent -> Events TraceMockEnv +selectEnvEvents = Signal.selectEvents + (\case MockEnvEvent e -> Just e + _ -> Nothing) + +selectGovEvents :: Events TestTraceEvent + -> Events (TracePeerSelection PeerAddr) +selectGovEvents = Signal.selectEvents + (\case GovernorEvent e -> Just e + _ -> Nothing) + +selectGovState :: Eq a + => (Governor.PeerSelectionState PeerAddr () -> a) + -> Events TestTraceEvent + -> Signal a +selectGovState f = + Signal.nub + . fmap f + -- TODO: #3182 Rng seed should come from quickcheck. + . Signal.fromChangeEvents (Governor.emptyPeerSelectionState $ mkStdGen 42) + . Signal.selectEvents + (\case GovernorDebug (TraceGovernorState _ _ st) -> Just st + _ -> Nothing) + +selectEnvTargets :: Eq a + => (PeerSelectionTargets -> a) + -> Events TestTraceEvent + -> Signal a +selectEnvTargets f = + Signal.nub + . fmap f + . Signal.fromChangeEvents nullPeerSelectionTargets + . Signal.selectEvents + (\case TraceEnvSetTargets targets -> Just targets + _ -> Nothing) + . selectEnvEvents -- -- Live examples -- -_governorFindingPublicRoots :: Int -> [DomainAddress] -> IO Void -_governorFindingPublicRoots targetNumberOfRootPeers domains = - withTimeoutSerial $ \timeout -> +-- | Run the 'publicRootPeersProvider' in IO with a stdout tracer to observe +-- what it does. +-- +-- This is a manual test that runs in IO and has to be observed to see that it +-- is doing something sensible. It is not run automatically. +-- +_governorFindingPublicRoots :: Int -> STM IO [RelayAccessPoint] -> IO Void +_governorFindingPublicRoots targetNumberOfRootPeers readDomains = publicRootPeersProvider tracer - timeout DNS.defaultResolvConf - domains $ \requestPublicRootPeers -> + readDomains + ioDNSActions $ \requestPublicRootPeers -> peerSelectionGovernor - tracer tracer + tracer tracer tracer + -- TODO: #3182 Rng seed should come from quickcheck. + (mkStdGen 42) actions { requestPublicRootPeers } policy where diff --git a/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/Instances.hs b/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/Instances.hs index 7a298c350ed..67508950415 100644 --- a/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/Instances.hs +++ b/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/Instances.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE OverloadedStrings #-} {-# OPTIONS_GHC -Wno-orphans #-} @@ -7,22 +8,23 @@ module Test.Ouroboros.Network.PeerSelection.Instances ( -- test types PeerAddr(..), - -- test utils - renderRanges, - -- generator tests prop_arbitrary_PeerSelectionTargets, prop_shrink_PeerSelectionTargets, ) where -import Data.List.NonEmpty (NonEmpty (..)) -import qualified Data.List.NonEmpty as NonEmpty +import Data.Word (Word32) +import Data.Text.Encoding (encodeUtf8) +import Ouroboros.Network.PeerSelection.RootPeersDNS + (DomainAccessPoint (..), RelayAccessPoint (..)) import Ouroboros.Network.PeerSelection.Governor import Ouroboros.Network.PeerSelection.Types import Test.QuickCheck +import Test.QuickCheck.Utils +import qualified Data.IP as IP -- @@ -39,21 +41,10 @@ newtype PeerAddr = PeerAddr Int -- here for the few cases that need it, and it is used for (lack-of) shrinking. -- instance Arbitrary PeerAddr where - arbitrary = PeerAddr . getNonNegative <$> arbitrary + arbitrary = PeerAddr <$> arbitrarySizedNatural shrink _ = [] -instance Arbitrary a => Arbitrary (NonEmpty a) where - arbitrary = NonEmpty.fromList <$> listOf1 arbitrary - - shrink = shrinkMap from to - where - to :: NonEmpty a -> NonEmptyList a - to xs = NonEmpty (NonEmpty.toList xs) - - from :: NonEmptyList a -> NonEmpty a - from (NonEmpty xs) = NonEmpty.fromList xs - instance Arbitrary PeerAdvertise where arbitrary = elements [ DoAdvertisePeer, DoNotAdvertisePeer ] @@ -81,23 +72,47 @@ instance Arbitrary PeerSelectionTargets where , let targets' = PeerSelectionTargets r' k' e' a' , sanePeerSelectionTargets targets' ] +instance Arbitrary DomainAccessPoint where + arbitrary = + DomainAccessPoint . encodeUtf8 + <$> elements domains + <*> (fromIntegral <$> (arbitrary :: Gen Int)) + where + domains = [ "test1" + , "test2" + , "test3" + , "test4" + , "test5" + ] + +genIPv4 :: Gen IP.IP +genIPv4 = + IP.IPv4 . IP.toIPv4w <$> arbitrary + +genIPv6 :: Gen IP.IP +genIPv6 = + IP.IPv6 . IP.toIPv6w <$> genFourWord32 + where + genFourWord32 :: Gen (Word32, Word32, Word32, Word32) + genFourWord32 = + (,,,) <$> arbitrary + <*> arbitrary + <*> arbitrary + <*> arbitrary + +instance Arbitrary RelayAccessPoint where + arbitrary = + oneof [ RelayDomainAccessPoint <$> arbitrary + , RelayAccessAddress <$> oneof [genIPv4, genIPv6] + <*> (fromIntegral <$> (arbitrary :: Gen Int)) + ] prop_arbitrary_PeerSelectionTargets :: PeerSelectionTargets -> Bool prop_arbitrary_PeerSelectionTargets = sanePeerSelectionTargets -prop_shrink_PeerSelectionTargets :: PeerSelectionTargets -> Bool -prop_shrink_PeerSelectionTargets = - all sanePeerSelectionTargets . shrink - - --- --- QuickCheck utils --- - -renderRanges :: Int -> Int -> String -renderRanges r n = show lower ++ " -- " ++ show upper - where - lower = n - n `mod` r - upper = lower + (r-1) +prop_shrink_PeerSelectionTargets :: Fixed PeerSelectionTargets -> Property +prop_shrink_PeerSelectionTargets x = + prop_shrink_valid sanePeerSelectionTargets x + .&&. prop_shrink_nonequal x diff --git a/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/Json.hs b/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/Json.hs new file mode 100644 index 00000000000..ed78b3a739f --- /dev/null +++ b/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/Json.hs @@ -0,0 +1,39 @@ +module Test.Ouroboros.Network.PeerSelection.Json + ( tests ) where + +import Data.Aeson (decode, encode, fromJSON, toJSON) +import Ouroboros.Network.PeerSelection.RootPeersDNS + (DomainAccessPoint (..), RelayAccessPoint (..)) +import Ouroboros.Network.PeerSelection.Types (PeerAdvertise) +import Test.Ouroboros.Network.PeerSelection.Instances() + +import Test.QuickCheck +import Test.Tasty (TestTree, testGroup) +import Test.Tasty.QuickCheck (testProperty) + +tests :: TestTree +tests = + testGroup "Ouroboros.Network.PeerSelection.Json" + [ testProperty "DomainAccessPoint roundtrip" prop_roundtrip_DomainAccessPoint_JSON + , testProperty "RelayAccessPoint roundtrip" prop_roundtrip_RelayAccessPoint_JSON + , testProperty "PeerAdvertise roundtrip" prop_roundtrip_PeerAdvertise_JSON + ] + +prop_roundtrip_DomainAccessPoint_JSON :: DomainAccessPoint -> Property +prop_roundtrip_DomainAccessPoint_JSON da = + decode (encode da) === Just da + .&. + fromJSON (toJSON da) === pure da + +prop_roundtrip_RelayAccessPoint_JSON :: RelayAccessPoint -> Property +prop_roundtrip_RelayAccessPoint_JSON ra = + decode (encode ra) === Just ra + .&. + fromJSON (toJSON ra) === pure ra + +prop_roundtrip_PeerAdvertise_JSON :: PeerAdvertise -> Property +prop_roundtrip_PeerAdvertise_JSON pa = + decode (encode pa) === Just pa + .&. + fromJSON (toJSON pa) === pure pa + diff --git a/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/LocalRootPeers.hs b/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/LocalRootPeers.hs index f216b2de572..2afef27b2c3 100644 --- a/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/LocalRootPeers.hs +++ b/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/LocalRootPeers.hs @@ -1,11 +1,9 @@ -{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE NamedFieldPuns #-} {-# OPTIONS_GHC -Wno-orphans #-} module Test.Ouroboros.Network.PeerSelection.LocalRootPeers ( arbitraryLocalRootPeers, - toGroups', restrictKeys, tests, ) where @@ -25,18 +23,21 @@ import Test.Ouroboros.Network.PeerSelection.Instances import Test.QuickCheck +import Test.QuickCheck.Utils import Test.Tasty (TestTree, testGroup) import Test.Tasty.QuickCheck (testProperty) tests :: TestTree tests = - testGroup "LocalRootPeers" - [ testProperty "arbitrary" prop_arbitrary_LocalRootPeers - , testProperty "fromToGroups" prop_fromToGroups - , testProperty "fromGroups" prop_fromGroups - , testProperty "shrink" prop_shrink_LocalRootPeers - , testProperty "clampToLimit" prop_clampToLimit + testGroup "Ouroboros.Network.PeerSelection" + [ testGroup "LocalRootPeers" + [ testProperty "arbitrary" prop_arbitrary_LocalRootPeers + , testProperty "fromToGroups" prop_fromToGroups + , testProperty "fromGroups" prop_fromGroups + , testProperty "shrink" prop_shrink_LocalRootPeers + , testProperty "clampToLimit" prop_clampToLimit + ] ] @@ -59,7 +60,8 @@ prop_clampToLimit localRootPeers targets = (LocalRootPeers.size localRootPeers) -arbitraryLocalRootPeers :: Set PeerAddr -> Gen (LocalRootPeers PeerAddr) +arbitraryLocalRootPeers :: Ord peeraddr + => Set peeraddr -> Gen (LocalRootPeers peeraddr) arbitraryLocalRootPeers peeraddrs = do -- divide into a few disjoint groups ngroups <- choose (1, 5 :: Int) @@ -77,22 +79,14 @@ arbitraryLocalRootPeers peeraddrs = do return (LocalRootPeers.fromGroups (zip targets groups)) -instance Arbitrary (LocalRootPeers PeerAddr) where +instance (Arbitrary peeraddr, Ord peeraddr) => + Arbitrary (LocalRootPeers peeraddr) where arbitrary = do - peeraddrs <- Set.map (PeerAddr . getNonNegative) - <$> scale (`div` 4) arbitrary + peeraddrs <- scale (`div` 4) arbitrary arbitraryLocalRootPeers peeraddrs shrink lrps = - map LocalRootPeers.fromGroups (shrink (toGroups' lrps)) - -toGroups' :: Ord peeraddr - => LocalRootPeers peeraddr - -> [(Int, Map peeraddr PeerAdvertise)] -toGroups' lrps = - [ (t, Map.fromSet (m Map.!) g) - | let m = LocalRootPeers.toMap lrps - , (t, g) <- LocalRootPeers.toGroups lrps ] + map LocalRootPeers.fromGroups (shrink (LocalRootPeers.toGroups lrps)) restrictKeys :: Ord peeraddr => LocalRootPeers peeraddr @@ -101,7 +95,7 @@ restrictKeys :: Ord peeraddr restrictKeys lrps ks = LocalRootPeers.fromGroups . map (\(t,g) -> (t, Map.restrictKeys g ks)) - . toGroups' + . LocalRootPeers.toGroups $ lrps prop_arbitrary_LocalRootPeers :: LocalRootPeers PeerAddr -> Property @@ -115,22 +109,23 @@ prop_arbitrary_LocalRootPeers lrps = where size = renderRanges 5 (LocalRootPeers.size lrps) numGroups = show (length (LocalRootPeers.toGroups lrps)) - sizeGroups = map (show . Set.size . snd) (LocalRootPeers.toGroups lrps) + sizeGroups = map (show . Set.size . snd) (LocalRootPeers.toGroupSets lrps) targets = [ case () of _ | t == 0 -> "none" | t == Set.size g -> "all" | otherwise -> "some" - | (t, g) <- LocalRootPeers.toGroups lrps ] + | (t, g) <- LocalRootPeers.toGroupSets lrps ] -prop_shrink_LocalRootPeers :: LocalRootPeers PeerAddr -> Bool -prop_shrink_LocalRootPeers = - all LocalRootPeers.invariant . shrink +prop_shrink_LocalRootPeers :: Fixed (LocalRootPeers PeerAddr) -> Property +prop_shrink_LocalRootPeers x = + prop_shrink_valid LocalRootPeers.invariant x + .&&. prop_shrink_nonequal x prop_fromGroups :: [(Int, Map PeerAddr PeerAdvertise)] -> Bool prop_fromGroups = LocalRootPeers.invariant . LocalRootPeers.fromGroups prop_fromToGroups :: LocalRootPeers PeerAddr -> Bool prop_fromToGroups lrps = - LocalRootPeers.fromGroups (toGroups' lrps) == lrps + LocalRootPeers.fromGroups (LocalRootPeers.toGroups lrps) == lrps diff --git a/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/MockEnvironment.hs b/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/MockEnvironment.hs index 78b097bb54b..89a61c902e3 100644 --- a/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/MockEnvironment.hs +++ b/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/MockEnvironment.hs @@ -20,28 +20,25 @@ module Test.Ouroboros.Network.PeerSelection.MockEnvironment ( selectPeerSelectionTraceEvents, firstGossipReachablePeers, - Script, - ScriptDelay(..), - TimedScript, - scriptHead, - singletonScript, + module Test.Ouroboros.Network.PeerSelection.Script, + module Ouroboros.Network.PeerSelection.Types, tests, ) where import Data.Dynamic (fromDynamic) -import Data.Functor (($>)) import Data.List (nub) -import Data.List.NonEmpty (NonEmpty (..)) import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Set (Set) import qualified Data.Set as Set import Data.Typeable (Typeable) import Data.Void (Void) +import System.Random (mkStdGen) import Control.Exception (throw) +import Control.Monad (forM_) import Control.Monad.Class.MonadAsync import Control.Monad.Class.MonadSTM import Control.Monad.Class.MonadTime @@ -65,21 +62,26 @@ import Test.Ouroboros.Network.PeerSelection.LocalRootPeers as LocalRootPeers hiding (tests) import Test.QuickCheck +import Test.QuickCheck.Utils import Test.Tasty (TestTree, localOption, testGroup) import Test.Tasty.QuickCheck (QuickCheckMaxSize (..), testProperty) tests :: TestTree tests = - testGroup "Mock environment" - [ testProperty "arbitrary for PeerSelectionTargets" prop_arbitrary_PeerSelectionTargets - , testProperty "shrink for PeerSelectionTargets" prop_shrink_PeerSelectionTargets - , testProperty "arbitrary for PeerGraph" prop_arbitrary_PeerGraph - , localOption (QuickCheckMaxSize 30) $ - testProperty "shrink for PeerGraph" prop_shrink_PeerGraph - , testProperty "arbitrary for GovernorMockEnvironment" prop_arbitrary_GovernorMockEnvironment - , localOption (QuickCheckMaxSize 30) $ - testProperty "shrink for GovernorMockEnvironment" prop_shrink_GovernorMockEnvironment + testGroup "Ouroboros.Network.PeerSelection" + [ testGroup "MockEnvironment" + [ testProperty "shrink for Script" prop_shrink_Script + , testProperty "shrink for GovernorScripts" prop_shrink_GovernorScripts + , testProperty "arbitrary for PeerSelectionTargets" prop_arbitrary_PeerSelectionTargets + , testProperty "shrink for PeerSelectionTargets" prop_shrink_PeerSelectionTargets + , testProperty "arbitrary for PeerGraph" prop_arbitrary_PeerGraph + , localOption (QuickCheckMaxSize 30) $ + testProperty "shrink for PeerGraph" prop_shrink_PeerGraph + , testProperty "arbitrary for GovernorMockEnvironment" prop_arbitrary_GovernorMockEnvironment + , localOption (QuickCheckMaxSize 30) $ + testProperty "shrink for GovernorMockEnvironment" prop_shrink_GovernorMockEnvironment + ] ] @@ -99,14 +101,14 @@ data GovernorMockEnvironment = GovernorMockEnvironment { localRootPeers :: LocalRootPeers PeerAddr, publicRootPeers :: Set PeerAddr, targets :: TimedScript PeerSelectionTargets, - pickKnownPeersForGossip :: PickScript, - pickColdPeersToPromote :: PickScript, - pickWarmPeersToPromote :: PickScript, - pickHotPeersToDemote :: PickScript, - pickWarmPeersToDemote :: PickScript, - pickColdPeersToForget :: PickScript + pickKnownPeersForGossip :: PickScript PeerAddr, + pickColdPeersToPromote :: PickScript PeerAddr, + pickWarmPeersToPromote :: PickScript PeerAddr, + pickHotPeersToDemote :: PickScript PeerAddr, + pickWarmPeersToDemote :: PickScript PeerAddr, + pickColdPeersToForget :: PickScript PeerAddr } - deriving Show + deriving (Show, Eq) data PeerConn m = PeerConn !PeerAddr !(TVar m PeerStatus) @@ -163,36 +165,64 @@ validGovernorMockEnvironment GovernorMockEnvironment { runGovernorInMockEnvironment :: GovernorMockEnvironment -> Trace Void runGovernorInMockEnvironment mockEnv = runSimTrace $ do - actions <- mockPeerSelectionActions tracerMockEnv mockEnv policy <- mockPeerSelectionPolicy mockEnv + actions <- mockPeerSelectionActions tracerMockEnv mockEnv policy peerSelectionGovernor tracerTracePeerSelection tracerDebugPeerSelection + tracerTracePeerSelectionCounters + (mkStdGen 42) actions policy -data TraceMockEnv = TraceEnvPeersStatus (Map PeerAddr PeerStatus) +data TraceMockEnv = TraceEnvAddPeers PeerGraph + | TraceEnvSetLocalRoots (LocalRootPeers PeerAddr) + | TraceEnvSetPublicRoots (Set PeerAddr) + | TraceEnvPublicRootTTL + | TraceEnvGossipTTL PeerAddr + | TraceEnvSetTargets PeerSelectionTargets + | TraceEnvPeersDemote AsyncDemotion PeerAddr + + | TraceEnvRootsResult [PeerAddr] + | TraceEnvGossipRequest PeerAddr (Maybe ([PeerAddr], GossipTime)) + | TraceEnvGossipResult PeerAddr [PeerAddr] + | TraceEnvPeersStatus (Map PeerAddr PeerStatus) deriving Show mockPeerSelectionActions :: (MonadAsync m, MonadTimer m, Fail.MonadFail m, MonadThrow (STM m)) => Tracer m TraceMockEnv -> GovernorMockEnvironment + -> PeerSelectionPolicy PeerAddr m -> m (PeerSelectionActions PeerAddr (PeerConn m) m) mockPeerSelectionActions tracer env@GovernorMockEnvironment { - peerGraph = PeerGraph adjacency, + peerGraph, + localRootPeers, + publicRootPeers, targets - } = do + } + policy = do scripts <- Map.fromList <$> - sequence [ (\a b -> (addr, (a, b))) - <$> initScript gossipScript - <*> initScript connectionScript - | (addr, _, GovernorScripts { gossipScript, connectionScript }) <- adjacency ] - targetsVar <- playTimedScript targets + sequence + [ (\a b -> (addr, (a, b))) + <$> initScript gossipScript + <*> initScript connectionScript + | let PeerGraph adjacency = peerGraph + , (addr, _, GovernorScripts { + gossipScript, + connectionScript + }) <- adjacency + ] + targetsVar <- playTimedScript (contramap TraceEnvSetTargets tracer) targets peerConns <- newTVarIO Map.empty + traceWith tracer (TraceEnvAddPeers peerGraph) + traceWith tracer (TraceEnvSetLocalRoots localRootPeers) --TODO: make dynamic + traceWith tracer (TraceEnvSetPublicRoots publicRootPeers) --TODO: make dynamic + snapshot <- atomically (snapshotPeersStatus peerConns) + traceWith tracer (TraceEnvPeersStatus snapshot) return $ mockPeerSelectionActions' - tracer env + tracer env policy scripts targetsVar peerConns @@ -209,6 +239,7 @@ mockPeerSelectionActions' :: forall m. MonadThrow (STM m)) => Tracer m TraceMockEnv -> GovernorMockEnvironment + -> PeerSelectionPolicy PeerAddr m -> Map PeerAddr (TVar m GossipScript, TVar m ConnectionScript) -> TVar m PeerSelectionTargets -> TVar m (Map PeerAddr (TVar m PeerStatus)) @@ -218,12 +249,15 @@ mockPeerSelectionActions' tracer localRootPeers, publicRootPeers } + PeerSelectionPolicy { + policyGossipRetryTime + } scripts targetsVar connsVar = PeerSelectionActions { - readLocalRootPeers = return (LocalRootPeers.toGroups' localRootPeers), - requestPublicRootPeers = \_ -> return (publicRootPeers, 60), + readLocalRootPeers = return (LocalRootPeers.toGroups localRootPeers), + requestPublicRootPeers, readPeerSelectionTargets = readTVar targetsVar, requestPeerGossip, peerStateActions = PeerStateActions { @@ -235,24 +269,43 @@ mockPeerSelectionActions' tracer } } where + -- TODO: make this dynamic + requestPublicRootPeers _n = do + let ttl :: Num n => n + ttl = 60 + _ <- async $ do + threadDelay ttl + traceWith tracer TraceEnvPublicRootTTL + traceWith tracer (TraceEnvRootsResult (Set.toList publicRootPeers)) + return (publicRootPeers, ttl) + requestPeerGossip addr = do let Just (gossipScript, _) = Map.lookup addr scripts mgossip <- stepScript gossipScript + traceWith tracer (TraceEnvGossipRequest addr mgossip) + _ <- async $ do + threadDelay policyGossipRetryTime + traceWith tracer (TraceEnvGossipTTL addr) case mgossip of - Nothing -> fail "no peers" + Nothing -> do + threadDelay 1 + traceWith tracer (TraceEnvGossipResult addr []) + fail "no peers" Just (peeraddrs, time) -> do threadDelay (interpretGossipTime time) + traceWith tracer (TraceEnvGossipResult addr peeraddrs) return peeraddrs establishPeerConnection :: PeerAddr -> m (PeerConn m) establishPeerConnection peeraddr = do + --TODO: add support for variable delays and synchronous failure threadDelay 1 (conn@(PeerConn _ v), snapshot) <- atomically $ do conn <- newTVar PeerWarm conns <- readTVar connsVar let !conns' = Map.insert peeraddr conn conns writeTVar connsVar conns' - snapshot <- traverse readTVar conns' + snapshot <- snapshotPeersStatus connsVar return (PeerConn peeraddr conn, snapshot) traceWith tracer (TraceEnvPeersStatus snapshot) let Just (_, connectScript) = Map.lookup peeraddr scripts @@ -268,21 +321,32 @@ mockPeerSelectionActions' tracer let interpretScriptDelay NoDelay = 1 interpretScriptDelay ShortDelay = 60 interpretScriptDelay LongDelay = 600 - done <- + (done, msnapshot) <- case demotion of - Noop -> return True + Noop -> return (True, Nothing) ToWarm -> do threadDelay (interpretScriptDelay delay) atomically $ do s <- readTVar v case s of - PeerHot -> writeTVar v PeerWarm - $> False - _ -> return (PeerCold == s) + PeerHot -> do writeTVar v PeerWarm + snapshot' <- snapshotPeersStatus connsVar + return (False, Just snapshot') + PeerWarm -> return (False, Nothing) + PeerCold -> return (True, Nothing) ToCold -> do threadDelay (interpretScriptDelay delay) - atomically $ writeTVar v PeerCold - $> True + atomically $ do + s <- readTVar v + case s of + PeerCold -> return (True, Nothing) + _ -> do writeTVar v PeerCold + snapshot' <- snapshotPeersStatus connsVar + return (True, Just snapshot') + + traceWith tracer (TraceEnvPeersDemote demotion peeraddr) + forM_ msnapshot $ \snapshot' -> + traceWith tracer (TraceEnvPeersStatus snapshot') if done then return () @@ -308,8 +372,7 @@ mockPeerSelectionActions' tracer -- state as if the transition went fine which will violate -- 'invariantPeerSelectionState'. PeerCold -> throwIO ActivationError - conns <- readTVar connsVar - traverse readTVar conns + snapshotPeersStatus connsVar traceWith tracer (TraceEnvPeersStatus snapshot) deactivatePeerConnection :: PeerConn m -> m () @@ -323,8 +386,7 @@ mockPeerSelectionActions' tracer -- See the note in 'activatePeerConnection' why we throw an exception -- here. PeerCold -> throwIO DeactivationError - conns <- readTVar connsVar - traverse readTVar conns + snapshotPeersStatus connsVar traceWith tracer (TraceEnvPeersStatus snapshot) closePeerConnection :: PeerConn m -> m () @@ -339,13 +401,21 @@ mockPeerSelectionActions' tracer conns <- readTVar connsVar let !conns' = Map.delete peeraddr conns writeTVar connsVar conns' - traverse readTVar conns' + snapshotPeersStatus connsVar traceWith tracer (TraceEnvPeersStatus snapshot) monitorPeerConnection :: PeerConn m -> STM m PeerStatus monitorPeerConnection (PeerConn _peeraddr conn) = readTVar conn +snapshotPeersStatus :: MonadSTMTx stm + => TVar_ stm (Map PeerAddr (TVar_ stm PeerStatus)) + -> stm (Map PeerAddr PeerStatus) +snapshotPeersStatus connsVar = do + conns <- readTVar connsVar + traverse readTVar conns + + mockPeerSelectionPolicy :: MonadSTM m => GovernorMockEnvironment -> m (PeerSelectionPolicy PeerAddr m) @@ -382,9 +452,17 @@ mockPeerSelectionPolicy GovernorMockEnvironment { -- Utils for properties -- -data TestTraceEvent = GovernorDebug (DebugPeerSelection PeerAddr ()) - | GovernorEvent (TracePeerSelection PeerAddr) - | MockEnvEvent TraceMockEnv +data TestTraceEvent = GovernorDebug (DebugPeerSelection PeerAddr ()) + | GovernorEvent (TracePeerSelection PeerAddr) + | GovernorCounters PeerSelectionCounters + | MockEnvEvent TraceMockEnv + -- Warning: be careful with writing properties that rely + -- on trace events from both the governor and from the + -- environment. These events typically occur in separate + -- threads and so are not casually ordered. It is ok to use + -- them for timeout/eventually properties, but not for + -- properties that check conditions synchronously. + -- The governor debug vs other events are fully ordered. deriving Show tracerTracePeerSelection :: Tracer (IOSim s) (TracePeerSelection PeerAddr) @@ -394,6 +472,9 @@ tracerDebugPeerSelection :: Tracer (IOSim s) (DebugPeerSelection PeerAddr peerco tracerDebugPeerSelection = contramap (GovernorDebug . fmap (const ())) tracerTestTraceEvent +tracerTracePeerSelectionCounters :: Tracer (IOSim s) PeerSelectionCounters +tracerTracePeerSelectionCounters = contramap GovernorCounters tracerTestTraceEvent + tracerMockEnv :: Tracer (IOSim s) TraceMockEnv tracerMockEnv = contramap MockEnvEvent tracerTestTraceEvent @@ -426,17 +507,20 @@ instance Arbitrary GovernorMockEnvironment where arbitrary = do -- Dependency of the root set on the graph peerGraph <- arbitrary + let peersSet = allPeers peerGraph (localRootPeers, - publicRootPeers) <- arbitraryRootPeers (allPeers peerGraph) + publicRootPeers) <- arbitraryRootPeers peersSet -- But the others are independent targets <- arbitrary - pickKnownPeersForGossip <- arbitrary - pickColdPeersToPromote <- arbitrary - pickWarmPeersToPromote <- arbitrary - pickHotPeersToDemote <- arbitrary - pickWarmPeersToDemote <- arbitrary - pickColdPeersToForget <- arbitrary + + let arbitrarySubsetOfPeers = arbitrarySubset peersSet + pickKnownPeersForGossip <- arbitraryPickScript arbitrarySubsetOfPeers + pickColdPeersToPromote <- arbitraryPickScript arbitrarySubsetOfPeers + pickWarmPeersToPromote <- arbitraryPickScript arbitrarySubsetOfPeers + pickHotPeersToDemote <- arbitraryPickScript arbitrarySubsetOfPeers + pickWarmPeersToDemote <- arbitraryPickScript arbitrarySubsetOfPeers + pickColdPeersToForget <- arbitraryPickScript arbitrarySubsetOfPeers return GovernorMockEnvironment{..} where arbitraryRootPeers :: Set PeerAddr @@ -545,7 +629,8 @@ prop_arbitrary_GovernorMockEnvironment env = (LocalRootPeers.keysSet (localRootPeers env)) (publicRootPeers env) -prop_shrink_GovernorMockEnvironment :: GovernorMockEnvironment -> Bool -prop_shrink_GovernorMockEnvironment = - all validGovernorMockEnvironment . shrink +prop_shrink_GovernorMockEnvironment :: Fixed GovernorMockEnvironment -> Property +prop_shrink_GovernorMockEnvironment x = + prop_shrink_valid validGovernorMockEnvironment x + .&&. prop_shrink_nonequal x diff --git a/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/PeerGraph.hs b/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/PeerGraph.hs index 1be7b3c513c..9c31ff37317 100644 --- a/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/PeerGraph.hs +++ b/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/PeerGraph.hs @@ -14,9 +14,10 @@ module Test.Ouroboros.Network.PeerSelection.PeerGraph ( GossipScript, ConnectionScript, AsyncDemotion(..), - GossipTime, + GossipTime(..), interpretGossipTime, + prop_shrink_GovernorScripts, prop_arbitrary_PeerGraph, prop_shrink_PeerGraph, @@ -24,7 +25,6 @@ module Test.Ouroboros.Network.PeerSelection.PeerGraph ( import Data.Graph (Graph) import qualified Data.Graph as Graph -import Data.List.NonEmpty (NonEmpty (..)) import qualified Data.List.NonEmpty as NonEmpty import qualified Data.Map.Strict as Map import Data.Set (Set) @@ -37,6 +37,7 @@ import Test.Ouroboros.Network.PeerSelection.Instances import Test.Ouroboros.Network.PeerSelection.Script import Test.QuickCheck +import Test.QuickCheck.Utils -- @@ -205,8 +206,11 @@ instance Arbitrary GovernorScripts where | gossipScript' <- shrink gossipScript ] ++ - [ GovernorScripts gossipScript (fixConnectionScript connectionScript') - | connectionScript' <- shrink connectionScript + [ GovernorScripts gossipScript connectionScript' + | connectionScript' <- map fixConnectionScript (shrink connectionScript) + -- fixConnectionScript can result in re-creating the same script + -- which would cause shrinking to loop. Filter out such cases. + , connectionScript' /= connectionScript ] -- | We ensure that eventually the connection script will allow to connect to @@ -251,7 +255,8 @@ instance Arbitrary PeerGraph where arbitraryGossipScript :: [PeerAddr] -> Gen GossipScript arbitraryGossipScript peers = - arbitraryShortScriptOf gossipResult + sized $ \sz -> + arbitraryScriptOf (isqrt sz) gossipResult where gossipResult :: Gen (Maybe ([PeerAddr], GossipTime)) gossipResult = @@ -264,6 +269,9 @@ arbitraryGossipScript peers = picked <- vectorOf (length xs) arbitrary return [ x | (x, True) <- zip xs picked ] +isqrt :: Int -> Int +isqrt = floor . sqrt . (fromIntegral :: Int -> Double) + -- | Remove dangling graph edges and gossip results. -- prunePeerGraphEdges :: [(PeerAddr, [PeerAddr], PeerInfo)] @@ -305,6 +313,10 @@ instance Arbitrary GossipTime where -- Tests for the QC Arbitrary instances -- +prop_shrink_GovernorScripts :: Fixed GovernorScripts -> Property +prop_shrink_GovernorScripts = + prop_shrink_nonequal + prop_arbitrary_PeerGraph :: PeerGraph -> Property prop_arbitrary_PeerGraph pg = -- We are interested in the distribution of the graph size (in nodes) @@ -333,7 +345,8 @@ peerGraphNumStronglyConnectedComponents pg = where (g,_,_) = peerGraphAsGraph pg -prop_shrink_PeerGraph :: PeerGraph -> Bool -prop_shrink_PeerGraph = - all validPeerGraph . shrink +prop_shrink_PeerGraph :: Fixed PeerGraph -> Property +prop_shrink_PeerGraph x = + prop_shrink_valid validPeerGraph x + .&&. prop_shrink_nonequal x diff --git a/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/RootPeersDNS.hs b/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/RootPeersDNS.hs new file mode 100644 index 00000000000..c211be7a912 --- /dev/null +++ b/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/RootPeersDNS.hs @@ -0,0 +1,606 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE NumericUnderscores #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} + +{-# OPTIONS_GHC -Wno-unused-top-binds #-} + module Test.Ouroboros.Network.PeerSelection.RootPeersDNS ( + tests + ) where + +import Ouroboros.Network.PeerSelection.RootPeersDNS +import Ouroboros.Network.PeerSelection.Types (PeerAdvertise (..)) + +import Data.Dynamic (fromDynamic, Typeable) +import Data.Foldable (toList, foldl') +import Data.Functor (void) +import qualified Data.List.NonEmpty as NonEmpty +import qualified Data.Map.Strict as Map +import Data.Map.Strict (Map) +import Data.Maybe (catMaybes) +import qualified Data.Sequence as Seq +import Data.Sequence (Seq) +import Data.IP (IPv4, toIPv4w, fromHostAddress) +import Data.Time.Clock (picosecondsToDiffTime) +import Data.ByteString.Char8 (pack) +import Data.Set (Set) +import qualified Data.Set as Set +import Data.Time (DiffTime) +import qualified Network.DNS.Resolver as DNSResolver +import Network.DNS (DNSError(NameError, TimeoutExpired)) +import Network.Socket (SockAddr (..)) + +import Control.Exception (throw) +import Control.Monad.IOSim +import qualified Control.Monad.Class.MonadTimer as MonadTimer +import Control.Tracer (Tracer(Tracer), contramap) +import Control.Monad.Class.MonadSTM.Strict (newTVarIO, readTVar) +import qualified Control.Monad.Class.MonadSTM as LazySTM +import Control.Monad.Class.MonadTime (Time) + +import Test.Ouroboros.Network.PeerSelection.Script +import Test.Ouroboros.Network.PeerSelection.Instances() +import Test.QuickCheck +import Test.Tasty (TestTree, testGroup) +import Test.Tasty.QuickCheck (testProperty) + + +tests :: TestTree +tests = + testGroup "Ouroboros.Network.PeerSelection" + [ testGroup "RootPeersDNS" + [ testGroup "localRootPeersProvider" + [ testProperty "preserves groups and targets" + prop_local_preservesGroupNumberAndTargets + , testProperty "resolves domains correctly" + prop_local_resolvesDomainsCorrectly + , testProperty "updates domains correctly" + prop_local_updatesDomainsCorrectly + ] + , testGroup "publicRootPeersProvider" + [ testProperty "resolves domains correctly" + prop_public_resolvesDomainsCorrectly + ] + ] + ] + +-- +-- Mock Environment and Utils +-- + +data MockRoots = MockRoots { + mockLocalRootPeers :: [(Int, Map RelayAccessPoint PeerAdvertise)] + , mockDNSMap :: Map Domain [IPv4] + } + deriving Show + +-- | Generates MockRoots environments +-- +genMockRoots :: Gen MockRoots +genMockRoots = sized $ \relaysNumber -> do + relaysPerGroup <- chooseEnum (1, relaysNumber) + + relays <- vectorOf relaysNumber arbitrary + targets <- vectorOf relaysNumber (chooseEnum (1, 5)) + + peerAdvertise <- blocks relaysPerGroup + <$> vectorOf relaysNumber (arbitrary @PeerAdvertise) + + -- concat unique identifier to DNS domains to simplify tests + let taggedRelays = + zipWith + (\tag rel + -> case rel of + RelayAccessDomain domain port + -> RelayAccessDomain (domain <> (pack . show) tag) port + x -> x + ) + [(0 :: Int), 1 .. ] + relays + relaysBlocks = blocks relaysPerGroup taggedRelays + relaysMap = map Map.fromList $ zipWith zip relaysBlocks peerAdvertise + + localRootPeers = zip targets relaysMap + + domains = [ domain | RelayAccessDomain domain _ <- taggedRelays ] + + ipsPerDomain = 2 + + ips <- blocks ipsPerDomain + <$> vectorOf (ipsPerDomain * length domains) (toIPv4w <$> arbitrary) + + let dnsMap = Map.fromList $ zip domains ips + + return (MockRoots { + mockLocalRootPeers = localRootPeers, + mockDNSMap = dnsMap + }) + where + blocks _ [] = [] + blocks s l = take s l : blocks s (drop s l) + +instance Arbitrary MockRoots where + arbitrary = genMockRoots + shrink roots@MockRoots { mockLocalRootPeers, mockDNSMap } = + [ roots { mockLocalRootPeers = peers} + | peers <- shrinkList (const []) mockLocalRootPeers + ] + ++ + [ roots { mockDNSMap = Map.fromList dnsMap } + | dnsMap <- shrinkList (const []) (Map.assocs mockDNSMap) + ] + +-- | Used for debugging in GHCI +-- +simpleMockRoots :: MockRoots +simpleMockRoots = MockRoots localRootPeers dnsMap + where + localRootPeers = + [ ( 2 + , Map.fromList + [ ( RelayAccessAddress (read "192.0.2.1") (read "3333") + , DoAdvertisePeer + ) + , ( RelayAccessDomain "test.domain" (read "4444") + , DoNotAdvertisePeer + ) + ] + ) + ] + dnsMap = Map.fromList [ ("test.domain", [read "192.1.1.1", read "192.2.2.2"]) + ] + + +genDiffTime :: Integer + -> Integer + -> Gen DiffTime +genDiffTime lo hi = + picosecondsToDiffTime + . (lo * 1_000_000_000 +) + . (1_000_000_000 *) + . getNonNegative + <$> resize (fromIntegral hi) arbitrary + + +newtype DNSTimeout = DNSTimeout { getDNSTimeout :: DiffTime } + deriving Show + +instance Arbitrary DNSTimeout where + arbitrary = DNSTimeout <$> genDiffTime 110 300 + shrink (DNSTimeout delta) = + [ DNSTimeout (fromRational delta') + | delta' <- shrink (realToFrac delta) + , delta' >= 110 + ] + + +newtype DNSLookupDelay = DNSLookupDelay { getDNSLookupDelay :: DiffTime } + deriving Show + +instance Arbitrary DNSLookupDelay where + arbitrary = DNSLookupDelay <$> genDiffTime 20 120 + shrink (DNSLookupDelay delta) = + [ DNSLookupDelay (fromRational delta') + | delta' <- shrink (realToFrac delta) + , delta' >= 20 + ] + +-- | Mock DNSActions data structure for testing purposes. +-- Adds DNS Lookup function for IOSim with different timeout and lookup +-- delays for every attempt. +mockDNSActions :: forall exception s. + Map Domain [IPv4] + -> LazySTM.TVar (IOSim s) (Script DNSTimeout) + -> LazySTM.TVar (IOSim s) (Script DNSLookupDelay) + -> DNSActions () exception (IOSim s) +mockDNSActions dnsMap dnsTimeoutScript dnsLookupDelayScript = + DNSActions { + dnsResolverResource, + dnsAsyncResolverResource, + dnsLookupAWithTTL + } + where + + dnsResolverResource _ = return (constantResource ()) + dnsAsyncResolverResource _ = return (constantResource ()) + + dnsLookupAWithTTL :: resolvConf + -> resolver + -> Domain + -> IOSim s (Either DNSError [(IPv4, TTL)]) + dnsLookupAWithTTL _ _ domain = do + DNSTimeout dnsTimeout <- stepScript dnsTimeoutScript + DNSLookupDelay dnsLookupDelay <- stepScript dnsLookupDelayScript + + dnsLookup <- + MonadTimer.timeout dnsTimeout $ do + MonadTimer.threadDelay dnsLookupDelay + case Map.lookup domain dnsMap of + Nothing -> return (Left NameError) + Just x -> return (Right (map (\a -> (a, 0)) x)) + + case dnsLookup of + Nothing -> return (Left TimeoutExpired) + Just a -> return a + +-- | 'localRootPeersProvider' running with a given MockRoots env +-- +mockLocalRootPeersProvider :: forall s. + MockRoots + -> Script DNSTimeout + -> Script DNSLookupDelay + -> IOSim s () +mockLocalRootPeersProvider (MockRoots localRootPeers dnsMap) + dnsTimeoutScript dnsLookupDelayScript = do + dnsTimeoutScriptVar <- initScript dnsTimeoutScript + dnsLookupDelayScriptVar <- initScript dnsLookupDelayScript + localRootPeersVar <- newTVarIO localRootPeers + resultVar <- newTVarIO mempty + + void $ MonadTimer.timeout 3600 $ + localRootPeersProvider tracerTraceLocalRoots + DNSResolver.defaultResolvConf + (mockDNSActions dnsMap + dnsTimeoutScriptVar + dnsLookupDelayScriptVar) + (readTVar localRootPeersVar) + resultVar + +-- | 'publicRootPeersProvider' running with a given MockRoots env +-- +mockPublicRootPeersProvider :: forall s. + MockRoots + -> Script DNSTimeout + -> Script DNSLookupDelay + -> Int + -> IOSim s (Set SockAddr, DiffTime) +mockPublicRootPeersProvider (MockRoots localRootPeers dnsMap) + dnsTimeoutScript dnsLookupDelayScript n = do + dnsTimeoutScriptVar <- initScript dnsTimeoutScript + dnsLookupDelayScriptVar <- initScript dnsLookupDelayScript + localRootPeersVar <- newTVarIO (concatMap (Map.keys . snd) localRootPeers) + + publicRootPeersProvider tracerTracePublicRoots + DNSResolver.defaultResolvConf + (readTVar localRootPeersVar) + (mockDNSActions @Failure dnsMap + dnsTimeoutScriptVar + dnsLookupDelayScriptVar) + ($ n) + +-- | 'resolveDomainAddresses' running with a given MockRoots env +-- +mockResolveDomainAddresses :: forall s. + MockRoots + -> Script DNSTimeout + -> Script DNSLookupDelay + -> IOSim s (Map DomainAccessPoint (Set SockAddr)) +mockResolveDomainAddresses (MockRoots localRootPeers dnsMap) + dnsTimeoutScript dnsLookupDelayScript = do + dnsTimeoutScriptVar <- initScript dnsTimeoutScript + dnsLookupDelayScriptVar <- initScript dnsLookupDelayScript + resolveDomainAccessPoint tracerTracePublicRoots + DNSResolver.defaultResolvConf + (mockDNSActions @Failure dnsMap + dnsTimeoutScriptVar + dnsLookupDelayScriptVar) + [ domain + | (_, m) <- localRootPeers + , RelayDomainAccessPoint domain <- Map.keys m ] + +-- +-- Utils for properties +-- + +data TestTraceEvent exception = RootPeerDNSLocal (TraceLocalRootPeers exception) + | RootPeerDNSPublic TracePublicRootPeers + deriving Show + +tracerTraceLocalRoots :: Tracer (IOSim s) (TraceLocalRootPeers Failure) +tracerTraceLocalRoots = contramap RootPeerDNSLocal tracerTestTraceEvent + +tracerTracePublicRoots :: Tracer (IOSim s) TracePublicRootPeers +tracerTracePublicRoots = contramap RootPeerDNSPublic tracerTestTraceEvent + +tracerTestTraceEvent :: Tracer (IOSim s) (TestTraceEvent Failure) +tracerTestTraceEvent = dynamicTracer + +dynamicTracer :: Typeable a => Tracer (IOSim s) a +dynamicTracer = Tracer traceM + +selectRootPeerDNSTraceEvents :: Trace a -> [(Time, TestTraceEvent Failure)] +selectRootPeerDNSTraceEvents = go + where + go (Trace t _ _ (EventLog e) trace) + | Just x <- fromDynamic e = (t,x) : go trace + go (Trace _ _ _ _ trace) = go trace + go (TraceMainException _ e _) = throw e + go (TraceDeadlock _ _) = [] -- expected result in many cases + go (TraceMainReturn _ _ _) = [] + +selectLocalRootPeersEvents :: [(Time, TestTraceEvent Failure)] + -> [(Time, TraceLocalRootPeers Failure)] +selectLocalRootPeersEvents trace = [ (t, e) | (t, RootPeerDNSLocal e) <- trace ] + +selectLocalRootGroupsEvents :: [(Time, TraceLocalRootPeers Failure)] + -> [(Time, Seq (Int, Map SockAddr PeerAdvertise))] +selectLocalRootGroupsEvents trace = [ (t, e) | (t, TraceLocalRootGroups e) <- trace ] + +selectLocalRootResultEvents :: [(Time, TraceLocalRootPeers Failure)] + -> [(Time, (Domain, [IPv4]))] +selectLocalRootResultEvents trace = [ (t, (domain, map fst r)) + | (t, TraceLocalRootResult (DomainAccessPoint domain _) r) <- trace ] + +selectPublicRootPeersEvents :: [(Time, TestTraceEvent Failure)] + -> [(Time, TracePublicRootPeers)] +selectPublicRootPeersEvents trace = [ (t, e) | (t, RootPeerDNSPublic e) <- trace ] + +selectPublicRootFailureEvents :: [(Time, TracePublicRootPeers)] + -> [(Time, Domain)] +selectPublicRootFailureEvents trace = [ (t, domain) + | (t, TracePublicRootFailure domain _) <- trace ] + +selectPublicRootResultEvents :: [(Time, TracePublicRootPeers)] + -> [(Time, (Domain, [IPv4]))] +selectPublicRootResultEvents trace = [ (t, (domain, map fst r)) + | (t, TracePublicRootResult domain r) <- trace ] + +-- +-- Local Root Peers Provider Tests +-- + +-- | The 'localRootPeersProvider' should preserve the local root peers +-- group number and respective targets. This property tests whether local +-- root peer groups update due to DNS resolution results, does not alter +-- the initial groups configuration. +-- +prop_local_preservesGroupNumberAndTargets :: MockRoots + -> Script DNSTimeout + -> Script DNSLookupDelay + -> Property +prop_local_preservesGroupNumberAndTargets mockRoots@(MockRoots lrp _) + dnsTimeoutScript + dnsLookupDelayScript = + let tr = selectLocalRootGroupsEvents + $ selectLocalRootPeersEvents + $ selectRootPeerDNSTraceEvents + $ runSimTrace + $ mockLocalRootPeersProvider mockRoots + dnsTimeoutScript + dnsLookupDelayScript + + -- For all LocalRootGroup results, the number of groups should be + -- preserved, i.e. no new groups are added nor deleted along the + -- trace by localRootPeersProvider. + preservesGroupNumber = all ((== length lrp) . length . snd) tr + + -- For all LocalRootGroup results, the targets for each group + -- should be preserved, i.e. targets are not modified along the + -- trace by localRootPeersProvider. + preservesTargets = all (all (\(a, b) -> fst a == fst b)) + [ zip lrp (toList r) | r <- map snd tr ] + + in label (show $ length tr `div` 100 * 100) $ + preservesGroupNumber .&&. preservesTargets + +-- | The 'localRootPeersProvider' should be able to resolve DNS domains +-- correctly, assuming the domain maps to any IP address. This property +-- tests whether 'localRootPeersProvider' is capable of eventually resolving +-- domain addresses even after having failed to do so in the first attempt. +-- +prop_local_resolvesDomainsCorrectly :: MockRoots + -> Script DNSTimeout + -> Script DNSLookupDelay + -> Property +prop_local_resolvesDomainsCorrectly mockRoots@(MockRoots localRoots dnsMap) + dnsTimeoutScript + dnsLookupDelayScript = + let tr = selectLocalRootPeersEvents + $ selectRootPeerDNSTraceEvents + $ runSimTrace + $ mockLocalRootPeersProvider mockRoots + dnsTimeoutScript + dnsLookupDelayScript + + -- local root domains + localRootDomains :: Set Domain + localRootDomains = + Set.fromList + [ domain + | (_, m) <- localRoots + , RelayAccessDomain domain _ <- Map.keys m + ] + + -- domains that were resolved during simulation + resultMap :: Map Domain [IPv4] + resultMap = Map.fromList + $ map snd + $ selectLocalRootResultEvents + $ tr + + -- all domains that could have been resolved + maxResultMap :: Map Domain [IPv4] + maxResultMap = dnsMap `Map.restrictKeys` localRootDomains + + -- all domains that were tried to resolve during the simulation + allTriedDomains :: Set Domain + allTriedDomains + = Set.fromList + $ catMaybes + [ mbDomain + | (_, ev) <- tr + , let mbDomain = case ev of + TraceLocalRootResult (DomainAccessPoint domain _) _ -> Just domain + TraceLocalRootFailure (DomainAccessPoint domain _) _ -> Just domain + TraceLocalRootError (DomainAccessPoint _domain _) _ -> Nothing + _ -> Nothing + + ] + + + in + -- we verify that we tried to resolve all local root domains, and that the + -- resolved ones are a subset of `maxResultMap` + localRootDomains === allTriedDomains + .&&. property (resultMap `Map.isSubmapOf` maxResultMap) + + +-- | The 'localRootPeersProvider' after resolving a DNS domain address +-- should update the local result group list correctly, i.e. add the +-- resolved ip addresses to the correct group where the domain address was +-- (in the initial configuration specification). This property tests whether +-- after a successful DNS lookup the result list is updated correctly. +prop_local_updatesDomainsCorrectly :: MockRoots + -> Script DNSTimeout + -> Script DNSLookupDelay + -> Property +prop_local_updatesDomainsCorrectly mockRoots@(MockRoots lrp _) + dnsTimeoutScript + dnsLookupDelayScript = + let tr = selectLocalRootPeersEvents + $ selectRootPeerDNSTraceEvents + $ runSimTrace + $ mockLocalRootPeersProvider mockRoots + dnsTimeoutScript + dnsLookupDelayScript + + r = foldl' (\(b, (t, x)) (t', y) -> + case (x, y) of + -- Last DNS lookup result , Current result groups value + (TraceLocalRootResult da res, TraceLocalRootGroups lrpg) -> + -- create and index db for each group + let db = zip [0,1..] lrp + -- since our MockRoots generator generates + -- unique domain addresses we can look for + -- which group index does a particular domain + -- address belongs + index = foldr (\(i, (_, m)) prev -> + case Map.lookup (RelayDomainAccessPoint da) m of + Nothing -> prev + Just _ -> i + ) (-1) db + -- Get all IPv4 present in group at position + -- 'index' + ipsAtIndex = map (\sockAddr -> + case sockAddr of + SockAddrInet _ hostAddr + -> fromHostAddress hostAddr + _ -> error "Impossible happened!" + + ) $ Map.keys + $ snd + $ lrpg `Seq.index` index + -- Check if all ips from the previous DNS + -- lookup result are present in the current + -- result group at the correct index + arePresent = all ((`elem` ipsAtIndex) . fst) res + in (arePresent && b, (t', y)) + + (TraceLocalRootResult _ _, _) -> (b, (t, x)) + (_, _) -> (b, (t', y)) + ) + (True, head tr) + (tail tr) + in property (fst r) + + +-- +-- Public Root Peers Provider Tests +-- + +-- | Delay and timeout script which make sure that eventually the dns lookup +-- will not timeout. +-- +data DelayAndTimeoutScripts = DelayAndTimeoutScripts + (Script DNSLookupDelay) + (Script DNSTimeout) + deriving Show + +fixupDelayAndTimeoutScripts :: DelayAndTimeoutScripts + -> DelayAndTimeoutScripts +fixupDelayAndTimeoutScripts (DelayAndTimeoutScripts lookupScript@(Script delays) + timeoutScript@(Script timeouts)) = + let lastTimeout :: DiffTime + lastTimeout = getDNSTimeout $ NonEmpty.last timeouts + + lookupScript' = + if getDNSLookupDelay (NonEmpty.last delays) >= lastTimeout + then Script (delays <> (DNSLookupDelay (lastTimeout / 2) :| [])) + else lookupScript + + in (DelayAndTimeoutScripts lookupScript' timeoutScript) + +instance Arbitrary DelayAndTimeoutScripts where + arbitrary = fmap fixupDelayAndTimeoutScripts + $ DelayAndTimeoutScripts + <$> arbitrary + <*> arbitrary + + shrink (DelayAndTimeoutScripts lookupScript timeoutScript) = + [ fixupDelayAndTimeoutScripts + (DelayAndTimeoutScripts lookupScript timeoutScript') + | timeoutScript' <- shrink timeoutScript + ] + ++ + [ fixupDelayAndTimeoutScripts + (DelayAndTimeoutScripts lookupScript' timeoutScript) + | lookupScript' <- shrink lookupScript + ] + + + +-- | The 'publicRootPeersProvider' should be able to resolve DNS domains +-- correctly, assuming the domain maps to any IP address. This property +-- tests whether 'publicRootPeersProvider' is capable of eventually resolving domain +-- addresses even after having failed to do so in the first attempt, in +-- a bounded amount of time. +-- +prop_public_resolvesDomainsCorrectly :: MockRoots + -> DelayAndTimeoutScripts + -> Int + -> Property +prop_public_resolvesDomainsCorrectly mockRoots@(MockRoots _ dnsMap) + (DelayAndTimeoutScripts dnsLookupDelayScript dnsTimeoutScript) + n = + lookupLoop mockRoots dnsMap === dnsMap + where + -- Perform public root DNS lookup until no failures + lookupLoop :: MockRoots -> Map Domain [IPv4] -> Map Domain [IPv4] + lookupLoop mr res = + let tr = runSimTrace + $ mockPublicRootPeersProvider mr + dnsTimeoutScript + dnsLookupDelayScript + n + + successes = selectPublicRootResultEvents + $ selectPublicRootPeersEvents + $ selectRootPeerDNSTraceEvents + $ tr + + failures = selectPublicRootFailureEvents + $ selectPublicRootPeersEvents + $ selectRootPeerDNSTraceEvents + $ tr + + successesMap = Map.fromList $ map snd successes + + -- Update MockRoots with only the RelayAccessPoint that failed the + -- DNS timeouts + failuresMap = + [ ( i + , Map.fromList [ (RelayAccessDomain domain port, pa) + | (RelayAccessDomain domain port, pa) <- Map.toList m + , domain `elem` map snd failures + ] + ) + | (i, m) <- mockLocalRootPeers mr + ] + newMR = mr { mockLocalRootPeers = failuresMap } + + in if null failures || res == dnsMap + then res + else lookupLoop newMR (res <> successesMap) diff --git a/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/Script.hs b/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/Script.hs index 809916f204c..b4ccd438efb 100644 --- a/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/Script.hs +++ b/ouroboros-network/test/Test/Ouroboros/Network/PeerSelection/Script.hs @@ -5,12 +5,14 @@ module Test.Ouroboros.Network.PeerSelection.Script ( -- * Test scripts Script(..), + NonEmpty(..), scriptHead, singletonScript, initScript, stepScript, stepScriptSTM, - arbitraryShortScriptOf, + arbitraryScriptOf, + prop_shrink_Script, -- * Timed scripts ScriptDelay(..), @@ -19,7 +21,9 @@ module Test.Ouroboros.Network.PeerSelection.Script ( -- * Pick scripts PickScript, - interpretPickScript + PickMembers(..), + arbitraryPickScript, + interpretPickScript, ) where @@ -31,10 +35,12 @@ import qualified Data.List.NonEmpty as NonEmpty import Control.Monad.Class.MonadAsync import Control.Monad.Class.MonadSTM import Control.Monad.Class.MonadTimer +import Control.Tracer (Tracer, traceWith) import Test.Ouroboros.Network.PeerSelection.Instances () import Test.QuickCheck +import Test.QuickCheck.Utils -- @@ -43,7 +49,6 @@ import Test.QuickCheck newtype Script a = Script (NonEmpty a) deriving (Eq, Show, Functor, Foldable, Traversable) - deriving Arbitrary via NonEmpty a singletonScript :: a -> Script a singletonScript x = (Script (x :| [])) @@ -51,10 +56,11 @@ singletonScript x = (Script (x :| [])) scriptHead :: Script a -> a scriptHead (Script (x :| _)) = x -arbitraryShortScriptOf :: Gen a -> Gen (Script a) -arbitraryShortScriptOf a = - sized $ \sz -> - (Script . NonEmpty.fromList) <$> vectorOf (min 5 (sz+1)) a +arbitraryScriptOf :: Int -> Gen a -> Gen (Script a) +arbitraryScriptOf maxSz a = + sized $ \sz -> do + n <- choose (1, max 1 (min maxSz sz)) + (Script . NonEmpty.fromList) <$> vectorOf n a initScript :: MonadSTM m => Script a -> m (TVar m (Script a)) initScript = newTVarIO @@ -70,6 +76,20 @@ stepScriptSTM scriptVar = do x':xs' -> writeTVar scriptVar (Script (x' :| xs')) return x +instance Arbitrary a => Arbitrary (Script a) where + arbitrary = sized $ \sz -> arbitraryScriptOf sz arbitrary + + shrink (Script (x :| [])) = [ Script (x' :| []) | x' <- shrink x ] + shrink (Script (x :| xs)) = + Script (x :| []) -- drop whole tail + : Script (x :| take (length xs `div` 2) xs) -- drop half the tail + : Script (x :| init xs) -- drop only last + + -- drop none, shrink only elements + : [ Script (x' :| xs) | x' <- shrink x ] + ++ [ Script (x :| xs') | xs' <- shrinkListElems shrink xs ] + + -- -- Timed scripts -- @@ -89,12 +109,14 @@ instance Arbitrary ScriptDelay where shrink NoDelay = [] playTimedScript :: (MonadAsync m, MonadTimer m) - => TimedScript a -> m (TVar m a) -playTimedScript (Script ((x0,d0) :| script)) = do + => Tracer m a -> TimedScript a -> m (TVar m a) +playTimedScript tracer (Script ((x0,d0) :| script)) = do v <- newTVarIO x0 + traceWith tracer x0 _ <- async $ do threadDelay (interpretScriptDelay d0) sequence_ [ do atomically (writeTVar v x) + traceWith tracer x threadDelay (interpretScriptDelay d) | (x,d) <- script ] return v @@ -113,27 +135,39 @@ playTimedScript (Script ((x0,d0) :| script)) = do -- choices by their index (modulo the number of choices). This representation -- was chosen because it allows easy shrinking. -- -type PickScript = Script PickMembers +type PickScript peeraddr = Script (PickMembers peeraddr) -data PickMembers = PickFirst - | PickAll - | PickSome [Int] +data PickMembers peeraddr = PickFirst + | PickAll + | PickSome (Set peeraddr) deriving (Eq, Show) -instance Arbitrary PickMembers where - arbitrary = frequency [ (1, pure PickFirst) - , (1, pure PickAll) - , (2, PickSome <$> listOf1 arbitrarySizedNatural) ] +instance (Arbitrary peeraddr, Ord peeraddr) => + Arbitrary (PickMembers peeraddr) where + arbitrary = arbitraryPickMembers (Set.fromList <$> listOf1 arbitrary) - shrink (PickSome ixs) = PickAll + shrink (PickSome ixs) = PickFirst + : PickAll : [ PickSome ixs' | ixs' <- shrink ixs - , not (null ixs') ] + , not (Set.null ixs') ] shrink PickAll = [PickFirst] shrink PickFirst = [] +arbitraryPickMembers :: Gen (Set peeraddr) -> Gen (PickMembers peeraddr) +arbitraryPickMembers pickSome = + frequency [ (1, pure PickFirst) + , (1, pure PickAll) + , (2, PickSome <$> pickSome) + ] + +arbitraryPickScript :: Gen (Set peeraddr) -> Gen (PickScript peeraddr) +arbitraryPickScript pickSome = + sized $ \sz -> + arbitraryScriptOf sz (arbitraryPickMembers pickSome) + interpretPickScript :: (MonadSTMTx stm, Ord peeraddr) - => TVar_ stm PickScript + => TVar_ stm (PickScript peeraddr) -> Set peeraddr -> Int -> stm (Set peeraddr) @@ -143,22 +177,26 @@ interpretPickScript scriptVar available pickNum | pickNum <= 0 = error "interpretPickScript: given invalid pickNum" - | Set.size available <= pickNum - = return available - | otherwise = do pickmembers <- stepScriptSTM scriptVar return (interpretPickMembers pickmembers available pickNum) interpretPickMembers :: Ord peeraddr - => PickMembers -> Set peeraddr -> Int -> Set peeraddr -interpretPickMembers PickFirst ps _ = Set.singleton (Set.elemAt 0 ps) -interpretPickMembers PickAll ps n = Set.take n ps -interpretPickMembers (PickSome ixs) ps n = pickMapKeys ps (take n ixs) - -pickMapKeys :: Ord a => Set a -> [Int] -> Set a -pickMapKeys m ns = - Set.fromList (map pick ns) + => PickMembers peeraddr + -> Set peeraddr -> Int -> Set peeraddr +interpretPickMembers PickFirst ps _ = Set.singleton (Set.elemAt 0 ps) +interpretPickMembers PickAll ps n = Set.take n ps +interpretPickMembers (PickSome as) ps n + | Set.null ps' = Set.singleton (Set.elemAt 0 ps) + | otherwise = Set.take n ps' where - pick n = Set.elemAt i m where i = n `mod` Set.size m + ps' = Set.intersection ps as + + +-- +-- Tests for the QC Arbitrary instances +-- + +prop_shrink_Script :: Fixed (Script Int) -> Property +prop_shrink_Script = prop_shrink_nonequal diff --git a/ouroboros-network/test/Test/QuickCheck/Signal.hs b/ouroboros-network/test/Test/QuickCheck/Signal.hs new file mode 100644 index 00000000000..c58410dd36c --- /dev/null +++ b/ouroboros-network/test/Test/QuickCheck/Signal.hs @@ -0,0 +1,43 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ScopedTypeVariables #-} + +module Test.QuickCheck.Signal where + +import qualified Deque.Lazy as Deque +import Deque.Lazy (Deque) +import qualified Data.Foldable as Deque (toList) +import qualified Data.Signal as Signal +import Data.Signal (Signal) + +import Control.Monad.Class.MonadTime (Time) + +import Test.QuickCheck + + +-- | Check a property over a 'Signal'. The property should be true at all times. +-- +-- On failure it shows the @n@ most recent signal values. +-- +signalProperty :: forall a. Int -> (a -> String) + -> (a -> Bool) -> Signal a -> Property +signalProperty atMost showSignalValue p = + go 0 mempty . Signal.eventsToList . Signal.toChangeEvents + where + go :: Int -> Deque (Time, a) -> [(Time, a)] -> Property + go !_ !_ [] = property True + go !n !q ((t, x) : txs) | p x = next + where + next + | n < atMost = go (n+1) ( Deque.snoc (t,x) q) txs + | otherwise = go n ((Deque.tail . Deque.snoc (t,x)) q) txs + + go !_ !recent ((t, x) : _) = counterexample details (property False) + where + details = + unlines [ "Last " ++ show atMost ++ " signal values:" + , unlines [ show t' ++ "\t: " ++ showSignalValue x' + | (t',x') <- Deque.toList recent ] + , "Property violated at: " ++ show t + , "Invalid signal value:" + , showSignalValue x + ] diff --git a/ouroboros-network/test/Test/QuickCheck/Utils.hs b/ouroboros-network/test/Test/QuickCheck/Utils.hs new file mode 100644 index 00000000000..77b34a30cdb --- /dev/null +++ b/ouroboros-network/test/Test/QuickCheck/Utils.hs @@ -0,0 +1,81 @@ + +-- | QuickCheck utils +-- +module Test.QuickCheck.Utils ( + + -- * Generator and shrinker utils + arbitrarySubset, + shrinkListElems, + prop_shrink_valid, + prop_shrink_nonequal, + + -- * Reporting utils + renderRanges, + + ) where + +import Data.Set (Set) +import qualified Data.Set as Set + +import Test.QuickCheck + +-- | Pick a subset of a set, using a 50:50 chance for each set element. +-- +arbitrarySubset :: Ord a => Set a -> Gen (Set a) +arbitrarySubset s = do + picks <- vectorOf (Set.size s) (arbitrary :: Gen Bool) + let s' = Set.fromList + . map snd + . filter fst + . zip picks + . Set.toList + $ s + return s' + + +-- | Like 'shrinkList' but only shrink the elems, don't drop elements. +-- +-- Useful when you want a custom strategy for dropping elements. +-- +shrinkListElems :: (a -> [a]) -> [a] -> [[a]] +shrinkListElems _ [] = [] +shrinkListElems shr (x:xs) = [ x':xs | x' <- shr x ] + ++ [ x:xs' | xs' <- shrinkListElems shr xs ] + + +-- | Check that each shrink satisfies some invariant or validity condition. +-- +prop_shrink_valid :: (Arbitrary a, Eq a, Show a) + => (a -> Bool) -> Fixed a -> Property +prop_shrink_valid valid (Fixed x) = + let invalid = [ x' | x' <- shrink x, not (valid x') ] + in case invalid of + [] -> property True + (x':_) -> counterexample ("shrink result invalid:\n" ++ show x') $ + property False + + +-- | The 'shrink' function needs to give a valid value that is /smaller/ than +-- the original, otherwise the shrinking procedure is not well-founded and can +-- cycle. +-- +-- This property does not check size, as that would need significant extra +-- infrastructure to define an appropriate measure. Instead this property +-- simply checks each shrink is not the same as the original. This catches +-- simple 1-cycles, but not bigger cycles. These are fortunately the most +-- common case, so it is still a useful property in practice. +-- +prop_shrink_nonequal :: (Arbitrary a, Eq a) => Fixed a -> Property +prop_shrink_nonequal (Fixed x) = + counterexample "A shrink result equals as the original.\n" $ + counterexample "This will cause non-termination for shrinking." $ + all (x /=) (shrink x) + + +-- | Use in 'tabulate' to help summarise data into buckets. +-- +renderRanges :: Int -> Int -> String +renderRanges r n = show lower ++ " -- " ++ show upper + where + lower = n - n `mod` r + upper = lower + (r-1)