-
Notifications
You must be signed in to change notification settings - Fork 178
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
test that instances for Eq and Ord agree with going via toAscList #670
Changes from all commits
45d19b1
9e87998
c0fb190
dee87a5
3eb7c58
0606cfd
392bf60
b9d9608
81bae9c
9454e35
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,52 +1,123 @@ | ||
{-# LANGUAGE BangPatterns #-} | ||
{-# LANGUAGE CPP #-} | ||
{-# LANGUAGE BangPatterns, GeneralizedNewtypeDeriving #-} | ||
|
||
module Main where | ||
|
||
import Control.DeepSeq (rnf) | ||
import Control.Exception (evaluate) | ||
import Gauge (bench, defaultMain, whnf) | ||
import Data.List (foldl') | ||
import qualified Data.IntSet as S | ||
import Data.Monoid (Sum(..)) | ||
#if !MIN_VERSION_base(4,8,0) | ||
import Data.Foldable (foldMap) | ||
#endif | ||
import qualified Data.IntSet as IS | ||
-- benchmarks for "instance Ord IntSet" | ||
-- uses IntSet as keys of maps, and elements of sets | ||
import qualified Data.Set as S | ||
import qualified Data.IntMap as IM | ||
import qualified Data.Map.Strict as M | ||
|
||
main = do | ||
let s = S.fromAscList elems :: S.IntSet | ||
s_even = S.fromAscList elems_even :: S.IntSet | ||
s_odd = S.fromAscList elems_odd :: S.IntSet | ||
let s = IS.fromAscList elems :: IS.IntSet | ||
s_even = IS.fromAscList elems_even :: IS.IntSet | ||
s_odd = IS.fromAscList elems_odd :: IS.IntSet | ||
evaluate $ rnf [s, s_even, s_odd] | ||
defaultMain | ||
[ bench "member" $ whnf (member elems) s | ||
, bench "insert" $ whnf (ins elems) S.empty | ||
, bench "map" $ whnf (S.map (+ 1)) s | ||
, bench "filter" $ whnf (S.filter ((== 0) . (`mod` 2))) s | ||
, bench "partition" $ whnf (S.partition ((== 0) . (`mod` 2))) s | ||
, bench "fold" $ whnf (S.fold (:) []) s | ||
, bench "insert" $ whnf (ins elems) IS.empty | ||
, bench "map" $ whnf (IS.map (+ 1)) s | ||
, bench "filter" $ whnf (IS.filter ((== 0) . (`mod` 2))) s | ||
, bench "partition" $ whnf (IS.partition ((== 0) . (`mod` 2))) s | ||
, bench "fold" $ whnf (IS.fold (:) []) s | ||
, bench "delete" $ whnf (del elems) s | ||
, bench "findMin" $ whnf S.findMin s | ||
, bench "findMax" $ whnf S.findMax s | ||
, bench "deleteMin" $ whnf S.deleteMin s | ||
, bench "deleteMax" $ whnf S.deleteMax s | ||
, bench "unions" $ whnf S.unions [s_even, s_odd] | ||
, bench "union" $ whnf (S.union s_even) s_odd | ||
, bench "difference" $ whnf (S.difference s) s_even | ||
, bench "intersection" $ whnf (S.intersection s) s_even | ||
, bench "fromList" $ whnf S.fromList elems | ||
, bench "fromAscList" $ whnf S.fromAscList elems | ||
, bench "fromDistinctAscList" $ whnf S.fromDistinctAscList elems | ||
, bench "disjoint:false" $ whnf (S.disjoint s) s_even | ||
, bench "disjoint:true" $ whnf (S.disjoint s_odd) s_even | ||
, bench "null.intersection:false" $ whnf (S.null. S.intersection s) s_even | ||
, bench "null.intersection:true" $ whnf (S.null. S.intersection s_odd) s_even | ||
, bench "findMin" $ whnf IS.findMin s | ||
, bench "findMax" $ whnf IS.findMax s | ||
, bench "deleteMin" $ whnf IS.deleteMin s | ||
, bench "deleteMax" $ whnf IS.deleteMax s | ||
, bench "unions" $ whnf IS.unions [s_even, s_odd] | ||
, bench "union" $ whnf (IS.union s_even) s_odd | ||
, bench "difference" $ whnf (IS.difference s) s_even | ||
, bench "intersection" $ whnf (IS.intersection s) s_even | ||
, bench "fromList" $ whnf IS.fromList elems | ||
, bench "fromAscList" $ whnf IS.fromAscList elems | ||
, bench "fromDistinctAscList" $ whnf IS.fromDistinctAscList elems | ||
, bench "disjoint:false" $ whnf (IS.disjoint s) s_even | ||
, bench "disjoint:true" $ whnf (IS.disjoint s_odd) s_even | ||
, bench "null.intersection:false" $ whnf (IS.null. IS.intersection s) s_even | ||
, bench "null.intersection:true" $ whnf (IS.null. IS.intersection s_odd) s_even | ||
, bench "instanceOrd:dense" -- the IntSet will just use one Tip | ||
$ whnf (num_transitions . det 2 0) $ hard_nfa 1 16 | ||
, bench "instanceOrd:sparse" -- many Bin, each Tip is singleton | ||
$ whnf (num_transitions . det 2 0) $ hard_nfa 1111 16 | ||
] | ||
where | ||
elems = [1..2^12] | ||
elems_even = [2,4..2^12] | ||
elems_odd = [1,3..2^12] | ||
|
||
member :: [Int] -> S.IntSet -> Int | ||
member xs s = foldl' (\n x -> if S.member x s then n + 1 else n) 0 xs | ||
member :: [Int] -> IS.IntSet -> Int | ||
member xs s = foldl' (\n x -> if IS.member x s then n + 1 else n) 0 xs | ||
|
||
ins :: [Int] -> S.IntSet -> S.IntSet | ||
ins xs s0 = foldl' (\s a -> S.insert a s) s0 xs | ||
ins :: [Int] -> IS.IntSet -> IS.IntSet | ||
ins xs s0 = foldl' (\s a -> IS.insert a s) s0 xs | ||
|
||
del :: [Int] -> S.IntSet -> S.IntSet | ||
del xs s0 = foldl' (\s k -> S.delete k s) s0 xs | ||
del :: [Int] -> IS.IntSet -> IS.IntSet | ||
del xs s0 = foldl' (\s k -> IS.delete k s) s0 xs | ||
|
||
|
||
|
||
-- | Automata contain just the transitions | ||
type NFA = IM.IntMap (IM.IntMap IS.IntSet) | ||
type DFA = IM.IntMap (M.Map IS.IntSet IS.IntSet) | ||
|
||
newtype State = State Int deriving (Num, Enum) | ||
instance Show State where show (State s) = show s | ||
newtype Sigma = Sigma Int deriving (Num, Enum, Eq) | ||
|
||
num_transitions :: DFA -> Int | ||
num_transitions = getSum . foldMap (Sum . M.size) | ||
|
||
det :: Sigma -> State -> NFA -> DFA | ||
det sigma (State initial) aut = | ||
let get :: State -> Sigma -> IS.IntSet | ||
get (State p) (Sigma s) = IM.findWithDefault IS.empty p | ||
$ IM.findWithDefault IM.empty s aut | ||
go :: DFA -> S.Set IS.IntSet -> S.Set IS.IntSet -> DFA | ||
go !accu !done !todo = case S.minView todo of | ||
Nothing -> accu | ||
Just (t, odo) -> | ||
if S.member t done | ||
then go accu done odo | ||
else let ts = do | ||
s <- [0 .. sigma-1] | ||
let next :: IS.IntSet | ||
next = foldMap (\p -> get (State p) s) $ IS.toList t | ||
return (t, s, next) | ||
in go (union_dfa (dfa ts) accu) | ||
(S.insert t done) | ||
(Data.List.foldl' (\ o (_,_,q) -> S.insert q o) odo ts) | ||
in go IM.empty S.empty $ S.singleton $ IS.singleton initial | ||
|
||
nfa :: [(State,Sigma,State)] -> NFA | ||
nfa ts = IM.fromListWith ( IM.unionWith IS.union ) | ||
$ Prelude.map (\(State p,Sigma s,State q) -> | ||
(s, IM.singleton p (IS.singleton q))) ts | ||
|
||
dfa :: [(IS.IntSet, Sigma, IS.IntSet)] -> DFA | ||
dfa ts = IM.fromListWith ( M.unionWith ( error "WAT") ) | ||
$ Prelude.map (\( p, Sigma s, q) -> | ||
(s, M.singleton p q)) ts | ||
|
||
union_dfa a b = IM.unionWith (M.unionWith (error "WAT")) a b | ||
|
||
-- | for the language Sigma^* 1 Sigma^{n-2} where Sigma={0,1}. | ||
-- this NFA has n states. DFA has 2^(n-1) states | ||
-- since it needs to remember the last n characters. | ||
-- Extra parameter delta: the automaton will use states [0, delta .. ] | ||
-- for IntSet, larger deltas should be harder, | ||
-- since for delta=1, all the states do fit in one Tip | ||
hard_nfa :: State -> Int -> NFA | ||
hard_nfa delta n = nfa | ||
$ [ (0, 0, 0), (0,1,0), (0, 1, delta) ] | ||
++ do k <- [1 .. State n - 2] ; c <- [0,1] ; return (delta * k,c,delta *(k+1)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -210,7 +210,8 @@ import Utils.Containers.Internal.BitUtil | |
import Utils.Containers.Internal.StrictPair | ||
|
||
#if __GLASGOW_HASKELL__ | ||
import Data.Data (Data(..), Constr, mkConstr, constrIndex, Fixity(Prefix), DataType, mkDataType) | ||
import Data.Data (Data(..), Constr, mkConstr, constrIndex, DataType, mkDataType) | ||
import qualified Data.Data | ||
import Text.Read | ||
#endif | ||
|
||
|
@@ -310,7 +311,7 @@ instance Data IntSet where | |
dataTypeOf _ = intSetDataType | ||
|
||
fromListConstr :: Constr | ||
fromListConstr = mkConstr intSetDataType "fromList" [] Prefix | ||
fromListConstr = mkConstr intSetDataType "fromList" [] Data.Data.Prefix | ||
|
||
intSetDataType :: DataType | ||
intSetDataType = mkDataType "Data.IntSet.Internal.IntSet" [fromListConstr] | ||
|
@@ -1157,8 +1158,153 @@ nequal _ _ = True | |
--------------------------------------------------------------------} | ||
|
||
instance Ord IntSet where | ||
compare s1 s2 = compare (toAscList s1) (toAscList s2) | ||
-- tentative implementation. See if more efficient exists. | ||
compare Nil Nil = EQ | ||
compare Nil _ = LT | ||
compare _ Nil = GT | ||
compare t1@(Tip _ _) t2@(Tip _ _) | ||
= orderingOf $ relateTipTip t1 t2 | ||
compare xs ys | ||
| (xsNeg, xsNonNeg) <- splitSign xs | ||
, (ysNeg, ysNonNeg) <- splitSign ys | ||
= case relate xsNeg ysNeg of | ||
Less -> LT | ||
Prefix -> if null xsNonNeg then LT else GT | ||
Equals -> orderingOf (relate xsNonNeg ysNonNeg) | ||
FlipPrefix -> if null ysNonNeg then GT else LT | ||
Greater -> GT | ||
|
||
-- | detailed outcome of lexicographic comparison of lists. | ||
-- w.r.t. Ordering, there are two extra cases, | ||
-- since (++) is not monotonic w.r.t. lex. order on lists | ||
-- (which is used by definition): | ||
-- consider comparison of (Bin [0,3,4] [ 6] ) to (Bin [0,3] [7] ) | ||
-- where [0,3,4] > [0,3] but [0,3,4,6] < [0,3,7]. | ||
|
||
data Relation | ||
= Less -- ^ holds for [0,3,4] [0,3,5,1] | ||
| Prefix -- ^ holds for [0,3,4] [0,3,4,5] | ||
| Equals -- ^ holds for [0,3,4] [0,3,4] | ||
| FlipPrefix -- ^ holds for [0,3,4] [0,3] | ||
| Greater -- ^ holds for [0,3,4] [0,2,5] | ||
deriving (Show, Eq) | ||
|
||
orderingOf :: Relation -> Ordering | ||
{-# INLINE orderingOf #-} | ||
orderingOf r = case r of | ||
Less -> LT | ||
Prefix -> LT | ||
Equals -> EQ | ||
FlipPrefix -> GT | ||
Greater -> GT | ||
|
||
-- | precondition: each argument is non-mixed | ||
relate :: IntSet -> IntSet -> Relation | ||
relate Nil Nil = Equals | ||
relate Nil t2 = Prefix | ||
relate t1 Nil = FlipPrefix | ||
relate t1@(Tip p1 bm1) t2@(Tip p2 bm2) = relateTipTip t1 t2 | ||
relate t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't we play any tricks here? I have to think there's some way to use the prefixes and masks. For example, if we have t1 = Bin 0100000 0000100 l1 r1
t2 = Bin 0010000 0001000 l2 r2 I think we can reach a conclusion immediately. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In a bit more detail.... It seems to me that if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I hope that this happens automatically since the compiler should inline There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, I mean pulling tricks in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. prefixes are tricky: assume wordSize is one (else multiply everthing by some large enough power of two), then fromList [2,3] has prefix 10 (binary) but fromList[3,4] has smaller prefix 0 (?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, but can't you use your lower bound/upper bound calculation anyway? If the upper bound of one tree is less than the lower bound of the other, I think that's it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lower/upper-bound: yes this should be correct for any Tip/Bin combo:
For now, this is inside the guards of some of the pattern matches but this could also be inverted (compare bounds first, and pattern match only if needed). This needs benchmarking (not today). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that computing "lowerbound" inside a branch of a pattern match is better: the inliner then should be able to remove the pattern match that is in the implementation of "lowerbound" (does it really?) I inserted the lower-upper test in the Bin/Bin case as helps to avoid recursion. |
||
| succUpperbound t1 <= lowerbound t2 = Less | ||
| lowerbound t1 >= succUpperbound t2 = Greater | ||
| otherwise = case compare (natFromInt m1) (natFromInt m2) of | ||
GT -> combine_left (relate l1 t2) | ||
EQ -> combine (relate l1 l2) (relate r1 r2) | ||
LT -> combine_right (relate t1 l2) | ||
relate t1@(Bin p1 m1 l1 r1) t2@(Tip p2 _) | ||
| succUpperbound t1 <= lowerbound t2 = Less | ||
| lowerbound t1 >= succUpperbound t2 = Greater | ||
| 0 == (m1 .&. p2) = combine_left (relate l1 t2) | ||
| otherwise = Less | ||
relate t1@(Tip p1 _) t2@(Bin p2 m2 l2 r2) | ||
| succUpperbound t1 <= lowerbound t2 = Less | ||
| lowerbound t1 >= succUpperbound t2 = Greater | ||
| 0 == (p1 .&. m2) = combine_right (relate t1 l2) | ||
| otherwise = Greater | ||
|
||
relateTipTip :: IntSet -> IntSet -> Relation | ||
{-# INLINE relateTipTip #-} | ||
relateTipTip t1@(Tip p1 bm1) t2@(Tip p2 bm2) = case compare p1 p2 of | ||
LT -> Less | ||
EQ -> relateBM bm1 bm2 | ||
GT -> Greater | ||
|
||
relateBM :: BitMap -> BitMap -> Relation | ||
{-# inline relateBM #-} | ||
relateBM w1 w2 | w1 == w2 = Equals | ||
relateBM w1 w2 = | ||
let delta = xor w1 w2 | ||
lowest_diff_mask = delta .&. complement (delta-1) | ||
prefix = (complement lowest_diff_mask + 1) | ||
.&. (complement lowest_diff_mask) | ||
in if 0 == lowest_diff_mask .&. w1 | ||
then if 0 == w1 .&. prefix | ||
then Prefix else Greater | ||
else if 0 == w2 .&. prefix | ||
then FlipPrefix else Less | ||
|
||
-- | This function has the property | ||
-- relate t1@(Bin p m l1 r1) t2@(Bin p m l2 r2) = combine (relate l1 l2) (relate r1 r2) | ||
-- It is important that `combine` is lazy in the second argument (achieved by inlining) | ||
combine :: Relation -> Relation -> Relation | ||
{-# inline combine #-} | ||
combine r eq = case r of | ||
Less -> Less | ||
Prefix -> Greater | ||
Equals -> eq | ||
FlipPrefix -> Less | ||
Greater -> Greater | ||
|
||
-- | This function has the property | ||
-- relate t1@(Bin p1 m1 l1 r1) t2 = combine_left (relate l1 t2) | ||
-- under the precondition that the range of l1 contains the range of t2, | ||
-- and r1 is non-empty | ||
combine_left :: Relation -> Relation | ||
{-# inline combine_left #-} | ||
combine_left r = case r of | ||
Less -> Less | ||
Prefix -> Greater | ||
Equals -> FlipPrefix | ||
FlipPrefix -> FlipPrefix | ||
Greater -> Greater | ||
|
||
-- | This function has the property | ||
-- relate t1 t2@(Bin p2 m2 l2 r2) = combine_right (relate t1 l2) | ||
-- under the precondition that the range of t1 is included in the range of l2, | ||
-- and r2 is non-empty | ||
combine_right :: Relation -> Relation | ||
{-# inline combine_right #-} | ||
combine_right r = case r of | ||
Less -> Less | ||
Prefix -> Prefix | ||
Equals -> Prefix | ||
FlipPrefix -> Less | ||
Greater -> Greater | ||
|
||
-- | shall only be applied to non-mixed non-Nil trees | ||
lowerbound :: IntSet -> Int | ||
{-# INLINE lowerbound #-} | ||
lowerbound (Tip p _) = p | ||
lowerbound (Bin p _ _ _) = p | ||
|
||
-- | this is one more than the actual upper bound (to save one operation) | ||
-- shall only be applied to non-mixed non-Nil trees | ||
succUpperbound :: IntSet -> Int | ||
{-# INLINE succUpperbound #-} | ||
succUpperbound (Tip p _) = p + wordSize | ||
succUpperbound (Bin p m _ _) = p + shiftR m 1 | ||
|
||
-- | split a set into subsets of negative and non-negative elements | ||
splitSign :: IntSet -> (IntSet,IntSet) | ||
{-# INLINE splitSign #-} | ||
splitSign t@(Tip kx _) | ||
| kx >= 0 = (Nil, t) | ||
| otherwise = (t, Nil) | ||
splitSign t@(Bin p m l r) | ||
-- m < 0 is the usual way to find out if we have positives and negatives (see findMax) | ||
| m < 0 = (r, l) | ||
| p < 0 = (t, Nil) | ||
| otherwise = (Nil, t) | ||
splitSign Nil = (Nil, Nil) | ||
|
||
{-------------------------------------------------------------------- | ||
Show | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We already have
prop_ord
for this. I don't see a similarprop_eq
though.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not exactly "this" (
prop_ord
usestoList
, I think it should usetoAscList
#470 (comment) )There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, yes,
toAscList
is a bit more correct. That said, I would be astonished if we madetoList
do something different. While it's true that it's not documented as producing keys in order, it's also not explicitly documented as potentially producing keys in some other order. There are almost certainly people whose code will break in horrible ways if we change that.