-
Notifications
You must be signed in to change notification settings - Fork 1
/
SpellChecker.hs
89 lines (68 loc) · 2.45 KB
/
SpellChecker.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeApplications #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
-- |
-- Module: Main
-- Copyright: Copyright © 2019 Lars Kuhtz <lakuhtz@gmail.com>
-- License: BSD3
-- Maintainer: Lars Kuhtz <lakuhtz@gmail.com>
-- Stability: experimental
--
-- This code is an adaptation of the respective code from Chris Coffey's cuckoo filter package,
-- which in turn is borrowed from Bryan O'Sullivan's bloom filter package.
--
module Main
( main
, runSpellCheck
) where
import Control.Monad (filterM, unless)
import Control.StopWatch
import qualified Data.ByteString.Char8 as B
import qualified Data.Cuckoo as C
import Data.List ((\\))
import Prelude hiding (words)
-- Just changing the salt isn't enough to make the hash functions independent
-- enough.
--
-- instance C.CuckooFilterHash B.ByteString where
-- cuckooHash (C.Salt s) a = fromIntegral $! hashWithSalt s a
-- cuckooFingerprint (C.Salt s) a = fromIntegral $! hashWithSalt (s + 23) a
-- {-# INLINE cuckooHash #-}
-- {-# INLINE cuckooFingerprint #-}
instance C.CuckooFilterHash B.ByteString where
cuckooHash (C.Salt s) = C.saltedFnv1aByteString s
cuckooFingerprint (C.Salt s) = C.saltedSipHashByteString s
{-# INLINE cuckooHash #-}
{-# INLINE cuckooFingerprint #-}
main :: IO ()
main = runSpellCheck
dict :: String
dict = "/usr/share/dict/words"
runSpellCheck :: IO ()
runSpellCheck = do
-- read and count words
(words, t0) <- stopWatch $ do
words <- B.lines `fmap` B.readFile dict
putStrLn $ show (length words) ++ " words"
return words
putStrLn $ show t0 <> "s to count words"
((f, failed), t1) <- stopWatch $ do
f <- C.newCuckooFilter @4 @8 0 500000
failed <- filterM (fmap not . C.insert f) words
return (f, failed)
putStrLn $ show t1 ++ "s to construct filter"
-- check words
(missing, t2) <- stopWatch $
filterM (fmap not . C.member f) words
putStrLn $ show t2 ++ "s to query every element"
-- report results
unless (null failed) $
putStrLn $ "failed inserts: " <> show (length failed)
let unexpectedMissing = missing \\ failed
unless (null $ unexpectedMissing) $
putStrLn $ "FAILURE: missing " <> show (length unexpectedMissing)
let falsePositives = failed \\ missing
unless (null $ falsePositives) $
putStrLn $ "false positives: " <> show (length falsePositives)
lf <- C.loadFactor f
putStrLn $ "load factor: " <> show lf