-
Notifications
You must be signed in to change notification settings - Fork 0
/
Main.hs
199 lines (170 loc) · 6.08 KB
/
Main.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
{-
| = CLI application
Generates an expression approximating an unknown
multivariable function f(x1,x2,x3,...,x_8) using data from an external file.
File format: comma-separated values with the function evaluations
in the last column, i.e.
x1 x2 x3 ... x_8 f(x1,x2,x3,...,x_8)
For example, run 2000 iterations expressions of maximal length 30
with mutation probability 0.2 and the population of 400 chromosomes.
$ stack exec hmep -- -f Data.csv -t 2000 --var 0.2 -l 30 -m 0.2 -p 400
-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ExtendedDefaultRules #-}
module Main where
-- CLI options from `optparse-applicative`
import Options.Applicative as Op
import Data.Semigroup ( (<>) )
import qualified Data.Vector as V
-- CSV files from `cassava`
import Data.Csv ( decode
, HasHeader (..)
)
import Data.List ( foldl' )
import qualified Data.ByteString.Lazy as BS
import Control.Monad ( foldM )
import Text.Printf ( printf )
-- Random operations from `probable`
import Math.Probable.Random ( double )
import AI.MEP
-- | (Mean) square error
dist :: Double -> Double -> Double
dist x y = (x - y)^2
-- Functions available to genetically produced programs.
-- Modify this to your needs.
ops = V.fromList [
('*', (*)),
('+', (+)),
-- Avoid division by zero
('/', \x y -> x / (y + 1e-6)),
('-', (-)),
('s', \x -> sin. const x),
('f', \x -> fromIntegral. floor. const x),
-- Power; invalid operation results in zero
('^', \x y -> let z = x**y in if isNaN z || isInfinite z then 0 else z),
('e', \x _ -> let z = exp x in if isNaN z || isInfinite z then 0 else z),
('a', \x -> abs. const x)
-- ('n', min),
-- ('x', max)
]
-- | CLI options are parsed to this data structure
data ProgOptions = ProgOptions
{ _inputFile :: FilePath
, _chromosomeLength :: Int
, _mutationProb :: Double
, _varProb :: Double
, _constProb :: Double
, _popSize :: Int
, _maxIter :: Int
}
-- | CLI options parser
progOptions :: Parser ProgOptions
progOptions = ProgOptions
-- Mandatory field (no default value)
<$> strOption
( short 'f'
<> metavar "<input file>"
<> help "Input file path. Format: comma-separated, two columns." )
<*> option auto
( long "length"
<> short 'l'
<> metavar "30"
<> value 30
<> help "Chromosome length" )
<*> option auto
( long "mutation"
<> short 'm'
<> metavar "0.05"
<> value 0.05
<> help "Mutation probability" )
<*> option auto
( long "var"
<> short 'r'
<> metavar "0.1"
<> value 0.1
<> help "Probability to generate a new variable gene" )
<*> option auto
( long "const"
<> short 'c'
<> metavar "0.05"
<> value 0.05
<> help "Probability to generate a new constant gene" )
<*> option auto
( long "population"
<> short 'p'
<> metavar "200"
<> value 200
<> help "Population size" )
<*> option auto
( long "total"
<> short 't'
<> metavar "200"
<> value 200
<> help "Total number of iterations" )
main :: IO ()
main = Op.execParser opts >>= run
where
opts = Op.info (Op.helper <*> progOptions)
( fullDesc
<> header "A CLI interface to Haskell multi expression programming" )
last9 (_, _, _, _, _, _, _, _, y) = y
init9 (x1,x2,x3,x4,x5,x6,x7,x8,_) = V.fromList [x1,x2,x3,x4,x5,x6,x7,x8]
run :: ProgOptions -> IO ()
run arg = do
let pVar = _varProb arg
pConst = _constProb arg
pMut = _mutationProb arg
config0 = defaultConfig
{ c'ops = ops
, c'length = _chromosomeLength arg
, p'mutation = pMut
, p'var = pVar
, p'const = pConst
, c'popSize = _popSize arg
}
maxIter = _maxIter arg
let f = _inputFile arg
putStrLn $ printf "Chromosome length: %d" (_chromosomeLength arg)
putStrLn $ printf "Population size: %d" (_popSize arg)
putStrLn $ "Mutation probability: " ++ show pMut
putStrLn $ "Probability to generate a new variable gene: " ++ show pVar
putStrLn $ "Probability to generate a new constant gene: " ++ show pConst
let pNew = 1 - pConst - pVar
putStrLn $ "Probability to generate a new operator: " ++ show pNew
putStrLn ""
putStrLn $ "Reading file " ++ f
bs <- BS.readFile f
-- TODO: Improve parsing with Cassava
let result = decode NoHeader bs :: Either String (V.Vector (Double, Double, Double, Double, Double, Double, Double, Double, Double))
case result of
Left err -> error err
Right parsed -> do
let dtaY = V.map last9 parsed -- Last column
dtaX = V.map init9 parsed
dataset = (dtaX, dtaY) :: (V.Vector (V.Vector Double), V.Vector Double)
let datasetSize = V.length dtaX
dim = V.length (V.head dtaX) -- Input dimensionality
config = config0 { c'vars = dim }
putStrLn $ printf "Fetched %d records" datasetSize
putStrLn $ printf "Input dimensions: %d\n" dim
-- Randomly create a population of chromosomes
pop <- runRandIO $ initialize config
let loss = regressionLoss dist dataset
-- Evaluate the initial population
let popEvaluated = evaluatePopulation loss pop
norm = fromIntegral datasetSize
putStrLn $ "Average loss in the initial population " ++ show (avgLoss popEvaluated / norm)
-- Declare how to produce the new generation
let nextGeneration = evolve config loss (mutation3 config) crossover binaryTournament
-- Specify the I/O loop, which logs every 5 generation
runIO pop i = do
newPop <- runRandIO $ foldM (\xg _ -> nextGeneration xg) pop [1..generations]
putStrLn $ "Population " ++ show (i * generations) ++ ": average loss " ++ show (avgLoss newPop / norm)
return newPop
where generations = 5
-- The final population
final <- foldM runIO popEvaluated [1..(maxIter `div` 5)]
putStrLn $ show $ best final
putStrLn "Interpreted expression:"
putStrLn $ generateCode (best final)