From 8fa909e5c3db2160628891d1e36f5c7522988583 Mon Sep 17 00:00:00 2001 From: David van Balen Date: Fri, 7 Jun 2024 14:24:58 +0200 Subject: [PATCH] minor --- accelerate-llvm-native/benchmarksoutput | 4 - accelerate-llvm-native/benchmarksoutput.csv | 37 ++ .../benchmarksoutputmatmulcontext.csv | 14 + accelerate-llvm-native/greediesarebad.csv | 10 + .../LLVM/Native/Execute/Scheduler.hs | 4 +- .../Array/Accelerate/LLVM/Native/Operation.hs | 2 +- accelerate-llvm-native/test/nofib/Main.hs | 97 ++-- debug | 505 ++++++++++++++++++ debug2 | 217 ++++++++ runovernight | 1 + stack.yaml | 4 +- 11 files changed, 844 insertions(+), 51 deletions(-) delete mode 100644 accelerate-llvm-native/benchmarksoutput create mode 100644 accelerate-llvm-native/benchmarksoutput.csv create mode 100644 accelerate-llvm-native/benchmarksoutputmatmulcontext.csv create mode 100644 accelerate-llvm-native/greediesarebad.csv create mode 100644 debug create mode 100644 debug2 create mode 100644 runovernight diff --git a/accelerate-llvm-native/benchmarksoutput b/accelerate-llvm-native/benchmarksoutput deleted file mode 100644 index 90e8f04d1..000000000 --- a/accelerate-llvm-native/benchmarksoutput +++ /dev/null @@ -1,4 +0,0 @@ -Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB -Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB -Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB -Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB diff --git a/accelerate-llvm-native/benchmarksoutput.csv b/accelerate-llvm-native/benchmarksoutput.csv new file mode 100644 index 000000000..c666ba54f --- /dev/null +++ b/accelerate-llvm-native/benchmarksoutput.csv @@ -0,0 +1,37 @@ +Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB +Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB +Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB +Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB +Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB +Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB +Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB +Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB +Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB +Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB +Everything/matmul,1.3502863333959634,1.1352471112198137,1.5836369612627916,0.5166027023744023,0.4773110762815058,0.5541380164732501 +Everything/forwardbad,2.135629816375433,1.9835745340867694,2.1782784115099623,0.1430969556362045,3.551289499907248e-2,0.2681958061015379 +Everything/backwardbad,1.0551923127077802e-3,1.0261420849649963e-3,1.0958729321550516e-3,2.051263952984784e-4,1.6473543205892374e-4,2.7257451310358337e-4 +NumClusters/matmul,11.177424687738247,9.434310233020751,13.831702931356544,2.985588614519109,0.10055508320940643,3.510774557572271 +NumClusters/forwardbad,2.25244210987047,2.1497158256104503,2.379189923318892,0.22541186633942642,0.1365404355394495,0.3637130859566092 +NumClusters/backwardbad,1.057029102627378e-3,1.0310487959908478e-3,1.09433183384675e-3,1.8921342407944932e-4,1.4907138419933142e-4,2.733204551943844e-4 +ArrayReads/matmul,1.5577622447058046,1.3268997734611412,1.7854141088451783,0.5190333284851716,0.427042718275227,0.6007962874729096 +ArrayReads/forwardbad,2.1982514686737473,2.115899636603832,2.2503309081844427,0.12966701743235057,7.756102769596677e-2,0.20867315297657577 +ArrayReads/backwardbad,1.0745693357682164e-3,1.0450592265811754e-3,1.1180884700089406e-3,2.0659821377486248e-4,1.5098528876200906e-4,3.4589886646053583e-4 +ArrayReadsWrites/matmul,1.5520527678653597,1.312849051866864,1.7645711547829666,0.5141306770637573,0.4200955129885183,0.5769996081933351 +ArrayReadsWrites/forwardbad,2.159520071278085,2.0536919323524656,2.2132468016236717,0.15102593067790593,5.089549539671408e-2,0.23473463477861548 +ArrayReadsWrites/backwardbad,1.058858740395056e-3,1.0281876056398946e-3,1.104298828022603e-3,2.1516486495026034e-4,1.680669828417058e-4,3.098210564486659e-4 +IntermediateArrays/matmul,1.8815438792659256,1.8020228474261344,1.9491330888376235,0.1573562468716222,0.11099702118234331,0.23762538821961376 +IntermediateArrays/forwardbad,2.1397595408185333,2.0392605143802283,2.202945425006698,0.1646028225684085,9.756754164913639e-2,0.27955646281115776 +IntermediateArrays/backwardbad,1.342566624775104e-3,1.306561358672572e-3,1.4100863680135008e-3,2.8315918718603946e-4,1.7942888621475949e-4,4.4356949885908555e-4 +FusedEdges/matmul,1.8097483036093143,1.7261518561346025,1.8555774865753982,0.13558004562001946,6.1419180163578946e-2,0.2379305821034696 +FusedEdges/forwardbad,2.1523152658494737,2.0815238243628125,2.197059913302162,0.11889223327610768,6.680062576628132e-2,0.20214967991078375 +FusedEdges/backwardbad,1.338251381054407e-3,1.3026872804275701e-3,1.3895123598019184e-3,2.585858168996678e-4,1.9478748208066435e-4,3.839086336209633e-4 +GreedyUp/matmul,1.49913782838233,1.2772552717068815,1.6915842199231563,0.44618612146875203,0.34515717660218326,0.503435763209746 +GreedyUp/forwardbad,2.119755043971352,2.0393467377343506,2.1672191670342227,0.13200222834579184,7.772362665274478e-2,0.20494500313499486 +GreedyUp/backwardbad,1.3143907367389337e-3,1.2857562222436075e-3,1.3543731897805908e-3,2.085487623716435e-4,1.607039415702079e-4,2.880011890283828e-4 +GreedyDown/matmul,1.9029468707813861,1.8094239767461178,1.9458241517784156,0.13594866879593942,7.086191990036372e-2,0.2396218440818592 +GreedyDown/forwardbad,2.0907710299874513,2.020291554320154,2.125162175793583,0.10152349445910949,4.779829280982774e-2,0.16917958670872063 +GreedyDown/backwardbad,1.3632174109589011e-3,1.320516651583929e-3,1.4423832089292341e-3,3.363860534582522e-4,2.2169706782569553e-4,6.117641965212207e-4 +NoFusion/matmul,36.94635295214963,23.794245673023397,42.306299438642476,9.280767539952567,2.1448638993339664,12.378266187274532 +NoFusion/forwardbad,23.082870092373923,13.966668136996304,30.377858369876776,9.946468428403195,7.626137228559429,11.8131826047403 +NoFusion/backwardbad,1.2562148565046821e-3,1.2374307930216395e-3,1.2794919398975556e-3,1.1897057507418666e-4,9.208582341065748e-5,1.53156664048873e-4 diff --git a/accelerate-llvm-native/benchmarksoutputmatmulcontext.csv b/accelerate-llvm-native/benchmarksoutputmatmulcontext.csv new file mode 100644 index 000000000..1de3b00cc --- /dev/null +++ b/accelerate-llvm-native/benchmarksoutputmatmulcontext.csv @@ -0,0 +1,14 @@ +Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB +Everything/matmulcontext,1.4927236663803578,1.2755927797986089,1.678658488589967,0.4548664041570711,0.37971487045890856,0.5078559875955694 +NumClusters/matmulcontext,1.8325178209487925,1.7398651374806102,1.876154716194433,0.13832272351779853,6.250007407765376e-2,0.239475863016437 +ArrayReads/matmulcontext,1.883758925488431,1.8378746566304092,1.9389994799861252,0.10619803585517566,6.904933676884673e-2,0.1842927319984395 +ArrayReadsWrites/matmulcontext,1.8939725908350415,1.782264430033007,1.9475988234939863,0.16462798590063105,8.023921400842501e-2,0.2991357811440669 +IntermediateArrays/matmulcontext,1.9523295348482317,1.9026205709997153,2.014329231671332,0.12153372290003468,8.347302220324947e-2,0.1739060378605435 +FusedEdges/matmulcontext,1.881919365639304,1.8296029580174067,1.9211448945664553,0.10130754849415059,5.3794290065749664e-2,0.16200595993134195 +GreedyUp/matmulcontext,1.8649471036547869,1.7818282015992775,1.9187583252469462,0.1431059375714343,7.948952053399606e-2,0.22142100366316092 +GreedyDown/matmulcontext,1.8671849327230834,1.7594629544373048,1.9193299434069204,0.14999187831696095,5.503048568128746e-2,0.24102162049493203 +NoFusion/matmulcontext,41.504644953303796,34.50971416023094,49.54096629580454,9.23499848089001,3.044820105277328,12.586003324066512 +Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB +GreedyUp/matmulcontext,1.6093282945164777,1.3085210545754826,1.9071802607872101,0.6510957527814312,0.5345549321259793,0.8738456454003665 +GreedyDown/matmulcontext,1.9551582802534269,1.8432695898449039,2.097925818701378,0.27910084704621174,0.19899241404417303,0.36182159618438475 +NoFusion/matmulcontext,39.786535659053094,33.738748545210306,43.467295845378736,6.151677142169614,3.033937002465801,8.500351537590266 diff --git a/accelerate-llvm-native/greediesarebad.csv b/accelerate-llvm-native/greediesarebad.csv new file mode 100644 index 000000000..a214388d0 --- /dev/null +++ b/accelerate-llvm-native/greediesarebad.csv @@ -0,0 +1,10 @@ +Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB +Everything/forwardbad,2.373783422499138e-2,2.347745205308043e-2,2.4627366176390424e-2,2.114908708044531e-3,6.531908115589382e-4,4.819508352668887e-3 +Everything/backwardbad,3.606691781374293e-2,3.571940999474364e-2,3.74534520066784e-2,2.6358486764775264e-3,9.747949186503443e-4,5.879831014618713e-3 +GreedyUp/forwardbad,3.6626181695724444e-2,3.502026117114083e-2,3.9755432256981235e-2,1.017618798045282e-2,6.001991690996569e-3,1.755050507896749e-2 +GreedyUp/backwardbad,3.632229297610867e-2,3.6057610401319315e-2,3.6921268357541406e-2,1.7583316147213716e-3,9.704259641190331e-4,3.5721254246404667e-3 +GreedyDown/forwardbad,2.3970060685063254e-2,2.3701581756535476e-2,2.4991189089748546e-2,2.304345990307629e-3,3.852235860432923e-4,4.973315861257144e-3 +GreedyDown/backwardbad,0.708492649517366,0.6981189270352098,0.7464575397676662,5.161654698391952e-2,5.4325858778790995e-3,0.10636994048126226 +NoFusion/forwardbad,9.706273619993701e-2,9.441343113486038e-2,0.10235459452755384,1.469075659904803e-2,8.002086943955398e-3,2.5279871688187384e-2 +NoFusion/backwardbad,1.341660145146297,1.304144801615958,1.3596349875245755,5.49257811758058e-2,1.3556280370411532e-2,8.886663759599717e-2 +Name,Mean,MeanLB,MeanUB,Stddev,StddevLB,StddevUB diff --git a/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Execute/Scheduler.hs b/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Execute/Scheduler.hs index e10c53744..184066a94 100644 --- a/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Execute/Scheduler.hs +++ b/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Execute/Scheduler.hs @@ -183,9 +183,9 @@ hireWorkers :: IO Workers hireWorkers = do nproc <- getNumProcessors ncaps <- getNumCapabilities - -- menv <- (readMaybe =<<) <$> lookupEnv "ACCELERATE_LLVM_NATIVE_THREADS" + menv <- (readMaybe =<<) <$> lookupEnv "ACCELERATE_LLVM_NATIVE_THREADS" - let nthreads = 1 --fromMaybe nproc menv + let nthreads = fromMaybe nproc menv workers <- hireWorkersOn [0 .. nthreads-1] return workers diff --git a/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Operation.hs b/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Operation.hs index 699714662..f5847a31f 100644 --- a/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Operation.hs +++ b/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Operation.hs @@ -275,7 +275,7 @@ instance MakesILP NativeOp where ( inputConstraints l lIns <> ILP.c (InDir l) .==. ILP.c (OutDir l) <> ILP.c (InDims l) .==. int 1 .+. ILP.c (OutDims l) - <> foldMap (\lin -> fused lin l .==. int 1) lIns + -- <> foldMap (\lin -> fused lin l .==. int 1) lIns <> inrankifmanifest (ShapeRsnoc shr) l) (defaultBounds l) diff --git a/accelerate-llvm-native/test/nofib/Main.hs b/accelerate-llvm-native/test/nofib/Main.hs index e3ad0cb61..db1a22214 100644 --- a/accelerate-llvm-native/test/nofib/Main.hs +++ b/accelerate-llvm-native/test/nofib/Main.hs @@ -5,8 +5,8 @@ -- -- Maintainer : Trevor L. McDonell -- Stability : experimental --- Portability : non-portable (GHC extensions) -- +-- Portability : non-portable (GHC extensions) {-# LANGUAGE TypeApplications #-} {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE FlexibleContexts #-} @@ -34,40 +34,31 @@ import Control.DeepSeq -- import Quickhull import Data.Array.Accelerate.Trafo.Partitioning.ILP (Benchmarking(..)) import Criterion.Types +import Data.Array.Accelerate (testWithObjective) main :: IO () main = do - let loop :: [a] -> [a] - loop xs = xs Prelude.<> loop xs - - let histogram :: Acc (Vector Int) -> Acc (Vector Int) - histogram xs = - let zeros = fill (constant (Z:.10)) 0 - ones = fill (shape xs) 1 - in - permute (+) zeros (\ix -> Just_ (I1 (xs!ix))) ones - - let xs = fromList (Z :. 50) $ loop $ [1 :: Int .. 9] Prelude.<> [2 .. 8] - - putStrLn $ test @UniformScheduleFun @NativeKernel histogram - print $ runN @Native histogram xs - - - - let xs = fromList (Z:.1000:.1000) [1::Int ..] - let greedyForwardBad xs = - let largexs = replicate (Z_ ::. (1000 :: Exp Int) ::. All_ ::. (1000 :: Exp Int)) xs - ys = generate (Z_ ::. (1000 :: Exp Int)) (\(I1 i) -> i + xs ! (I1 $ 999 - i)) - largeys = replicate (Z_ ::. (1000 :: Exp Int) ::. All_ ::. (1000 :: Exp Int)) ys - result = sum $ flatten $ zipWith (+) largexs largeys + let xs'= fromList (Z:.1000) [1::Int ..] + xs = fromList (Z:.10000000) [1::Int ..] + ys = fromList (Z:.1000:.1000) [1::Int ..] + zs = fromList (Z:.1000:.1000) [0::Int ..] + + -- The forwards greedy algorithm can't (easily?) be fooled into manifesting a large array, + -- but it does do this example 1.5-2x as slow + let greedyForwardBad as = + let (I1 x) = shape as + bs = map (*2) as + cs = map (+1) bs + xs = generate (Z_ ::. x) (\(I1 i) -> i + bs ! (I1 0)) + result = sum $ zipWith (+) cs xs in result - + -- backwards greedy manifests a huge array let greedyBackwardBad (xs :: Acc (Vector Int)) = - let large = replicate (Z_ ::. All_ ::. (1000 :: Exp Int)) xs + let large = replicate (Z_ ::. All_ ::. (1000000 :: Exp Int)) xs ys = sum large zs = product large - result = imap (\(I1 i) y -> if zs ! (I1 $ 999 - i) == 0 then y else 1+y) ys + result = imap (\(I1 i) y -> if zs ! (I1 0) == 0 then y else 1+y) ys in result let transpose' x = @@ -81,25 +72,45 @@ main = do in sum $ transpose' $ zipWith (*) (replicate (Z_ ::. All_ ::. All_ ::. cols) xs) (replicate (Z_ ::. rows ::. All_ ::. All_) ys) - - let testcase' :: (Arrays b, NFData b) => ((Acc (Matrix Int, Matrix Int) -> Acc b) -> (Matrix Int, Matrix Int) -> b) -> (Prelude.String, Acc (Matrix Int, Matrix Int) -> Acc b) -> Benchmark - testcase' f (name, p) = env (Prelude.pure (f p, xs)) $ \ ~(p', xs') -> bench name $ nf p' (xs',xs') - - let testcase :: Prelude.String -> (forall b. Arrays b =>(Acc (Matrix Int, Matrix Int) -> Acc b) -> (Matrix Int, Matrix Int) -> b) -> Benchmark - testcase name f = bgroup name $ [ - testcase' f ("matmul", matmul) - --, testcase' f ("forwardbad",greedyForwardBad) - --, testcase' f ("backwardbad", greedyBackwardBad) + + let matmulcontext :: Acc (Matrix Int) -> Acc (Matrix Int) + matmulcontext xs' = + let xs = map (*2) xs' + ys = map (+1) xs + zs = map (`div` xs ! (Z_::.0::.0)) ys + in matmul (T2 ys zs) + + let testcase' :: (Arrays a, Arrays b, NFData a, NFData b) => ((Acc a -> Acc b) -> a -> b) -> (Prelude.String, Acc a -> Acc b, a) -> Benchmark + testcase' f (name, p, input) = env (Prelude.pure (f p, input)) $ \ ~(p', xs') -> bench name $ nf p' xs' + + let testcase :: Prelude.String -> (forall a b. (Arrays a, Arrays b) =>(Acc a -> Acc b) -> a -> b) -> Benchmark + testcase name f = bgroup name + [ + -- testcase' f ("matmulcontext", matmulcontext, ys) + -- testcase' f ("matmul", matmul, (ys, zs)) + testcase' f ("forwardbad",greedyForwardBad, xs) + , testcase' f ("backwardbad", greedyBackwardBad, xs') ] - - defaultMainWith (defaultConfig { timeLimit = 5*60, resamples = 10000, csvFile = Just "benchmarksoutput"}) $ Prelude.map (\obj -> testcase (show obj) (runNWithObj @Native obj)) + + -- benchmarkmain + + -- putStrLn "readswrites" + -- putStrLn $ testWithObjective @UniformScheduleFun @NativeKernel IntermediateArrays (greedyForwardBad) + -- putStrLn "down" + -- putStrLn $ testBench @UniformScheduleFun @NativeKernel GreedyDown (greedyForwardBad) + -- putStrLn "up" + -- putStrLn $ testBench @UniformScheduleFun @NativeKernel GreedyUp (greedyForwardBad) + + + + defaultMainWith (defaultConfig { timeLimit = 5*60, resamples = 10000, csvFile = Just "greediesarebad.csv"}) $ Prelude.map (\obj -> testcase (show obj) (runNWithObj @Native obj)) [ Everything - , NumClusters - , ArrayReads - , ArrayReadsWrites - , IntermediateArrays - , FusedEdges + -- , NumClusters + -- , ArrayReads + -- ArrayReadsWrites + -- , IntermediateArrays + -- , FusedEdges ] Prelude.<> Prelude.map (\b -> testcase (show b) (runNBench @Native b)) [ GreedyUp diff --git a/debug b/debug new file mode 100644 index 000000000..dc746923a --- /dev/null +++ b/debug @@ -0,0 +1,505 @@ +DEPRECATED: The package at Archive from https://github.com/msakai/haskell-MIP/archive/4295aa21a24a30926b55770c55ac00f749fb8a39.tar.gz in subdir MIP does not include a cabal file. +Instead, it includes an hpack package.yaml file for generating a cabal file. +This usage is deprecated; please see https://github.com/commercialhaskell/stack/issues/5210. +Support for this workflow will be removed in the future. + +Stack has not been tested with GHC versions above 9.0, and using 9.4.5, this may fail +Stack has not been tested with Cabal versions above 3.4, but version 3.8.1.0 was found, this may fail +accelerate-llvm-native> test (suite: nofib-llvm-native) + +readswrites +OriginalAcc: +\a0 -> fold (\x0 x1 -> x0 + x1) 0 (let a1 = map (\x0 -> x0 * 2) a0 in zipWith (\x0 x1 -> x0 + x1) (replicate (let () = () in T1 (), 1000000) (map (\x0 -> x0 + 1) a1)) (replicate (let () = () in T1 (), 1000000) (generate (T1 1000) (\(T1 x0) -> x0 + (a1 ! (T1 0)))))) + +Desugared OperationAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> x0 * 2, in (Z :. e0) b0, out (Z :. e0) b1) + b2: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> x0 + 1, in (Z :. e0) b1, out (Z :. e0) b2) + T1 () = compute let () = () in T1 () + e1: Int = compute 1000000 + T2 (e2: Int) (e3: Int) = compute indexFull (T2 () e1) (T1 e0) + b3: [Int]¹ = alloc Int[Z :. e2 :. e3] + execute backpermute (\(T2 x0 x1) -> indexSlice (T2 () e1) (T2 x0 x1), in (Z :. e0) b2, out (Z :. e2 :. e3) b3) + e4: Int = compute 1000 + b4: [Int]¹ = alloc Int[Z :. e4] + execute generate (\(T1 x0) -> x0 + (let x1 = toIndex (T1 e0) (T1 0) in (b1 !! x1)), out (Z :. e4) b4) + T1 () = compute let () = () in T1 () + e5: Int = compute 1000000 + T2 (e6: Int) (e7: Int) = compute indexFull (T2 () e5) (T1 e4) + b5: [Int]¹ = alloc Int[Z :. e6 :. e7] + execute backpermute (\(T2 x0 x1) -> indexSlice (T2 () e5) (T2 x0 x1), in (Z :. e4) b4, out (Z :. e6 :. e7) b5) + e8: Int = compute min e2 e6 + e9: Int = compute min e3 e7 + b6: [Int]¹ = alloc Int[Z :. e8 :. e9] + b7: [Int]¹ = alloc Int[Z :. e8 :. e9] + b8: [Int]¹ = alloc Int[Z :. e8 :. e9] + execute backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e2 :. e3) b3, out (Z :. e8 :. e9) b7) + execute backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e6 :. e7) b5, out (Z :. e8 :. e9) b8) + execute map (\(x0, x1) -> x0 + x1, in (Z :. e8 :. e9) (b7, b8), out (Z :. e8 :. e9) b6) + b9: [Int]¹ = alloc Int[Z :. e8] + b10: [Int]¹ = alloc Int[Z :. e8] + execute fold-2 (\x0 x1 -> x0 + x1, in (Z :. e8 :. e9) b6, out (Z :. e8) b10) + execute map (\x0 -> let (x1, x2) = (0, x0) in x1 + x2, in (Z :. e8) b10, out (Z :. e8) b9) + return (T2 e8 b9) + +Simplified OperationAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> 2 * x0, in (Z :. e0) b0, out (Z :. e0) b1) + b2: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> 1 + x0, in (Z :. e0) b1, out (Z :. e0) b2) + e1: Int = compute 1000000 + b3: [Int]¹ = alloc Int[Z :. e0 :. e1] + execute backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) b2, out (Z :. e0 :. e1) b3) + e2: Int = compute 1000 + b4: [Int]¹ = alloc Int[Z :. e2] + execute generate (\(T1 x0) -> x0 + (b1 !! 0), out (Z :. e2) b4) + e3: Int = compute 1000 + e4: Int = compute 1000000 + b5: [Int]¹ = alloc Int[Z :. e3 :. e4] + execute backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e2) b4, out (Z :. e3 :. e4) b5) + e5: Int = compute min 1000 e0 + e6: Int = compute 1000000 + b6: [Int]¹ = alloc Int[Z :. e5 :. e6] + b7: [Int]¹ = alloc Int[Z :. e5 :. e6] + b8: [Int]¹ = alloc Int[Z :. e5 :. e6] + execute backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e0 :. e1) b3, out (Z :. e5 :. e6) b7) + execute backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e3 :. e4) b5, out (Z :. e5 :. e6) b8) + execute map (\(x0, x1) -> x0 + x1, in (Z :. e5 :. e6) (b7, b8), out (Z :. e5 :. e6) b6) + b9: [Int]¹ = alloc Int[Z :. e5] + execute fold-2 (\x0 x1 -> x0 + x1, in (Z :. e5 :. e6) b6, out (Z :. e5) b9) + return (T2 e5 b9) + +PartitionedAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + b2: [Int]¹ = alloc Int[Z :. e0] + e1: Int = compute 1000000 + e2: Int = compute 1000 + e3: Int = compute 1000 + e4: Int = compute 1000000 + e5: Int = compute min 1000 e0 + e6: Int = compute 1000000 + execute map (\x0 -> 2 * x0, in (Z :. e0) b0, out (Z :. e0) b1) + b3: [Int]¹ = alloc Int[Z :. e0 :. e1] + b4: [Int]¹ = alloc Int[Z :. e2] + b5: [Int]¹ = alloc Int[Z :. e3 :. e4] + b6: [Int]¹ = alloc Int[Z :. e5 :. e6] + b7: [Int]¹ = alloc Int[Z :. e5 :. e6] + b8: [Int]¹ = alloc Int[Z :. e5 :. e6] + b9: [Int]¹ = alloc Int[Z :. e5] + execute { + generate (\(T1 x0) -> x0 + (b1 !! 0), out (Z :. e2) b4) + fused diagonally: (b4) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e2) b4, out (Z :. e3 :. e4) b5) + fused diagonally: (b5) + backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e3 :. e4) b5, out (Z :. e5 :. e6) b8) + fused diagonally: (b8) + map (\x0 -> 1 + x0, in (Z :. e0) b1, out (Z :. e0) b2) + fused diagonally: (b2) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) b2, out (Z :. e0 :. e1) b3) + fused diagonally: (b3) + backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e0 :. e1) b3, out (Z :. e5 :. e6) b7) + fused diagonally: (b7) + map (\(x0, x1) -> x0 + x1, in (Z :. e5 :. e6) (b7, b8), out (Z :. e5 :. e6) b6) + fused diagonally: (b6) + fold-2 (\x0 x1 -> x0 + x1, in (Z :. e5 :. e6) b6, out (Z :. e5) b9) + } + return (T2 e5 b9) +SLV'd PartitionedAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + e1: Int = compute 1000000 + e2: Int = compute 1000 + e3: Int = compute 1000 + e4: Int = compute 1000000 + e5: Int = compute min 1000 e0 + e6: Int = compute 1000000 + execute map (\x0 -> 2 * x0, in (Z :. e0) b0, out (Z :. e0) b1) + b2: [Int]¹ = alloc Int[Z :. e5] + execute { + generate (\(T1 x0) -> x0 + (b1 !! 0), out (Z :. e2) %0) + fused vertically: (%0) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e2) %0, out (Z :. e3 :. e4) %1) + fused vertically: (%1) + backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e3 :. e4) %1, out (Z :. e5 :. e6) %2) + fused vertically: (%2) + map (\x0 -> 1 + x0, in (Z :. e0) b1, out (Z :. e0) %3) + fused vertically: (%3) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) %3, out (Z :. e0 :. e1) %4) + fused vertically: (%4) + backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e0 :. e1) %4, out (Z :. e5 :. e6) %5) + fused vertically: (%5) + map (\(x0, x1) -> x0 + x1, in (Z :. e5 :. e6) (%5, %2), out (Z :. e5 :. e6) %6) + fused vertically: (%6) + fold-2 (\x0 x1 -> x0 + x1, in (Z :. e5 :. e6) %6, out (Z :. e5) b2) + } + return (T2 e5 b2) + +Schedule: +\T2 (s0: Signal, r0: *Int) (s1: Signal, r1: *[Int]) -> + \T2 (s2': Resolver, r2': *out Int) (s3': Resolver, r3': *out [Int]) -> + await [s0] + e0: Int = compute 1000000 + e1: Int = compute 1000 + e2: Int = compute 1000 + e3: Int = compute 1000000 + e4: Int = compute 1000000 + e5: Int = *r0 + e6: Int = compute min 1000 e5 + *r2' = e6 + resolve [s2'] + await [s1] + b0 = alloc Int[Z :. e5] + b1: [Int] = *r1 + execute map-953fc2721145c55433eb2e01ce07 (out b0, in b1, e5) + b2 = alloc Int[Z :. e6] + execute fold-maps-etc-279dfcb93aed5661c4 {- Cluster with 1 generate, then 2 backpermutes, then 1 map, then 2 backpermutes, then 1 map and finally 1 fold-} (out b2, e4, e6, e3, e2, e1, e0, in b0, e5) + *r3' = b2 + resolve [s3'] +down +OriginalAcc: +\a0 -> fold (\x0 x1 -> x0 + x1) 0 (let a1 = map (\x0 -> x0 * 2) a0 in zipWith (\x0 x1 -> x0 + x1) (replicate (let () = () in T1 (), 1000000) (map (\x0 -> x0 + 1) a1)) (replicate (let () = () in T1 (), 1000000) (generate (T1 1000) (\(T1 x0) -> x0 + (a1 ! (T1 0)))))) + +Desugared OperationAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> x0 * 2, in (Z :. e0) b0, out (Z :. e0) b1) + b2: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> x0 + 1, in (Z :. e0) b1, out (Z :. e0) b2) + T1 () = compute let () = () in T1 () + e1: Int = compute 1000000 + T2 (e2: Int) (e3: Int) = compute indexFull (T2 () e1) (T1 e0) + b3: [Int]¹ = alloc Int[Z :. e2 :. e3] + execute backpermute (\(T2 x0 x1) -> indexSlice (T2 () e1) (T2 x0 x1), in (Z :. e0) b2, out (Z :. e2 :. e3) b3) + e4: Int = compute 1000 + b4: [Int]¹ = alloc Int[Z :. e4] + execute generate (\(T1 x0) -> x0 + (let x1 = toIndex (T1 e0) (T1 0) in (b1 !! x1)), out (Z :. e4) b4) + T1 () = compute let () = () in T1 () + e5: Int = compute 1000000 + T2 (e6: Int) (e7: Int) = compute indexFull (T2 () e5) (T1 e4) + b5: [Int]¹ = alloc Int[Z :. e6 :. e7] + execute backpermute (\(T2 x0 x1) -> indexSlice (T2 () e5) (T2 x0 x1), in (Z :. e4) b4, out (Z :. e6 :. e7) b5) + e8: Int = compute min e2 e6 + e9: Int = compute min e3 e7 + b6: [Int]¹ = alloc Int[Z :. e8 :. e9] + b7: [Int]¹ = alloc Int[Z :. e8 :. e9] + b8: [Int]¹ = alloc Int[Z :. e8 :. e9] + e + +(fromList [L5{Just L2{Nothing} } :-> L8{Just L2{Nothing} } ,L8{Just L2{Nothing} } :-> L13{Just L2{Nothing} } ,L9{Just L2{Nothing} } :-> L36{Just L2{Nothing} } ,L13{Just L2{Nothing} } :-> L36{Just L2{Nothing} } ,L14{Just L2{Nothing} } :-> L25{Just L2{Nothing} } ,L18{Just L2{Nothing} } :-> L25{Just L2{Nothing} } ,L19{Just L2{Nothing} } :-> L37{Just L2{Nothing} } ,L21{Just L2{Nothing} } :-> L37{Just L2{Nothing} } ,L25{Just L2{Nothing} } :-> L37{Just L2{Nothing} } ,L28{Just L2{Nothing} } :-> L41{Just L2{Nothing} } ,L36{Just L2{Nothing} } :-> L38{Just L2{Nothing} } ,L37{Just L2{Nothing} } :-> L38{Just L2{Nothing} } ,L38{Just L2{Nothing} } :-> L41{Just L2{Nothing} } ],13,fromList [(L1{Nothing} ,fun),(L2{Nothing} ,bod),(L3{Just L2{Nothing} } ,lhs),(L4{Just L2{Nothing} } ,alc),(L5{Just L2{Nothing} } ,exe'),(L6{Just L2{Nothing} } ,lhs),(L7{Just L2{Nothing} } ,alc),(L8{Just L2{Nothing} } ,exe'),(L9{Just L2{Nothing} } ,lhs),(L10{Just L2{Nothing} } ,cmp),(L11{Just L2{Nothing} } ,lhs),(L12{Just L2{Nothing} } ,alc),(L13{Just L2{Nothing} } ,exe'),(L14{Just L2{Nothing} } ,lhs),(L15{Just L2{Nothing} } ,cmp),(L16{Just L2{Nothing} } ,lhs),(L17{Just L2{Nothing} } ,alc),(L18{Just L2{Nothing} } ,exe'),(L19{Just L2{Nothing} } ,lhs),(L20{Just L2{Nothing} } ,cmp),(L21{Just L2{Nothing} } ,lhs),(L22{Just L2{Nothing} } ,cmp),(L23{Just L2{Nothing} } ,lhs),(L24{Just L2{Nothing} } ,alc),(L25{Just L2{Nothing} } ,exe'),(L26{Just L2{Nothing} } ,lhs),(L27{Just L2{Nothing} } ,cmp),(L28{Just L2{Nothing} } ,lhs),(L29{Just L2{Nothing} } ,cmp),(L30{Just L2{Nothing} } ,lhs),(L31{Just L2{Nothing} } ,alc),(L32{Just L2{Nothing} } ,lhs),(L33{Just L2{Nothing} } ,alc),(L34{Just L2{Nothing} } ,lhs),(L35{Just L2{Nothing} } ,alc),(L36{Just L2{Nothing} } ,exe'),(L37{Just L2{Nothing} } ,exe'),(L38{Just L2{Nothing} } ,exe'),(L39{Just L2{Nothing} } ,lhs),(L40{Just L2{Nothing} } ,alc),(L41{Just L2{Nothing} } ,exe'),(L42{Just L2{Nothing} } ,ret)]) +True +True +True +False +True +False +False +True +False +True +False +True +False + + +(fromList [L5{Just L2{Nothing} } :-> L8{Just L2{Nothing} } ,L8{Just L2{Nothing} } :-> L13{Just L2{Nothing} } ,L9{Just L2{Nothing} } :-> L36{Just L2{Nothing} } ,L13{Just L2{Nothing} } :-> L36{Just L2{Nothing} } ,L14{Just L2{Nothing} } :-> L25{Just L2{Nothing} } ,L18{Just L2{Nothing} } :-> L25{Just L2{Nothing} } ,L19{Just L2{Nothing} } :-> L37{Just L2{Nothing} } ,L21{Just L2{Nothing} } :-> L37{Just L2{Nothing} } ,L25{Just L2{Nothing} } :-> L37{Just L2{Nothing} } ,L28{Just L2{Nothing} } :-> L41{Just L2{Nothing} } ,L36{Just L2{Nothing} } :-> L38{Just L2{Nothing} } ,L37{Just L2{Nothing} } :-> L38{Just L2{Nothing} } ,L38{Just L2{Nothing} } :-> L41{Just L2{Nothing} } ],13,fromList [(L1{Nothing} ,fun),(L2{Nothing} ,bod),(L3{Just L2{Nothing} } ,lhs),(L4{Just L2{Nothing} } ,alc),(L5{Just L2{Nothing} } ,exe'),(L6{Just L2{Nothing} } ,lhs),(L7{Just L2{Nothing} } ,alc),(L8{Just L2{Nothing} } ,exe'),(L9{Just L2{Nothing} } ,lhs),(L10{Just L2{Nothing} } ,cmp),(L11{Just L2{Nothing} } ,lhs),(L12{Just L2{Nothing} } ,alc),(L13{Just L2{Nothing} } ,exe'),(L14{Just L2{Nothing} } ,lhs),(L15{Just L2{Nothing} } ,cmp),(L16{Just L2{Nothing} } ,lhs),(L17{Just L2{Nothing} } ,alc),(L18{Just L2{Nothing} } ,exe'),(L19{Just L2{Nothing} } ,lhs),(L20{Just L2{Nothing} } ,cmp),(L21{Just L2{Nothing} } ,lhs),(L22{Just L2{Nothing} } ,cmp),(L23{Just L2{Nothing} } ,lhs),(L24{Just L2{Nothing} } ,alc),(L25{Just L2{Nothing} } ,exe'),(L26{Just L2{Nothing} } ,lhs),(L27{Just L2{Nothing} } ,cmp),(L28{Just L2{Nothing} } ,lhs),(L29{Just L2{Nothing} } ,cmp),(L30{Just L2{Nothing} } ,lhs),(L31{Just L2{Nothing} } ,alc),(L32{Just L2{Nothing} } ,lhs),(L33{Just L2{Nothing} } ,alc),(L34{Just L2{Nothing} } ,lhs),(L35{Just L2{Nothing} } ,alc),(L36{Just L2{Nothing} } ,exe'),(L37{Just L2{Nothing} } ,exe'),(L38{Just L2{Nothing} } ,exe'),(L39{Just L2{Nothing} } ,lhs),(L40{Just L2{Nothing} } ,alc),(L41{Just L2{Nothing} } ,exe'),(L42{Just L2{Nothing} } ,ret)]) +True +False +False +True +False +True +False +False +True +False +True +True +True +xecute backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e2 :. e3) b3, out (Z :. e8 :. e9) b7) + execute backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e6 :. e7) b5, out (Z :. e8 :. e9) b8) + execute map (\(x0, x1) -> x0 + x1, in (Z :. e8 :. e9) (b7, b8), out (Z :. e8 :. e9) b6) + b9: [Int]¹ = alloc Int[Z :. e8] + b10: [Int]¹ = alloc Int[Z :. e8] + execute fold-2 (\x0 x1 -> x0 + x1, in (Z :. e8 :. e9) b6, out (Z :. e8) b10) + execute map (\x0 -> let (x1, x2) = (0, x0) in x1 + x2, in (Z :. e8) b10, out (Z :. e8) b9) + return (T2 e8 b9) + +Simplified OperationAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> 2 * x0, in (Z :. e0) b0, out (Z :. e0) b1) + b2: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> 1 + x0, in (Z :. e0) b1, out (Z :. e0) b2) + e1: Int = compute 1000000 + b3: [Int]¹ = alloc Int[Z :. e0 :. e1] + execute backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) b2, out (Z :. e0 :. e1) b3) + e2: Int = compute 1000 + b4: [Int]¹ = alloc Int[Z :. e2] + execute generate (\(T1 x0) -> x0 + (b1 !! 0), out (Z :. e2) b4) + e3: Int = compute 1000 + e4: Int = compute 1000000 + b5: [Int]¹ = alloc Int[Z :. e3 :. e4] + execute backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e2) b4, out (Z :. e3 :. e4) b5) + e5: Int = compute min 1000 e0 + e6: Int = compute 1000000 + b6: [Int]¹ = alloc Int[Z :. e5 :. e6] + b7: [Int]¹ = alloc Int[Z :. e5 :. e6] + b8: [Int]¹ = alloc Int[Z :. e5 :. e6] + execute backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e0 :. e1) b3, out (Z :. e5 :. e6) b7) + execute backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e3 :. e4) b5, out (Z :. e5 :. e6) b8) + execute map (\(x0, x1) -> x0 + x1, in (Z :. e5 :. e6) (b7, b8), out (Z :. e5 :. e6) b6) + b9: [Int]¹ = alloc Int[Z :. e5] + execute fold-2 (\x0 x1 -> x0 + x1, in (Z :. e5 :. e6) b6, out (Z :. e5) b9) + return (T2 e5 b9) + +PartitionedAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + b2: [Int]¹ = alloc Int[Z :. e0] + e1: Int = compute 1000000 + e2: Int = compute 1000 + e3: Int = compute 1000 + e4: Int = compute 1000000 + e5: Int = compute min 1000 e0 + e6: Int = compute 1000000 + execute map (\x0 -> 2 * x0, in (Z :. e0) b0, out (Z :. e0) b1) + b3: [Int]¹ = alloc Int[Z :. e0 :. e1] + b4: [Int]¹ = alloc Int[Z :. e2] + b5: [Int]¹ = alloc Int[Z :. e3 :. e4] + b6: [Int]¹ = alloc Int[Z :. e5 :. e6] + b7: [Int]¹ = alloc Int[Z :. e5 :. e6] + b8: [Int]¹ = alloc Int[Z :. e5 :. e6] + b9: [Int]¹ = alloc Int[Z :. e5] + execute { + generate (\(T1 x0) -> x0 + (b1 !! 0), out (Z :. e2) b4) + fused diagonally: (b4) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e2) b4, out (Z :. e3 :. e4) b5) + fused diagonally: (b5) + backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e3 :. e4) b5, out (Z :. e5 :. e6) b8) + fused diagonally: (b8) + map (\x0 -> 1 + x0, in (Z :. e0) b1, out (Z :. e0) b2) + fused diagonally: (b2) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) b2, out (Z :. e0 :. e1) b3) + fused diagonally: (b3) + backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e0 :. e1) b3, out (Z :. e5 :. e6) b7) + fused diagonally: (b7) + map (\(x0, x1) -> x0 + x1, in (Z :. e5 :. e6) (b7, b8), out (Z :. e5 :. e6) b6) + fused diagonally: (b6) + fold-2 (\x0 x1 -> x0 + x1, in (Z :. e5 :. e6) b6, out (Z :. e5) b9) + } + return (T2 e5 b9) +SLV'd PartitionedAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + e1: Int = compute 1000000 + e2: Int = compute 1000 + e3: Int = compute 1000 + e4: Int = compute 1000000 + e5: Int = compute min 1000 e0 + e6: Int = compute 1000000 + execute map (\x0 -> 2 * x0, in (Z :. e0) b0, out (Z :. e0) b1) + b2: [Int]¹ = alloc Int[Z :. e5] + execute { + generate (\(T1 x0) -> x0 + (b1 !! 0), out (Z :. e2) %0) + fused vertically: (%0) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e2) %0, out (Z :. e3 :. e4) %1) + fused vertically: (%1) + backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e3 :. e4) %1, out (Z :. e5 :. e6) %2) + fused vertically: (%2) + map (\x0 -> 1 + x0, in (Z :. e0) b1, out (Z :. e0) %3) + fused vertically: (%3) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) %3, out (Z :. e0 :. e1) %4) + fused vertically: (%4) + backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e0 :. e1) %4, out (Z :. e5 :. e6) %5) + fused vertically: (%5) + map (\(x0, x1) -> x0 + x1, in (Z :. e5 :. e6) (%5, %2), out (Z :. e5 :. e6) %6) + fused vertically: (%6) + fold-2 (\x0 x1 -> x0 + x1, in (Z :. e5 :. e6) %6, out (Z :. e5) b2) + } + return (T2 e5 b2) + +Schedule: +\T2 (s0: Signal, r0: *Int) (s1: Signal, r1: *[Int]) -> + \T2 (s2': Resolver, r2': *out Int) (s3': Resolver, r3': *out [Int]) -> + await [s0] + e0: Int = compute 1000000 + e1: Int = compute 1000 + e2: Int = compute 1000 + e3: Int = compute 1000000 + e4: Int = compute 1000000 + e5: Int = *r0 + e6: Int = compute min 1000 e5 + *r2' = e6 + resolve [s2'] + await [s1] + b0 = alloc Int[Z :. e5] + b1: [Int] = *r1 + execute map-953fc2721145c55433eb2e01ce07 (out b0, in b1, e5) + b2 = alloc Int[Z :. e6] + execute fold-maps-etc-279dfcb93aed5661c4 {- Cluster with 1 generate, then 2 backpermutes, then 1 map, then 2 backpermutes, then 1 map and finally 1 fold-} (out b2, e4, e6, e3, e2, e1, e0, in b0, e5) + *r3' = b2 + resolve [s3'] +up +OriginalAcc: +\a0 -> fold (\x0 x1 -> x0 + x1) 0 (let a1 = map (\x0 -> x0 * 2) a0 in zipWith (\x0 x1 -> x0 + x1) (replicate (let () = () in T1 (), 1000000) (map (\x0 -> x0 + 1) a1)) (replicate (let () = () in T1 (), 1000000) (generate (T1 1000) (\(T1 x0) -> x0 + (a1 ! (T1 0)))))) + +Desugared OperationAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> x0 * 2, in (Z :. e0) b0, out (Z :. e0) b1) + b2: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> x0 + 1, in (Z :. e0) b1, out (Z :. e0) b2) + T1 () = compute let () = () in T1 () + e1: Int = compute 1000000 + T2 (e2: Int) (e3: Int) = compute indexFull (T2 () e1) (T1 e0) + b3: [Int]¹ = alloc Int[Z :. e2 :. e3] + execute backpermute (\(T2 x0 x1) -> indexSlice (T2 () e1) (T2 x0 x1), in (Z :. e0) b2, out (Z :. e2 :. e3) b3) + e4: Int = compute 1000 + b4: [Int]¹ = alloc Int[Z :. e4] + execute generate (\(T1 x0) -> x0 + (let x1 = toIndex (T1 e0) (T1 0) in (b1 !! x1)), out (Z :. e4) b4) + T1 () = compute let () = () in T1 () + e5: Int = compute 1000000 + T2 (e6: Int) (e7: Int) = compute indexFull (T2 () e5) (T1 e4) + b5: [Int]¹ = alloc Int[Z :. e6 :. e7] + execute backpermute (\(T2 x0 x1) -> indexSlice (T2 () e5) (T2 x0 x1), in (Z :. e4) b4, out (Z :. e6 :. e7) b5) + e8: Int = compute min e2 e6 + e9: Int = compute min e3 e7 + b6: [Int]¹ = alloc Int[Z :. e8 :. e9] + b7: [Int]¹ = alloc Int[Z :. e8 :. e9] + b8: [Int]¹ = alloc Int[Z :. e8 :. e9] + execute backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e2 :. e3) b3, out (Z :. e8 :. e9) b7) + execute backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e6 :. e7) b5, out (Z :. e8 :. e9) b8) + execute map (\(x0, x1) -> x0 + x1, in (Z :. e8 :. e9) (b7, b8), out (Z :. e8 :. e9) b6) + b9: [Int]¹ = alloc Int[Z :. e8] + b10: [Int]¹ = alloc Int[Z :. e8] + execute fold-2 (\x0 x1 -> x0 + x1, in (Z :. e8 :. e9) b6, out (Z :. e8) b10) + execute map (\x0 -> let (x1, x2) = (0, x0) in x1 + x2, in (Z :. e8) b10, out (Z :. e8) b9) + return (T2 e8 b9) + +Simplified OperationAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> 2 * x0, in (Z :. e0) b0, out (Z :. e0) b1) + b2: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> 1 + x0, in (Z :. e0) b1, out (Z :. e0) b2) + e1: Int = compute 1000000 + b3: [Int]¹ = alloc Int[Z :. e0 :. e1] + execute backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) b2, out (Z :. e0 :. e1) b3) + e2: Int = compute 1000 + b4: [Int]¹ = alloc Int[Z :. e2] + execute generate (\(T1 x0) -> x0 + (b1 !! 0), out (Z :. e2) b4) + e3: Int = compute 1000 + e4: Int = compute 1000000 + b5: [Int]¹ = alloc Int[Z :. e3 :. e4] + execute backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e2) b4, out (Z :. e3 :. e4) b5) + e5: Int = compute min 1000 e0 + e6: Int = compute 1000000 + b6: [Int]¹ = alloc Int[Z :. e5 :. e6] + b7: [Int]¹ = alloc Int[Z :. e5 :. e6] + b8: [Int]¹ = alloc Int[Z :. e5 :. e6] + execute backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e0 :. e1) b3, out (Z :. e5 :. e6) b7) + execute backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e3 :. e4) b5, out (Z :. e5 :. e6) b8) + execute map (\(x0, x1) -> x0 + x1, in (Z :. e5 :. e6) (b7, b8), out (Z :. e5 :. e6) b6) + b9: [Int]¹ = alloc Int[Z :. e5] + execute fold-2 (\x0 x1 -> x0 + x1, in (Z :. e5 :. e6) b6, out (Z :. e5) b9) + return (T2 e5 b9) + +PartitionedAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + b2: [Int]¹ = alloc Int[Z :. e0] + e1: Int = compute 1000000 + e2: Int = compute 1000 + e3: Int = compute 1000 + e4: Int = compute 1000000 + e5: Int = compute min 1000 e0 + e6: Int = compute 1000000 + execute { + map (\x0 -> 2 * x0, in (Z :. e0) b0, out (Z :. e0) b1) + fused diagonally: (b1) + map (\x0 -> 1 + x0, in (Z :. e0) b1, out (Z :. e0) b2) + } + b3: [Int]¹ = alloc Int[Z :. e0 :. e1] + b4: [Int]¹ = alloc Int[Z :. e2] + b5: [Int]¹ = alloc Int[Z :. e3 :. e4] + b6: [Int]¹ = alloc Int[Z :. e5 :. e6] + b7: [Int]¹ = alloc Int[Z :. e5 :. e6] + b8: [Int]¹ = alloc Int[Z :. e5 :. e6] + b9: [Int]¹ = alloc Int[Z :. e5] + execute { + generate (\(T1 x0) -> x0 + (b1 !! 0), out (Z :. e2) b4) + fused diagonally: (b4) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e2) b4, out (Z :. e3 :. e4) b5) + fused diagonally: (b5) + backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e3 :. e4) b5, out (Z :. e5 :. e6) b8) + fused diagonally: (b8) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) b2, out (Z :. e0 :. e1) b3) + fused diagonally: (b3) + backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e0 :. e1) b3, out (Z :. e5 :. e6) b7) + fused diagonally: (b7) + map (\(x0, x1) -> x0 + x1, in (Z :. e5 :. e6) (b7, b8), out (Z :. e5 :. e6) b6) + fused diagonally: (b6) + fold-2 (\x0 x1 -> x0 + x1, in (Z :. e5 :. e6) b6, out (Z :. e5) b9) + } + return (T2 e5 b9) +SLV'd PartitionedAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + b2: [Int]¹ = alloc Int[Z :. e0] + e1: Int = compute 1000000 + e2: Int = compute 1000 + e3: Int = compute 1000 + e4: Int = compute 1000000 + e5: Int = compute min 1000 e0 + e6: Int = compute 1000000 + execute { + map (\x0 -> 2 * x0, in (Z :. e0) b0, out (Z :. e0) b1) + fused diagonally: (b1) + map (\x0 -> 1 + x0, in (Z :. e0) b1, out (Z :. e0) b2) + } + b3: [Int]¹ = alloc Int[Z :. e5] + execute { + generate (\(T1 x0) -> x0 + (b1 !! 0), out (Z :. e2) %0) + fused vertically: (%0) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e2) %0, out (Z :. e3 :. e4) %1) + fused vertically: (%1) + backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e3 :. e4) %1, out (Z :. e5 :. e6) %2) + fused vertically: (%2) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) b2, out (Z :. e0 :. e1) %3) + fused vertically: (%3) + backpermute (\(T2 x0 x1) -> T2 x0 x1, in (Z :. e0 :. e1) %3, out (Z :. e5 :. e6) %4) + fused vertically: (%4) + map (\(x0, x1) -> x0 + x1, in (Z :. e5 :. e6) (%4, %2), out (Z :. e5 :. e6) %5) + fused vertically: (%5) + fold-2 (\x0 x1 -> x0 + x1, in (Z :. e5 :. e6) %5, out (Z :. e5) b3) + } + return (T2 e5 b3) + +Schedule: +\T2 (s0: Signal, r0: *Int) (s1: Signal, r1: *[Int]) -> + \T2 (s2': Resolver, r2': *out Int) (s3': Resolver, r3': *out [Int]) -> + await [s0] + e0: Int = compute 1000000 + e1: Int = compute 1000 + e2: Int = compute 1000 + e3: Int = compute 1000000 + e4: Int = compute 1000000 + e5: Int = *r0 + e6: Int = compute min 1000 e5 + *r2' = e6 + b0 = alloc Int[Z :. e5] + resolve [s2'] + await [s1] + b1 = alloc Int[Z :. e5] + b2: [Int] = *r1 + execute maps-fa2a84921035b7ec79e4b990b85 {- Cluster with 2 maps-} (out b1, out b0, in b2, e5) + b3 = alloc Int[Z :. e6] + execute fold-map-etc-1a20fd7bd0c61dc78f9 {- Cluster with 1 generate, then 4 backpermutes, then 1 map and finally 1 fold-} (out b3, e4, e6, e3, e2, e1, e0, in b1, in b0, e5) + *r3' = b3 + resolve [s3'] + +accelerate-llvm-native> Test suite nofib-llvm-native passed diff --git a/debug2 b/debug2 new file mode 100644 index 000000000..bd51a810b --- /dev/null +++ b/debug2 @@ -0,0 +1,217 @@ +DEPRECATED: The package at Archive from https://github.com/msakai/haskell-MIP/archive/4295aa21a24a30926b55770c55ac00f749fb8a39.tar.gz in subdir MIP does not include a cabal file. +Instead, it includes an hpack package.yaml file for generating a cabal file. +This usage is deprecated; please see https://github.com/commercialhaskell/stack/issues/5210. +Support for this workflow will be removed in the future. + +Stack has not been tested with GHC versions above 9.0, and using 9.4.5, this may fail +Stack has not been tested with Cabal versions above 3.4, but version 3.8.1.0 was found, this may fail +accelerate-llvm-native> test (suite: nofib-llvm-native) + + + +(fromList [L5{Just L2{Nothing} } :-> L10{Just L2{Nothing} } ],1,fromList [(L1{Nothing} ,fun),(L2{Nothing} ,bod),(L3{Just L2{Nothing} } ,lhs),(L4{Just L2{Nothing} } ,alc),(L5{Just L2{Nothing} } ,exe'),(L6{Just L2{Nothing} } ,lhs),(L7{Just L2{Nothing} } ,cmp),(L8{Just L2{Nothing} } ,lhs),(L9{Just L2{Nothing} } ,alc),(L10{Just L2{Nothing} } ,exe'),(L11{Just L2{Nothing} } ,ret)]) +True + + +(fromList [L5{Just L2{Nothing} } :-> L10{Just L2{Nothing} } ],1,fromList [(L1{Nothing} ,fun),(L2{Nothing} ,bod),(L3{Just L2{Nothing} } ,lhs),(L4{Just L2{Nothing} } ,alc),(L5{Just L2{Nothing} } ,exe'),(L6{Just L2{Nothing} } ,lhs),(L7{Just L2{Nothing} } ,cmp),(L8{Just L2{Nothing} } ,lhs),(L9{Just L2{Nothing} } ,alc),(L10{Just L2{Nothing} } ,exe'),(L11{Just L2{Nothing} } ,ret)]) +True +readswrites +OriginalAcc: +\a0 -> replicate (let () = () in T1 (), 1000000) (map (\x0 -> x0 + 1) a0) + +Desugared OperationAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> x0 + 1, in (Z :. e0) b0, out (Z :. e0) b1) + T1 () = compute let () = () in T1 () + e1: Int = compute 1000000 + T2 (e2: Int) (e3: Int) = compute indexFull (T2 () e1) (T1 e0) + b2: [Int]¹ = alloc Int[Z :. e2 :. e3] + execute backpermute (\(T2 x0 x1) -> indexSlice (T2 () e1) (T2 x0 x1), in (Z :. e0) b1, out (Z :. e2 :. e3) b2) + return (T3 e2 e3 b2) + +Simplified OperationAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> 1 + x0, in (Z :. e0) b0, out (Z :. e0) b1) + e1: Int = compute 1000000 + b2: [Int]¹ = alloc Int[Z :. e0 :. e1] + execute backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) b1, out (Z :. e0 :. e1) b2) + return (T3 e0 e1 b2) + +PartitionedAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + e1: Int = compute 1000000 + b2: [Int]¹ = alloc Int[Z :. e0 :. e1] + execute { + map (\x0 -> 1 + x0, in (Z :. e0) b0, out (Z :. e0) b1) + fused diagonally: (b1) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) b1, out (Z :. e0 :. e1) b2) + } + return (T3 e0 e1 b2) +SLV'd PartitionedAcc: +\T2 (e0: Int) (b0: [Int]) -> + e1: Int = compute 1000000 + b1: [Int]¹ = alloc Int[Z :. e0 :. e1] + execute { + map (\x0 -> 1 + x0, in (Z :. e0) b0, out (Z :. e0) %0) + fused vertically: (%0) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) %0, out (Z :. e0 :. e1) b1) + } + return (T3 e0 e1 b1) + +Schedule: +\T2 (s0: Signal, r0: *Int) (s1: Signal, r1: *[Int]) -> + \T3 (s2': Resolver, r2': *out Int) (s3': Resolver, r3': *out Int) (s4': Resolver, r4': *out [Int]) -> + e0: Int = compute 1000000 + spawn await [s0] { + e1: Int = *r0 + *r2' = e1 + resolve [s2'] + } + spawn { + *r3' = e0 + resolve [s3'] + } + await [s1, s0] + e1: Int = *r0 + b0 = alloc Int[Z :. e1 :. e0] + b1: [Int] = *r1 + execute map-backpermute-0a6fc0f096763b5b {- Cluster with 1 map and then 1 backpermute-} (out b0, e0, in b1, e1) + *r4' = b0 + resolve [s4'] +down +OriginalAcc: +\a0 -> replicate (let () = () in T1 (), 1000000) (map (\x0 -> x0 + 1) a0) + +Desugared OperationAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> x0 + 1, in (Z :. e0) b0, out (Z :. e0) b1) + T1 () = compute let () = () in T1 () + e1: Int = compute 1000000 + T2 (e2: Int) (e3: Int) = compute indexFull (T2 () e1) (T1 e0) + b2: [Int]¹ = alloc Int[Z :. e2 :. e3] + execute backpermute (\(T2 x0 x1) -> indexSlice (T2 () e1) (T2 x0 x1), in (Z :. e0) b1, out (Z :. e2 :. e3) b2) + return (T3 e2 e3 b2) + +Simplified OperationAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> 1 + x0, in (Z :. e0) b0, out (Z :. e0) b1) + e1: Int = compute 1000000 + b2: [Int]¹ = alloc Int[Z :. e0 :. e1] + execute backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) b1, out (Z :. e0 :. e1) b2) + return (T3 e0 e1 b2) + +PartitionedAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + e1: Int = compute 1000000 + b2: [Int]¹ = alloc Int[Z :. e0 :. e1] + execute { + map (\x0 -> 1 + x0, in (Z :. e0) b0, out (Z :. e0) b1) + fused diagonally: (b1) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) b1, out (Z :. e0 :. e1) b2) + } + return (T3 e0 e1 b2) +SLV'd PartitionedAcc: +\T2 (e0: Int) (b0: [Int]) -> + e1: Int = compute 1000000 + b1: [Int]¹ = alloc Int[Z :. e0 :. e1] + execute { + map (\x0 -> 1 + x0, in (Z :. e0) b0, out (Z :. e0) %0) + fused vertically: (%0) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) %0, out (Z :. e0 :. e1) b1) + } + return (T3 e0 e1 b1) + +Schedule: +\T2 (s0: Signal, r0: *Int) (s1: Signal, r1: *[Int]) -> + \T3 (s2': Resolver, r2': *out Int) (s3': Resolver, r3': *out Int) (s4': Resolver, r4': *out [Int]) -> + e0: Int = compute 1000000 + spawn await [s0] { + e1: Int = *r0 + *r2' = e1 + resolve [s2'] + } + spawn { + *r3' = e0 + resolve [s3'] + } + await [s1, s0] + e1: Int = *r0 + b0 = alloc Int[Z :. e1 :. e0] + b1: [Int] = *r1 + execute map-backpermute-0a6fc0f096763b5b {- Cluster with 1 map and then 1 backpermute-} (out b0, e0, in b1, e1) + *r4' = b0 + resolve [s4'] +up +OriginalAcc: +\a0 -> replicate (let () = () in T1 (), 1000000) (map (\x0 -> x0 + 1) a0) + +Desugared OperationAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> x0 + 1, in (Z :. e0) b0, out (Z :. e0) b1) + T1 () = compute let () = () in T1 () + e1: Int = compute 1000000 + T2 (e2: Int) (e3: Int) = compute indexFull (T2 () e1) (T1 e0) + b2: [Int]¹ = alloc Int[Z :. e2 :. e3] + execute backpermute (\(T2 x0 x1) -> indexSlice (T2 () e1) (T2 x0 x1), in (Z :. e0) b1, out (Z :. e2 :. e3) b2) + return (T3 e2 e3 b2) + +Simplified OperationAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + execute map (\x0 -> 1 + x0, in (Z :. e0) b0, out (Z :. e0) b1) + e1: Int = compute 1000000 + b2: [Int]¹ = alloc Int[Z :. e0 :. e1] + execute backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) b1, out (Z :. e0 :. e1) b2) + return (T3 e0 e1 b2) + +PartitionedAcc: +\T2 (e0: Int) (b0: [Int]) -> + b1: [Int]¹ = alloc Int[Z :. e0] + e1: Int = compute 1000000 + b2: [Int]¹ = alloc Int[Z :. e0 :. e1] + execute { + map (\x0 -> 1 + x0, in (Z :. e0) b0, out (Z :. e0) b1) + fused diagonally: (b1) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) b1, out (Z :. e0 :. e1) b2) + } + return (T3 e0 e1 b2) +SLV'd PartitionedAcc: +\T2 (e0: Int) (b0: [Int]) -> + e1: Int = compute 1000000 + b1: [Int]¹ = alloc Int[Z :. e0 :. e1] + execute { + map (\x0 -> 1 + x0, in (Z :. e0) b0, out (Z :. e0) %0) + fused vertically: (%0) + backpermute (\(T2 x0 _) -> T1 x0, in (Z :. e0) %0, out (Z :. e0 :. e1) b1) + } + return (T3 e0 e1 b1) + +Schedule: +\T2 (s0: Signal, r0: *Int) (s1: Signal, r1: *[Int]) -> + \T3 (s2': Resolver, r2': *out Int) (s3': Resolver, r3': *out Int) (s4': Resolver, r4': *out [Int]) -> + e0: Int = compute 1000000 + spawn await [s0] { + e1: Int = *r0 + *r2' = e1 + resolve [s2'] + } + spawn { + *r3' = e0 + resolve [s3'] + } + await [s1, s0] + e1: Int = *r0 + b0 = alloc Int[Z :. e1 :. e0] + b1: [Int] = *r1 + execute map-backpermute-0a6fc0f096763b5b {- Cluster with 1 map and then 1 backpermute-} (out b0, e0, in b1, e1) + *r4' = b0 + resolve [s4'] + +accelerate-llvm-native> Test suite nofib-llvm-native passed diff --git a/runovernight b/runovernight new file mode 100644 index 000000000..1396b3f46 --- /dev/null +++ b/runovernight @@ -0,0 +1 @@ +benchmarking Left NumClusters/complex/32768 diff --git a/stack.yaml b/stack.yaml index 7ab664fa2..51a84694e 100644 --- a/stack.yaml +++ b/stack.yaml @@ -26,7 +26,9 @@ extra-deps: subdirs: - llvm-hs - llvm-hs-pure - +# - highs-lp-0.0 +# - linear-programming-0.0.1 +# - comfort-array-0.5.5 # Override default flag values for local packages and extra-deps # flags: