benchmarks

dpvanbalen · Aug 2, 2023 · c3accce · c3accce
1 parent 4ae01ba
commit c3accce
Show file tree

Hide file tree

Showing 3 changed files with 1,181 additions and 166 deletions.
diff --git a/accelerate-llvm-native/test/nofib/Main.hs b/accelerate-llvm-native/test/nofib/Main.hs
@@ -32,32 +32,31 @@ import Data.Array.Accelerate.Unsafe
 main :: IO ()
 main = do
   -- benchmarking:
-  -- defaultMain $ Prelude.map benchOption [minBound :: Objective .. maxBound]
-
-  -- problem: just combining the lists (taking True over False) as now only works if the backpermute isn't rank changing
-  -- need to actually keep track of which array, which dimension, we need at which point.. stacking directions (the clean solution from paper) seems easiest tbh
-  -- main problems with that approach: 1. could break bunch of stuff, 2. direction bounds get wide af
-  -- Ah, alternatively: I'll just make each 'False' loopsize the right rank using the depth in bcan2: trimming the outermost loops
-  -- or adding 0s on the outermost loops.
-  Prelude.print $ runNWithObj @Native ArrayReadsWrites $ quicksort $ use $ fromList (Z :. 5) [100::Int, 200, 3, 5, 4]
+  defaultMain $ 
+    Prelude.map (benchOption . Prelude.Left) [minBound :: Objective .. maxBound] 
+    Prelude.++ 
+    Prelude.map (benchOption . Prelude.Right) [NoFusion, GreedyFusion]
+
+  -- Prelude.print $ runNWithObj @Native ArrayReadsWrites $ quicksort $ use $ fromList (Z :. 5) [100::Int, 200, 3, 5, 4]
   where
+    benchOption :: Prelude.Either Objective Benchmarking -> Benchmark
     benchOption obj = bgroup (show obj)
-      [ benchProgram "diagonal " diagonal  obj
-      , benchProgram "diagonal'" diagonal' obj
-      , benchProgram "complex" complex obj
-      , benchProgram "complex'" complex' obj
+      [ 
+      --   benchProgram "diagonal " diagonal  obj
+      -- , benchProgram "diagonal'" diagonal' obj
+        benchProgram "complex" complex obj
+      -- , benchProgram "complex'" complex' obj
       , benchProgram "complexAdd" complexAdd obj
-      , benchProgram "complexAdd'" complexAdd' obj
+      -- , benchProgram "complexAdd'" complexAdd' obj
       , benchProgram "singleLoop" singleLoop obj
       , benchProgram "singleLoop'" singleLoop' obj
       , benchProgram "futharkbadaccelerategood" futharkbadaccelerategood obj
       , benchProgram "reverses" reverses obj
       ]
-    benchProgram str pr obj = env (return $ runNWithObj @Native obj pr) $ \p -> bgroup str
-      [ benchsize 32         p
-      , benchsize (32*32)    p
-      , benchsize (32*32*32) p
-      ]
+    benchProgram str pr (Prelude.Left obj) = env (return $ runNWithObj @Native obj pr) $ \p -> bgroup str
+      [ benchsize (32*32*32) p ]
+    benchProgram str pr (Prelude.Right obj) = env (return $ runNBench @Native obj pr) $ \p -> bgroup str
+      [ benchsize (32*32*32) p ] 
     xs n = fromList (Z:.n) $ Prelude.map (`Prelude.mod` (n `div` 2)) [1 :: Int ..]
     benchsize n p = env (return $ xs n) $ \xs -> bench (show n) $ nf p xs
       -- we force the result by indexing into a result array and forcing that number. 
@@ -138,7 +137,7 @@ diagonal' :: Acc (Vector Int) -> Acc (Vector Int, Vector Int)
 diagonal' xs = let ys = A.map (+2) xs in T2 ys (A.map (+3) $ barrier ys)
 
 futharkbadaccelerategood :: Acc (Vector Int) -> Acc (Vector Int, Vector Int)
-futharkbadaccelerategood = complex . map (*4)
+futharkbadaccelerategood = complex . map (\x -> x - 1)
 
 
 
@@ -197,146 +196,3 @@ awhileFuse c f x = asnd $ A.awhile c' f' x'
 --   in permute (+) (\i -> i/2) as bs
 
 
-
-
-
-quicksort :: Ord a => Acc (Vector a) -> Acc (Vector a)
-quicksort input = result
-  where
-    -- Initially, we have one segment, namely the whole array
-    initialFlags = scatter (fill (I1 1) 0 ++ fill (I1 1) (length input)) emptyFlags fullFlags
-    emptyFlags   = fill (I1 (1 + length input)) False_
-    fullFlags    = fill (I1 2) True_
-
-    -- We stop when each segment contains just one element, as segments of
-    -- one element are sorted.
-    T2 result _ = awhile condition step $ T2 input initialFlags
-
-type State a =
-  ( Vector a      -- Values
-  , Vector Bool   -- Head flags, denoting the starting points of the unsorted segments
-  )
-
-step :: Ord a => Acc (State a) -> Acc (State a)
-step (T2 values headFlags) = (T2 values' headFlags')
-  where
-    -- Per element, the pivot of the segment of that element
-    -- For each segment, we just take the first element as pivot
-    pivots = propagateSegmentHead headFlags values
-
-    -- Find which elements are larger than the pivot
-    isLarger = zipWith (>=) values pivots
-
-    -- Propagate the start index of a segment to all elements
-    startIndex = propagateSegmentHead headFlags (generate (shape values) unindex1)
-
-    -- Compute the offsets to which the elements must be moved using a scan
-    indicesLarger, indicesSmaller :: Acc (Vector Int)
-    indicesLarger  = map (\x -> x - 1) $ postscanSegHead (+) headFlags $ map (? (1, 0)) isLarger
-    indicesSmaller = map (\x -> x - 1) $ postscanSegHead (+) headFlags $ map (? (0, 1)) isLarger
-
-    -- Propagate the number of smaller elements to each segment
-    -- This is needed as an offset for the larger elements
-    countSmaller :: Acc (Vector Int)
-    countSmaller = map (+1) $ propagateSegmentLast headFlags indicesSmaller
-
-    -- Compute the new indices of the elements
-    permutation = zipWith5 partitionPermuteIndex isLarger startIndex indicesSmaller indicesLarger countSmaller
-
-    -- Perform the permutation
-    values' = scatter permutation (fill (shape values) undef) values
-
-    -- Update the head flags for the next iteration (the 'recursive call' in a traditional implementation)
-    -- Mark new section starts at:
-    --  * the position of the pivot
-    --  * the position of the pivot + 1
-    headFlags' =
-      let
-          f :: Int -> Exp Bool -> Exp Int -> Exp Int -> Exp (Maybe DIM1)
-          f inc headF start countSmall =
-            headF ? (Just_ (I1 $ start + countSmall + constant inc), Nothing_)
-
-          writes :: Int -> Acc (Vector (Maybe DIM1))
-          writes inc = zipWith3 (f inc) headFlags startIndex countSmaller
-      in
-      -- Note that (writes 1) may go out of bounds of the values array.
-      -- We made the headFlags array one larger, such that this gives no problems.
-      writeFlags (writes 0) $ writeFlags (writes 1) $ headFlags
-
--- Checks whether all segments have length 1. If that is the case, then the
--- loop may terminate.
---
-condition :: Elt a => Acc (State a) -> Acc (Scalar Bool)
-condition (T2 _ headFlags) = map not $ fold (&&) True_ headFlags
-
--- Finds the new index of an element of the list, as the result of the
--- partition
---
-partitionPermuteIndex :: Exp Bool -> Exp Int -> Exp Int -> Exp Int -> Exp Int -> Exp Int
-partitionPermuteIndex isLarger start indexIfSmaller indexIfLarger countSmaller =
-  start + (isLarger ? (countSmaller + indexIfLarger, indexIfSmaller))
-
--- Given head flags, propagates the value of the head to all elements in
--- the segment
---
-propagateSegmentHead
-    :: Elt a
-    => Acc (Vector Bool)
-    -> Acc (Vector a)
-    -> Acc (Vector a)
-propagateSegmentHead headFlags values
-  = map fst
-  $ postscanl f (T2 undef True_)
-  $ zip values headFlags
-  where
-    f left (T2 rightValue rightFlag) =
-      if rightFlag
-         then T2 rightValue True_
-         else left
-
--- Given head flags, propagates the value of the head to all elements in
--- the segment
---
-propagateSegmentLast
-    :: Elt a
-    => Acc (Vector Bool)
-    -> Acc (Vector a)
-    -> Acc (Vector a)
-propagateSegmentLast headFlags values
-  = map fst
-  $ reverse
-  $ postscanl f (T2 undef True_)
-  $ reverse
-  $ zip values
-  $ tail headFlags
-  where
-    f (T2 leftValue leftFlag) right =
-      if leftFlag
-         then T2 leftValue True_
-         else right
-
--- Segmented postscan, where the segments are defined with head flags
---
-postscanSegHead
-    :: Elt a
-    => (Exp a -> Exp a -> Exp a)
-    -> Acc (Vector Bool)
-    -> Acc (Vector a)
-    -> Acc (Vector a)
-postscanSegHead f headFlags values
-  = map fst
-  $ postscanl g (T2 undef True_)
-  $ zip values headFlags
-  where
-    g (T2 leftValue leftFlag) (T2 rightValue rightFlag)
-      = T2
-          (rightFlag ? (rightValue, f leftValue rightValue))
-          (leftFlag .|. rightFlag)
-
--- Writes True to the specified indices in a flags arrays
---
-writeFlags
-    :: Acc (Vector (Maybe DIM1))
-    -> Acc (Vector Bool)
-    -> Acc (Vector Bool)
-writeFlags writes flags = permute const flags (writes !) (fill (shape writes) True_)
diff --git a/accelerate-llvm-native/withNoAndGreedy.html b/accelerate-llvm-native/withNoAndGreedy.html
diff --git a/hie.yaml b/hie.yaml
@@ -6,11 +6,11 @@ cradle:
     - path: "accelerate-llvm-native/src"
       component: "accelerate-llvm-native:lib"
 
-    # - path: "accelerate-llvm-native/test/nofib"
-    #   component: "accelerate-llvm-native:test:nofib-llvm-native"
+    - path: "accelerate-llvm-native/test/nofib"
+      component: "accelerate-llvm-native:test:nofib-llvm-native"
 
-    - path: "accelerate-llvm-ptx/src"
-      component: "accelerate-llvm-ptx:lib"
+    # - path: "accelerate-llvm-ptx/src"
+    #   component: "accelerate-llvm-ptx:lib"
 
     # - path: "accelerate-llvm-ptx/test/nofib"
     #   component: "accelerate-llvm-ptx:test:nofib-llvm-ptx"