PackBits.hs 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. {-
  2. - Copyright (C) 2019 Koz Ross <koz.ross@retro-freedom.nz>
  3. -
  4. - This program is free software: you can redistribute it and/or modify
  5. - it under the terms of the GNU General Public License as published by
  6. - the Free Software Foundation, either version 3 of the License, or
  7. - (at your option) any later version.
  8. -
  9. - This program is distributed in the hope that it will be useful,
  10. - but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. - GNU General Public License for more details.
  13. -
  14. - You should have received a copy of the GNU General Public License
  15. - along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. -}
  17. {-# OPTIONS_GHC -fplugin GHC.TypeLits.Extra.Solver #-}
  18. {-# OPTIONS_GHC -fplugin GHC.TypeLits.KnownNat.Solver #-}
  19. {-# LANGUAGE ViewPatterns #-}
  20. {-# LANGUAGE PatternSynonyms #-}
  21. {-# LANGUAGE RoleAnnotations #-}
  22. {-# LANGUAGE AllowAmbiguousTypes #-}
  23. {-# LANGUAGE ScopedTypeVariables #-}
  24. {-# LANGUAGE TypeInType #-}
  25. {-# LANGUAGE TypeOperators #-}
  26. {-# LANGUAGE TypeFamilies #-}
  27. {-# LANGUAGE TypeApplications #-}
  28. {-# LANGUAGE Trustworthy #-}
  29. {-# LANGUAGE MultiParamTypeClasses #-}
  30. {-# LANGUAGE UndecidableInstances #-}
  31. {-# LANGUAGE StandaloneDeriving #-}
  32. {-# LANGUAGE GeneralizedNewtypeDeriving #-}
  33. -- |
  34. -- Module: Data.Finitary.PackBits
  35. -- Description: Scheme for bit-packing @Finitary@ types.
  36. -- Copyright: (C) Koz Ross 2019
  37. -- License: GPL version 3.0 or later
  38. -- Maintainer: koz.ross@retro-freedom.nz
  39. -- Stability: Experimental
  40. -- Portability: GHC only
  41. --
  42. -- From the [Kraft-McMillan
  43. -- inequality](https://en.wikipedia.org/wiki/Kraft%E2%80%93McMillan_inequality)
  44. -- and
  45. -- the fact that we are not able to have \'fractional\' bits, we can derive a
  46. -- fixed-length code into a bitstring for any 'Finitary' type @a@, with code
  47. -- length \(\lceil \log_{2}(\texttt{Cardinality a}) \rceil\) bits. This code is
  48. -- essentially a binary representation of the index of each inhabitant of @a@.
  49. -- On that basis, we can derive an 'VU.Unbox' instance, representing
  50. -- the entire 'VU.Vector' as an unboxed [bit
  51. -- array](https://en.wikipedia.org/wiki/Bit_array).
  52. --
  53. -- This encoding is advantageous from the point of view of space - there is no
  54. -- tighter possible packing that preserves \(\Theta(1)\) random access and also
  55. -- allows the full range of 'VU.Vector' operations. If you are concerned about
  56. -- space usage above all, this is the best choice for you.
  57. --
  58. -- Because access to individual bits is slower than whole bytes or words, this
  59. -- encoding adds some overhead. Additionally, a primary advantage of bit arrays
  60. -- (the ability to perform \'bulk\' operations on bits efficiently) is not made
  61. -- use of here. Therefore, if speed matters more than compactness, this encoding
  62. -- is suboptimal.
  63. --
  64. -- This encoding is __thread-safe__, and thus slightly slower. If you are certain
  65. -- that race conditions cannot occur for your code, you can gain a speed improvement
  66. -- by using "Data.Finitary.PackBits.Unsafe" instead.
  67. module Data.Finitary.PackBits
  68. (
  69. PackBits, pattern Packed,
  70. BulkPack, exposeVector
  71. ) where
  72. import GHC.TypeLits.Extra
  73. import Data.Proxy (Proxy(..))
  74. import Numeric.Natural (Natural)
  75. import GHC.TypeNats
  76. import CoercibleUtils (op, over, over2)
  77. import Data.Kind (Type)
  78. import Data.Hashable (Hashable(..))
  79. import Data.Vector.Instances ()
  80. import Data.Vector.Binary ()
  81. import Control.DeepSeq (NFData(..))
  82. import Data.Finitary(Finitary(..))
  83. import Data.Finite (Finite)
  84. import Control.Monad.Trans.State.Strict (evalState, get, modify, put)
  85. import Data.Semigroup (Dual(..))
  86. import qualified Data.Binary as Bin
  87. import qualified Data.Bit.ThreadSafe as BT
  88. import qualified Data.Vector.Generic as VG
  89. import qualified Data.Vector.Generic.Mutable as VGM
  90. import qualified Data.Vector.Unboxed as VU
  91. -- | An opaque wrapper around @a@, representing each value as a 'bit-packed'
  92. -- encoding.
  93. newtype PackBits (a :: Type) = PackBits (VU.Vector BT.Bit)
  94. deriving (Eq, Show)
  95. type role PackBits nominal
  96. -- | To provide (something that resembles a) data constructor for 'PackBits', we
  97. -- provide the following pattern. It can be used like any other data
  98. -- constructor:
  99. --
  100. -- > import Data.Finitary.PackBits
  101. -- >
  102. -- > anInt :: PackBits Int
  103. -- > anInt = Packed 10
  104. -- >
  105. -- > isPackedEven :: PackBits Int -> Bool
  106. -- > isPackedEven (Packed x) = even x
  107. --
  108. -- __Every__ pattern match, and data constructor call, performs a
  109. -- \(\Theta(\log_{2}(\texttt{Cardinality a}))\) encoding or decoding operation.
  110. -- Use with this in mind.
  111. pattern Packed :: forall (a :: Type) .
  112. (Finitary a, 1 <= Cardinality a) =>
  113. a -> PackBits a
  114. pattern Packed x <- (unpackBits -> x)
  115. where Packed x = packBits x
  116. instance Ord (PackBits a) where
  117. compare (PackBits v1) (PackBits v2) = getDual . VU.foldr go (Dual EQ) . VU.zipWith (,) v1 $ v2
  118. where go input order = (order <>) . Dual . uncurry compare $ input
  119. instance NFData (PackBits a) where
  120. {-# INLINE rnf #-}
  121. rnf = rnf . op PackBits
  122. instance (Finitary a, 1 <= Cardinality a) => Finitary (PackBits a) where
  123. type Cardinality (PackBits a) = Cardinality a
  124. {-# INLINE fromFinite #-}
  125. fromFinite = PackBits . intoBits
  126. {-# INLINE toFinite #-}
  127. toFinite = outOfBits . op PackBits
  128. instance (Finitary a, 1 <= Cardinality a) => Bounded (PackBits a) where
  129. {-# INLINE minBound #-}
  130. minBound = start
  131. {-# INLINE maxBound #-}
  132. maxBound = end
  133. newtype instance VU.MVector s (PackBits a) = MV_PackBits (VU.MVector s BT.Bit)
  134. instance (Finitary a, 1 <= Cardinality a) => VGM.MVector VU.MVector (PackBits a) where
  135. {-# INLINE basicLength #-}
  136. basicLength = over MV_PackBits ((`div` bitLength @a) . VGM.basicLength)
  137. {-# INLINE basicOverlaps #-}
  138. basicOverlaps = over2 MV_PackBits VGM.basicOverlaps
  139. {-# INLINE basicUnsafeSlice #-}
  140. basicUnsafeSlice i len = over MV_PackBits (VGM.basicUnsafeSlice (i * bitLength @a) (len * bitLength @a))
  141. {-# INLINE basicUnsafeNew #-}
  142. basicUnsafeNew len = fmap MV_PackBits (VGM.basicUnsafeNew (len * bitLength @a))
  143. {-# INLINE basicInitialize #-}
  144. basicInitialize = VGM.basicInitialize . op MV_PackBits
  145. {-# INLINE basicUnsafeRead #-}
  146. basicUnsafeRead (MV_PackBits v) i = fmap PackBits . VG.freeze . VGM.unsafeSlice (i * bitLength @a) (bitLength @a) $ v
  147. {-# INLINE basicUnsafeWrite #-}
  148. basicUnsafeWrite (MV_PackBits v) i (PackBits x) = let slice = VGM.unsafeSlice (i * bitLength @a) (bitLength @a) v in
  149. VG.unsafeCopy slice x
  150. newtype instance VU.Vector (PackBits a) = V_PackBits (VU.Vector BT.Bit)
  151. instance (Finitary a, 1 <= Cardinality a) => VG.Vector VU.Vector (PackBits a) where
  152. {-# INLINE basicLength #-}
  153. basicLength = over V_PackBits ((`div` bitLength @a) . VG.basicLength)
  154. {-# INLINE basicUnsafeFreeze #-}
  155. basicUnsafeFreeze = fmap V_PackBits . VG.basicUnsafeFreeze . op MV_PackBits
  156. {-# INLINE basicUnsafeThaw #-}
  157. basicUnsafeThaw = fmap MV_PackBits . VG.basicUnsafeThaw . op V_PackBits
  158. {-# INLINE basicUnsafeSlice #-}
  159. basicUnsafeSlice i len = over V_PackBits (VG.basicUnsafeSlice (i * bitLength @a) (len * bitLength @a))
  160. {-# INLINE basicUnsafeIndexM #-}
  161. basicUnsafeIndexM (V_PackBits v) i = pure . PackBits . VG.unsafeSlice (i * bitLength @a) (bitLength @a) $ v
  162. instance (Finitary a, 1 <= Cardinality a) => VU.Unbox (PackBits a)
  163. -- | This wrapper provides an efficient 'Hashable' instance (hash the entire
  164. -- underlying bit-packed vector, rather than each element individually), as well
  165. -- as a 'Bin.Binary' instance (which stores or reads the entire blob of
  166. -- bits \'in one go\').
  167. newtype BulkPack a = BulkPack { exposeVector :: VU.Vector (PackBits a) }
  168. deriving (NFData)
  169. deriving instance (Finitary a, 1 <= Cardinality a) => Eq (BulkPack a)
  170. deriving instance (Finitary a, 1 <= Cardinality a) => Ord (BulkPack a)
  171. instance Hashable (BulkPack a) where
  172. {-# INLINE hashWithSalt #-}
  173. hashWithSalt salt = hashWithSalt salt . BT.cloneToWords . op V_PackBits . op BulkPack
  174. instance Bin.Binary (BulkPack a) where
  175. {-# INLINE put #-}
  176. put = Bin.put . BT.cloneToWords . op V_PackBits . op BulkPack
  177. {-# INLINE get #-}
  178. get = BulkPack . V_PackBits . BT.castFromWords <$> Bin.get
  179. -- Helpers
  180. type BitLength a = CLog 2 (Cardinality a)
  181. {-# INLINE packBits #-}
  182. packBits :: forall (a :: Type) .
  183. (Finitary a, 1 <= Cardinality a) =>
  184. a -> PackBits a
  185. packBits = fromFinite . toFinite
  186. {-# INLINE unpackBits #-}
  187. unpackBits :: forall (a :: Type) .
  188. (Finitary a, 1 <= Cardinality a) =>
  189. PackBits a -> a
  190. unpackBits = fromFinite . toFinite
  191. {-# INLINE bitLength #-}
  192. bitLength :: forall (a :: Type) (b :: Type) .
  193. (Finitary a, 1 <= Cardinality a, Num b) =>
  194. b
  195. bitLength = fromIntegral . natVal $ (Proxy :: Proxy (BitLength a))
  196. {-# INLINE intoBits #-}
  197. intoBits :: forall (n :: Nat) .
  198. (KnownNat n, 1 <= n) =>
  199. Finite n -> VU.Vector BT.Bit
  200. intoBits = evalState (VU.replicateM (bitLength @(Finite n)) go) . fromIntegral @_ @Natural
  201. where go = do remaining <- get
  202. let (d, r) = quotRem remaining 2
  203. put d >> pure (BT.Bit . toEnum . fromIntegral $ r)
  204. {-# INLINE outOfBits #-}
  205. outOfBits :: forall (n :: Nat) .
  206. (KnownNat n) =>
  207. VU.Vector BT.Bit -> Finite n
  208. outOfBits v = evalState (VU.foldM' go 0 v) 1
  209. where go old (BT.Bit b) = do power <- get
  210. let placeValue = power * (fromIntegral . fromEnum $ b)
  211. modify (* 2)
  212. return (old + placeValue)