fpu.vhdl 109 KB


  1. -- Floating-point unit for Microwatt
  2. library ieee;
  3. use ieee.std_logic_1164.all;
  4. use ieee.numeric_std.all;
  5. library work;
  6. use work.insn_helpers.all;
  7. use work.decode_types.all;
  8. use work.crhelpers.all;
  9. use work.helpers.all;
  10. use work.common.all;
  11. entity fpu is
  12. port (
  13. clk : in std_ulogic;
  14. rst : in std_ulogic;
  15. e_in : in Execute1toFPUType;
  16. e_out : out FPUToExecute1Type;
  17. w_out : out FPUToWritebackType
  18. );
  19. end entity fpu;
  20. architecture behaviour of fpu is
  21. type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
  22. constant EXP_BITS : natural := 13;
  23. type fpu_reg_type is record
  24. class : fp_number_class;
  25. negative : std_ulogic;
  26. exponent : signed(EXP_BITS-1 downto 0); -- unbiased
  27. mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
  28. end record;
  29. type state_t is (IDLE,
  30. DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
  31. DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
  32. DO_FCFID, DO_FCTI,
  33. DO_FRSP, DO_FRI,
  34. DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
  35. DO_FRE, DO_FRSQRTE,
  36. DO_FSEL,
  37. FRI_1,
  38. ADD_1, ADD_SHIFT, ADD_2, ADD_3,
  39. CMP_1, CMP_2,
  40. MULT_1,
  41. FMADD_1, FMADD_2, FMADD_3,
  42. FMADD_4, FMADD_5, FMADD_6,
  43. LOOKUP,
  44. DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
  45. FRE_1,
  46. RSQRT_1,
  47. FTDIV_1,
  48. SQRT_1, SQRT_2, SQRT_3, SQRT_4,
  49. SQRT_5, SQRT_6, SQRT_7, SQRT_8,
  50. SQRT_9, SQRT_10, SQRT_11, SQRT_12,
  51. INT_SHIFT, INT_ROUND, INT_ISHIFT,
  52. INT_FINAL, INT_CHECK, INT_OFLOW,
  53. FINISH, NORMALIZE,
  54. ROUND_UFLOW, ROUND_OFLOW,
  55. ROUNDING, ROUNDING_2, ROUNDING_3,
  56. DENORM,
  57. RENORM_A, RENORM_A2,
  58. RENORM_B, RENORM_B2,
  59. RENORM_C, RENORM_C2,
  60. NAN_RESULT, EXC_RESULT);
  61. type reg_type is record
  62. state : state_t;
  63. busy : std_ulogic;
  64. instr_done : std_ulogic;
  65. do_intr : std_ulogic;
  66. illegal : std_ulogic;
  67. op : insn_type_t;
  68. insn : std_ulogic_vector(31 downto 0);
  69. nia : std_ulogic_vector(63 downto 0);
  70. instr_tag : instr_tag_t;
  71. dest_fpr : gspr_index_t;
  72. fe_mode : std_ulogic;
  73. rc : std_ulogic;
  74. is_cmp : std_ulogic;
  75. single_prec : std_ulogic;
  76. fpscr : std_ulogic_vector(31 downto 0);
  77. a : fpu_reg_type;
  78. b : fpu_reg_type;
  79. c : fpu_reg_type;
  80. r : std_ulogic_vector(63 downto 0); -- 10.54 format
  81. s : std_ulogic_vector(55 downto 0); -- extended fraction
  82. x : std_ulogic;
  83. p : std_ulogic_vector(63 downto 0); -- 8.56 format
  84. y : std_ulogic_vector(63 downto 0); -- 8.56 format
  85. result_sign : std_ulogic;
  86. result_class : fp_number_class;
  87. result_exp : signed(EXP_BITS-1 downto 0);
  88. shift : signed(EXP_BITS-1 downto 0);
  89. writing_back : std_ulogic;
  90. int_result : std_ulogic;
  91. cr_result : std_ulogic_vector(3 downto 0);
  92. cr_mask : std_ulogic_vector(7 downto 0);
  93. old_exc : std_ulogic_vector(4 downto 0);
  94. update_fprf : std_ulogic;
  95. quieten_nan : std_ulogic;
  96. tiny : std_ulogic;
  97. denorm : std_ulogic;
  98. round_mode : std_ulogic_vector(2 downto 0);
  99. is_subtract : std_ulogic;
  100. exp_cmp : std_ulogic;
  101. madd_cmp : std_ulogic;
  102. add_bsmall : std_ulogic;
  103. is_multiply : std_ulogic;
  104. is_sqrt : std_ulogic;
  105. first : std_ulogic;
  106. count : unsigned(1 downto 0);
  107. doing_ftdiv : std_ulogic_vector(1 downto 0);
  108. opsel_a : std_ulogic_vector(1 downto 0);
  109. use_a : std_ulogic;
  110. use_b : std_ulogic;
  111. use_c : std_ulogic;
  112. invalid : std_ulogic;
  113. negate : std_ulogic;
  114. longmask : std_ulogic;
  115. end record;
  116. type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
  117. signal r, rin : reg_type;
  118. signal fp_result : std_ulogic_vector(63 downto 0);
  119. signal opsel_b : std_ulogic_vector(1 downto 0);
  120. signal opsel_r : std_ulogic_vector(1 downto 0);
  121. signal opsel_s : std_ulogic_vector(1 downto 0);
  122. signal opsel_ainv : std_ulogic;
  123. signal opsel_mask : std_ulogic;
  124. signal opsel_binv : std_ulogic;
  125. signal in_a : std_ulogic_vector(63 downto 0);
  126. signal in_b : std_ulogic_vector(63 downto 0);
  127. signal result : std_ulogic_vector(63 downto 0);
  128. signal carry_in : std_ulogic;
  129. signal lost_bits : std_ulogic;
  130. signal r_hi_nz : std_ulogic;
  131. signal r_lo_nz : std_ulogic;
  132. signal s_nz : std_ulogic;
  133. signal misc_sel : std_ulogic_vector(3 downto 0);
  134. signal f_to_multiply : MultiplyInputType;
  135. signal multiply_to_f : MultiplyOutputType;
  136. signal msel_1 : std_ulogic_vector(1 downto 0);
  137. signal msel_2 : std_ulogic_vector(1 downto 0);
  138. signal msel_add : std_ulogic_vector(1 downto 0);
  139. signal msel_inv : std_ulogic;
  140. signal inverse_est : std_ulogic_vector(18 downto 0);
  141. -- opsel values
  142. constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
  143. constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
  144. constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
  145. constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
  146. constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
  147. constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
  148. constant BIN_RND : std_ulogic_vector(1 downto 0) := "10";
  149. constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
  150. constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
  151. constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
  152. constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
  153. constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
  154. constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
  155. constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
  156. constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
  157. constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
  158. -- msel values
  159. constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
  160. constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
  161. constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
  162. constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
  163. constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
  164. constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
  165. constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
  166. constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
  167. constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
  168. constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
  169. constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
  170. constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
  171. -- Inverse lookup table, indexed by the top 8 fraction bits
  172. -- The first 256 entries are the reciprocal (1/x) lookup table,
  173. -- and the remaining 768 entries are the reciprocal square root table.
  174. -- Output range is [0.5, 1) in 0.19 format, though the top
  175. -- bit isn't stored since it is always 1.
  176. -- Each output value is the inverse of the center of the input
  177. -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
  178. -- entry 1 is 1 / (1 + 3/512), etc.
  179. signal inverse_table : lookup_table := (
  180. -- 1/x lookup table
  181. -- Unit bit is assumed to be 1, so input range is [1, 2)
  182. 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
  183. 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
  184. 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
  185. 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
  186. 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
  187. 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
  188. 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
  189. 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
  190. 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
  191. 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
  192. 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
  193. 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
  194. 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
  195. 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
  196. 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
  197. 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
  198. 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
  199. 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
  200. 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
  201. 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
  202. 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
  203. 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
  204. 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
  205. 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
  206. 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
  207. 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
  208. 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
  209. 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
  210. 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
  211. 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
  212. 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
  213. 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
  214. -- 1/sqrt(x) lookup table
  215. -- Input is in the range [1, 4), i.e. two bits to the left of the
  216. -- binary point. Those 2 bits index the following 3 blocks of 256 values.
  217. -- 1.0 ... 1.9999
  218. 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
  219. 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
  220. 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
  221. 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
  222. 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
  223. 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
  224. 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
  225. 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
  226. 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
  227. 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
  228. 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
  229. 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
  230. 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
  231. 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
  232. 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
  233. 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
  234. 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
  235. 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
  236. 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
  237. 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
  238. 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
  239. 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
  240. 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
  241. 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
  242. 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
  243. 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
  244. 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
  245. 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
  246. 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
  247. 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
  248. 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
  249. 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
  250. -- 2.0 ... 2.9999
  251. 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
  252. 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
  253. 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
  254. 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
  255. 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
  256. 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
  257. 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
  258. 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
  259. 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
  260. 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
  261. 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
  262. 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
  263. 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
  264. 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
  265. 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
  266. 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
  267. 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
  268. 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
  269. 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
  270. 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
  271. 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
  272. 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
  273. 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
  274. 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
  275. 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
  276. 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
  277. 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
  278. 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
  279. 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
  280. 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
  281. 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
  282. 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
  283. -- 3.0 ... 3.9999
  284. 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
  285. 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
  286. 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
  287. 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
  288. 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
  289. 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
  290. 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
  291. 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
  292. 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
  293. 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
  294. 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
  295. 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
  296. 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
  297. 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
  298. 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
  299. 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
  300. 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
  301. 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
  302. 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
  303. 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
  304. 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
  305. 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
  306. 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
  307. 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
  308. 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
  309. 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
  310. 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
  311. 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
  312. 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
  313. 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
  314. 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
  315. 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
  316. );
  317. -- Left and right shifter with 120 bit input and 64 bit output.
  318. -- Shifts inp left by shift bits and returns the upper 64 bits of
  319. -- the result. The shift parameter is interpreted as a signed
  320. -- number in the range -64..63, with negative values indicating
  321. -- right shifts.
  322. function shifter_64(inp: std_ulogic_vector(119 downto 0);
  323. shift: std_ulogic_vector(6 downto 0))
  324. return std_ulogic_vector is
  325. variable s1 : std_ulogic_vector(94 downto 0);
  326. variable s2 : std_ulogic_vector(70 downto 0);
  327. variable result : std_ulogic_vector(63 downto 0);
  328. begin
  329. case shift(6 downto 5) is
  330. when "00" =>
  331. s1 := inp(119 downto 25);
  332. when "01" =>
  333. s1 := inp(87 downto 0) & "0000000";
  334. when "10" =>
  335. s1 := x"0000000000000000" & inp(119 downto 89);
  336. when others =>
  337. s1 := x"00000000" & inp(119 downto 57);
  338. end case;
  339. case shift(4 downto 3) is
  340. when "00" =>
  341. s2 := s1(94 downto 24);
  342. when "01" =>
  343. s2 := s1(86 downto 16);
  344. when "10" =>
  345. s2 := s1(78 downto 8);
  346. when others =>
  347. s2 := s1(70 downto 0);
  348. end case;
  349. case shift(2 downto 0) is
  350. when "000" =>
  351. result := s2(70 downto 7);
  352. when "001" =>
  353. result := s2(69 downto 6);
  354. when "010" =>
  355. result := s2(68 downto 5);
  356. when "011" =>
  357. result := s2(67 downto 4);
  358. when "100" =>
  359. result := s2(66 downto 3);
  360. when "101" =>
  361. result := s2(65 downto 2);
  362. when "110" =>
  363. result := s2(64 downto 1);
  364. when others =>
  365. result := s2(63 downto 0);
  366. end case;
  367. return result;
  368. end;
  369. -- Generate a mask with 0-bits on the left and 1-bits on the right which
  370. -- selects the bits will be lost in doing a right shift. The shift
  371. -- parameter is the bottom 6 bits of a negative shift count,
  372. -- indicating a right shift.
  373. function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
  374. variable result: std_ulogic_vector(63 downto 0);
  375. begin
  376. result := (others => '0');
  377. for i in 0 to 63 loop
  378. if i >= shift then
  379. result(63 - i) := '1';
  380. end if;
  381. end loop;
  382. return result;
  383. end;
  384. -- Split a DP floating-point number into components and work out its class.
  385. -- If is_int = 1, the input is considered an integer
  386. function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
  387. variable r : fpu_reg_type;
  388. variable exp_nz : std_ulogic;
  389. variable exp_ao : std_ulogic;
  390. variable frac_nz : std_ulogic;
  391. variable cls : std_ulogic_vector(2 downto 0);
  392. begin
  393. r.negative := fpr(63);
  394. exp_nz := or (fpr(62 downto 52));
  395. exp_ao := and (fpr(62 downto 52));
  396. frac_nz := or (fpr(51 downto 0));
  397. if is_int = '0' then
  398. r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
  399. if exp_nz = '0' then
  400. r.exponent := to_signed(-1022, EXP_BITS);
  401. end if;
  402. r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
  403. cls := exp_ao & exp_nz & frac_nz;
  404. case cls is
  405. when "000" => r.class := ZERO;
  406. when "001" => r.class := FINITE; -- denormalized
  407. when "010" => r.class := FINITE;
  408. when "011" => r.class := FINITE;
  409. when "110" => r.class := INFINITY;
  410. when others => r.class := NAN;
  411. end case;
  412. else
  413. r.mantissa := fpr;
  414. r.exponent := (others => '0');
  415. if (fpr(63) or exp_nz or frac_nz) = '1' then
  416. r.class := FINITE;
  417. else
  418. r.class := ZERO;
  419. end if;
  420. end if;
  421. return r;
  422. end;
  423. -- Construct a DP floating-point result from components
  424. function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
  425. mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
  426. return std_ulogic_vector is
  427. variable result : std_ulogic_vector(63 downto 0);
  428. begin
  429. result := (others => '0');
  430. result(63) := sign;
  431. case class is
  432. when ZERO =>
  433. when FINITE =>
  434. if mantissa(54) = '1' then
  435. -- normalized number
  436. result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
  437. end if;
  438. result(51 downto 29) := mantissa(53 downto 31);
  439. if single_prec = '0' then
  440. result(28 downto 0) := mantissa(30 downto 2);
  441. end if;
  442. when INFINITY =>
  443. result(62 downto 52) := "11111111111";
  444. when NAN =>
  445. result(62 downto 52) := "11111111111";
  446. result(51) := quieten_nan or mantissa(53);
  447. result(50 downto 29) := mantissa(52 downto 31);
  448. if single_prec = '0' then
  449. result(28 downto 0) := mantissa(30 downto 2);
  450. end if;
  451. end case;
  452. return result;
  453. end;
  454. -- Determine whether to increment when rounding
  455. -- Returns rounding_inc & inexact
  456. -- Assumes x includes the bottom 29 bits of the mantissa already
  457. -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
  458. function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
  459. single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
  460. sign: std_ulogic)
  461. return std_ulogic_vector is
  462. variable grx : std_ulogic_vector(2 downto 0);
  463. variable ret : std_ulogic_vector(1 downto 0);
  464. variable lsb : std_ulogic;
  465. begin
  466. if single_prec = '0' then
  467. grx := mantissa(1 downto 0) & x;
  468. lsb := mantissa(2);
  469. else
  470. grx := mantissa(30 downto 29) & x;
  471. lsb := mantissa(31);
  472. end if;
  473. ret(1) := '0';
  474. ret(0) := or (grx);
  475. case rn(1 downto 0) is
  476. when "00" => -- round to nearest
  477. if grx = "100" and rn(2) = '0' then
  478. ret(1) := lsb; -- tie, round to even
  479. else
  480. ret(1) := grx(2);
  481. end if;
  482. when "01" => -- round towards zero
  483. when others => -- round towards +/- inf
  484. if rn(0) = sign then
  485. -- round towards greater magnitude
  486. ret(1) := ret(0);
  487. end if;
  488. end case;
  489. return ret;
  490. end;
  491. -- Determine result flags to write into the FPSCR
  492. function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
  493. return std_ulogic_vector is
  494. begin
  495. case class is
  496. when ZERO =>
  497. return sign & "0010";
  498. when FINITE =>
  499. return (not unitbit) & sign & (not sign) & "00";
  500. when INFINITY =>
  501. return '0' & sign & (not sign) & "01";
  502. when NAN =>
  503. return "10001";
  504. end case;
  505. end;
  506. begin
  507. fpu_multiply_0: entity work.multiply
  508. port map (
  509. clk => clk,
  510. m_in => f_to_multiply,
  511. m_out => multiply_to_f
  512. );
  513. fpu_0: process(clk)
  514. begin
  515. if rising_edge(clk) then
  516. if rst = '1' then
  517. r.state <= IDLE;
  518. r.busy <= '0';
  519. r.instr_done <= '0';
  520. r.do_intr <= '0';
  521. r.fpscr <= (others => '0');
  522. r.writing_back <= '0';
  523. else
  524. assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
  525. r <= rin;
  526. end if;
  527. end if;
  528. end process;
  529. -- synchronous reads from lookup table
  530. lut_access: process(clk)
  531. variable addrhi : std_ulogic_vector(1 downto 0);
  532. variable addr : std_ulogic_vector(9 downto 0);
  533. begin
  534. if rising_edge(clk) then
  535. if r.is_sqrt = '1' then
  536. addrhi := r.b.mantissa(55 downto 54);
  537. else
  538. addrhi := "00";
  539. end if;
  540. addr := addrhi & r.b.mantissa(53 downto 46);
  541. inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
  542. end if;
  543. end process;
  544. e_out.busy <= r.busy;
  545. e_out.exception <= r.fpscr(FPSCR_FEX);
  546. w_out.valid <= r.instr_done and not r.do_intr;
  547. w_out.instr_tag <= r.instr_tag;
  548. w_out.write_enable <= r.writing_back;
  549. w_out.write_reg <= r.dest_fpr;
  550. w_out.write_data <= fp_result;
  551. w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
  552. w_out.write_cr_mask <= r.cr_mask;
  553. w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
  554. r.cr_result & r.cr_result & r.cr_result & r.cr_result;
  555. w_out.interrupt <= r.do_intr;
  556. w_out.intr_vec <= 16#700#;
  557. w_out.srr0 <= r.nia;
  558. w_out.srr1 <= (47-44 => r.illegal, 47-43 => not r.illegal, others => '0');
  559. fpu_1: process(all)
  560. variable v : reg_type;
  561. variable adec : fpu_reg_type;
  562. variable bdec : fpu_reg_type;
  563. variable cdec : fpu_reg_type;
  564. variable fpscr_mask : std_ulogic_vector(31 downto 0);
  565. variable illegal : std_ulogic;
  566. variable j, k : integer;
  567. variable flm : std_ulogic_vector(7 downto 0);
  568. variable int_input : std_ulogic;
  569. variable mask : std_ulogic_vector(63 downto 0);
  570. variable in_a0 : std_ulogic_vector(63 downto 0);
  571. variable in_b0 : std_ulogic_vector(63 downto 0);
  572. variable misc : std_ulogic_vector(63 downto 0);
  573. variable shift_res : std_ulogic_vector(63 downto 0);
  574. variable round : std_ulogic_vector(1 downto 0);
  575. variable update_fx : std_ulogic;
  576. variable arith_done : std_ulogic;
  577. variable invalid : std_ulogic;
  578. variable zero_divide : std_ulogic;
  579. variable mant_nz : std_ulogic;
  580. variable min_exp : signed(EXP_BITS-1 downto 0);
  581. variable max_exp : signed(EXP_BITS-1 downto 0);
  582. variable bias_exp : signed(EXP_BITS-1 downto 0);
  583. variable new_exp : signed(EXP_BITS-1 downto 0);
  584. variable exp_tiny : std_ulogic;
  585. variable exp_huge : std_ulogic;
  586. variable renormalize : std_ulogic;
  587. variable clz : std_ulogic_vector(5 downto 0);
  588. variable set_x : std_ulogic;
  589. variable mshift : signed(EXP_BITS-1 downto 0);
  590. variable need_check : std_ulogic;
  591. variable msb : std_ulogic;
  592. variable is_add : std_ulogic;
  593. variable set_a : std_ulogic;
  594. variable set_b : std_ulogic;
  595. variable set_c : std_ulogic;
  596. variable set_y : std_ulogic;
  597. variable set_s : std_ulogic;
  598. variable qnan_result : std_ulogic;
  599. variable px_nz : std_ulogic;
  600. variable pcmpb_eq : std_ulogic;
  601. variable pcmpb_lt : std_ulogic;
  602. variable pshift : std_ulogic;
  603. variable renorm_sqrt : std_ulogic;
  604. variable sqrt_exp : signed(EXP_BITS-1 downto 0);
  605. variable shiftin : std_ulogic;
  606. variable mulexp : signed(EXP_BITS-1 downto 0);
  607. variable maddend : std_ulogic_vector(127 downto 0);
  608. variable sum : std_ulogic_vector(63 downto 0);
  609. variable round_inc : std_ulogic_vector(63 downto 0);
  610. begin
  611. v := r;
  612. illegal := '0';
  613. v.busy := '0';
  614. int_input := '0';
  615. -- capture incoming instruction
  616. if e_in.valid = '1' then
  617. v.insn := e_in.insn;
  618. v.nia := e_in.nia;
  619. v.op := e_in.op;
  620. v.instr_tag := e_in.itag;
  621. v.fe_mode := or (e_in.fe_mode);
  622. v.dest_fpr := e_in.frt;
  623. v.single_prec := e_in.single;
  624. v.longmask := e_in.single;
  625. v.int_result := '0';
  626. v.rc := e_in.rc;
  627. v.is_cmp := e_in.out_cr;
  628. if e_in.out_cr = '0' then
  629. v.cr_mask := num_to_fxm(1);
  630. else
  631. v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
  632. end if;
  633. int_input := '0';
  634. if e_in.op = OP_FPOP_I then
  635. int_input := '1';
  636. end if;
  637. v.quieten_nan := '1';
  638. v.tiny := '0';
  639. v.denorm := '0';
  640. v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
  641. v.is_subtract := '0';
  642. v.is_multiply := '0';
  643. v.is_sqrt := '0';
  644. v.add_bsmall := '0';
  645. v.doing_ftdiv := "00";
  646. adec := decode_dp(e_in.fra, int_input);
  647. bdec := decode_dp(e_in.frb, int_input);
  648. cdec := decode_dp(e_in.frc, int_input);
  649. v.a := adec;
  650. v.b := bdec;
  651. v.c := cdec;
  652. v.exp_cmp := '0';
  653. if adec.exponent > bdec.exponent then
  654. v.exp_cmp := '1';
  655. end if;
  656. v.madd_cmp := '0';
  657. if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
  658. v.madd_cmp := '1';
  659. end if;
  660. end if;
  661. r_hi_nz <= or (r.r(55 downto 31));
  662. r_lo_nz <= or (r.r(30 downto 2));
  663. s_nz <= or (r.s);
  664. if r.single_prec = '0' then
  665. if r.doing_ftdiv(1) = '0' then
  666. max_exp := to_signed(1023, EXP_BITS);
  667. else
  668. max_exp := to_signed(1020, EXP_BITS);
  669. end if;
  670. if r.doing_ftdiv(0) = '0' then
  671. min_exp := to_signed(-1022, EXP_BITS);
  672. else
  673. min_exp := to_signed(-1021, EXP_BITS);
  674. end if;
  675. bias_exp := to_signed(1536, EXP_BITS);
  676. else
  677. max_exp := to_signed(127, EXP_BITS);
  678. min_exp := to_signed(-126, EXP_BITS);
  679. bias_exp := to_signed(192, EXP_BITS);
  680. end if;
  681. new_exp := r.result_exp - r.shift;
  682. exp_tiny := '0';
  683. exp_huge := '0';
  684. if new_exp < min_exp then
  685. exp_tiny := '1';
  686. end if;
  687. if new_exp > max_exp then
  688. exp_huge := '1';
  689. end if;
  690. -- Compare P with zero and with B
  691. px_nz := or (r.p(57 downto 4));
  692. pcmpb_eq := '0';
  693. if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
  694. pcmpb_eq := '1';
  695. end if;
  696. pcmpb_lt := '0';
  697. if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
  698. pcmpb_lt := '1';
  699. end if;
  700. v.writing_back := '0';
  701. v.instr_done := '0';
  702. v.update_fprf := '0';
  703. v.shift := to_signed(0, EXP_BITS);
  704. v.first := '0';
  705. v.opsel_a := AIN_R;
  706. opsel_ainv <= '0';
  707. opsel_mask <= '0';
  708. opsel_b <= BIN_ZERO;
  709. opsel_binv <= '0';
  710. opsel_r <= RES_SUM;
  711. opsel_s <= S_ZERO;
  712. carry_in <= '0';
  713. misc_sel <= "0000";
  714. fpscr_mask := (others => '1');
  715. update_fx := '0';
  716. arith_done := '0';
  717. invalid := '0';
  718. zero_divide := '0';
  719. renormalize := '0';
  720. set_x := '0';
  721. qnan_result := '0';
  722. set_a := '0';
  723. set_b := '0';
  724. set_c := '0';
  725. set_s := '0';
  726. f_to_multiply.is_32bit <= '0';
  727. f_to_multiply.valid <= '0';
  728. msel_1 <= MUL1_A;
  729. msel_2 <= MUL2_C;
  730. msel_add <= MULADD_ZERO;
  731. msel_inv <= '0';
  732. set_y := '0';
  733. pshift := '0';
  734. renorm_sqrt := '0';
  735. shiftin := '0';
  736. case r.state is
  737. when IDLE =>
  738. v.use_a := '0';
  739. v.use_b := '0';
  740. v.use_c := '0';
  741. v.invalid := '0';
  742. v.negate := '0';
  743. if e_in.valid = '1' then
  744. case e_in.insn(5 downto 1) is
  745. when "00000" =>
  746. if e_in.insn(8) = '1' then
  747. if e_in.insn(6) = '0' then
  748. v.state := DO_FTDIV;
  749. else
  750. v.state := DO_FTSQRT;
  751. end if;
  752. elsif e_in.insn(7) = '1' then
  753. v.state := DO_MCRFS;
  754. else
  755. v.opsel_a := AIN_B;
  756. v.state := DO_FCMP;
  757. end if;
  758. when "00110" =>
  759. if e_in.insn(10) = '0' then
  760. if e_in.insn(8) = '0' then
  761. v.state := DO_MTFSB;
  762. else
  763. v.state := DO_MTFSFI;
  764. end if;
  765. else
  766. v.state := DO_FMRG;
  767. end if;
  768. when "00111" =>
  769. if e_in.insn(8) = '0' then
  770. v.state := DO_MFFS;
  771. else
  772. v.state := DO_MTFSF;
  773. end if;
  774. when "01000" =>
  775. v.opsel_a := AIN_B;
  776. if e_in.insn(9 downto 8) /= "11" then
  777. v.state := DO_FMR;
  778. else
  779. v.state := DO_FRI;
  780. end if;
  781. when "01100" =>
  782. v.opsel_a := AIN_B;
  783. v.state := DO_FRSP;
  784. when "01110" =>
  785. v.opsel_a := AIN_B;
  786. if int_input = '1' then
  787. -- fcfid[u][s]
  788. v.state := DO_FCFID;
  789. else
  790. v.state := DO_FCTI;
  791. end if;
  792. when "01111" =>
  793. v.round_mode := "001";
  794. v.opsel_a := AIN_B;
  795. v.state := DO_FCTI;
  796. when "10010" =>
  797. v.opsel_a := AIN_A;
  798. if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
  799. v.opsel_a := AIN_B;
  800. end if;
  801. v.state := DO_FDIV;
  802. when "10100" | "10101" =>
  803. v.opsel_a := AIN_A;
  804. v.state := DO_FADD;
  805. when "10110" =>
  806. v.is_sqrt := '1';
  807. v.opsel_a := AIN_B;
  808. v.state := DO_FSQRT;
  809. when "10111" =>
  810. v.state := DO_FSEL;
  811. when "11000" =>
  812. v.opsel_a := AIN_B;
  813. v.state := DO_FRE;
  814. when "11001" =>
  815. v.is_multiply := '1';
  816. v.opsel_a := AIN_A;
  817. if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
  818. v.opsel_a := AIN_C;
  819. end if;
  820. v.state := DO_FMUL;
  821. when "11010" =>
  822. v.is_sqrt := '1';
  823. v.opsel_a := AIN_B;
  824. v.state := DO_FRSQRTE;
  825. when "11100" | "11101" | "11110" | "11111" =>
  826. if v.a.mantissa(54) = '0' then
  827. v.opsel_a := AIN_A;
  828. elsif v.c.mantissa(54) = '0' then
  829. v.opsel_a := AIN_C;
  830. else
  831. v.opsel_a := AIN_B;
  832. end if;
  833. v.state := DO_FMADD;
  834. when others =>
  835. illegal := '1';
  836. end case;
  837. end if;
  838. v.x := '0';
  839. v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
  840. set_s := '1';
  841. when DO_MCRFS =>
  842. j := to_integer(unsigned(insn_bfa(r.insn)));
  843. for i in 0 to 7 loop
  844. if i = j then
  845. k := (7 - i) * 4;
  846. v.cr_result := r.fpscr(k + 3 downto k);
  847. fpscr_mask(k + 3 downto k) := "0000";
  848. end if;
  849. end loop;
  850. v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
  851. v.instr_done := '1';
  852. v.state := IDLE;
  853. when DO_FTDIV =>
  854. v.instr_done := '1';
  855. v.state := IDLE;
  856. v.cr_result := "0000";
  857. if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
  858. (r.b.class = FINITE and r.b.mantissa(53) = '0') then
  859. v.cr_result(2) := '1';
  860. end if;
  861. if r.a.class = NAN or r.a.class = INFINITY or
  862. r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
  863. (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
  864. v.cr_result(1) := '1';
  865. else
  866. v.doing_ftdiv := "11";
  867. v.first := '1';
  868. v.state := FTDIV_1;
  869. v.instr_done := '0';
  870. end if;
  871. when DO_FTSQRT =>
  872. v.instr_done := '1';
  873. v.state := IDLE;
  874. v.cr_result := "0000";
  875. if r.b.class = ZERO or r.b.class = INFINITY or
  876. (r.b.class = FINITE and r.b.mantissa(53) = '0') then
  877. v.cr_result(2) := '1';
  878. end if;
  879. if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
  880. or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
  881. v.cr_result(1) := '0';
  882. end if;
  883. when DO_FCMP =>
  884. -- fcmp[uo]
  885. -- r.opsel_a = AIN_B
  886. v.instr_done := '1';
  887. v.state := IDLE;
  888. update_fx := '1';
  889. v.result_exp := r.b.exponent;
  890. if (r.a.class = NAN and r.a.mantissa(53) = '0') or
  891. (r.b.class = NAN and r.b.mantissa(53) = '0') then
  892. -- Signalling NAN
  893. v.fpscr(FPSCR_VXSNAN) := '1';
  894. if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
  895. v.fpscr(FPSCR_VXVC) := '1';
  896. end if;
  897. invalid := '1';
  898. v.cr_result := "0001"; -- unordered
  899. elsif r.a.class = NAN or r.b.class = NAN then
  900. if r.insn(6) = '1' then
  901. -- fcmpo
  902. v.fpscr(FPSCR_VXVC) := '1';
  903. invalid := '1';
  904. end if;
  905. v.cr_result := "0001"; -- unordered
  906. elsif r.a.class = ZERO and r.b.class = ZERO then
  907. v.cr_result := "0010"; -- equal
  908. elsif r.a.negative /= r.b.negative then
  909. v.cr_result := r.a.negative & r.b.negative & "00";
  910. elsif r.a.class = ZERO then
  911. -- A and B are the same sign from here down
  912. v.cr_result := not r.b.negative & r.b.negative & "00";
  913. elsif r.a.class = INFINITY then
  914. if r.b.class = INFINITY then
  915. v.cr_result := "0010";
  916. else
  917. v.cr_result := r.a.negative & not r.a.negative & "00";
  918. end if;
  919. elsif r.b.class = ZERO then
  920. -- A is finite from here down
  921. v.cr_result := r.a.negative & not r.a.negative & "00";
  922. elsif r.b.class = INFINITY then
  923. v.cr_result := not r.b.negative & r.b.negative & "00";
  924. elsif r.exp_cmp = '1' then
  925. -- A and B are both finite from here down
  926. v.cr_result := r.a.negative & not r.a.negative & "00";
  927. elsif r.a.exponent /= r.b.exponent then
  928. -- A exponent is smaller than B
  929. v.cr_result := not r.a.negative & r.a.negative & "00";
  930. else
  931. -- Prepare to subtract mantissas, put B in R
  932. v.cr_result := "0000";
  933. v.instr_done := '0';
  934. v.opsel_a := AIN_A;
  935. v.state := CMP_1;
  936. end if;
  937. v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
  938. when DO_MTFSB =>
  939. -- mtfsb{0,1}
  940. j := to_integer(unsigned(insn_bt(r.insn)));
  941. for i in 0 to 31 loop
  942. if i = j then
  943. v.fpscr(31 - i) := r.insn(6);
  944. end if;
  945. end loop;
  946. v.instr_done := '1';
  947. v.state := IDLE;
  948. when DO_MTFSFI =>
  949. -- mtfsfi
  950. j := to_integer(unsigned(insn_bf(r.insn)));
  951. if r.insn(16) = '0' then
  952. for i in 0 to 7 loop
  953. if i = j then
  954. k := (7 - i) * 4;
  955. v.fpscr(k + 3 downto k) := insn_u(r.insn);
  956. end if;
  957. end loop;
  958. end if;
  959. v.instr_done := '1';
  960. v.state := IDLE;
  961. when DO_FMRG =>
  962. -- fmrgew, fmrgow
  963. opsel_r <= RES_MISC;
  964. misc_sel <= "01" & r.insn(8) & '0';
  965. v.int_result := '1';
  966. v.writing_back := '1';
  967. v.instr_done := '1';
  968. v.state := IDLE;
  969. when DO_MFFS =>
  970. v.int_result := '1';
  971. v.writing_back := '1';
  972. opsel_r <= RES_MISC;
  973. case r.insn(20 downto 16) is
  974. when "00000" =>
  975. -- mffs
  976. when "00001" =>
  977. -- mffsce
  978. v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
  979. when "10100" | "10101" =>
  980. -- mffscdrn[i] (but we don't implement DRN)
  981. fpscr_mask := x"000000FF";
  982. when "10110" =>
  983. -- mffscrn
  984. fpscr_mask := x"000000FF";
  985. v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
  986. r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
  987. when "10111" =>
  988. -- mffscrni
  989. fpscr_mask := x"000000FF";
  990. v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
  991. when "11000" =>
  992. -- mffsl
  993. fpscr_mask := x"0007F0FF";
  994. when others =>
  995. illegal := '1';
  996. end case;
  997. v.instr_done := '1';
  998. v.state := IDLE;
  999. when DO_MTFSF =>
  1000. if r.insn(25) = '1' then
  1001. flm := x"FF";
  1002. elsif r.insn(16) = '1' then
  1003. flm := x"00";
  1004. else
  1005. flm := r.insn(24 downto 17);
  1006. end if;
  1007. for i in 0 to 7 loop
  1008. k := i * 4;
  1009. if flm(i) = '1' then
  1010. v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
  1011. end if;
  1012. end loop;
  1013. v.instr_done := '1';
  1014. v.state := IDLE;
  1015. when DO_FMR =>
  1016. -- r.opsel_a = AIN_B
  1017. v.result_class := r.b.class;
  1018. v.result_exp := r.b.exponent;
  1019. v.quieten_nan := '0';
  1020. if r.insn(9) = '1' then
  1021. v.result_sign := '0'; -- fabs
  1022. elsif r.insn(8) = '1' then
  1023. v.result_sign := '1'; -- fnabs
  1024. elsif r.insn(7) = '1' then
  1025. v.result_sign := r.b.negative; -- fmr
  1026. elsif r.insn(6) = '1' then
  1027. v.result_sign := not r.b.negative; -- fneg
  1028. else
  1029. v.result_sign := r.a.negative; -- fcpsgn
  1030. end if;
  1031. v.writing_back := '1';
  1032. v.instr_done := '1';
  1033. v.state := IDLE;
  1034. when DO_FRI => -- fri[nzpm]
  1035. -- r.opsel_a = AIN_B
  1036. v.result_class := r.b.class;
  1037. v.result_sign := r.b.negative;
  1038. v.result_exp := r.b.exponent;
  1039. v.fpscr(FPSCR_FR) := '0';
  1040. v.fpscr(FPSCR_FI) := '0';
  1041. if r.b.class = NAN and r.b.mantissa(53) = '0' then
  1042. -- Signalling NAN
  1043. v.fpscr(FPSCR_VXSNAN) := '1';
  1044. invalid := '1';
  1045. end if;
  1046. if r.b.class = FINITE then
  1047. if r.b.exponent >= to_signed(52, EXP_BITS) then
  1048. -- integer already, no rounding required
  1049. arith_done := '1';
  1050. else
  1051. v.shift := r.b.exponent - to_signed(52, EXP_BITS);
  1052. v.state := FRI_1;
  1053. v.round_mode := '1' & r.insn(7 downto 6);
  1054. end if;
  1055. else
  1056. arith_done := '1';
  1057. end if;
  1058. when DO_FRSP =>
  1059. -- r.opsel_a = AIN_B, r.shift = 0
  1060. v.result_class := r.b.class;
  1061. v.result_sign := r.b.negative;
  1062. v.result_exp := r.b.exponent;
  1063. v.fpscr(FPSCR_FR) := '0';
  1064. v.fpscr(FPSCR_FI) := '0';
  1065. if r.b.class = NAN and r.b.mantissa(53) = '0' then
  1066. -- Signalling NAN
  1067. v.fpscr(FPSCR_VXSNAN) := '1';
  1068. invalid := '1';
  1069. end if;
  1070. set_x := '1';
  1071. if r.b.class = FINITE then
  1072. if r.b.exponent < to_signed(-126, EXP_BITS) then
  1073. v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
  1074. v.state := ROUND_UFLOW;
  1075. elsif r.b.exponent > to_signed(127, EXP_BITS) then
  1076. v.state := ROUND_OFLOW;
  1077. else
  1078. v.state := ROUNDING;
  1079. end if;
  1080. else
  1081. arith_done := '1';
  1082. end if;
  1083. when DO_FCTI =>
  1084. -- instr bit 9: 1=dword 0=word
  1085. -- instr bit 8: 1=unsigned 0=signed
  1086. -- instr bit 1: 1=round to zero 0=use fpscr[RN]
  1087. -- r.opsel_a = AIN_B
  1088. v.result_class := r.b.class;
  1089. v.result_sign := r.b.negative;
  1090. v.result_exp := r.b.exponent;
  1091. v.fpscr(FPSCR_FR) := '0';
  1092. v.fpscr(FPSCR_FI) := '0';
  1093. if r.b.class = NAN and r.b.mantissa(53) = '0' then
  1094. -- Signalling NAN
  1095. v.fpscr(FPSCR_VXSNAN) := '1';
  1096. invalid := '1';
  1097. end if;
  1098. v.int_result := '1';
  1099. case r.b.class is
  1100. when ZERO =>
  1101. arith_done := '1';
  1102. when FINITE =>
  1103. if r.b.exponent >= to_signed(64, EXP_BITS) or
  1104. (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
  1105. v.state := INT_OFLOW;
  1106. elsif r.b.exponent >= to_signed(52, EXP_BITS) then
  1107. -- integer already, no rounding required,
  1108. -- shift into final position
  1109. v.shift := r.b.exponent - to_signed(54, EXP_BITS);
  1110. if r.insn(8) = '1' and r.b.negative = '1' then
  1111. v.state := INT_OFLOW;
  1112. else
  1113. v.state := INT_ISHIFT;
  1114. end if;
  1115. else
  1116. v.shift := r.b.exponent - to_signed(52, EXP_BITS);
  1117. v.state := INT_SHIFT;
  1118. end if;
  1119. when INFINITY | NAN =>
  1120. v.state := INT_OFLOW;
  1121. end case;
  1122. when DO_FCFID =>
  1123. -- r.opsel_a = AIN_B
  1124. v.result_sign := '0';
  1125. if r.insn(8) = '0' and r.b.negative = '1' then
  1126. -- fcfid[s] with negative operand, set R = -B
  1127. opsel_ainv <= '1';
  1128. carry_in <= '1';
  1129. v.result_sign := '1';
  1130. end if;
  1131. v.result_class := r.b.class;
  1132. v.result_exp := to_signed(54, EXP_BITS);
  1133. v.fpscr(FPSCR_FR) := '0';
  1134. v.fpscr(FPSCR_FI) := '0';
  1135. if r.b.class = ZERO then
  1136. arith_done := '1';
  1137. else
  1138. v.state := FINISH;
  1139. end if;
  1140. when DO_FADD =>
  1141. -- fadd[s] and fsub[s]
  1142. -- r.opsel_a = AIN_A
  1143. v.result_sign := r.a.negative;
  1144. v.result_class := r.a.class;
  1145. v.result_exp := r.a.exponent;
  1146. v.fpscr(FPSCR_FR) := '0';
  1147. v.fpscr(FPSCR_FI) := '0';
  1148. v.use_a := '1';
  1149. v.use_b := '1';
  1150. is_add := r.a.negative xor r.b.negative xor r.insn(1);
  1151. if r.a.class = FINITE and r.b.class = FINITE then
  1152. v.is_subtract := not is_add;
  1153. v.add_bsmall := r.exp_cmp;
  1154. v.opsel_a := AIN_B;
  1155. if r.exp_cmp = '0' then
  1156. v.shift := r.a.exponent - r.b.exponent;
  1157. v.result_sign := r.b.negative xnor r.insn(1);
  1158. if r.a.exponent = r.b.exponent then
  1159. v.state := ADD_2;
  1160. else
  1161. v.longmask := '0';
  1162. v.state := ADD_SHIFT;
  1163. end if;
  1164. else
  1165. v.state := ADD_1;
  1166. end if;
  1167. else
  1168. if r.a.class = NAN or r.b.class = NAN then
  1169. v.state := NAN_RESULT;
  1170. elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
  1171. -- invalid operation, construct QNaN
  1172. v.fpscr(FPSCR_VXISI) := '1';
  1173. qnan_result := '1';
  1174. arith_done := '1';
  1175. elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
  1176. -- return -0 for rounding to -infinity
  1177. v.result_sign := r.round_mode(1) and r.round_mode(0);
  1178. arith_done := '1';
  1179. elsif r.a.class = INFINITY or r.b.class = ZERO then
  1180. -- result is A
  1181. v.opsel_a := AIN_A;
  1182. v.state := EXC_RESULT;
  1183. else
  1184. -- result is +/- B
  1185. v.opsel_a := AIN_B;
  1186. v.negate := not r.insn(1);
  1187. v.state := EXC_RESULT;
  1188. end if;
  1189. end if;
  1190. when DO_FMUL =>
  1191. -- fmul[s]
  1192. -- r.opsel_a = AIN_A unless C is denorm and A isn't
  1193. v.result_sign := r.a.negative xor r.c.negative;
  1194. v.result_class := r.a.class;
  1195. v.fpscr(FPSCR_FR) := '0';
  1196. v.fpscr(FPSCR_FI) := '0';
  1197. v.use_a := '1';
  1198. v.use_c := '1';
  1199. if r.a.class = FINITE and r.c.class = FINITE then
  1200. v.result_exp := r.a.exponent + r.c.exponent;
  1201. -- Renormalize denorm operands
  1202. if r.a.mantissa(54) = '0' then
  1203. v.state := RENORM_A;
  1204. elsif r.c.mantissa(54) = '0' then
  1205. v.state := RENORM_C;
  1206. else
  1207. f_to_multiply.valid <= '1';
  1208. v.state := MULT_1;
  1209. end if;
  1210. else
  1211. if r.a.class = NAN or r.c.class = NAN then
  1212. v.state := NAN_RESULT;
  1213. elsif (r.a.class = INFINITY and r.c.class = ZERO) or
  1214. (r.a.class = ZERO and r.c.class = INFINITY) then
  1215. -- invalid operation, construct QNaN
  1216. v.fpscr(FPSCR_VXIMZ) := '1';
  1217. qnan_result := '1';
  1218. elsif r.a.class = ZERO or r.a.class = INFINITY then
  1219. -- result is +/- A
  1220. arith_done := '1';
  1221. else
  1222. -- r.c.class is ZERO or INFINITY
  1223. v.opsel_a := AIN_C;
  1224. v.negate := r.a.negative;
  1225. v.state := EXC_RESULT;
  1226. end if;
  1227. end if;
  1228. when DO_FDIV =>
  1229. -- r.opsel_a = AIN_A unless B is denorm and A isn't
  1230. v.result_class := r.a.class;
  1231. v.fpscr(FPSCR_FR) := '0';
  1232. v.fpscr(FPSCR_FI) := '0';
  1233. v.use_a := '1';
  1234. v.use_b := '1';
  1235. v.result_sign := r.a.negative xor r.b.negative;
  1236. v.result_exp := r.a.exponent - r.b.exponent;
  1237. v.count := "00";
  1238. if r.a.class = FINITE and r.b.class = FINITE then
  1239. -- Renormalize denorm operands
  1240. if r.a.mantissa(54) = '0' then
  1241. v.state := RENORM_A;
  1242. elsif r.b.mantissa(54) = '0' then
  1243. v.state := RENORM_B;
  1244. else
  1245. v.first := '1';
  1246. v.state := DIV_2;
  1247. end if;
  1248. else
  1249. if r.a.class = NAN or r.b.class = NAN then
  1250. v.state := NAN_RESULT;
  1251. elsif r.b.class = INFINITY then
  1252. if r.a.class = INFINITY then
  1253. v.fpscr(FPSCR_VXIDI) := '1';
  1254. qnan_result := '1';
  1255. else
  1256. v.result_class := ZERO;
  1257. end if;
  1258. arith_done := '1';
  1259. elsif r.b.class = ZERO then
  1260. if r.a.class = ZERO then
  1261. v.fpscr(FPSCR_VXZDZ) := '1';
  1262. qnan_result := '1';
  1263. else
  1264. if r.a.class = FINITE then
  1265. zero_divide := '1';
  1266. end if;
  1267. v.result_class := INFINITY;
  1268. end if;
  1269. arith_done := '1';
  1270. else -- r.b.class = FINITE, result_class = r.a.class
  1271. arith_done := '1';
  1272. end if;
  1273. end if;
  1274. when DO_FSEL =>
  1275. v.fpscr(FPSCR_FR) := '0';
  1276. v.fpscr(FPSCR_FI) := '0';
  1277. if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
  1278. v.opsel_a := AIN_C;
  1279. else
  1280. v.opsel_a := AIN_B;
  1281. end if;
  1282. v.quieten_nan := '0';
  1283. v.state := EXC_RESULT;
  1284. when DO_FSQRT =>
  1285. -- r.opsel_a = AIN_B
  1286. v.result_class := r.b.class;
  1287. v.result_sign := r.b.negative;
  1288. v.fpscr(FPSCR_FR) := '0';
  1289. v.fpscr(FPSCR_FI) := '0';
  1290. v.use_b := '1';
  1291. case r.b.class is
  1292. when FINITE =>
  1293. v.result_exp := r.b.exponent;
  1294. if r.b.negative = '1' then
  1295. v.fpscr(FPSCR_VXSQRT) := '1';
  1296. qnan_result := '1';
  1297. elsif r.b.mantissa(54) = '0' then
  1298. v.state := RENORM_B;
  1299. elsif r.b.exponent(0) = '0' then
  1300. v.state := SQRT_1;
  1301. else
  1302. v.shift := to_signed(1, EXP_BITS);
  1303. v.state := RENORM_B2;
  1304. end if;
  1305. when NAN =>
  1306. v.state := NAN_RESULT;
  1307. when ZERO =>
  1308. -- result is B
  1309. arith_done := '1';
  1310. when INFINITY =>
  1311. if r.b.negative = '1' then
  1312. v.fpscr(FPSCR_VXSQRT) := '1';
  1313. qnan_result := '1';
  1314. -- else result is B
  1315. end if;
  1316. arith_done := '1';
  1317. end case;
  1318. when DO_FRE =>
  1319. -- r.opsel_a = AIN_B
  1320. v.result_class := r.b.class;
  1321. v.result_sign := r.b.negative;
  1322. v.fpscr(FPSCR_FR) := '0';
  1323. v.fpscr(FPSCR_FI) := '0';
  1324. v.use_b := '1';
  1325. case r.b.class is
  1326. when FINITE =>
  1327. v.result_exp := - r.b.exponent;
  1328. if r.b.mantissa(54) = '0' then
  1329. v.state := RENORM_B;
  1330. else
  1331. v.state := FRE_1;
  1332. end if;
  1333. when NAN =>
  1334. v.state := NAN_RESULT;
  1335. when INFINITY =>
  1336. v.result_class := ZERO;
  1337. arith_done := '1';
  1338. when ZERO =>
  1339. v.result_class := INFINITY;
  1340. zero_divide := '1';
  1341. arith_done := '1';
  1342. end case;
  1343. when DO_FRSQRTE =>
  1344. -- r.opsel_a = AIN_B
  1345. v.result_class := r.b.class;
  1346. v.result_sign := r.b.negative;
  1347. v.fpscr(FPSCR_FR) := '0';
  1348. v.fpscr(FPSCR_FI) := '0';
  1349. v.use_b := '1';
  1350. v.shift := to_signed(1, EXP_BITS);
  1351. case r.b.class is
  1352. when FINITE =>
  1353. v.result_exp := r.b.exponent;
  1354. if r.b.negative = '1' then
  1355. v.fpscr(FPSCR_VXSQRT) := '1';
  1356. qnan_result := '1';
  1357. elsif r.b.mantissa(54) = '0' then
  1358. v.state := RENORM_B;
  1359. elsif r.b.exponent(0) = '0' then
  1360. v.state := RSQRT_1;
  1361. else
  1362. v.state := RENORM_B2;
  1363. end if;
  1364. when NAN =>
  1365. v.state := NAN_RESULT;
  1366. when INFINITY =>
  1367. if r.b.negative = '1' then
  1368. v.fpscr(FPSCR_VXSQRT) := '1';
  1369. qnan_result := '1';
  1370. else
  1371. v.result_class := ZERO;
  1372. end if;
  1373. arith_done := '1';
  1374. when ZERO =>
  1375. v.result_class := INFINITY;
  1376. zero_divide := '1';
  1377. arith_done := '1';
  1378. end case;
  1379. when DO_FMADD =>
  1380. -- fmadd, fmsub, fnmadd, fnmsub
  1381. -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
  1382. -- else AIN_B
  1383. v.result_sign := r.a.negative;
  1384. v.result_class := r.a.class;
  1385. v.result_exp := r.a.exponent;
  1386. v.fpscr(FPSCR_FR) := '0';
  1387. v.fpscr(FPSCR_FI) := '0';
  1388. v.use_a := '1';
  1389. v.use_b := '1';
  1390. v.use_c := '1';
  1391. is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
  1392. if r.a.class = FINITE and r.c.class = FINITE and
  1393. (r.b.class = FINITE or r.b.class = ZERO) then
  1394. v.is_subtract := not is_add;
  1395. mulexp := r.a.exponent + r.c.exponent;
  1396. v.result_exp := mulexp;
  1397. -- Make sure A and C are normalized
  1398. if r.a.mantissa(54) = '0' then
  1399. v.state := RENORM_A;
  1400. elsif r.c.mantissa(54) = '0' then
  1401. v.state := RENORM_C;
  1402. elsif r.b.class = ZERO then
  1403. -- no addend, degenerates to multiply
  1404. v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
  1405. f_to_multiply.valid <= '1';
  1406. v.is_multiply := '1';
  1407. v.state := MULT_1;
  1408. elsif r.madd_cmp = '0' then
  1409. -- addend is bigger, do multiply first
  1410. v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
  1411. f_to_multiply.valid <= '1';
  1412. v.state := FMADD_1;
  1413. else
  1414. -- product is bigger, shift B right and use it as the
  1415. -- addend to the multiplier
  1416. v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
  1417. -- for subtract, multiplier does B - A * C
  1418. v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
  1419. v.result_exp := r.b.exponent;
  1420. v.state := FMADD_2;
  1421. end if;
  1422. else
  1423. if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
  1424. v.state := NAN_RESULT;
  1425. elsif (r.a.class = ZERO and r.c.class = INFINITY) or
  1426. (r.a.class = INFINITY and r.c.class = ZERO) then
  1427. -- invalid operation, construct QNaN
  1428. v.fpscr(FPSCR_VXIMZ) := '1';
  1429. qnan_result := '1';
  1430. elsif r.a.class = INFINITY or r.c.class = INFINITY then
  1431. if r.b.class = INFINITY and is_add = '0' then
  1432. -- invalid operation, construct QNaN
  1433. v.fpscr(FPSCR_VXISI) := '1';
  1434. qnan_result := '1';
  1435. else
  1436. -- result is infinity
  1437. v.result_class := INFINITY;
  1438. v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
  1439. arith_done := '1';
  1440. end if;
  1441. else
  1442. -- Here A is zero, C is zero, or B is infinity
  1443. -- Result is +/-B in all of those cases
  1444. v.opsel_a := AIN_B;
  1445. if r.b.class /= ZERO or is_add = '1' then
  1446. v.negate := not (r.insn(1) xor r.insn(2));
  1447. else
  1448. -- have to be careful about rule for 0 - 0 result sign
  1449. v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
  1450. end if;
  1451. v.state := EXC_RESULT;
  1452. end if;
  1453. end if;
  1454. when RENORM_A =>
  1455. renormalize := '1';
  1456. v.state := RENORM_A2;
  1457. if r.insn(4) = '1' then
  1458. v.opsel_a := AIN_C;
  1459. else
  1460. v.opsel_a := AIN_B;
  1461. end if;
  1462. when RENORM_A2 =>
  1463. -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
  1464. set_a := '1';
  1465. v.result_exp := new_exp;
  1466. if r.insn(4) = '1' then
  1467. if r.c.mantissa(54) = '1' then
  1468. if r.insn(3) = '0' or r.b.class = ZERO then
  1469. v.first := '1';
  1470. v.state := MULT_1;
  1471. else
  1472. v.madd_cmp := '0';
  1473. if new_exp + 1 >= r.b.exponent then
  1474. v.madd_cmp := '1';
  1475. end if;
  1476. v.opsel_a := AIN_B;
  1477. v.state := DO_FMADD;
  1478. end if;
  1479. else
  1480. v.state := RENORM_C;
  1481. end if;
  1482. else
  1483. if r.b.mantissa(54) = '1' then
  1484. v.first := '1';
  1485. v.state := DIV_2;
  1486. else
  1487. v.state := RENORM_B;
  1488. end if;
  1489. end if;
  1490. when RENORM_B =>
  1491. renormalize := '1';
  1492. renorm_sqrt := r.is_sqrt;
  1493. v.state := RENORM_B2;
  1494. when RENORM_B2 =>
  1495. set_b := '1';
  1496. if r.is_sqrt = '0' then
  1497. v.result_exp := r.result_exp + r.shift;
  1498. else
  1499. v.result_exp := new_exp;
  1500. end if;
  1501. v.opsel_a := AIN_B;
  1502. v.state := LOOKUP;
  1503. when RENORM_C =>
  1504. renormalize := '1';
  1505. v.state := RENORM_C2;
  1506. when RENORM_C2 =>
  1507. set_c := '1';
  1508. v.result_exp := new_exp;
  1509. if r.insn(3) = '0' or r.b.class = ZERO then
  1510. v.first := '1';
  1511. v.state := MULT_1;
  1512. else
  1513. v.madd_cmp := '0';
  1514. if new_exp + 1 >= r.b.exponent then
  1515. v.madd_cmp := '1';
  1516. end if;
  1517. v.opsel_a := AIN_B;
  1518. v.state := DO_FMADD;
  1519. end if;
  1520. when ADD_1 =>
  1521. -- transferring B to R
  1522. v.shift := r.b.exponent - r.a.exponent;
  1523. v.result_exp := r.b.exponent;
  1524. v.longmask := '0';
  1525. v.state := ADD_SHIFT;
  1526. when ADD_SHIFT =>
  1527. -- r.shift = - exponent difference, r.longmask = 0
  1528. opsel_r <= RES_SHIFT;
  1529. v.x := s_nz;
  1530. set_x := '1';
  1531. v.longmask := r.single_prec;
  1532. if r.add_bsmall = '1' then
  1533. v.opsel_a := AIN_A;
  1534. else
  1535. v.opsel_a := AIN_B;
  1536. end if;
  1537. v.state := ADD_2;
  1538. when ADD_2 =>
  1539. -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
  1540. opsel_b <= BIN_R;
  1541. opsel_binv <= r.is_subtract;
  1542. carry_in <= r.is_subtract and not r.x;
  1543. v.shift := to_signed(-1, EXP_BITS);
  1544. v.state := ADD_3;
  1545. when ADD_3 =>
  1546. -- check for overflow or negative result (can't get both)
  1547. -- r.shift = -1
  1548. if r.r(63) = '1' then
  1549. -- result is opposite sign to expected
  1550. v.result_sign := not r.result_sign;
  1551. opsel_ainv <= '1';
  1552. carry_in <= '1';
  1553. v.state := FINISH;
  1554. elsif r.r(55) = '1' then
  1555. -- sum overflowed, shift right
  1556. opsel_r <= RES_SHIFT;
  1557. set_x := '1';
  1558. if exp_huge = '1' then
  1559. v.state := ROUND_OFLOW;
  1560. else
  1561. v.state := ROUNDING;
  1562. end if;
  1563. elsif r.r(54) = '1' then
  1564. set_x := '1';
  1565. v.state := ROUNDING;
  1566. elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
  1567. -- r.x must be zero at this point
  1568. v.result_class := ZERO;
  1569. if r.is_subtract = '1' then
  1570. -- set result sign depending on rounding mode
  1571. v.result_sign := r.round_mode(1) and r.round_mode(0);
  1572. end if;
  1573. arith_done := '1';
  1574. else
  1575. renormalize := '1';
  1576. v.state := NORMALIZE;
  1577. end if;
  1578. when CMP_1 =>
  1579. -- r.opsel_a = AIN_A
  1580. opsel_b <= BIN_R;
  1581. opsel_binv <= '1';
  1582. carry_in <= '1';
  1583. v.state := CMP_2;
  1584. when CMP_2 =>
  1585. if r.r(63) = '1' then
  1586. -- A is smaller in magnitude
  1587. v.cr_result := not r.a.negative & r.a.negative & "00";
  1588. elsif (r_hi_nz or r_lo_nz) = '0' then
  1589. v.cr_result := "0010";
  1590. else
  1591. v.cr_result := r.a.negative & not r.a.negative & "00";
  1592. end if;
  1593. v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
  1594. v.instr_done := '1';
  1595. v.state := IDLE;
  1596. when MULT_1 =>
  1597. f_to_multiply.valid <= r.first;
  1598. opsel_r <= RES_MULT;
  1599. if multiply_to_f.valid = '1' then
  1600. v.state := FINISH;
  1601. end if;
  1602. when FMADD_1 =>
  1603. -- Addend is bigger here
  1604. v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
  1605. -- note v.shift is at most -2 here
  1606. v.shift := r.result_exp - r.b.exponent;
  1607. opsel_r <= RES_MULT;
  1608. opsel_s <= S_MULT;
  1609. set_s := '1';
  1610. f_to_multiply.valid <= r.first;
  1611. if multiply_to_f.valid = '1' then
  1612. v.longmask := '0';
  1613. v.state := ADD_SHIFT;
  1614. end if;
  1615. when FMADD_2 =>
  1616. -- Product is potentially bigger here
  1617. -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
  1618. set_s := '1';
  1619. opsel_s <= S_SHIFT;
  1620. v.shift := r.shift - to_signed(64, EXP_BITS);
  1621. v.state := FMADD_3;
  1622. when FMADD_3 =>
  1623. -- r.shift = addend exp - product exp
  1624. opsel_r <= RES_SHIFT;
  1625. v.first := '1';
  1626. v.state := FMADD_4;
  1627. when FMADD_4 =>
  1628. msel_add <= MULADD_RS;
  1629. f_to_multiply.valid <= r.first;
  1630. msel_inv <= r.is_subtract;
  1631. opsel_r <= RES_MULT;
  1632. opsel_s <= S_MULT;
  1633. set_s := '1';
  1634. if multiply_to_f.valid = '1' then
  1635. v.state := FMADD_5;
  1636. end if;
  1637. when FMADD_5 =>
  1638. -- negate R:S:X if negative
  1639. if r.r(63) = '1' then
  1640. v.result_sign := not r.result_sign;
  1641. opsel_ainv <= '1';
  1642. carry_in <= not (s_nz or r.x);
  1643. opsel_s <= S_NEG;
  1644. set_s := '1';
  1645. end if;
  1646. v.shift := to_signed(56, EXP_BITS);
  1647. v.state := FMADD_6;
  1648. when FMADD_6 =>
  1649. -- r.shift = 56 (or 0, but only if r is now nonzero)
  1650. if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
  1651. if s_nz = '0' then
  1652. -- must be a subtraction, and r.x must be zero
  1653. v.result_class := ZERO;
  1654. v.result_sign := r.round_mode(1) and r.round_mode(0);
  1655. arith_done := '1';
  1656. else
  1657. -- R is all zeroes but there are non-zero bits in S
  1658. -- so shift them into R and set S to 0
  1659. opsel_r <= RES_SHIFT;
  1660. set_s := '1';
  1661. -- stay in state FMADD_6
  1662. end if;
  1663. elsif r.r(56 downto 54) = "001" then
  1664. v.state := FINISH;
  1665. else
  1666. renormalize := '1';
  1667. v.state := NORMALIZE;
  1668. end if;
  1669. when LOOKUP =>
  1670. -- r.opsel_a = AIN_B
  1671. -- wait one cycle for inverse_table[B] lookup
  1672. v.first := '1';
  1673. if r.insn(4) = '0' then
  1674. if r.insn(3) = '0' then
  1675. v.state := DIV_2;
  1676. else
  1677. v.state := SQRT_1;
  1678. end if;
  1679. elsif r.insn(2) = '0' then
  1680. v.state := FRE_1;
  1681. else
  1682. v.state := RSQRT_1;
  1683. end if;
  1684. when DIV_2 =>
  1685. -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
  1686. msel_1 <= MUL1_B;
  1687. msel_add <= MULADD_CONST;
  1688. msel_inv <= '1';
  1689. if r.count = 0 then
  1690. msel_2 <= MUL2_LUT;
  1691. else
  1692. msel_2 <= MUL2_P;
  1693. end if;
  1694. set_y := r.first;
  1695. pshift := '1';
  1696. f_to_multiply.valid <= r.first;
  1697. if multiply_to_f.valid = '1' then
  1698. v.first := '1';
  1699. v.count := r.count + 1;
  1700. v.state := DIV_3;
  1701. end if;
  1702. when DIV_3 =>
  1703. -- compute Y = P = P * Y
  1704. msel_1 <= MUL1_Y;
  1705. msel_2 <= MUL2_P;
  1706. f_to_multiply.valid <= r.first;
  1707. pshift := '1';
  1708. if multiply_to_f.valid = '1' then
  1709. v.first := '1';
  1710. if r.count = 3 then
  1711. v.state := DIV_4;
  1712. else
  1713. v.state := DIV_2;
  1714. end if;
  1715. end if;
  1716. when DIV_4 =>
  1717. -- compute R = P = A * Y (quotient)
  1718. msel_1 <= MUL1_A;
  1719. msel_2 <= MUL2_P;
  1720. set_y := r.first;
  1721. f_to_multiply.valid <= r.first;
  1722. pshift := '1';
  1723. if multiply_to_f.valid = '1' then
  1724. opsel_r <= RES_MULT;
  1725. v.first := '1';
  1726. v.state := DIV_5;
  1727. end if;
  1728. when DIV_5 =>
  1729. -- compute P = A - B * R (remainder)
  1730. msel_1 <= MUL1_B;
  1731. msel_2 <= MUL2_R;
  1732. msel_add <= MULADD_A;
  1733. msel_inv <= '1';
  1734. f_to_multiply.valid <= r.first;
  1735. if multiply_to_f.valid = '1' then
  1736. v.state := DIV_6;
  1737. end if;
  1738. when DIV_6 =>
  1739. -- test if remainder is 0 or >= B
  1740. if pcmpb_lt = '1' then
  1741. -- quotient is correct, set X if remainder non-zero
  1742. v.x := r.p(58) or px_nz;
  1743. else
  1744. -- quotient needs to be incremented by 1
  1745. carry_in <= '1';
  1746. v.x := not pcmpb_eq;
  1747. end if;
  1748. v.state := FINISH;
  1749. when FRE_1 =>
  1750. opsel_r <= RES_MISC;
  1751. misc_sel <= "0111";
  1752. v.shift := to_signed(1, EXP_BITS);
  1753. v.state := NORMALIZE;
  1754. when FTDIV_1 =>
  1755. v.cr_result(1) := exp_tiny or exp_huge;
  1756. if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
  1757. v.instr_done := '1';
  1758. v.state := IDLE;
  1759. else
  1760. v.shift := r.a.exponent;
  1761. v.doing_ftdiv := "10";
  1762. end if;
  1763. when RSQRT_1 =>
  1764. opsel_r <= RES_MISC;
  1765. misc_sel <= "0111";
  1766. sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
  1767. v.result_exp := - sqrt_exp;
  1768. v.shift := to_signed(1, EXP_BITS);
  1769. v.state := NORMALIZE;
  1770. when SQRT_1 =>
  1771. -- put invsqr[B] in R and compute P = invsqr[B] * B
  1772. -- also transfer B (in R) to A
  1773. set_a := '1';
  1774. opsel_r <= RES_MISC;
  1775. misc_sel <= "0111";
  1776. msel_1 <= MUL1_B;
  1777. msel_2 <= MUL2_LUT;
  1778. f_to_multiply.valid <= '1';
  1779. v.shift := to_signed(-1, EXP_BITS);
  1780. v.count := "00";
  1781. v.state := SQRT_2;
  1782. when SQRT_2 =>
  1783. -- shift R right one place
  1784. -- not expecting multiplier result yet
  1785. -- r.shift = -1
  1786. opsel_r <= RES_SHIFT;
  1787. v.first := '1';
  1788. v.state := SQRT_3;
  1789. when SQRT_3 =>
  1790. -- put R into Y, wait for product from multiplier
  1791. msel_2 <= MUL2_R;
  1792. set_y := r.first;
  1793. pshift := '1';
  1794. if multiply_to_f.valid = '1' then
  1795. -- put result into R
  1796. opsel_r <= RES_MULT;
  1797. v.first := '1';
  1798. v.state := SQRT_4;
  1799. end if;
  1800. when SQRT_4 =>
  1801. -- compute 1.5 - Y * P
  1802. msel_1 <= MUL1_Y;
  1803. msel_2 <= MUL2_P;
  1804. msel_add <= MULADD_CONST;
  1805. msel_inv <= '1';
  1806. f_to_multiply.valid <= r.first;
  1807. pshift := '1';
  1808. if multiply_to_f.valid = '1' then
  1809. v.state := SQRT_5;
  1810. end if;
  1811. when SQRT_5 =>
  1812. -- compute Y = Y * P
  1813. msel_1 <= MUL1_Y;
  1814. msel_2 <= MUL2_P;
  1815. f_to_multiply.valid <= '1';
  1816. v.first := '1';
  1817. v.state := SQRT_6;
  1818. when SQRT_6 =>
  1819. -- pipeline in R = R * P
  1820. msel_1 <= MUL1_R;
  1821. msel_2 <= MUL2_P;
  1822. f_to_multiply.valid <= r.first;
  1823. pshift := '1';
  1824. if multiply_to_f.valid = '1' then
  1825. v.first := '1';
  1826. v.state := SQRT_7;
  1827. end if;
  1828. when SQRT_7 =>
  1829. -- first multiply is done, put result in Y
  1830. msel_2 <= MUL2_P;
  1831. set_y := r.first;
  1832. -- wait for second multiply (should be here already)
  1833. pshift := '1';
  1834. if multiply_to_f.valid = '1' then
  1835. -- put result into R
  1836. opsel_r <= RES_MULT;
  1837. v.first := '1';
  1838. v.count := r.count + 1;
  1839. if r.count < 2 then
  1840. v.state := SQRT_4;
  1841. else
  1842. v.first := '1';
  1843. v.state := SQRT_8;
  1844. end if;
  1845. end if;
  1846. when SQRT_8 =>
  1847. -- compute P = A - R * R, which can be +ve or -ve
  1848. -- we arranged for B to be put into A earlier
  1849. msel_1 <= MUL1_R;
  1850. msel_2 <= MUL2_R;
  1851. msel_add <= MULADD_A;
  1852. msel_inv <= '1';
  1853. pshift := '1';
  1854. f_to_multiply.valid <= r.first;
  1855. if multiply_to_f.valid = '1' then
  1856. v.first := '1';
  1857. v.state := SQRT_9;
  1858. end if;
  1859. when SQRT_9 =>
  1860. -- compute P = P * Y
  1861. -- since Y is an estimate of 1/sqrt(B), this makes P an
  1862. -- estimate of the adjustment needed to R. Since the error
  1863. -- could be negative and we have an unsigned multiplier, the
  1864. -- upper bits can be wrong, but it turns out the lowest 8 bits
  1865. -- are correct and are all we need (given 3 iterations through
  1866. -- SQRT_4 to SQRT_7).
  1867. msel_1 <= MUL1_Y;
  1868. msel_2 <= MUL2_P;
  1869. pshift := '1';
  1870. f_to_multiply.valid <= r.first;
  1871. if multiply_to_f.valid = '1' then
  1872. v.state := SQRT_10;
  1873. end if;
  1874. when SQRT_10 =>
  1875. -- Add the bottom 8 bits of P, sign-extended,
  1876. -- divided by 4, onto R.
  1877. -- The division by 4 is because R is 10.54 format
  1878. -- whereas P is 8.56 format.
  1879. opsel_b <= BIN_PS6;
  1880. sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
  1881. v.result_exp := sqrt_exp;
  1882. v.shift := to_signed(1, EXP_BITS);
  1883. v.first := '1';
  1884. v.state := SQRT_11;
  1885. when SQRT_11 =>
  1886. -- compute P = A - R * R (remainder)
  1887. -- also put 2 * R + 1 into B for comparison with P
  1888. msel_1 <= MUL1_R;
  1889. msel_2 <= MUL2_R;
  1890. msel_add <= MULADD_A;
  1891. msel_inv <= '1';
  1892. f_to_multiply.valid <= r.first;
  1893. shiftin := '1';
  1894. set_b := r.first;
  1895. if multiply_to_f.valid = '1' then
  1896. v.state := SQRT_12;
  1897. end if;
  1898. when SQRT_12 =>
  1899. -- test if remainder is 0 or >= B = 2*R + 1
  1900. if pcmpb_lt = '1' then
  1901. -- square root is correct, set X if remainder non-zero
  1902. v.x := r.p(58) or px_nz;
  1903. else
  1904. -- square root needs to be incremented by 1
  1905. carry_in <= '1';
  1906. v.x := not pcmpb_eq;
  1907. end if;
  1908. v.state := FINISH;
  1909. when INT_SHIFT =>
  1910. -- r.shift = b.exponent - 52
  1911. opsel_r <= RES_SHIFT;
  1912. set_x := '1';
  1913. v.state := INT_ROUND;
  1914. v.shift := to_signed(-2, EXP_BITS);
  1915. when INT_ROUND =>
  1916. -- r.shift = -2
  1917. opsel_r <= RES_SHIFT;
  1918. round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
  1919. v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
  1920. -- Check for negative values that don't round to 0 for fcti*u*
  1921. if r.insn(8) = '1' and r.result_sign = '1' and
  1922. (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
  1923. v.state := INT_OFLOW;
  1924. else
  1925. v.state := INT_FINAL;
  1926. end if;
  1927. when INT_ISHIFT =>
  1928. -- r.shift = b.exponent - 54;
  1929. opsel_r <= RES_SHIFT;
  1930. v.state := INT_FINAL;
  1931. when INT_FINAL =>
  1932. -- Negate if necessary, and increment for rounding if needed
  1933. opsel_ainv <= r.result_sign;
  1934. carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
  1935. -- Check for possible overflows
  1936. case r.insn(9 downto 8) is
  1937. when "00" => -- fctiw[z]
  1938. need_check := r.r(31) or (r.r(30) and not r.result_sign);
  1939. when "01" => -- fctiwu[z]
  1940. need_check := r.r(31);
  1941. when "10" => -- fctid[z]
  1942. need_check := r.r(63) or (r.r(62) and not r.result_sign);
  1943. when others => -- fctidu[z]
  1944. need_check := r.r(63);
  1945. end case;
  1946. if need_check = '1' then
  1947. v.state := INT_CHECK;
  1948. else
  1949. if r.fpscr(FPSCR_FI) = '1' then
  1950. v.fpscr(FPSCR_XX) := '1';
  1951. end if;
  1952. arith_done := '1';
  1953. end if;
  1954. when INT_CHECK =>
  1955. if r.insn(9) = '0' then
  1956. msb := r.r(31);
  1957. else
  1958. msb := r.r(63);
  1959. end if;
  1960. misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
  1961. if (r.insn(8) = '0' and msb /= r.result_sign) or
  1962. (r.insn(8) = '1' and msb /= '1') then
  1963. opsel_r <= RES_MISC;
  1964. v.fpscr(FPSCR_VXCVI) := '1';
  1965. invalid := '1';
  1966. else
  1967. if r.fpscr(FPSCR_FI) = '1' then
  1968. v.fpscr(FPSCR_XX) := '1';
  1969. end if;
  1970. end if;
  1971. arith_done := '1';
  1972. when INT_OFLOW =>
  1973. opsel_r <= RES_MISC;
  1974. misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
  1975. if r.b.class = NAN then
  1976. misc_sel(0) <= '1';
  1977. end if;
  1978. v.fpscr(FPSCR_VXCVI) := '1';
  1979. invalid := '1';
  1980. arith_done := '1';
  1981. when FRI_1 =>
  1982. -- r.shift = b.exponent - 52
  1983. opsel_r <= RES_SHIFT;
  1984. set_x := '1';
  1985. v.state := ROUNDING;
  1986. when FINISH =>
  1987. if r.is_multiply = '1' and px_nz = '1' then
  1988. v.x := '1';
  1989. end if;
  1990. if r.r(63 downto 54) /= "0000000001" then
  1991. renormalize := '1';
  1992. v.state := NORMALIZE;
  1993. else
  1994. set_x := '1';
  1995. if exp_tiny = '1' then
  1996. v.shift := new_exp - min_exp;
  1997. v.state := ROUND_UFLOW;
  1998. elsif exp_huge = '1' then
  1999. v.state := ROUND_OFLOW;
  2000. else
  2001. v.state := ROUNDING;
  2002. end if;
  2003. end if;
  2004. when NORMALIZE =>
  2005. -- Shift so we have 9 leading zeroes (we know R is non-zero)
  2006. -- r.shift = clz(r.r) - 9
  2007. opsel_r <= RES_SHIFT;
  2008. set_x := '1';
  2009. if exp_tiny = '1' then
  2010. v.shift := new_exp - min_exp;
  2011. v.state := ROUND_UFLOW;
  2012. elsif exp_huge = '1' then
  2013. v.state := ROUND_OFLOW;
  2014. else
  2015. v.state := ROUNDING;
  2016. end if;
  2017. when ROUND_UFLOW =>
  2018. -- r.shift = - amount by which exponent underflows
  2019. v.tiny := '1';
  2020. if r.fpscr(FPSCR_UE) = '0' then
  2021. -- disabled underflow exception case
  2022. -- have to denormalize before rounding
  2023. opsel_r <= RES_SHIFT;
  2024. set_x := '1';
  2025. v.state := ROUNDING;
  2026. else
  2027. -- enabled underflow exception case
  2028. -- if denormalized, have to normalize before rounding
  2029. v.fpscr(FPSCR_UX) := '1';
  2030. v.result_exp := r.result_exp + bias_exp;
  2031. if r.r(54) = '0' then
  2032. renormalize := '1';
  2033. v.state := NORMALIZE;
  2034. else
  2035. v.state := ROUNDING;
  2036. end if;
  2037. end if;
  2038. when ROUND_OFLOW =>
  2039. v.fpscr(FPSCR_OX) := '1';
  2040. if r.fpscr(FPSCR_OE) = '0' then
  2041. -- disabled overflow exception
  2042. -- result depends on rounding mode
  2043. v.fpscr(FPSCR_XX) := '1';
  2044. v.fpscr(FPSCR_FI) := '1';
  2045. if r.round_mode(1 downto 0) = "00" or
  2046. (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
  2047. v.result_class := INFINITY;
  2048. v.fpscr(FPSCR_FR) := '1';
  2049. else
  2050. v.fpscr(FPSCR_FR) := '0';
  2051. end if;
  2052. -- construct largest representable number
  2053. v.result_exp := max_exp;
  2054. opsel_r <= RES_MISC;
  2055. misc_sel <= "001" & r.single_prec;
  2056. arith_done := '1';
  2057. else
  2058. -- enabled overflow exception
  2059. v.result_exp := r.result_exp - bias_exp;
  2060. v.state := ROUNDING;
  2061. end if;
  2062. when ROUNDING =>
  2063. opsel_mask <= '1';
  2064. round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
  2065. v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
  2066. if round(1) = '1' then
  2067. -- increment the LSB for the precision
  2068. opsel_b <= BIN_RND;
  2069. v.shift := to_signed(-1, EXP_BITS);
  2070. v.state := ROUNDING_2;
  2071. else
  2072. if r.r(54) = '0' then
  2073. -- result after masking could be zero, or could be a
  2074. -- denormalized result that needs to be renormalized
  2075. renormalize := '1';
  2076. v.state := ROUNDING_3;
  2077. else
  2078. arith_done := '1';
  2079. end if;
  2080. end if;
  2081. if round(0) = '1' then
  2082. v.fpscr(FPSCR_XX) := '1';
  2083. if r.tiny = '1' then
  2084. v.fpscr(FPSCR_UX) := '1';
  2085. end if;
  2086. end if;
  2087. when ROUNDING_2 =>
  2088. -- Check for overflow during rounding
  2089. -- r.shift = -1
  2090. v.x := '0';
  2091. if r.r(55) = '1' then
  2092. opsel_r <= RES_SHIFT;
  2093. if exp_huge = '1' then
  2094. v.state := ROUND_OFLOW;
  2095. else
  2096. arith_done := '1';
  2097. end if;
  2098. elsif r.r(54) = '0' then
  2099. -- Do CLZ so we can renormalize the result
  2100. renormalize := '1';
  2101. v.state := ROUNDING_3;
  2102. else
  2103. arith_done := '1';
  2104. end if;
  2105. when ROUNDING_3 =>
  2106. -- r.shift = clz(r.r) - 9
  2107. mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
  2108. if mant_nz = '0' then
  2109. v.result_class := ZERO;
  2110. if r.is_subtract = '1' then
  2111. -- set result sign depending on rounding mode
  2112. v.result_sign := r.round_mode(1) and r.round_mode(0);
  2113. end if;
  2114. arith_done := '1';
  2115. else
  2116. -- Renormalize result after rounding
  2117. opsel_r <= RES_SHIFT;
  2118. v.denorm := exp_tiny;
  2119. v.shift := new_exp - to_signed(-1022, EXP_BITS);
  2120. if new_exp < to_signed(-1022, EXP_BITS) then
  2121. v.state := DENORM;
  2122. else
  2123. arith_done := '1';
  2124. end if;
  2125. end if;
  2126. when DENORM =>
  2127. -- r.shift = result_exp - -1022
  2128. opsel_r <= RES_SHIFT;
  2129. arith_done := '1';
  2130. when NAN_RESULT =>
  2131. if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
  2132. (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
  2133. (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
  2134. -- Signalling NAN
  2135. v.fpscr(FPSCR_VXSNAN) := '1';
  2136. invalid := '1';
  2137. end if;
  2138. if r.use_a = '1' and r.a.class = NAN then
  2139. v.opsel_a := AIN_A;
  2140. elsif r.use_b = '1' and r.b.class = NAN then
  2141. v.opsel_a := AIN_B;
  2142. elsif r.use_c = '1' and r.c.class = NAN then
  2143. v.opsel_a := AIN_C;
  2144. end if;
  2145. v.state := EXC_RESULT;
  2146. when EXC_RESULT =>
  2147. -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
  2148. case r.opsel_a is
  2149. when AIN_B =>
  2150. v.result_sign := r.b.negative xor r.negate;
  2151. v.result_exp := r.b.exponent;
  2152. v.result_class := r.b.class;
  2153. when AIN_C =>
  2154. v.result_sign := r.c.negative xor r.negate;
  2155. v.result_exp := r.c.exponent;
  2156. v.result_class := r.c.class;
  2157. when others =>
  2158. v.result_sign := r.a.negative xor r.negate;
  2159. v.result_exp := r.a.exponent;
  2160. v.result_class := r.a.class;
  2161. end case;
  2162. arith_done := '1';
  2163. end case;
  2164. if zero_divide = '1' then
  2165. v.fpscr(FPSCR_ZX) := '1';
  2166. end if;
  2167. if qnan_result = '1' then
  2168. invalid := '1';
  2169. v.result_class := NAN;
  2170. v.result_sign := '0';
  2171. misc_sel <= "0001";
  2172. opsel_r <= RES_MISC;
  2173. arith_done := '1';
  2174. end if;
  2175. if invalid = '1' then
  2176. v.invalid := '1';
  2177. end if;
  2178. if arith_done = '1' then
  2179. -- Enabled invalid exception doesn't write result or FPRF
  2180. -- Neither does enabled zero-divide exception
  2181. if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
  2182. (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
  2183. v.writing_back := '1';
  2184. v.update_fprf := '1';
  2185. end if;
  2186. v.instr_done := '1';
  2187. v.state := IDLE;
  2188. update_fx := '1';
  2189. end if;
  2190. -- Multiplier and divide/square root data path
  2191. case msel_1 is
  2192. when MUL1_A =>
  2193. f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
  2194. when MUL1_B =>
  2195. f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
  2196. when MUL1_Y =>
  2197. f_to_multiply.data1 <= r.y;
  2198. when others =>
  2199. f_to_multiply.data1 <= r.r(61 downto 0) & "00";
  2200. end case;
  2201. case msel_2 is
  2202. when MUL2_C =>
  2203. f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
  2204. when MUL2_LUT =>
  2205. f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
  2206. when MUL2_P =>
  2207. f_to_multiply.data2 <= r.p;
  2208. when others =>
  2209. f_to_multiply.data2 <= r.r(61 downto 0) & "00";
  2210. end case;
  2211. maddend := (others => '0');
  2212. case msel_add is
  2213. when MULADD_CONST =>
  2214. -- addend is 2.0 or 1.5 in 16.112 format
  2215. if r.is_sqrt = '0' then
  2216. maddend(113) := '1'; -- 2.0
  2217. else
  2218. maddend(112 downto 111) := "11"; -- 1.5
  2219. end if;
  2220. when MULADD_A =>
  2221. -- addend is A in 16.112 format
  2222. maddend(121 downto 58) := r.a.mantissa;
  2223. when MULADD_RS =>
  2224. -- addend is concatenation of R and S in 16.112 format
  2225. maddend := "000000" & r.r & r.s & "00";
  2226. when others =>
  2227. end case;
  2228. if msel_inv = '1' then
  2229. f_to_multiply.addend <= not maddend;
  2230. else
  2231. f_to_multiply.addend <= maddend;
  2232. end if;
  2233. f_to_multiply.not_result <= msel_inv;
  2234. if set_y = '1' then
  2235. v.y := f_to_multiply.data2;
  2236. end if;
  2237. if multiply_to_f.valid = '1' then
  2238. if pshift = '0' then
  2239. v.p := multiply_to_f.result(63 downto 0);
  2240. else
  2241. v.p := multiply_to_f.result(119 downto 56);
  2242. end if;
  2243. end if;
  2244. -- Data path.
  2245. -- This has A and B input multiplexers, an adder, a shifter,
  2246. -- count-leading-zeroes logic, and a result mux.
  2247. if r.longmask = '1' then
  2248. mshift := r.shift + to_signed(-29, EXP_BITS);
  2249. else
  2250. mshift := r.shift;
  2251. end if;
  2252. if mshift < to_signed(-64, EXP_BITS) then
  2253. mask := (others => '1');
  2254. elsif mshift >= to_signed(0, EXP_BITS) then
  2255. mask := (others => '0');
  2256. else
  2257. mask := right_mask(unsigned(mshift(5 downto 0)));
  2258. end if;
  2259. case r.opsel_a is
  2260. when AIN_R =>
  2261. in_a0 := r.r;
  2262. when AIN_A =>
  2263. in_a0 := r.a.mantissa;
  2264. when AIN_B =>
  2265. in_a0 := r.b.mantissa;
  2266. when others =>
  2267. in_a0 := r.c.mantissa;
  2268. end case;
  2269. if (or (mask and in_a0)) = '1' and set_x = '1' then
  2270. v.x := '1';
  2271. end if;
  2272. if opsel_ainv = '1' then
  2273. in_a0 := not in_a0;
  2274. end if;
  2275. in_a <= in_a0;
  2276. case opsel_b is
  2277. when BIN_ZERO =>
  2278. in_b0 := (others => '0');
  2279. when BIN_R =>
  2280. in_b0 := r.r;
  2281. when BIN_RND =>
  2282. round_inc := (31 => r.single_prec, 2 => not r.single_prec, others => '0');
  2283. in_b0 := round_inc;
  2284. when others =>
  2285. -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
  2286. in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
  2287. end case;
  2288. if opsel_binv = '1' then
  2289. in_b0 := not in_b0;
  2290. end if;
  2291. in_b <= in_b0;
  2292. if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
  2293. shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
  2294. std_ulogic_vector(r.shift(6 downto 0)));
  2295. else
  2296. shift_res := (others => '0');
  2297. end if;
  2298. sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
  2299. if opsel_mask = '1' then
  2300. sum(1 downto 0) := "00";
  2301. if r.single_prec = '1' then
  2302. sum(30 downto 2) := (others => '0');
  2303. end if;
  2304. end if;
  2305. case opsel_r is
  2306. when RES_SUM =>
  2307. result <= sum;
  2308. when RES_SHIFT =>
  2309. result <= shift_res;
  2310. when RES_MULT =>
  2311. result <= multiply_to_f.result(121 downto 58);
  2312. when others =>
  2313. case misc_sel is
  2314. when "0000" =>
  2315. misc := x"00000000" & (r.fpscr and fpscr_mask);
  2316. when "0001" =>
  2317. -- generated QNaN mantissa
  2318. misc := x"0020000000000000";
  2319. when "0010" =>
  2320. -- mantissa of max representable DP number
  2321. misc := x"007ffffffffffffc";
  2322. when "0011" =>
  2323. -- mantissa of max representable SP number
  2324. misc := x"007fffff80000000";
  2325. when "0100" =>
  2326. -- fmrgow result
  2327. misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
  2328. when "0110" =>
  2329. -- fmrgew result
  2330. misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
  2331. when "0111" =>
  2332. misc := 10x"000" & inverse_est & 35x"000000000";
  2333. when "1000" =>
  2334. -- max positive result for fctiw[z]
  2335. misc := x"000000007fffffff";
  2336. when "1001" =>
  2337. -- max negative result for fctiw[z]
  2338. misc := x"ffffffff80000000";
  2339. when "1010" =>
  2340. -- max positive result for fctiwu[z]
  2341. misc := x"00000000ffffffff";
  2342. when "1011" =>
  2343. -- max negative result for fctiwu[z]
  2344. misc := x"0000000000000000";
  2345. when "1100" =>
  2346. -- max positive result for fctid[z]
  2347. misc := x"7fffffffffffffff";
  2348. when "1101" =>
  2349. -- max negative result for fctid[z]
  2350. misc := x"8000000000000000";
  2351. when "1110" =>
  2352. -- max positive result for fctidu[z]
  2353. misc := x"ffffffffffffffff";
  2354. when "1111" =>
  2355. -- max negative result for fctidu[z]
  2356. misc := x"0000000000000000";
  2357. when others =>
  2358. misc := x"0000000000000000";
  2359. end case;
  2360. result <= misc;
  2361. end case;
  2362. v.r := result;
  2363. if set_s = '1' then
  2364. case opsel_s is
  2365. when S_NEG =>
  2366. v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
  2367. when S_MULT =>
  2368. v.s := multiply_to_f.result(57 downto 2);
  2369. when S_SHIFT =>
  2370. v.s := shift_res(63 downto 8);
  2371. if shift_res(7 downto 0) /= x"00" then
  2372. v.x := '1';
  2373. end if;
  2374. when others =>
  2375. v.s := (others => '0');
  2376. end case;
  2377. end if;
  2378. if set_a = '1' then
  2379. v.a.exponent := new_exp;
  2380. v.a.mantissa := shift_res;
  2381. end if;
  2382. if set_b = '1' then
  2383. v.b.exponent := new_exp;
  2384. v.b.mantissa := shift_res;
  2385. end if;
  2386. if set_c = '1' then
  2387. v.c.exponent := new_exp;
  2388. v.c.mantissa := shift_res;
  2389. end if;
  2390. if opsel_r = RES_SHIFT then
  2391. v.result_exp := new_exp;
  2392. end if;
  2393. if renormalize = '1' then
  2394. clz := count_left_zeroes(r.r);
  2395. if renorm_sqrt = '1' then
  2396. -- make denormalized value end up with even exponent
  2397. clz(0) := '1';
  2398. end if;
  2399. v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
  2400. end if;
  2401. if r.int_result = '1' then
  2402. fp_result <= r.r;
  2403. else
  2404. fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
  2405. r.single_prec, r.quieten_nan);
  2406. end if;
  2407. if r.update_fprf = '1' then
  2408. v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
  2409. r.r(54) and not r.denorm);
  2410. end if;
  2411. v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
  2412. (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
  2413. v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
  2414. v.fpscr(FPSCR_VE downto FPSCR_XE));
  2415. if update_fx = '1' and
  2416. (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
  2417. v.fpscr(FPSCR_FX) := '1';
  2418. end if;
  2419. if r.rc = '1' then
  2420. v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
  2421. end if;
  2422. v.illegal := illegal;
  2423. if illegal = '1' then
  2424. v.instr_done := '0';
  2425. v.do_intr := '1';
  2426. v.writing_back := '0';
  2427. v.busy := '0';
  2428. v.state := IDLE;
  2429. else
  2430. v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
  2431. if v.state /= IDLE or v.do_intr = '1' then
  2432. v.busy := '1';
  2433. end if;
  2434. end if;
  2435. rin <= v;
  2436. end process;
  2437. end architecture behaviour;