oa_hash_map.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595
  1. /*************************************************************************/
  2. /* oa_hash_map.h */
  3. /*************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /*************************************************************************/
  8. /* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
  9. /* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /*************************************************************************/
  30. #ifndef OA_HASH_MAP_H
  31. #define OA_HASH_MAP_H
  32. #include "hashfuncs.h"
  33. #include "math_funcs.h"
  34. #include "os/copymem.h"
  35. #include "os/memory.h"
  36. // uncomment this to disable initial local storage.
  37. #define OA_HASH_MAP_INITIAL_LOCAL_STORAGE
  38. /**
  39. * This class implements a hash map datastructure that uses open addressing with
  40. * local probing.
  41. *
  42. * It can give huge performance improvements over a chained HashMap because of
  43. * the increased data locality.
  44. *
  45. * Because of that locality property it's important to not use "large" value
  46. * types as the "TData" type. If TData values are too big it can cause more
  47. * cache misses then chaining. If larger values are needed then storing those
  48. * in a separate array and using pointers or indices to reference them is the
  49. * better solution.
  50. *
  51. * This hash map also implements real-time incremental rehashing.
  52. *
  53. */
  54. template <class TKey, class TData,
  55. uint16_t INITIAL_NUM_ELEMENTS = 64,
  56. class Hasher = HashMapHasherDefault,
  57. class Comparator = HashMapComparatorDefault<TKey> >
  58. class OAHashMap {
  59. private:
  60. #ifdef OA_HASH_MAP_INITIAL_LOCAL_STORAGE
  61. TData local_data[INITIAL_NUM_ELEMENTS];
  62. TKey local_keys[INITIAL_NUM_ELEMENTS];
  63. uint32_t local_hashes[INITIAL_NUM_ELEMENTS];
  64. uint8_t local_flags[INITIAL_NUM_ELEMENTS / 4 + (INITIAL_NUM_ELEMENTS % 4 != 0 ? 1 : 0)];
  65. #endif
  66. struct {
  67. TData *data;
  68. TKey *keys;
  69. uint32_t *hashes;
  70. // This is actually an array of bits, 4 bit pairs per octet.
  71. // | ba ba ba ba | ba ba ba ba | ....
  72. //
  73. // if a is set it means that there is an element present.
  74. // if b is set it means that an element was deleted. This is needed for
  75. // the local probing to work without relocating any succeeding and
  76. // colliding entries.
  77. uint8_t *flags;
  78. uint32_t capacity;
  79. } table, old_table;
  80. bool is_rehashing;
  81. uint32_t rehash_position;
  82. uint32_t rehash_amount;
  83. uint32_t elements;
  84. /* Methods */
  85. // returns true if the value already existed, false if it's a new entry
  86. bool _raw_set_with_hash(uint32_t p_hash, const TKey &p_key, const TData &p_data) {
  87. for (int i = 0; i < table.capacity; i++) {
  88. int pos = (p_hash + i) % table.capacity;
  89. int flags_pos = pos / 4;
  90. int flags_pos_offset = pos % 4;
  91. bool is_filled_flag = table.flags[flags_pos] & (1 << (2 * flags_pos_offset));
  92. bool is_deleted_flag = table.flags[flags_pos] & (1 << (2 * flags_pos_offset + 1));
  93. if (is_filled_flag) {
  94. if (table.hashes[pos] == p_hash && Comparator::compare(table.keys[pos], p_key)) {
  95. table.data[pos] = p_data;
  96. return true;
  97. }
  98. continue;
  99. }
  100. table.keys[pos] = p_key;
  101. table.data[pos] = p_data;
  102. table.hashes[pos] = p_hash;
  103. table.flags[flags_pos] |= (1 << (2 * flags_pos_offset));
  104. table.flags[flags_pos] &= ~(1 << (2 * flags_pos_offset + 1));
  105. return false;
  106. }
  107. return false;
  108. }
  109. public:
  110. _FORCE_INLINE_ uint32_t get_capacity() const { return table.capacity; }
  111. _FORCE_INLINE_ uint32_t get_num_elements() const { return elements; }
  112. void set(const TKey &p_key, const TData &p_data) {
  113. uint32_t hash = Hasher::hash(p_key);
  114. // We don't progress the rehashing if the table just got resized
  115. // to keep the cost of this function low.
  116. if (is_rehashing) {
  117. // rehash progress
  118. for (int i = 0; i <= rehash_amount && rehash_position < old_table.capacity; rehash_position++) {
  119. int flags_pos = rehash_position / 4;
  120. int flags_pos_offset = rehash_position % 4;
  121. bool is_filled_flag = (old_table.flags[flags_pos] & (1 << (2 * flags_pos_offset))) > 0;
  122. bool is_deleted_flag = (old_table.flags[flags_pos] & (1 << (2 * flags_pos_offset + 1))) > 0;
  123. if (is_filled_flag) {
  124. _raw_set_with_hash(old_table.hashes[rehash_position], old_table.keys[rehash_position], old_table.data[rehash_position]);
  125. old_table.keys[rehash_position].~TKey();
  126. old_table.data[rehash_position].~TData();
  127. memnew_placement(&old_table.keys[rehash_position], TKey);
  128. memnew_placement(&old_table.data[rehash_position], TData);
  129. old_table.flags[flags_pos] &= ~(1 << (2 * flags_pos_offset));
  130. old_table.flags[flags_pos] |= (1 << (2 * flags_pos_offset + 1));
  131. }
  132. }
  133. if (rehash_position >= old_table.capacity) {
  134. // wohooo, we can get rid of the old table.
  135. is_rehashing = false;
  136. #ifdef OA_HASH_MAP_INITIAL_LOCAL_STORAGE
  137. if (old_table.data == local_data) {
  138. // Everything is local, so no cleanup :P
  139. } else
  140. #endif
  141. {
  142. memdelete_arr(old_table.data);
  143. memdelete_arr(old_table.keys);
  144. memdelete_arr(old_table.hashes);
  145. memdelete_arr(old_table.flags);
  146. }
  147. }
  148. }
  149. // Table is almost full, resize and start rehashing process.
  150. if (elements >= table.capacity * 0.7) {
  151. old_table.capacity = table.capacity;
  152. old_table.data = table.data;
  153. old_table.flags = table.flags;
  154. old_table.hashes = table.hashes;
  155. old_table.keys = table.keys;
  156. table.capacity = old_table.capacity * 2;
  157. table.data = memnew_arr(TData, table.capacity);
  158. table.flags = memnew_arr(uint8_t, table.capacity / 4 + (table.capacity % 4 != 0 ? 1 : 0));
  159. table.hashes = memnew_arr(uint32_t, table.capacity);
  160. table.keys = memnew_arr(TKey, table.capacity);
  161. zeromem(table.flags, table.capacity / 4 + (table.capacity % 4 != 0 ? 1 : 0));
  162. is_rehashing = true;
  163. rehash_position = 0;
  164. rehash_amount = (elements * 2) / (table.capacity * 0.7 - old_table.capacity);
  165. }
  166. if (!_raw_set_with_hash(hash, p_key, p_data))
  167. elements++;
  168. }
  169. /**
  170. * returns true if the value was found, false otherwise.
  171. *
  172. * if r_data is not NULL then the value will be written to the object
  173. * it points to.
  174. */
  175. bool lookup(const TKey &p_key, TData *r_data) {
  176. uint32_t hash = Hasher::hash(p_key);
  177. bool check_old_table = is_rehashing;
  178. bool check_new_table = true;
  179. // search for the key and return the value associated with it
  180. //
  181. // if we're rehashing we need to check both the old and the
  182. // current table. If we find a value in the old table we still
  183. // need to continue searching in the new table as it might have
  184. // been added after
  185. TData *value = NULL;
  186. for (int i = 0; i < table.capacity; i++) {
  187. if (!check_new_table && !check_old_table) {
  188. break;
  189. }
  190. // if we're rehashing check the old table
  191. if (check_old_table && i < old_table.capacity) {
  192. int pos = (hash + i) % old_table.capacity;
  193. int flags_pos = pos / 4;
  194. int flags_pos_offset = pos % 4;
  195. bool is_filled_flag = (old_table.flags[flags_pos] & (1 << (2 * flags_pos_offset))) > 0;
  196. bool is_deleted_flag = (old_table.flags[flags_pos] & (1 << (2 * flags_pos_offset + 1))) > 0;
  197. if (is_filled_flag) {
  198. // found our entry?
  199. if (old_table.hashes[pos] == hash && Comparator::compare(old_table.keys[pos], p_key)) {
  200. value = &old_table.data[pos];
  201. check_old_table = false;
  202. }
  203. } else if (!is_deleted_flag) {
  204. // we hit an empty field here, we don't
  205. // need to further check this old table
  206. // because we know it's not in here.
  207. check_old_table = false;
  208. }
  209. }
  210. if (check_new_table) {
  211. int pos = (hash + i) % table.capacity;
  212. int flags_pos = pos / 4;
  213. int flags_pos_offset = pos % 4;
  214. bool is_filled_flag = (table.flags[flags_pos] & (1 << (2 * flags_pos_offset))) > 0;
  215. bool is_deleted_flag = (table.flags[flags_pos] & (1 << (2 * flags_pos_offset + 1))) > 0;
  216. if (is_filled_flag) {
  217. // found our entry?
  218. if (table.hashes[pos] == hash && Comparator::compare(table.keys[pos], p_key)) {
  219. if (r_data != NULL)
  220. *r_data = table.data[pos];
  221. return true;
  222. }
  223. continue;
  224. } else if (is_deleted_flag) {
  225. continue;
  226. } else if (value != NULL) {
  227. // We found a value in the old table
  228. if (r_data != NULL)
  229. *r_data = *value;
  230. return true;
  231. } else {
  232. check_new_table = false;
  233. }
  234. }
  235. }
  236. if (value != NULL) {
  237. if (r_data != NULL)
  238. *r_data = *value;
  239. return true;
  240. }
  241. return false;
  242. }
  243. _FORCE_INLINE_ bool has(const TKey &p_key) {
  244. return lookup(p_key, NULL);
  245. }
  246. void remove(const TKey &p_key) {
  247. uint32_t hash = Hasher::hash(p_key);
  248. bool check_old_table = is_rehashing;
  249. bool check_new_table = true;
  250. for (int i = 0; i < table.capacity; i++) {
  251. if (!check_new_table && !check_old_table) {
  252. return;
  253. }
  254. // if we're rehashing check the old table
  255. if (check_old_table && i < old_table.capacity) {
  256. int pos = (hash + i) % old_table.capacity;
  257. int flags_pos = pos / 4;
  258. int flags_pos_offset = pos % 4;
  259. bool is_filled_flag = (old_table.flags[flags_pos] & (1 << (2 * flags_pos_offset))) > 0;
  260. bool is_deleted_flag = (old_table.flags[flags_pos] & (1 << (2 * flags_pos_offset + 1))) > 0;
  261. if (is_filled_flag) {
  262. // found our entry?
  263. if (old_table.hashes[pos] == hash && Comparator::compare(old_table.keys[pos], p_key)) {
  264. old_table.keys[pos].~TKey();
  265. old_table.data[pos].~TData();
  266. memnew_placement(&old_table.keys[pos], TKey);
  267. memnew_placement(&old_table.data[pos], TData);
  268. old_table.flags[flags_pos] &= ~(1 << (2 * flags_pos_offset));
  269. old_table.flags[flags_pos] |= (1 << (2 * flags_pos_offset + 1));
  270. elements--;
  271. return;
  272. }
  273. } else if (!is_deleted_flag) {
  274. // we hit an empty field here, we don't
  275. // need to further check this old table
  276. // because we know it's not in here.
  277. check_old_table = false;
  278. }
  279. }
  280. if (check_new_table) {
  281. int pos = (hash + i) % table.capacity;
  282. int flags_pos = pos / 4;
  283. int flags_pos_offset = pos % 4;
  284. bool is_filled_flag = (table.flags[flags_pos] & (1 << (2 * flags_pos_offset))) > 0;
  285. bool is_deleted_flag = (table.flags[flags_pos] & (1 << (2 * flags_pos_offset + 1))) > 0;
  286. if (is_filled_flag) {
  287. // found our entry?
  288. if (table.hashes[pos] == hash && Comparator::compare(table.keys[pos], p_key)) {
  289. table.keys[pos].~TKey();
  290. table.data[pos].~TData();
  291. memnew_placement(&table.keys[pos], TKey);
  292. memnew_placement(&table.data[pos], TData);
  293. table.flags[flags_pos] &= ~(1 << (2 * flags_pos_offset));
  294. table.flags[flags_pos] |= (1 << (2 * flags_pos_offset + 1));
  295. // don't return here, this value might still be in the old table
  296. // if it was already relocated.
  297. elements--;
  298. return;
  299. }
  300. continue;
  301. } else if (is_deleted_flag) {
  302. continue;
  303. } else {
  304. check_new_table = false;
  305. }
  306. }
  307. }
  308. }
  309. struct Iterator {
  310. bool valid;
  311. uint32_t hash;
  312. const TKey *key;
  313. const TData *data;
  314. private:
  315. friend class OAHashMap;
  316. bool was_from_old_table;
  317. };
  318. Iterator iter() const {
  319. Iterator it;
  320. it.valid = false;
  321. it.was_from_old_table = false;
  322. bool check_old_table = is_rehashing;
  323. for (int i = 0; i < table.capacity; i++) {
  324. // if we're rehashing check the old table first
  325. if (check_old_table && i < old_table.capacity) {
  326. int pos = i;
  327. int flags_pos = pos / 4;
  328. int flags_pos_offset = pos % 4;
  329. bool is_filled_flag = (old_table.flags[flags_pos] & (1 << (2 * flags_pos_offset))) > 0;
  330. if (is_filled_flag) {
  331. it.valid = true;
  332. it.hash = old_table.hashes[pos];
  333. it.data = &old_table.data[pos];
  334. it.key = &old_table.keys[pos];
  335. it.was_from_old_table = true;
  336. return it;
  337. }
  338. }
  339. {
  340. int pos = i;
  341. int flags_pos = pos / 4;
  342. int flags_pos_offset = pos % 4;
  343. bool is_filled_flag = (table.flags[flags_pos] & (1 << (2 * flags_pos_offset))) > 0;
  344. if (is_filled_flag) {
  345. it.valid = true;
  346. it.hash = table.hashes[pos];
  347. it.data = &table.data[pos];
  348. it.key = &table.keys[pos];
  349. return it;
  350. }
  351. }
  352. }
  353. return it;
  354. }
  355. Iterator next_iter(const Iterator &p_iter) const {
  356. if (!p_iter.valid) {
  357. return p_iter;
  358. }
  359. Iterator it;
  360. it.valid = false;
  361. it.was_from_old_table = false;
  362. bool check_old_table = is_rehashing;
  363. // we use this to skip the first check or not
  364. bool was_from_old_table = p_iter.was_from_old_table;
  365. int prev_index = (p_iter.data - (p_iter.was_from_old_table ? old_table.data : table.data));
  366. if (!was_from_old_table) {
  367. prev_index++;
  368. }
  369. for (int i = prev_index; i < table.capacity; i++) {
  370. // if we're rehashing check the old table first
  371. if (check_old_table && i < old_table.capacity && !was_from_old_table) {
  372. int pos = i;
  373. int flags_pos = pos / 4;
  374. int flags_pos_offset = pos % 4;
  375. bool is_filled_flag = (old_table.flags[flags_pos] & (1 << (2 * flags_pos_offset))) > 0;
  376. if (is_filled_flag) {
  377. it.valid = true;
  378. it.hash = old_table.hashes[pos];
  379. it.data = &old_table.data[pos];
  380. it.key = &old_table.keys[pos];
  381. it.was_from_old_table = true;
  382. return it;
  383. }
  384. }
  385. was_from_old_table = false;
  386. {
  387. int pos = i;
  388. int flags_pos = pos / 4;
  389. int flags_pos_offset = pos % 4;
  390. bool is_filled_flag = (table.flags[flags_pos] & (1 << (2 * flags_pos_offset))) > 0;
  391. if (is_filled_flag) {
  392. it.valid = true;
  393. it.hash = table.hashes[pos];
  394. it.data = &table.data[pos];
  395. it.key = &table.keys[pos];
  396. return it;
  397. }
  398. }
  399. }
  400. return it;
  401. }
  402. OAHashMap(uint32_t p_initial_capacity = INITIAL_NUM_ELEMENTS) {
  403. #ifdef OA_HASH_MAP_INITIAL_LOCAL_STORAGE
  404. if (p_initial_capacity <= INITIAL_NUM_ELEMENTS) {
  405. table.data = local_data;
  406. table.keys = local_keys;
  407. table.hashes = local_hashes;
  408. table.flags = local_flags;
  409. zeromem(table.flags, INITIAL_NUM_ELEMENTS / 4 + (INITIAL_NUM_ELEMENTS % 4 != 0 ? 1 : 0));
  410. table.capacity = INITIAL_NUM_ELEMENTS;
  411. elements = 0;
  412. } else
  413. #endif
  414. {
  415. table.data = memnew_arr(TData, p_initial_capacity);
  416. table.keys = memnew_arr(TKey, p_initial_capacity);
  417. table.hashes = memnew_arr(uint32_t, p_initial_capacity);
  418. table.flags = memnew_arr(uint8_t, p_initial_capacity / 4 + (p_initial_capacity % 4 != 0 ? 1 : 0));
  419. zeromem(table.flags, p_initial_capacity / 4 + (p_initial_capacity % 4 != 0 ? 1 : 0));
  420. table.capacity = p_initial_capacity;
  421. elements = 0;
  422. }
  423. is_rehashing = false;
  424. rehash_position = 0;
  425. }
  426. ~OAHashMap() {
  427. #ifdef OA_HASH_MAP_INITIAL_LOCAL_STORAGE
  428. if (table.capacity <= INITIAL_NUM_ELEMENTS) {
  429. return; // Everything is local, so no cleanup :P
  430. }
  431. #endif
  432. if (is_rehashing) {
  433. #ifdef OA_HASH_MAP_INITIAL_LOCAL_STORAGE
  434. if (old_table.data == local_data) {
  435. // Everything is local, so no cleanup :P
  436. } else
  437. #endif
  438. {
  439. memdelete_arr(old_table.data);
  440. memdelete_arr(old_table.keys);
  441. memdelete_arr(old_table.hashes);
  442. memdelete_arr(old_table.flags);
  443. }
  444. }
  445. memdelete_arr(table.data);
  446. memdelete_arr(table.keys);
  447. memdelete_arr(table.hashes);
  448. memdelete_arr(table.flags);
  449. }
  450. };
  451. #endif