zstd_fast.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. /*
  2. * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. #include "zstd_compress_internal.h"
  11. #include "zstd_fast.h"
  12. void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
  13. void const* end, ZSTD_dictTableLoadMethod_e dtlm)
  14. {
  15. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  16. U32* const hashTable = ms->hashTable;
  17. U32 const hBits = cParams->hashLog;
  18. U32 const mls = cParams->minMatch;
  19. const BYTE* const base = ms->window.base;
  20. const BYTE* ip = base + ms->nextToUpdate;
  21. const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
  22. const U32 fastHashFillStep = 3;
  23. /* Always insert every fastHashFillStep position into the hash table.
  24. * Insert the other positions if their hash entry is empty.
  25. */
  26. for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
  27. U32 const current = (U32)(ip - base);
  28. size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
  29. hashTable[hash0] = current;
  30. if (dtlm == ZSTD_dtlm_fast) continue;
  31. /* Only load extra positions for ZSTD_dtlm_full */
  32. { U32 p;
  33. for (p = 1; p < fastHashFillStep; ++p) {
  34. size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
  35. if (hashTable[hash] == 0) { /* not yet filled */
  36. hashTable[hash] = current + p;
  37. } } } }
  38. }
  39. FORCE_INLINE_TEMPLATE
  40. size_t ZSTD_compressBlock_fast_generic(
  41. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  42. void const* src, size_t srcSize,
  43. U32 const mls, ZSTD_dictMode_e const dictMode)
  44. {
  45. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  46. U32* const hashTable = ms->hashTable;
  47. U32 const hlog = cParams->hashLog;
  48. /* support stepSize of 0 */
  49. U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
  50. const BYTE* const base = ms->window.base;
  51. const BYTE* const istart = (const BYTE*)src;
  52. const BYTE* ip = istart;
  53. const BYTE* anchor = istart;
  54. const U32 prefixStartIndex = ms->window.dictLimit;
  55. const BYTE* const prefixStart = base + prefixStartIndex;
  56. const BYTE* const iend = istart + srcSize;
  57. const BYTE* const ilimit = iend - HASH_READ_SIZE;
  58. U32 offset_1=rep[0], offset_2=rep[1];
  59. U32 offsetSaved = 0;
  60. const ZSTD_matchState_t* const dms = ms->dictMatchState;
  61. const ZSTD_compressionParameters* const dictCParams =
  62. dictMode == ZSTD_dictMatchState ?
  63. &dms->cParams : NULL;
  64. const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ?
  65. dms->hashTable : NULL;
  66. const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
  67. dms->window.dictLimit : 0;
  68. const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
  69. dms->window.base : NULL;
  70. const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
  71. dictBase + dictStartIndex : NULL;
  72. const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
  73. dms->window.nextSrc : NULL;
  74. const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
  75. prefixStartIndex - (U32)(dictEnd - dictBase) :
  76. 0;
  77. const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
  78. const U32 dictHLog = dictMode == ZSTD_dictMatchState ?
  79. dictCParams->hashLog : hlog;
  80. assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
  81. /* otherwise, we would get index underflow when translating a dict index
  82. * into a local index */
  83. assert(dictMode != ZSTD_dictMatchState
  84. || prefixStartIndex >= (U32)(dictEnd - dictBase));
  85. /* init */
  86. ip += (dictAndPrefixLength == 0);
  87. if (dictMode == ZSTD_noDict) {
  88. U32 const maxRep = (U32)(ip - prefixStart);
  89. if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
  90. if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
  91. }
  92. if (dictMode == ZSTD_dictMatchState) {
  93. /* dictMatchState repCode checks don't currently handle repCode == 0
  94. * disabling. */
  95. assert(offset_1 <= dictAndPrefixLength);
  96. assert(offset_2 <= dictAndPrefixLength);
  97. }
  98. /* Main Search Loop */
  99. while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
  100. size_t mLength;
  101. size_t const h = ZSTD_hashPtr(ip, hlog, mls);
  102. U32 const current = (U32)(ip-base);
  103. U32 const matchIndex = hashTable[h];
  104. const BYTE* match = base + matchIndex;
  105. const U32 repIndex = current + 1 - offset_1;
  106. const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
  107. && repIndex < prefixStartIndex) ?
  108. dictBase + (repIndex - dictIndexDelta) :
  109. base + repIndex;
  110. hashTable[h] = current; /* update hash table */
  111. if ( (dictMode == ZSTD_dictMatchState)
  112. && ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
  113. && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
  114. const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
  115. mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
  116. ip++;
  117. ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
  118. } else if ( dictMode == ZSTD_noDict
  119. && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
  120. mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
  121. ip++;
  122. ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
  123. } else if ( (matchIndex <= prefixStartIndex) ) {
  124. if (dictMode == ZSTD_dictMatchState) {
  125. size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
  126. U32 const dictMatchIndex = dictHashTable[dictHash];
  127. const BYTE* dictMatch = dictBase + dictMatchIndex;
  128. if (dictMatchIndex <= dictStartIndex ||
  129. MEM_read32(dictMatch) != MEM_read32(ip)) {
  130. assert(stepSize >= 1);
  131. ip += ((ip-anchor) >> kSearchStrength) + stepSize;
  132. continue;
  133. } else {
  134. /* found a dict match */
  135. U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
  136. mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
  137. while (((ip>anchor) & (dictMatch>dictStart))
  138. && (ip[-1] == dictMatch[-1])) {
  139. ip--; dictMatch--; mLength++;
  140. } /* catch up */
  141. offset_2 = offset_1;
  142. offset_1 = offset;
  143. ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
  144. }
  145. } else {
  146. assert(stepSize >= 1);
  147. ip += ((ip-anchor) >> kSearchStrength) + stepSize;
  148. continue;
  149. }
  150. } else if (MEM_read32(match) != MEM_read32(ip)) {
  151. /* it's not a match, and we're not going to check the dictionary */
  152. assert(stepSize >= 1);
  153. ip += ((ip-anchor) >> kSearchStrength) + stepSize;
  154. continue;
  155. } else {
  156. /* found a regular match */
  157. U32 const offset = (U32)(ip-match);
  158. mLength = ZSTD_count(ip+4, match+4, iend) + 4;
  159. while (((ip>anchor) & (match>prefixStart))
  160. && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
  161. offset_2 = offset_1;
  162. offset_1 = offset;
  163. ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
  164. }
  165. /* match found */
  166. ip += mLength;
  167. anchor = ip;
  168. if (ip <= ilimit) {
  169. /* Fill Table */
  170. assert(base+current+2 > istart); /* check base overflow */
  171. hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
  172. hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
  173. /* check immediate repcode */
  174. if (dictMode == ZSTD_dictMatchState) {
  175. while (ip <= ilimit) {
  176. U32 const current2 = (U32)(ip-base);
  177. U32 const repIndex2 = current2 - offset_2;
  178. const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
  179. dictBase - dictIndexDelta + repIndex2 :
  180. base + repIndex2;
  181. if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
  182. && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
  183. const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
  184. size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
  185. U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
  186. ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
  187. hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
  188. ip += repLength2;
  189. anchor = ip;
  190. continue;
  191. }
  192. break;
  193. }
  194. }
  195. if (dictMode == ZSTD_noDict) {
  196. while ( (ip <= ilimit)
  197. && ( (offset_2>0)
  198. & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
  199. /* store sequence */
  200. size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
  201. U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
  202. hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
  203. ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
  204. ip += rLength;
  205. anchor = ip;
  206. continue; /* faster when present ... (?) */
  207. } } } }
  208. /* save reps for next block */
  209. rep[0] = offset_1 ? offset_1 : offsetSaved;
  210. rep[1] = offset_2 ? offset_2 : offsetSaved;
  211. /* Return the last literals size */
  212. return iend - anchor;
  213. }
  214. size_t ZSTD_compressBlock_fast(
  215. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  216. void const* src, size_t srcSize)
  217. {
  218. ZSTD_compressionParameters const* cParams = &ms->cParams;
  219. U32 const mls = cParams->minMatch;
  220. assert(ms->dictMatchState == NULL);
  221. switch(mls)
  222. {
  223. default: /* includes case 3 */
  224. case 4 :
  225. return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
  226. case 5 :
  227. return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
  228. case 6 :
  229. return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
  230. case 7 :
  231. return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
  232. }
  233. }
  234. size_t ZSTD_compressBlock_fast_dictMatchState(
  235. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  236. void const* src, size_t srcSize)
  237. {
  238. ZSTD_compressionParameters const* cParams = &ms->cParams;
  239. U32 const mls = cParams->minMatch;
  240. assert(ms->dictMatchState != NULL);
  241. switch(mls)
  242. {
  243. default: /* includes case 3 */
  244. case 4 :
  245. return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
  246. case 5 :
  247. return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
  248. case 6 :
  249. return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
  250. case 7 :
  251. return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
  252. }
  253. }
  254. static size_t ZSTD_compressBlock_fast_extDict_generic(
  255. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  256. void const* src, size_t srcSize, U32 const mls)
  257. {
  258. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  259. U32* const hashTable = ms->hashTable;
  260. U32 const hlog = cParams->hashLog;
  261. /* support stepSize of 0 */
  262. U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
  263. const BYTE* const base = ms->window.base;
  264. const BYTE* const dictBase = ms->window.dictBase;
  265. const BYTE* const istart = (const BYTE*)src;
  266. const BYTE* ip = istart;
  267. const BYTE* anchor = istart;
  268. const U32 dictStartIndex = ms->window.lowLimit;
  269. const BYTE* const dictStart = dictBase + dictStartIndex;
  270. const U32 prefixStartIndex = ms->window.dictLimit;
  271. const BYTE* const prefixStart = base + prefixStartIndex;
  272. const BYTE* const dictEnd = dictBase + prefixStartIndex;
  273. const BYTE* const iend = istart + srcSize;
  274. const BYTE* const ilimit = iend - 8;
  275. U32 offset_1=rep[0], offset_2=rep[1];
  276. /* Search Loop */
  277. while (ip < ilimit) { /* < instead of <=, because (ip+1) */
  278. const size_t h = ZSTD_hashPtr(ip, hlog, mls);
  279. const U32 matchIndex = hashTable[h];
  280. const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
  281. const BYTE* match = matchBase + matchIndex;
  282. const U32 current = (U32)(ip-base);
  283. const U32 repIndex = current + 1 - offset_1;
  284. const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
  285. const BYTE* const repMatch = repBase + repIndex;
  286. size_t mLength;
  287. hashTable[h] = current; /* update hash table */
  288. assert(offset_1 <= current +1); /* check repIndex */
  289. if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
  290. && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
  291. const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
  292. mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
  293. ip++;
  294. ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
  295. } else {
  296. if ( (matchIndex < dictStartIndex) ||
  297. (MEM_read32(match) != MEM_read32(ip)) ) {
  298. assert(stepSize >= 1);
  299. ip += ((ip-anchor) >> kSearchStrength) + stepSize;
  300. continue;
  301. }
  302. { const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
  303. const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
  304. U32 offset;
  305. mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
  306. while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
  307. offset = current - matchIndex;
  308. offset_2 = offset_1;
  309. offset_1 = offset;
  310. ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
  311. } }
  312. /* found a match : store it */
  313. ip += mLength;
  314. anchor = ip;
  315. if (ip <= ilimit) {
  316. /* Fill Table */
  317. hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
  318. hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
  319. /* check immediate repcode */
  320. while (ip <= ilimit) {
  321. U32 const current2 = (U32)(ip-base);
  322. U32 const repIndex2 = current2 - offset_2;
  323. const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
  324. if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
  325. && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
  326. const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
  327. size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
  328. U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
  329. ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
  330. hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
  331. ip += repLength2;
  332. anchor = ip;
  333. continue;
  334. }
  335. break;
  336. } } }
  337. /* save reps for next block */
  338. rep[0] = offset_1;
  339. rep[1] = offset_2;
  340. /* Return the last literals size */
  341. return iend - anchor;
  342. }
  343. size_t ZSTD_compressBlock_fast_extDict(
  344. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  345. void const* src, size_t srcSize)
  346. {
  347. ZSTD_compressionParameters const* cParams = &ms->cParams;
  348. U32 const mls = cParams->minMatch;
  349. switch(mls)
  350. {
  351. default: /* includes case 3 */
  352. case 4 :
  353. return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
  354. case 5 :
  355. return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
  356. case 6 :
  357. return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
  358. case 7 :
  359. return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
  360. }
  361. }