X-Git-Url: https://pd.if.org/git/?a=blobdiff_plain;f=lzma%2Flzma%2Ffastpos.h;fp=lzma%2Flzma%2Ffastpos.h;h=a3feea58d8d351a02d41dd2ade70ef7054e28d9c;hb=32b8a6b26ed8843828e03e505d2256960bda0980;hp=0000000000000000000000000000000000000000;hpb=d48fc23a4bcf8ca3c406d6e8c8a6f8c6b0fa2f1e;p=zpackage diff --git a/lzma/lzma/fastpos.h b/lzma/lzma/fastpos.h new file mode 100644 index 0000000..a3feea5 --- /dev/null +++ b/lzma/lzma/fastpos.h @@ -0,0 +1,141 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file fastpos.h +/// \brief Kind of two-bit version of bit scan reverse +/// +// Authors: Igor Pavlov +// Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_FASTPOS_H +#define LZMA_FASTPOS_H + +// LZMA encodes match distances by storing the highest two bits using +// a six-bit value [0, 63], and then the missing lower bits. +// Dictionary size is also stored using this encoding in the .xz +// file format header. +// +// fastpos.h provides a way to quickly find out the correct six-bit +// values. The following table gives some examples of this encoding: +// +// dist return +// 0 0 +// 1 1 +// 2 2 +// 3 3 +// 4 4 +// 5 4 +// 6 5 +// 7 5 +// 8 6 +// 11 6 +// 12 7 +// ... ... +// 15 7 +// 16 8 +// 17 8 +// ... ... +// 23 8 +// 24 9 +// 25 9 +// ... ... +// +// +// Provided functions or macros +// ---------------------------- +// +// get_dist_slot(dist) is the basic version. get_dist_slot_2(dist) +// assumes that dist >= FULL_DISTANCES, thus the result is at least +// FULL_DISTANCES_BITS * 2. Using get_dist_slot(dist) instead of +// get_dist_slot_2(dist) would give the same result, but get_dist_slot_2(dist) +// should be tiny bit faster due to the assumption being made. +// +// +// Size vs. speed +// -------------- +// +// With some CPUs that have fast BSR (bit scan reverse) instruction, the +// size optimized version is slightly faster than the bigger table based +// approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III +// and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that +// would still have speed roughly comparable to the table version. Older +// x86 CPUs like the original Pentium have very slow BSR; on those systems +// the table version is a lot faster. +// +// On some CPUs, the table version is a lot faster when using position +// dependent code, but with position independent code the size optimized +// version is slightly faster. This occurs at least on 32-bit SPARC (no +// ASM optimizations). +// +// I'm making the table version the default, because that has good speed +// on all systems I have tried. The size optimized version is sometimes +// slightly faster, but sometimes it is a lot slower. + +#ifdef HAVE_SMALL +# define get_dist_slot(dist) \ + ((dist) <= 4 ? (dist) : get_dist_slot_2(dist)) + +static inline uint32_t +get_dist_slot_2(uint32_t dist) +{ + const uint32_t i = bsr32(dist); + return (i + i) + ((dist >> (i - 1)) & 1); +} + + +#else + +#define FASTPOS_BITS 13 + +extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS]; + + +#define fastpos_shift(extra, n) \ + ((extra) + (n) * (FASTPOS_BITS - 1)) + +#define fastpos_limit(extra, n) \ + (UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n))) + +#define fastpos_result(dist, extra, n) \ + lzma_fastpos[(dist) >> fastpos_shift(extra, n)] \ + + 2 * fastpos_shift(extra, n) + + +static inline uint32_t +get_dist_slot(uint32_t dist) +{ + // If it is small enough, we can pick the result directly from + // the precalculated table. + if (dist < fastpos_limit(0, 0)) + return lzma_fastpos[dist]; + + if (dist < fastpos_limit(0, 1)) + return fastpos_result(dist, 0, 1); + + return fastpos_result(dist, 0, 2); +} + + +#ifdef FULL_DISTANCES_BITS +static inline uint32_t +get_dist_slot_2(uint32_t dist) +{ + assert(dist >= FULL_DISTANCES); + + if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 0)) + return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 0); + + if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 1)) + return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 1); + + return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 2); +} +#endif + +#endif + +#endif