]> pd.if.org Git - zpackage/blobdiff - lzma/lzma/lzma_encoder_optimum_normal.c
integrate lzma
[zpackage] / lzma / lzma / lzma_encoder_optimum_normal.c
diff --git a/lzma/lzma/lzma_encoder_optimum_normal.c b/lzma/lzma/lzma_encoder_optimum_normal.c
new file mode 100644 (file)
index 0000000..a360579
--- /dev/null
@@ -0,0 +1,854 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       lzma_encoder_optimum_normal.c
+//
+//  Author:     Igor Pavlov
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lzma_encoder_private.h"
+#include "fastpos.h"
+#include "memcmplen.h"
+
+
+////////////
+// Prices //
+////////////
+
+static uint32_t
+get_literal_price(const lzma_coder *const coder, const uint32_t pos,
+               const uint32_t prev_byte, const bool match_mode,
+               uint32_t match_byte, uint32_t symbol)
+{
+       const probability *const subcoder = literal_subcoder(coder->literal,
+                       coder->literal_context_bits, coder->literal_pos_mask,
+                       pos, prev_byte);
+
+       uint32_t price = 0;
+
+       if (!match_mode) {
+               price = rc_bittree_price(subcoder, 8, symbol);
+       } else {
+               uint32_t offset = 0x100;
+               symbol += UINT32_C(1) << 8;
+
+               do {
+                       match_byte <<= 1;
+
+                       const uint32_t match_bit = match_byte & offset;
+                       const uint32_t subcoder_index
+                                       = offset + match_bit + (symbol >> 8);
+                       const uint32_t bit = (symbol >> 7) & 1;
+                       price += rc_bit_price(subcoder[subcoder_index], bit);
+
+                       symbol <<= 1;
+                       offset &= ~(match_byte ^ symbol);
+
+               } while (symbol < (UINT32_C(1) << 16));
+       }
+
+       return price;
+}
+
+
+static inline uint32_t
+get_len_price(const lzma_length_encoder *const lencoder,
+               const uint32_t len, const uint32_t pos_state)
+{
+       // NOTE: Unlike the other price tables, length prices are updated
+       // in lzma_encoder.c
+       return lencoder->prices[pos_state][len - MATCH_LEN_MIN];
+}
+
+
+static inline uint32_t
+get_short_rep_price(const lzma_coder *const coder,
+               const lzma_lzma_state state, const uint32_t pos_state)
+{
+       return rc_bit_0_price(coder->is_rep0[state])
+               + rc_bit_0_price(coder->is_rep0_long[state][pos_state]);
+}
+
+
+static inline uint32_t
+get_pure_rep_price(const lzma_coder *const coder, const uint32_t rep_index,
+               const lzma_lzma_state state, uint32_t pos_state)
+{
+       uint32_t price;
+
+       if (rep_index == 0) {
+               price = rc_bit_0_price(coder->is_rep0[state]);
+               price += rc_bit_1_price(coder->is_rep0_long[state][pos_state]);
+       } else {
+               price = rc_bit_1_price(coder->is_rep0[state]);
+
+               if (rep_index == 1) {
+                       price += rc_bit_0_price(coder->is_rep1[state]);
+               } else {
+                       price += rc_bit_1_price(coder->is_rep1[state]);
+                       price += rc_bit_price(coder->is_rep2[state],
+                                       rep_index - 2);
+               }
+       }
+
+       return price;
+}
+
+
+static inline uint32_t
+get_rep_price(const lzma_coder *const coder, const uint32_t rep_index,
+               const uint32_t len, const lzma_lzma_state state,
+               const uint32_t pos_state)
+{
+       return get_len_price(&coder->rep_len_encoder, len, pos_state)
+               + get_pure_rep_price(coder, rep_index, state, pos_state);
+}
+
+
+static inline uint32_t
+get_dist_len_price(const lzma_coder *const coder, const uint32_t dist,
+               const uint32_t len, const uint32_t pos_state)
+{
+       const uint32_t dist_state = get_dist_state(len);
+       uint32_t price;
+
+       if (dist < FULL_DISTANCES) {
+               price = coder->dist_prices[dist_state][dist];
+       } else {
+               const uint32_t dist_slot = get_dist_slot_2(dist);
+               price = coder->dist_slot_prices[dist_state][dist_slot]
+                               + coder->align_prices[dist & ALIGN_MASK];
+       }
+
+       price += get_len_price(&coder->match_len_encoder, len, pos_state);
+
+       return price;
+}
+
+
+static void
+fill_dist_prices(lzma_coder *coder)
+{
+       for (uint32_t dist_state = 0; dist_state < DIST_STATES; ++dist_state) {
+
+               uint32_t *const dist_slot_prices
+                               = coder->dist_slot_prices[dist_state];
+
+               // Price to encode the dist_slot.
+               for (uint32_t dist_slot = 0;
+                               dist_slot < coder->dist_table_size; ++dist_slot)
+                       dist_slot_prices[dist_slot] = rc_bittree_price(
+                                       coder->dist_slot[dist_state],
+                                       DIST_SLOT_BITS, dist_slot);
+
+               // For matches with distance >= FULL_DISTANCES, add the price
+               // of the direct bits part of the match distance. (Align bits
+               // are handled by fill_align_prices()).
+               for (uint32_t dist_slot = DIST_MODEL_END;
+                               dist_slot < coder->dist_table_size;
+                               ++dist_slot)
+                       dist_slot_prices[dist_slot] += rc_direct_price(
+                                       ((dist_slot >> 1) - 1) - ALIGN_BITS);
+
+               // Distances in the range [0, 3] are fully encoded with
+               // dist_slot, so they are used for coder->dist_prices
+               // as is.
+               for (uint32_t i = 0; i < DIST_MODEL_START; ++i)
+                       coder->dist_prices[dist_state][i]
+                                       = dist_slot_prices[i];
+       }
+
+       // Distances in the range [4, 127] depend on dist_slot and
+       // dist_special. We do this in a loop separate from the above
+       // loop to avoid redundant calls to get_dist_slot().
+       for (uint32_t i = DIST_MODEL_START; i < FULL_DISTANCES; ++i) {
+               const uint32_t dist_slot = get_dist_slot(i);
+               const uint32_t footer_bits = ((dist_slot >> 1) - 1);
+               const uint32_t base = (2 | (dist_slot & 1)) << footer_bits;
+               const uint32_t price = rc_bittree_reverse_price(
+                               coder->dist_special + base - dist_slot - 1,
+                               footer_bits, i - base);
+
+               for (uint32_t dist_state = 0; dist_state < DIST_STATES;
+                               ++dist_state)
+                       coder->dist_prices[dist_state][i]
+                                       = price + coder->dist_slot_prices[
+                                               dist_state][dist_slot];
+       }
+
+       coder->match_price_count = 0;
+       return;
+}
+
+
+static void
+fill_align_prices(lzma_coder *coder)
+{
+       for (uint32_t i = 0; i < ALIGN_SIZE; ++i)
+               coder->align_prices[i] = rc_bittree_reverse_price(
+                               coder->dist_align, ALIGN_BITS, i);
+
+       coder->align_price_count = 0;
+       return;
+}
+
+
+/////////////
+// Optimal //
+/////////////
+
+static inline void
+make_literal(lzma_optimal *optimal)
+{
+       optimal->back_prev = UINT32_MAX;
+       optimal->prev_1_is_literal = false;
+}
+
+
+static inline void
+make_short_rep(lzma_optimal *optimal)
+{
+       optimal->back_prev = 0;
+       optimal->prev_1_is_literal = false;
+}
+
+
+#define is_short_rep(optimal) \
+       ((optimal).back_prev == 0)
+
+
+static void
+backward(lzma_coder *restrict coder, uint32_t *restrict len_res,
+               uint32_t *restrict back_res, uint32_t cur)
+{
+       coder->opts_end_index = cur;
+
+       uint32_t pos_mem = coder->opts[cur].pos_prev;
+       uint32_t back_mem = coder->opts[cur].back_prev;
+
+       do {
+               if (coder->opts[cur].prev_1_is_literal) {
+                       make_literal(&coder->opts[pos_mem]);
+                       coder->opts[pos_mem].pos_prev = pos_mem - 1;
+
+                       if (coder->opts[cur].prev_2) {
+                               coder->opts[pos_mem - 1].prev_1_is_literal
+                                               = false;
+                               coder->opts[pos_mem - 1].pos_prev
+                                               = coder->opts[cur].pos_prev_2;
+                               coder->opts[pos_mem - 1].back_prev
+                                               = coder->opts[cur].back_prev_2;
+                       }
+               }
+
+               const uint32_t pos_prev = pos_mem;
+               const uint32_t back_cur = back_mem;
+
+               back_mem = coder->opts[pos_prev].back_prev;
+               pos_mem = coder->opts[pos_prev].pos_prev;
+
+               coder->opts[pos_prev].back_prev = back_cur;
+               coder->opts[pos_prev].pos_prev = cur;
+               cur = pos_prev;
+
+       } while (cur != 0);
+
+       coder->opts_current_index = coder->opts[0].pos_prev;
+       *len_res = coder->opts[0].pos_prev;
+       *back_res = coder->opts[0].back_prev;
+
+       return;
+}
+
+
+//////////
+// Main //
+//////////
+
+static inline uint32_t
+helper1(lzma_coder *restrict coder, lzma_mf *restrict mf,
+               uint32_t *restrict back_res, uint32_t *restrict len_res,
+               uint32_t position)
+{
+       const uint32_t nice_len = mf->nice_len;
+
+       uint32_t len_main;
+       uint32_t matches_count;
+
+       if (mf->read_ahead == 0) {
+               len_main = mf_find(mf, &matches_count, coder->matches);
+       } else {
+               assert(mf->read_ahead == 1);
+               len_main = coder->longest_match_length;
+               matches_count = coder->matches_count;
+       }
+
+       const uint32_t buf_avail = my_min(mf_avail(mf) + 1, MATCH_LEN_MAX);
+       if (buf_avail < 2) {
+               *back_res = UINT32_MAX;
+               *len_res = 1;
+               return UINT32_MAX;
+       }
+
+       const uint8_t *const buf = mf_ptr(mf) - 1;
+
+       uint32_t rep_lens[REPS];
+       uint32_t rep_max_index = 0;
+
+       for (uint32_t i = 0; i < REPS; ++i) {
+               const uint8_t *const buf_back = buf - coder->reps[i] - 1;
+
+               if (not_equal_16(buf, buf_back)) {
+                       rep_lens[i] = 0;
+                       continue;
+               }
+
+               rep_lens[i] = lzma_memcmplen(buf, buf_back, 2, buf_avail);
+
+               if (rep_lens[i] > rep_lens[rep_max_index])
+                       rep_max_index = i;
+       }
+
+       if (rep_lens[rep_max_index] >= nice_len) {
+               *back_res = rep_max_index;
+               *len_res = rep_lens[rep_max_index];
+               mf_skip(mf, *len_res - 1);
+               return UINT32_MAX;
+       }
+
+
+       if (len_main >= nice_len) {
+               *back_res = coder->matches[matches_count - 1].dist + REPS;
+               *len_res = len_main;
+               mf_skip(mf, len_main - 1);
+               return UINT32_MAX;
+       }
+
+       const uint8_t current_byte = *buf;
+       const uint8_t match_byte = *(buf - coder->reps[0] - 1);
+
+       if (len_main < 2 && current_byte != match_byte
+                       && rep_lens[rep_max_index] < 2) {
+               *back_res = UINT32_MAX;
+               *len_res = 1;
+               return UINT32_MAX;
+       }
+
+       coder->opts[0].state = coder->state;
+
+       const uint32_t pos_state = position & coder->pos_mask;
+
+       coder->opts[1].price = rc_bit_0_price(
+                               coder->is_match[coder->state][pos_state])
+                       + get_literal_price(coder, position, buf[-1],
+                               !is_literal_state(coder->state),
+                               match_byte, current_byte);
+
+       make_literal(&coder->opts[1]);
+
+       const uint32_t match_price = rc_bit_1_price(
+                       coder->is_match[coder->state][pos_state]);
+       const uint32_t rep_match_price = match_price
+                       + rc_bit_1_price(coder->is_rep[coder->state]);
+
+       if (match_byte == current_byte) {
+               const uint32_t short_rep_price = rep_match_price
+                               + get_short_rep_price(
+                                       coder, coder->state, pos_state);
+
+               if (short_rep_price < coder->opts[1].price) {
+                       coder->opts[1].price = short_rep_price;
+                       make_short_rep(&coder->opts[1]);
+               }
+       }
+
+       const uint32_t len_end = my_max(len_main, rep_lens[rep_max_index]);
+
+       if (len_end < 2) {
+               *back_res = coder->opts[1].back_prev;
+               *len_res = 1;
+               return UINT32_MAX;
+       }
+
+       coder->opts[1].pos_prev = 0;
+
+       for (uint32_t i = 0; i < REPS; ++i)
+               coder->opts[0].backs[i] = coder->reps[i];
+
+       uint32_t len = len_end;
+       do {
+               coder->opts[len].price = RC_INFINITY_PRICE;
+       } while (--len >= 2);
+
+
+       for (uint32_t i = 0; i < REPS; ++i) {
+               uint32_t rep_len = rep_lens[i];
+               if (rep_len < 2)
+                       continue;
+
+               const uint32_t price = rep_match_price + get_pure_rep_price(
+                               coder, i, coder->state, pos_state);
+
+               do {
+                       const uint32_t cur_and_len_price = price
+                                       + get_len_price(
+                                               &coder->rep_len_encoder,
+                                               rep_len, pos_state);
+
+                       if (cur_and_len_price < coder->opts[rep_len].price) {
+                               coder->opts[rep_len].price = cur_and_len_price;
+                               coder->opts[rep_len].pos_prev = 0;
+                               coder->opts[rep_len].back_prev = i;
+                               coder->opts[rep_len].prev_1_is_literal = false;
+                       }
+               } while (--rep_len >= 2);
+       }
+
+
+       const uint32_t normal_match_price = match_price
+                       + rc_bit_0_price(coder->is_rep[coder->state]);
+
+       len = rep_lens[0] >= 2 ? rep_lens[0] + 1 : 2;
+       if (len <= len_main) {
+               uint32_t i = 0;
+               while (len > coder->matches[i].len)
+                       ++i;
+
+               for(; ; ++len) {
+                       const uint32_t dist = coder->matches[i].dist;
+                       const uint32_t cur_and_len_price = normal_match_price
+                                       + get_dist_len_price(coder,
+                                               dist, len, pos_state);
+
+                       if (cur_and_len_price < coder->opts[len].price) {
+                               coder->opts[len].price = cur_and_len_price;
+                               coder->opts[len].pos_prev = 0;
+                               coder->opts[len].back_prev = dist + REPS;
+                               coder->opts[len].prev_1_is_literal = false;
+                       }
+
+                       if (len == coder->matches[i].len)
+                               if (++i == matches_count)
+                                       break;
+               }
+       }
+
+       return len_end;
+}
+
+
+static inline uint32_t
+helper2(lzma_coder *coder, uint32_t *reps, const uint8_t *buf,
+               uint32_t len_end, uint32_t position, const uint32_t cur,
+               const uint32_t nice_len, const uint32_t buf_avail_full)
+{
+       uint32_t matches_count = coder->matches_count;
+       uint32_t new_len = coder->longest_match_length;
+       uint32_t pos_prev = coder->opts[cur].pos_prev;
+       lzma_lzma_state state;
+
+       if (coder->opts[cur].prev_1_is_literal) {
+               --pos_prev;
+
+               if (coder->opts[cur].prev_2) {
+                       state = coder->opts[coder->opts[cur].pos_prev_2].state;
+
+                       if (coder->opts[cur].back_prev_2 < REPS)
+                               update_long_rep(state);
+                       else
+                               update_match(state);
+
+               } else {
+                       state = coder->opts[pos_prev].state;
+               }
+
+               update_literal(state);
+
+       } else {
+               state = coder->opts[pos_prev].state;
+       }
+
+       if (pos_prev == cur - 1) {
+               if (is_short_rep(coder->opts[cur]))
+                       update_short_rep(state);
+               else
+                       update_literal(state);
+       } else {
+               uint32_t pos;
+               if (coder->opts[cur].prev_1_is_literal
+                               && coder->opts[cur].prev_2) {
+                       pos_prev = coder->opts[cur].pos_prev_2;
+                       pos = coder->opts[cur].back_prev_2;
+                       update_long_rep(state);
+               } else {
+                       pos = coder->opts[cur].back_prev;
+                       if (pos < REPS)
+                               update_long_rep(state);
+                       else
+                               update_match(state);
+               }
+
+               if (pos < REPS) {
+                       reps[0] = coder->opts[pos_prev].backs[pos];
+
+                       uint32_t i;
+                       for (i = 1; i <= pos; ++i)
+                               reps[i] = coder->opts[pos_prev].backs[i - 1];
+
+                       for (; i < REPS; ++i)
+                               reps[i] = coder->opts[pos_prev].backs[i];
+
+               } else {
+                       reps[0] = pos - REPS;
+
+                       for (uint32_t i = 1; i < REPS; ++i)
+                               reps[i] = coder->opts[pos_prev].backs[i - 1];
+               }
+       }
+
+       coder->opts[cur].state = state;
+
+       for (uint32_t i = 0; i < REPS; ++i)
+               coder->opts[cur].backs[i] = reps[i];
+
+       const uint32_t cur_price = coder->opts[cur].price;
+
+       const uint8_t current_byte = *buf;
+       const uint8_t match_byte = *(buf - reps[0] - 1);
+
+       const uint32_t pos_state = position & coder->pos_mask;
+
+       const uint32_t cur_and_1_price = cur_price
+                       + rc_bit_0_price(coder->is_match[state][pos_state])
+                       + get_literal_price(coder, position, buf[-1],
+                       !is_literal_state(state), match_byte, current_byte);
+
+       bool next_is_literal = false;
+
+       if (cur_and_1_price < coder->opts[cur + 1].price) {
+               coder->opts[cur + 1].price = cur_and_1_price;
+               coder->opts[cur + 1].pos_prev = cur;
+               make_literal(&coder->opts[cur + 1]);
+               next_is_literal = true;
+       }
+
+       const uint32_t match_price = cur_price
+                       + rc_bit_1_price(coder->is_match[state][pos_state]);
+       const uint32_t rep_match_price = match_price
+                       + rc_bit_1_price(coder->is_rep[state]);
+
+       if (match_byte == current_byte
+                       && !(coder->opts[cur + 1].pos_prev < cur
+                               && coder->opts[cur + 1].back_prev == 0)) {
+
+               const uint32_t short_rep_price = rep_match_price
+                               + get_short_rep_price(coder, state, pos_state);
+
+               if (short_rep_price <= coder->opts[cur + 1].price) {
+                       coder->opts[cur + 1].price = short_rep_price;
+                       coder->opts[cur + 1].pos_prev = cur;
+                       make_short_rep(&coder->opts[cur + 1]);
+                       next_is_literal = true;
+               }
+       }
+
+       if (buf_avail_full < 2)
+               return len_end;
+
+       const uint32_t buf_avail = my_min(buf_avail_full, nice_len);
+
+       if (!next_is_literal && match_byte != current_byte) { // speed optimization
+               // try literal + rep0
+               const uint8_t *const buf_back = buf - reps[0] - 1;
+               const uint32_t limit = my_min(buf_avail_full, nice_len + 1);
+
+               const uint32_t len_test = lzma_memcmplen(buf, buf_back, 1, limit) - 1;
+
+               if (len_test >= 2) {
+                       lzma_lzma_state state_2 = state;
+                       update_literal(state_2);
+
+                       const uint32_t pos_state_next = (position + 1) & coder->pos_mask;
+                       const uint32_t next_rep_match_price = cur_and_1_price
+                                       + rc_bit_1_price(coder->is_match[state_2][pos_state_next])
+                                       + rc_bit_1_price(coder->is_rep[state_2]);
+
+                       //for (; len_test >= 2; --len_test) {
+                       const uint32_t offset = cur + 1 + len_test;
+
+                       while (len_end < offset)
+                               coder->opts[++len_end].price = RC_INFINITY_PRICE;
+
+                       const uint32_t cur_and_len_price = next_rep_match_price
+                                       + get_rep_price(coder, 0, len_test,
+                                               state_2, pos_state_next);
+
+                       if (cur_and_len_price < coder->opts[offset].price) {
+                               coder->opts[offset].price = cur_and_len_price;
+                               coder->opts[offset].pos_prev = cur + 1;
+                               coder->opts[offset].back_prev = 0;
+                               coder->opts[offset].prev_1_is_literal = true;
+                               coder->opts[offset].prev_2 = false;
+                       }
+                       //}
+               }
+       }
+
+
+       uint32_t start_len = 2; // speed optimization
+
+       for (uint32_t rep_index = 0; rep_index < REPS; ++rep_index) {
+               const uint8_t *const buf_back = buf - reps[rep_index] - 1;
+               if (not_equal_16(buf, buf_back))
+                       continue;
+
+               uint32_t len_test = lzma_memcmplen(buf, buf_back, 2, buf_avail);
+
+               while (len_end < cur + len_test)
+                       coder->opts[++len_end].price = RC_INFINITY_PRICE;
+
+               const uint32_t len_test_temp = len_test;
+               const uint32_t price = rep_match_price + get_pure_rep_price(
+                               coder, rep_index, state, pos_state);
+
+               do {
+                       const uint32_t cur_and_len_price = price
+                                       + get_len_price(&coder->rep_len_encoder,
+                                                       len_test, pos_state);
+
+                       if (cur_and_len_price < coder->opts[cur + len_test].price) {
+                               coder->opts[cur + len_test].price = cur_and_len_price;
+                               coder->opts[cur + len_test].pos_prev = cur;
+                               coder->opts[cur + len_test].back_prev = rep_index;
+                               coder->opts[cur + len_test].prev_1_is_literal = false;
+                       }
+               } while (--len_test >= 2);
+
+               len_test = len_test_temp;
+
+               if (rep_index == 0)
+                       start_len = len_test + 1;
+
+
+               uint32_t len_test_2 = len_test + 1;
+               const uint32_t limit = my_min(buf_avail_full,
+                               len_test_2 + nice_len);
+               for (; len_test_2 < limit
+                               && buf[len_test_2] == buf_back[len_test_2];
+                               ++len_test_2) ;
+
+               len_test_2 -= len_test + 1;
+
+               if (len_test_2 >= 2) {
+                       lzma_lzma_state state_2 = state;
+                       update_long_rep(state_2);
+
+                       uint32_t pos_state_next = (position + len_test) & coder->pos_mask;
+
+                       const uint32_t cur_and_len_literal_price = price
+                                       + get_len_price(&coder->rep_len_encoder,
+                                               len_test, pos_state)
+                                       + rc_bit_0_price(coder->is_match[state_2][pos_state_next])
+                                       + get_literal_price(coder, position + len_test,
+                                               buf[len_test - 1], true,
+                                               buf_back[len_test], buf[len_test]);
+
+                       update_literal(state_2);
+
+                       pos_state_next = (position + len_test + 1) & coder->pos_mask;
+
+                       const uint32_t next_rep_match_price = cur_and_len_literal_price
+                                       + rc_bit_1_price(coder->is_match[state_2][pos_state_next])
+                                       + rc_bit_1_price(coder->is_rep[state_2]);
+
+                       //for(; len_test_2 >= 2; len_test_2--) {
+                       const uint32_t offset = cur + len_test + 1 + len_test_2;
+
+                       while (len_end < offset)
+                               coder->opts[++len_end].price = RC_INFINITY_PRICE;
+
+                       const uint32_t cur_and_len_price = next_rep_match_price
+                                       + get_rep_price(coder, 0, len_test_2,
+                                               state_2, pos_state_next);
+
+                       if (cur_and_len_price < coder->opts[offset].price) {
+                               coder->opts[offset].price = cur_and_len_price;
+                               coder->opts[offset].pos_prev = cur + len_test + 1;
+                               coder->opts[offset].back_prev = 0;
+                               coder->opts[offset].prev_1_is_literal = true;
+                               coder->opts[offset].prev_2 = true;
+                               coder->opts[offset].pos_prev_2 = cur;
+                               coder->opts[offset].back_prev_2 = rep_index;
+                       }
+                       //}
+               }
+       }
+
+
+       //for (uint32_t len_test = 2; len_test <= new_len; ++len_test)
+       if (new_len > buf_avail) {
+               new_len = buf_avail;
+
+               matches_count = 0;
+               while (new_len > coder->matches[matches_count].len)
+                       ++matches_count;
+
+               coder->matches[matches_count++].len = new_len;
+       }
+
+
+       if (new_len >= start_len) {
+               const uint32_t normal_match_price = match_price
+                               + rc_bit_0_price(coder->is_rep[state]);
+
+               while (len_end < cur + new_len)
+                       coder->opts[++len_end].price = RC_INFINITY_PRICE;
+
+               uint32_t i = 0;
+               while (start_len > coder->matches[i].len)
+                       ++i;
+
+               for (uint32_t len_test = start_len; ; ++len_test) {
+                       const uint32_t cur_back = coder->matches[i].dist;
+                       uint32_t cur_and_len_price = normal_match_price
+                                       + get_dist_len_price(coder,
+                                               cur_back, len_test, pos_state);
+
+                       if (cur_and_len_price < coder->opts[cur + len_test].price) {
+                               coder->opts[cur + len_test].price = cur_and_len_price;
+                               coder->opts[cur + len_test].pos_prev = cur;
+                               coder->opts[cur + len_test].back_prev
+                                               = cur_back + REPS;
+                               coder->opts[cur + len_test].prev_1_is_literal = false;
+                       }
+
+                       if (len_test == coder->matches[i].len) {
+                               // Try Match + Literal + Rep0
+                               const uint8_t *const buf_back = buf - cur_back - 1;
+                               uint32_t len_test_2 = len_test + 1;
+                               const uint32_t limit = my_min(buf_avail_full,
+                                               len_test_2 + nice_len);
+
+                               for (; len_test_2 < limit &&
+                                               buf[len_test_2] == buf_back[len_test_2];
+                                               ++len_test_2) ;
+
+                               len_test_2 -= len_test + 1;
+
+                               if (len_test_2 >= 2) {
+                                       lzma_lzma_state state_2 = state;
+                                       update_match(state_2);
+                                       uint32_t pos_state_next
+                                                       = (position + len_test) & coder->pos_mask;
+
+                                       const uint32_t cur_and_len_literal_price = cur_and_len_price
+                                                       + rc_bit_0_price(
+                                                               coder->is_match[state_2][pos_state_next])
+                                                       + get_literal_price(coder,
+                                                               position + len_test,
+                                                               buf[len_test - 1],
+                                                               true,
+                                                               buf_back[len_test],
+                                                               buf[len_test]);
+
+                                       update_literal(state_2);
+                                       pos_state_next = (pos_state_next + 1) & coder->pos_mask;
+
+                                       const uint32_t next_rep_match_price
+                                                       = cur_and_len_literal_price
+                                                       + rc_bit_1_price(
+                                                               coder->is_match[state_2][pos_state_next])
+                                                       + rc_bit_1_price(coder->is_rep[state_2]);
+
+                                       // for(; len_test_2 >= 2; --len_test_2) {
+                                       const uint32_t offset = cur + len_test + 1 + len_test_2;
+
+                                       while (len_end < offset)
+                                               coder->opts[++len_end].price = RC_INFINITY_PRICE;
+
+                                       cur_and_len_price = next_rep_match_price
+                                                       + get_rep_price(coder, 0, len_test_2,
+                                                               state_2, pos_state_next);
+
+                                       if (cur_and_len_price < coder->opts[offset].price) {
+                                               coder->opts[offset].price = cur_and_len_price;
+                                               coder->opts[offset].pos_prev = cur + len_test + 1;
+                                               coder->opts[offset].back_prev = 0;
+                                               coder->opts[offset].prev_1_is_literal = true;
+                                               coder->opts[offset].prev_2 = true;
+                                               coder->opts[offset].pos_prev_2 = cur;
+                                               coder->opts[offset].back_prev_2
+                                                               = cur_back + REPS;
+                                       }
+                                       //}
+                               }
+
+                               if (++i == matches_count)
+                                       break;
+                       }
+               }
+       }
+
+       return len_end;
+}
+
+
+extern void
+lzma_lzma_optimum_normal(lzma_coder *restrict coder, lzma_mf *restrict mf,
+               uint32_t *restrict back_res, uint32_t *restrict len_res,
+               uint32_t position)
+{
+       // If we have symbols pending, return the next pending symbol.
+       if (coder->opts_end_index != coder->opts_current_index) {
+               assert(mf->read_ahead > 0);
+               *len_res = coder->opts[coder->opts_current_index].pos_prev
+                               - coder->opts_current_index;
+               *back_res = coder->opts[coder->opts_current_index].back_prev;
+               coder->opts_current_index = coder->opts[
+                               coder->opts_current_index].pos_prev;
+               return;
+       }
+
+       // Update the price tables. In LZMA SDK <= 4.60 (and possibly later)
+       // this was done in both initialization function and in the main loop.
+       // In liblzma they were moved into this single place.
+       if (mf->read_ahead == 0) {
+               if (coder->match_price_count >= (1 << 7))
+                       fill_dist_prices(coder);
+
+               if (coder->align_price_count >= ALIGN_SIZE)
+                       fill_align_prices(coder);
+       }
+
+       // TODO: This needs quite a bit of cleaning still. But splitting
+       // the original function into two pieces makes it at least a little
+       // more readable, since those two parts don't share many variables.
+
+       uint32_t len_end = helper1(coder, mf, back_res, len_res, position);
+       if (len_end == UINT32_MAX)
+               return;
+
+       uint32_t reps[REPS];
+       memcpy(reps, coder->reps, sizeof(reps));
+
+       uint32_t cur;
+       for (cur = 1; cur < len_end; ++cur) {
+               assert(cur < OPTS);
+
+               coder->longest_match_length = mf_find(
+                               mf, &coder->matches_count, coder->matches);
+
+               if (coder->longest_match_length >= mf->nice_len)
+                       break;
+
+               len_end = helper2(coder, reps, mf_ptr(mf) - 1, len_end,
+                               position + cur, cur, mf->nice_len,
+                               my_min(mf_avail(mf) + 1, OPTS - 1 - cur));
+       }
+
+       backward(coder, len_res, back_res, cur);
+       return;
+}