summaryrefslogtreecommitdiffstats
path: root/chromium/third_party/brotli/src/brotli/enc/encode.cc
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/third_party/brotli/src/brotli/enc/encode.cc')
-rw-r--r--chromium/third_party/brotli/src/brotli/enc/encode.cc1056
1 files changed, 1056 insertions, 0 deletions
diff --git a/chromium/third_party/brotli/src/brotli/enc/encode.cc b/chromium/third_party/brotli/src/brotli/enc/encode.cc
new file mode 100644
index 00000000000..6e18845d5bb
--- /dev/null
+++ b/chromium/third_party/brotli/src/brotli/enc/encode.cc
@@ -0,0 +1,1056 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Implementation of Brotli compressor.
+
+#include "./encode.h"
+
+#include <algorithm>
+#include <limits>
+
+#include "./backward_references.h"
+#include "./bit_cost.h"
+#include "./block_splitter.h"
+#include "./cluster.h"
+#include "./context.h"
+#include "./transform.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./hash.h"
+#include "./histogram.h"
+#include "./literal_cost.h"
+#include "./prefix.h"
+#include "./write_bits.h"
+
+namespace brotli {
+
+static const int kWindowBits = 22;
+// To make decoding faster, we allow the decoder to write 16 bytes ahead in
+// its ringbuffer, therefore the encoder has to decrease max distance by this
+// amount.
+static const int kDecoderRingBufferWriteAheadSlack = 16;
+static const int kMaxBackwardDistance =
+ (1 << kWindowBits) - kDecoderRingBufferWriteAheadSlack;
+
+static const int kMetaBlockSizeBits = 21;
+static const int kRingBufferBits = 23;
+static const int kRingBufferMask = (1 << kRingBufferBits) - 1;
+
+template<int kSize>
+double Entropy(const std::vector<Histogram<kSize> >& histograms) {
+ double retval = 0;
+ for (int i = 0; i < histograms.size(); ++i) {
+ retval += histograms[i].EntropyBitCost();
+ }
+ return retval;
+}
+
+template<int kSize>
+double TotalBitCost(const std::vector<Histogram<kSize> >& histograms) {
+ double retval = 0;
+ for (int i = 0; i < histograms.size(); ++i) {
+ retval += PopulationCost(histograms[i]);
+ }
+ return retval;
+}
+
+void EncodeVarLenUint8(int n, int* storage_ix, uint8_t* storage) {
+ if (n == 0) {
+ WriteBits(1, 0, storage_ix, storage);
+ } else {
+ WriteBits(1, 1, storage_ix, storage);
+ int nbits = Log2Floor(n);
+ WriteBits(3, nbits, storage_ix, storage);
+ if (nbits > 0) {
+ WriteBits(nbits, n - (1 << nbits), storage_ix, storage);
+ }
+ }
+}
+
+int ParseAsUTF8(int* symbol, const uint8_t* input, int size) {
+ // ASCII
+ if ((input[0] & 0x80) == 0) {
+ *symbol = input[0];
+ if (*symbol > 0) {
+ return 1;
+ }
+ }
+ // 2-byte UTF8
+ if (size > 1 &&
+ (input[0] & 0xe0) == 0xc0 &&
+ (input[1] & 0xc0) == 0x80) {
+ *symbol = (((input[0] & 0x1f) << 6) |
+ (input[1] & 0x3f));
+ if (*symbol > 0x7f) {
+ return 2;
+ }
+ }
+ // 3-byte UFT8
+ if (size > 2 &&
+ (input[0] & 0xf0) == 0xe0 &&
+ (input[1] & 0xc0) == 0x80 &&
+ (input[2] & 0xc0) == 0x80) {
+ *symbol = (((input[0] & 0x0f) << 12) |
+ ((input[1] & 0x3f) << 6) |
+ (input[2] & 0x3f));
+ if (*symbol > 0x7ff) {
+ return 3;
+ }
+ }
+ // 4-byte UFT8
+ if (size > 3 &&
+ (input[0] & 0xf8) == 0xf0 &&
+ (input[1] & 0xc0) == 0x80 &&
+ (input[2] & 0xc0) == 0x80 &&
+ (input[3] & 0xc0) == 0x80) {
+ *symbol = (((input[0] & 0x07) << 18) |
+ ((input[1] & 0x3f) << 12) |
+ ((input[2] & 0x3f) << 6) |
+ (input[3] & 0x3f));
+ if (*symbol > 0xffff && *symbol <= 0x10ffff) {
+ return 4;
+ }
+ }
+ // Not UTF8, emit a special symbol above the UTF8-code space
+ *symbol = 0x110000 | input[0];
+ return 1;
+}
+
+// Returns true if at least min_fraction of the data is UTF8-encoded.
+bool IsMostlyUTF8(const uint8_t* data, size_t length, double min_fraction) {
+ size_t size_utf8 = 0;
+ size_t pos = 0;
+ while (pos < length) {
+ int symbol;
+ int bytes_read = ParseAsUTF8(&symbol, data + pos, length - pos);
+ pos += bytes_read;
+ if (symbol < 0x110000) size_utf8 += bytes_read;
+ }
+ return size_utf8 > min_fraction * length;
+}
+
+void EncodeMetaBlockLength(size_t meta_block_size,
+ bool is_last,
+ bool is_uncompressed,
+ int* storage_ix, uint8_t* storage) {
+ WriteBits(1, is_last, storage_ix, storage);
+ if (is_last) {
+ if (meta_block_size == 0) {
+ WriteBits(1, 1, storage_ix, storage);
+ return;
+ }
+ WriteBits(1, 0, storage_ix, storage);
+ }
+ --meta_block_size;
+ int num_bits = Log2Floor(meta_block_size) + 1;
+ if (num_bits < 16) {
+ num_bits = 16;
+ }
+ WriteBits(2, (num_bits - 13) >> 2, storage_ix, storage);
+ while (num_bits > 0) {
+ WriteBits(4, meta_block_size & 0xf, storage_ix, storage);
+ meta_block_size >>= 4;
+ num_bits -= 4;
+ }
+ if (!is_last) {
+ WriteBits(1, is_uncompressed, storage_ix, storage);
+ }
+}
+
+void StoreHuffmanTreeOfHuffmanTreeToBitMask(
+ const uint8_t* code_length_bitdepth,
+ int* storage_ix, uint8_t* storage) {
+ static const uint8_t kStorageOrder[kCodeLengthCodes] = {
+ 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ };
+ // Throw away trailing zeros:
+ int codes_to_store = kCodeLengthCodes;
+ for (; codes_to_store > 0; --codes_to_store) {
+ if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
+ break;
+ }
+ }
+ int num_codes = 0;
+ for (int i = 0; i < codes_to_store; ++i) {
+ if (code_length_bitdepth[kStorageOrder[i]] != 0) {
+ ++num_codes;
+ }
+ }
+ if (num_codes == 1) {
+ codes_to_store = kCodeLengthCodes;
+ }
+ int skip_some = 0; // skips none.
+ if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
+ code_length_bitdepth[kStorageOrder[1]] == 0) {
+ skip_some = 2; // skips two.
+ if (code_length_bitdepth[kStorageOrder[2]] == 0) {
+ skip_some = 3; // skips three.
+ }
+ }
+ WriteBits(2, skip_some, storage_ix, storage);
+ for (int i = skip_some; i < codes_to_store; ++i) {
+ uint8_t len[] = { 2, 4, 3, 2, 2, 4 };
+ uint8_t bits[] = { 0, 7, 3, 2, 1, 15 };
+ int v = code_length_bitdepth[kStorageOrder[i]];
+ WriteBits(len[v], bits[v], storage_ix, storage);
+ }
+}
+
+void StoreHuffmanTreeToBitMask(
+ const uint8_t* huffman_tree,
+ const uint8_t* huffman_tree_extra_bits,
+ const int huffman_tree_size,
+ const EntropyCode<kCodeLengthCodes>& entropy,
+ int* storage_ix, uint8_t* storage) {
+ for (int i = 0; i < huffman_tree_size; ++i) {
+ const int ix = huffman_tree[i];
+ const int extra_bits = huffman_tree_extra_bits[i];
+ if (entropy.count_ > 1) {
+ WriteBits(entropy.depth_[ix], entropy.bits_[ix], storage_ix, storage);
+ }
+ switch (ix) {
+ case 16:
+ WriteBits(2, extra_bits, storage_ix, storage);
+ break;
+ case 17:
+ WriteBits(3, extra_bits, storage_ix, storage);
+ break;
+ }
+ }
+}
+
+template<int kSize>
+void StoreHuffmanCodeSimple(
+ const EntropyCode<kSize>& code, int alphabet_size,
+ int max_bits, int* storage_ix, uint8_t* storage) {
+ const uint8_t *depth = &code.depth_[0];
+ int symbols[4];
+ // Quadratic sort.
+ int k, j;
+ for (k = 0; k < code.count_; ++k) {
+ symbols[k] = code.symbols_[k];
+ }
+ for (k = 0; k < code.count_; ++k) {
+ for (j = k + 1; j < code.count_; ++j) {
+ if (depth[symbols[j]] < depth[symbols[k]]) {
+ int t = symbols[k];
+ symbols[k] = symbols[j];
+ symbols[j] = t;
+ }
+ }
+ }
+ // Small tree marker to encode 1-4 symbols.
+ WriteBits(2, 1, storage_ix, storage);
+ WriteBits(2, code.count_ - 1, storage_ix, storage);
+ for (int i = 0; i < code.count_; ++i) {
+ WriteBits(max_bits, symbols[i], storage_ix, storage);
+ }
+ if (code.count_ == 4) {
+ if (depth[symbols[0]] == 2 &&
+ depth[symbols[1]] == 2 &&
+ depth[symbols[2]] == 2 &&
+ depth[symbols[3]] == 2) {
+ WriteBits(1, 0, storage_ix, storage);
+ } else {
+ WriteBits(1, 1, storage_ix, storage);
+ }
+ }
+}
+
+template<int kSize>
+void StoreHuffmanCodeComplex(
+ const EntropyCode<kSize>& code, int alphabet_size,
+ int* storage_ix, uint8_t* storage) {
+ const uint8_t *depth = &code.depth_[0];
+ uint8_t huffman_tree[kSize];
+ uint8_t huffman_tree_extra_bits[kSize];
+ int huffman_tree_size = 0;
+ WriteHuffmanTree(depth,
+ alphabet_size,
+ &huffman_tree[0],
+ &huffman_tree_extra_bits[0],
+ &huffman_tree_size);
+ Histogram<kCodeLengthCodes> huffman_tree_histogram;
+ memset(huffman_tree_histogram.data_, 0, sizeof(huffman_tree_histogram.data_));
+ for (int i = 0; i < huffman_tree_size; ++i) {
+ huffman_tree_histogram.Add(huffman_tree[i]);
+ }
+ EntropyCode<kCodeLengthCodes> huffman_tree_entropy;
+ BuildEntropyCode(huffman_tree_histogram, 5, kCodeLengthCodes,
+ &huffman_tree_entropy);
+ StoreHuffmanTreeOfHuffmanTreeToBitMask(
+ &huffman_tree_entropy.depth_[0], storage_ix, storage);
+ StoreHuffmanTreeToBitMask(&huffman_tree[0], &huffman_tree_extra_bits[0],
+ huffman_tree_size, huffman_tree_entropy,
+ storage_ix, storage);
+}
+
+template<int kSize>
+void BuildAndStoreEntropyCode(const Histogram<kSize>& histogram,
+ const int tree_limit,
+ const int alphabet_size,
+ EntropyCode<kSize>* code,
+ int* storage_ix, uint8_t* storage) {
+ memset(code->depth_, 0, sizeof(code->depth_));
+ memset(code->bits_, 0, sizeof(code->bits_));
+ memset(code->symbols_, 0, sizeof(code->symbols_));
+ code->count_ = 0;
+
+ int max_bits_counter = alphabet_size - 1;
+ int max_bits = 0;
+ while (max_bits_counter) {
+ max_bits_counter >>= 1;
+ ++max_bits;
+ }
+
+ for (size_t i = 0; i < alphabet_size; i++) {
+ if (histogram.data_[i] > 0) {
+ if (code->count_ < 4) code->symbols_[code->count_] = i;
+ ++code->count_;
+ }
+ }
+
+ if (code->count_ <= 1) {
+ WriteBits(2, 1, storage_ix, storage);
+ WriteBits(2, 0, storage_ix, storage);
+ WriteBits(max_bits, code->symbols_[0], storage_ix, storage);
+ return;
+ }
+
+ if (alphabet_size >= 50 && code->count_ >= 16) {
+ std::vector<int> counts(alphabet_size);
+ memcpy(&counts[0], histogram.data_, sizeof(counts[0]) * alphabet_size);
+ OptimizeHuffmanCountsForRle(alphabet_size, &counts[0]);
+ CreateHuffmanTree(&counts[0], alphabet_size, tree_limit, code->depth_);
+ } else {
+ CreateHuffmanTree(histogram.data_, alphabet_size, tree_limit, code->depth_);
+ }
+ ConvertBitDepthsToSymbols(code->depth_, alphabet_size, code->bits_);
+
+ if (code->count_ <= 4) {
+ StoreHuffmanCodeSimple(*code, alphabet_size, max_bits, storage_ix, storage);
+ } else {
+ StoreHuffmanCodeComplex(*code, alphabet_size, storage_ix, storage);
+ }
+}
+
+template<int kSize>
+void BuildAndStoreEntropyCodes(
+ const std::vector<Histogram<kSize> >& histograms,
+ int alphabet_size,
+ std::vector<EntropyCode<kSize> >* entropy_codes,
+ int* storage_ix, uint8_t* storage) {
+ entropy_codes->resize(histograms.size());
+ for (int i = 0; i < histograms.size(); ++i) {
+ BuildAndStoreEntropyCode(histograms[i], 15, alphabet_size,
+ &(*entropy_codes)[i],
+ storage_ix, storage);
+ }
+}
+
+void EncodeCommand(const Command& cmd,
+ const EntropyCodeCommand& entropy,
+ int* storage_ix, uint8_t* storage) {
+ int code = cmd.command_prefix_;
+ WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
+ if (code >= 128) {
+ code -= 128;
+ }
+ int insert_extra_bits = InsertLengthExtraBits(code);
+ uint64_t insert_extra_bits_val =
+ cmd.insert_length_ - InsertLengthOffset(code);
+ int copy_extra_bits = CopyLengthExtraBits(code);
+ uint64_t copy_extra_bits_val = cmd.copy_length_code_ - CopyLengthOffset(code);
+ if (insert_extra_bits > 0) {
+ WriteBits(insert_extra_bits, insert_extra_bits_val, storage_ix, storage);
+ }
+ if (copy_extra_bits > 0) {
+ WriteBits(copy_extra_bits, copy_extra_bits_val, storage_ix, storage);
+ }
+}
+
+void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
+ int* storage_ix, uint8_t* storage) {
+ int code = cmd.distance_prefix_;
+ int extra_bits = cmd.distance_extra_bits_;
+ uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
+ WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
+ if (extra_bits > 0) {
+ WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
+ }
+}
+
+void ComputeDistanceShortCodes(std::vector<Command>* cmds,
+ size_t pos,
+ const size_t max_backward,
+ int* dist_ringbuffer,
+ size_t* ringbuffer_idx) {
+ static const int kIndexOffset[16] = {
+ 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
+ };
+ static const int kValueOffset[16] = {
+ 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
+ };
+ for (int i = 0; i < cmds->size(); ++i) {
+ pos += (*cmds)[i].insert_length_;
+ size_t max_distance = std::min(pos, max_backward);
+ int cur_dist = (*cmds)[i].copy_distance_;
+ int dist_code = cur_dist + 16;
+ if (cur_dist <= max_distance) {
+ if (cur_dist == 0) break;
+ int limits[16] = { 0, 0, 0, 0,
+ 6, 6, 11, 11,
+ 11, 11, 11, 11,
+ 12, 12, 12, 12 };
+ for (int k = 0; k < 16; ++k) {
+ // Only accept more popular choices.
+ if (cur_dist < limits[k]) {
+ // Typically unpopular ranges, don't replace a short distance
+ // with them.
+ continue;
+ }
+ int comp = (dist_ringbuffer[(*ringbuffer_idx + kIndexOffset[k]) & 3] +
+ kValueOffset[k]);
+ if (cur_dist == comp) {
+ dist_code = k + 1;
+ break;
+ }
+ }
+ if (dist_code > 1) {
+ dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist;
+ ++(*ringbuffer_idx);
+ }
+ pos += (*cmds)[i].copy_length_;
+ } else {
+ int word_idx = cur_dist - max_distance - 1;
+ const std::string word =
+ GetTransformedDictionaryWord((*cmds)[i].copy_length_code_, word_idx);
+ pos += word.size();
+ }
+ (*cmds)[i].distance_code_ = dist_code;
+ }
+}
+
+void ComputeCommandPrefixes(std::vector<Command>* cmds,
+ int num_direct_distance_codes,
+ int distance_postfix_bits) {
+ for (int i = 0; i < cmds->size(); ++i) {
+ Command* cmd = &(*cmds)[i];
+ cmd->command_prefix_ = CommandPrefix(cmd->insert_length_,
+ cmd->copy_length_code_);
+ if (cmd->copy_length_code_ > 0) {
+ PrefixEncodeCopyDistance(cmd->distance_code_,
+ num_direct_distance_codes,
+ distance_postfix_bits,
+ &cmd->distance_prefix_,
+ &cmd->distance_extra_bits_,
+ &cmd->distance_extra_bits_value_);
+ }
+ if (cmd->command_prefix_ < 128 && cmd->distance_prefix_ == 0) {
+ cmd->distance_prefix_ = 0xffff;
+ } else {
+ cmd->command_prefix_ += 128;
+ }
+ }
+}
+
+int IndexOf(const std::vector<int>& v, int value) {
+ for (int i = 0; i < v.size(); ++i) {
+ if (v[i] == value) return i;
+ }
+ return -1;
+}
+
+void MoveToFront(std::vector<int>* v, int index) {
+ int value = (*v)[index];
+ for (int i = index; i > 0; --i) {
+ (*v)[i] = (*v)[i - 1];
+ }
+ (*v)[0] = value;
+}
+
+std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
+ if (v.empty()) return v;
+ std::vector<int> mtf(*max_element(v.begin(), v.end()) + 1);
+ for (int i = 0; i < mtf.size(); ++i) mtf[i] = i;
+ std::vector<int> result(v.size());
+ for (int i = 0; i < v.size(); ++i) {
+ int index = IndexOf(mtf, v[i]);
+ result[i] = index;
+ MoveToFront(&mtf, index);
+ }
+ return result;
+}
+
+// Finds runs of zeros in v_in and replaces them with a prefix code of the run
+// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
+// shifted by *max_length_prefix. Will not create prefix codes bigger than the
+// initial value of *max_run_length_prefix. The prefix code of run length L is
+// simply Log2Floor(L) and the number of extra bits is the same as the prefix
+// code.
+void RunLengthCodeZeros(const std::vector<int>& v_in,
+ int* max_run_length_prefix,
+ std::vector<int>* v_out,
+ std::vector<int>* extra_bits) {
+ int max_reps = 0;
+ for (int i = 0; i < v_in.size();) {
+ for (; i < v_in.size() && v_in[i] != 0; ++i) ;
+ int reps = 0;
+ for (; i < v_in.size() && v_in[i] == 0; ++i) {
+ ++reps;
+ }
+ max_reps = std::max(reps, max_reps);
+ }
+ int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
+ *max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
+ for (int i = 0; i < v_in.size();) {
+ if (v_in[i] != 0) {
+ v_out->push_back(v_in[i] + *max_run_length_prefix);
+ extra_bits->push_back(0);
+ ++i;
+ } else {
+ int reps = 1;
+ for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
+ ++reps;
+ }
+ i += reps;
+ while (reps) {
+ if (reps < (2 << *max_run_length_prefix)) {
+ int run_length_prefix = Log2Floor(reps);
+ v_out->push_back(run_length_prefix);
+ extra_bits->push_back(reps - (1 << run_length_prefix));
+ break;
+ } else {
+ v_out->push_back(*max_run_length_prefix);
+ extra_bits->push_back((1 << *max_run_length_prefix) - 1);
+ reps -= (2 << *max_run_length_prefix) - 1;
+ }
+ }
+ }
+ }
+}
+
+// Returns a maximum zero-run-length-prefix value such that run-length coding
+// zeros in v with this maximum prefix value and then encoding the resulting
+// histogram and entropy-coding v produces the least amount of bits.
+int BestMaxZeroRunLengthPrefix(const std::vector<int>& v) {
+ int min_cost = std::numeric_limits<int>::max();
+ int best_max_prefix = 0;
+ for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) {
+ std::vector<int> rle_symbols;
+ std::vector<int> extra_bits;
+ int max_run_length_prefix = max_prefix;
+ RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits);
+ if (max_run_length_prefix < max_prefix) break;
+ HistogramContextMap histogram;
+ for (int i = 0; i < rle_symbols.size(); ++i) {
+ histogram.Add(rle_symbols[i]);
+ }
+ int bit_cost = PopulationCost(histogram);
+ if (max_prefix > 0) {
+ bit_cost += 4;
+ }
+ for (int i = 1; i <= max_prefix; ++i) {
+ bit_cost += histogram.data_[i] * i; // extra bits
+ }
+ if (bit_cost < min_cost) {
+ min_cost = bit_cost;
+ best_max_prefix = max_prefix;
+ }
+ }
+ return best_max_prefix;
+}
+
+void EncodeContextMap(const std::vector<int>& context_map,
+ int num_clusters,
+ int* storage_ix, uint8_t* storage) {
+ EncodeVarLenUint8(num_clusters - 1, storage_ix, storage);
+
+ if (num_clusters == 1) {
+ return;
+ }
+
+ std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
+ std::vector<int> rle_symbols;
+ std::vector<int> extra_bits;
+ int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols);
+ RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
+ &rle_symbols, &extra_bits);
+ HistogramContextMap symbol_histogram;
+ for (int i = 0; i < rle_symbols.size(); ++i) {
+ symbol_histogram.Add(rle_symbols[i]);
+ }
+ bool use_rle = max_run_length_prefix > 0;
+ WriteBits(1, use_rle, storage_ix, storage);
+ if (use_rle) {
+ WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
+ }
+ EntropyCodeContextMap symbol_code;
+ BuildAndStoreEntropyCode(symbol_histogram, 15,
+ num_clusters + max_run_length_prefix,
+ &symbol_code,
+ storage_ix, storage);
+ for (int i = 0; i < rle_symbols.size(); ++i) {
+ WriteBits(symbol_code.depth_[rle_symbols[i]],
+ symbol_code.bits_[rle_symbols[i]],
+ storage_ix, storage);
+ if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
+ WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
+ }
+ }
+ WriteBits(1, 1, storage_ix, storage); // use move-to-front
+}
+
+struct BlockSplitCode {
+ EntropyCodeBlockType block_type_code;
+ EntropyCodeBlockLength block_len_code;
+};
+
+void EncodeBlockLength(const EntropyCodeBlockLength& entropy,
+ int length,
+ int* storage_ix, uint8_t* storage) {
+ int len_code = BlockLengthPrefix(length);
+ int extra_bits = BlockLengthExtraBits(len_code);
+ int extra_bits_value = length - BlockLengthOffset(len_code);
+ WriteBits(entropy.depth_[len_code], entropy.bits_[len_code],
+ storage_ix, storage);
+ if (extra_bits > 0) {
+ WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
+ }
+}
+
+void ComputeBlockTypeShortCodes(BlockSplit* split) {
+ if (split->num_types_ <= 1) {
+ split->num_types_ = 1;
+ return;
+ }
+ int ringbuffer[2] = { 0, 1 };
+ size_t index = 0;
+ for (int i = 0; i < split->types_.size(); ++i) {
+ int type = split->types_[i];
+ int type_code;
+ if (type == ringbuffer[index & 1]) {
+ type_code = 0;
+ } else if (type == ringbuffer[(index - 1) & 1] + 1) {
+ type_code = 1;
+ } else {
+ type_code = type + 2;
+ }
+ ringbuffer[index & 1] = type;
+ ++index;
+ split->type_codes_.push_back(type_code);
+ }
+}
+
+void BuildAndEncodeBlockSplitCode(const BlockSplit& split,
+ BlockSplitCode* code,
+ int* storage_ix, uint8_t* storage) {
+ EncodeVarLenUint8(split.num_types_ - 1, storage_ix, storage);
+
+ if (split.num_types_ == 1) {
+ return;
+ }
+
+ HistogramBlockType type_histo;
+ for (int i = 1; i < split.type_codes_.size(); ++i) {
+ type_histo.Add(split.type_codes_[i]);
+ }
+ HistogramBlockLength length_histo;
+ for (int i = 0; i < split.lengths_.size(); ++i) {
+ length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
+ }
+ BuildAndStoreEntropyCode(type_histo, 15, split.num_types_ + 2,
+ &code->block_type_code,
+ storage_ix, storage);
+ BuildAndStoreEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
+ &code->block_len_code,
+ storage_ix, storage);
+ EncodeBlockLength(code->block_len_code, split.lengths_[0],
+ storage_ix, storage);
+}
+
+void MoveAndEncode(const BlockSplitCode& code,
+ BlockSplitIterator* it,
+ int* storage_ix, uint8_t* storage) {
+ if (it->length_ == 0) {
+ ++it->idx_;
+ it->type_ = it->split_.types_[it->idx_];
+ it->length_ = it->split_.lengths_[it->idx_];
+ int type_code = it->split_.type_codes_[it->idx_];
+ WriteBits(code.block_type_code.depth_[type_code],
+ code.block_type_code.bits_[type_code],
+ storage_ix, storage);
+ EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
+ }
+ --it->length_;
+}
+
+struct EncodingParams {
+ int num_direct_distance_codes;
+ int distance_postfix_bits;
+ int literal_context_mode;
+};
+
+struct MetaBlock {
+ std::vector<Command> cmds;
+ EncodingParams params;
+ BlockSplit literal_split;
+ BlockSplit command_split;
+ BlockSplit distance_split;
+ std::vector<int> literal_context_modes;
+ std::vector<int> literal_context_map;
+ std::vector<int> distance_context_map;
+ std::vector<HistogramLiteral> literal_histograms;
+ std::vector<HistogramCommand> command_histograms;
+ std::vector<HistogramDistance> distance_histograms;
+};
+
+void BuildMetaBlock(const EncodingParams& params,
+ const std::vector<Command>& cmds,
+ const uint8_t* ringbuffer,
+ const size_t pos,
+ const size_t mask,
+ MetaBlock* mb) {
+ mb->cmds = cmds;
+ mb->params = params;
+ if (cmds.empty()) {
+ return;
+ }
+ ComputeCommandPrefixes(&mb->cmds,
+ mb->params.num_direct_distance_codes,
+ mb->params.distance_postfix_bits);
+ SplitBlock(mb->cmds,
+ &ringbuffer[pos & mask],
+ &mb->literal_split,
+ &mb->command_split,
+ &mb->distance_split);
+ ComputeBlockTypeShortCodes(&mb->literal_split);
+ ComputeBlockTypeShortCodes(&mb->command_split);
+ ComputeBlockTypeShortCodes(&mb->distance_split);
+
+ mb->literal_context_modes.resize(mb->literal_split.num_types_,
+ mb->params.literal_context_mode);
+
+
+ int num_literal_contexts =
+ mb->literal_split.num_types_ << kLiteralContextBits;
+ int num_distance_contexts =
+ mb->distance_split.num_types_ << kDistanceContextBits;
+ std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
+ mb->command_histograms.resize(mb->command_split.num_types_);
+ std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
+ BuildHistograms(mb->cmds,
+ mb->literal_split,
+ mb->command_split,
+ mb->distance_split,
+ ringbuffer,
+ pos,
+ mask,
+ mb->literal_context_modes,
+ &literal_histograms,
+ &mb->command_histograms,
+ &distance_histograms);
+
+ // Histogram ids need to fit in one byte.
+ static const int kMaxNumberOfHistograms = 256;
+
+ mb->literal_histograms = literal_histograms;
+ ClusterHistograms(literal_histograms,
+ 1 << kLiteralContextBits,
+ mb->literal_split.num_types_,
+ kMaxNumberOfHistograms,
+ &mb->literal_histograms,
+ &mb->literal_context_map);
+
+ mb->distance_histograms = distance_histograms;
+ ClusterHistograms(distance_histograms,
+ 1 << kDistanceContextBits,
+ mb->distance_split.num_types_,
+ kMaxNumberOfHistograms,
+ &mb->distance_histograms,
+ &mb->distance_context_map);
+}
+
+size_t MetaBlockLength(const std::vector<Command>& cmds) {
+ size_t length = 0;
+ for (int i = 0; i < cmds.size(); ++i) {
+ const Command& cmd = cmds[i];
+ length += cmd.insert_length_ + cmd.copy_length_;
+ }
+ return length;
+}
+
+void StoreMetaBlock(const MetaBlock& mb,
+ const bool is_last,
+ const uint8_t* ringbuffer,
+ const size_t mask,
+ size_t* pos,
+ int* storage_ix, uint8_t* storage) {
+ size_t length = MetaBlockLength(mb.cmds);
+ const size_t end_pos = *pos + length;
+ EncodeMetaBlockLength(length, is_last, false, storage_ix, storage);
+
+ if (length == 0) {
+ return;
+ }
+ BlockSplitCode literal_split_code;
+ BlockSplitCode command_split_code;
+ BlockSplitCode distance_split_code;
+ BuildAndEncodeBlockSplitCode(mb.literal_split, &literal_split_code,
+ storage_ix, storage);
+ BuildAndEncodeBlockSplitCode(mb.command_split, &command_split_code,
+ storage_ix, storage);
+ BuildAndEncodeBlockSplitCode(mb.distance_split, &distance_split_code,
+ storage_ix, storage);
+ WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
+ WriteBits(4,
+ mb.params.num_direct_distance_codes >>
+ mb.params.distance_postfix_bits,
+ storage_ix, storage);
+ int num_distance_codes =
+ kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
+ (48 << mb.params.distance_postfix_bits);
+ for (int i = 0; i < mb.literal_split.num_types_; ++i) {
+ WriteBits(2, mb.literal_context_modes[i], storage_ix, storage);
+ }
+ EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(),
+ storage_ix, storage);
+ EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(),
+ storage_ix, storage);
+ std::vector<EntropyCodeLiteral> literal_codes;
+ std::vector<EntropyCodeCommand> command_codes;
+ std::vector<EntropyCodeDistance> distance_codes;
+ BuildAndStoreEntropyCodes(mb.literal_histograms, 256, &literal_codes,
+ storage_ix, storage);
+ BuildAndStoreEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
+ &command_codes, storage_ix, storage);
+ BuildAndStoreEntropyCodes(mb.distance_histograms, num_distance_codes,
+ &distance_codes, storage_ix, storage);
+ BlockSplitIterator literal_it(mb.literal_split);
+ BlockSplitIterator command_it(mb.command_split);
+ BlockSplitIterator distance_it(mb.distance_split);
+ for (int i = 0; i < mb.cmds.size(); ++i) {
+ const Command& cmd = mb.cmds[i];
+ MoveAndEncode(command_split_code, &command_it, storage_ix, storage);
+ EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage);
+ for (int j = 0; j < cmd.insert_length_; ++j) {
+ MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
+ int histogram_idx = literal_it.type_;
+ uint8_t prev_byte = *pos > 0 ? ringbuffer[(*pos - 1) & mask] : 0;
+ uint8_t prev_byte2 = *pos > 1 ? ringbuffer[(*pos - 2) & mask] : 0;
+ int context = ((literal_it.type_ << kLiteralContextBits) +
+ Context(prev_byte, prev_byte2,
+ mb.literal_context_modes[literal_it.type_]));
+ histogram_idx = mb.literal_context_map[context];
+ int literal = ringbuffer[*pos & mask];
+ WriteBits(literal_codes[histogram_idx].depth_[literal],
+ literal_codes[histogram_idx].bits_[literal],
+ storage_ix, storage);
+ ++(*pos);
+ }
+ if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
+ MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
+ int context = (distance_it.type_ << 2) +
+ ((cmd.copy_length_code_ > 4) ? 3 : cmd.copy_length_code_ - 2);
+ int histogram_index = mb.distance_context_map[context];
+ size_t max_distance = std::min(*pos, (size_t)kMaxBackwardDistance);
+ EncodeCopyDistance(cmd, distance_codes[histogram_index],
+ storage_ix, storage);
+ }
+ *pos += cmd.copy_length_;
+ }
+}
+
+BrotliCompressor::BrotliCompressor(BrotliParams params)
+ : params_(params),
+ window_bits_(kWindowBits),
+ hashers_(new Hashers()),
+ dist_ringbuffer_idx_(0),
+ input_pos_(0),
+ ringbuffer_(kRingBufferBits, kMetaBlockSizeBits),
+ literal_cost_(1 << kRingBufferBits),
+ storage_ix_(0),
+ storage_(new uint8_t[2 << kMetaBlockSizeBits]) {
+ dist_ringbuffer_[0] = 16;
+ dist_ringbuffer_[1] = 15;
+ dist_ringbuffer_[2] = 11;
+ dist_ringbuffer_[3] = 4;
+ storage_[0] = 0;
+ switch (params.mode) {
+ case BrotliParams::MODE_TEXT: hash_type_ = Hashers::HASH_15_8_4; break;
+ case BrotliParams::MODE_FONT: hash_type_ = Hashers::HASH_15_8_2; break;
+ default: break;
+ }
+ hashers_->Init(hash_type_);
+ if (params.mode == BrotliParams::MODE_TEXT) {
+ StoreDictionaryWordHashes();
+ }
+}
+
+BrotliCompressor::~BrotliCompressor() {
+ delete[] storage_;
+}
+
+StaticDictionary *BrotliCompressor::static_dictionary_ = NULL;
+
+void BrotliCompressor::StoreDictionaryWordHashes() {
+ const int num_transforms = kNumTransforms;
+ if (static_dictionary_ == NULL) {
+ static_dictionary_ = new StaticDictionary;
+ for (int t = num_transforms - 1; t >= 0; --t) {
+ for (int i = kMaxDictionaryWordLength;
+ i >= kMinDictionaryWordLength; --i) {
+ const int num_words = 1 << kBrotliDictionarySizeBitsByLength[i];
+ for (int j = num_words - 1; j >= 0; --j) {
+ int word_id = t * num_words + j;
+ std::string word = GetTransformedDictionaryWord(i, word_id);
+ if (word.size() >= 4) {
+ static_dictionary_->Insert(word, i, word_id);
+ }
+ }
+ }
+ }
+ }
+ hashers_->SetStaticDictionary(static_dictionary_);
+}
+
+void BrotliCompressor::WriteStreamHeader() {
+ // Encode window size.
+ if (window_bits_ == 16) {
+ WriteBits(1, 0, &storage_ix_, storage_);
+ } else {
+ WriteBits(1, 1, &storage_ix_, storage_);
+ WriteBits(3, window_bits_ - 17, &storage_ix_, storage_);
+ }
+}
+
+void BrotliCompressor::WriteMetaBlock(const size_t input_size,
+ const uint8_t* input_buffer,
+ const bool is_last,
+ size_t* encoded_size,
+ uint8_t* encoded_buffer) {
+ static const double kMinUTF8Ratio = 0.75;
+ bool utf8_mode = false;
+ std::vector<Command> commands;
+ if (input_size > 0) {
+ ringbuffer_.Write(input_buffer, input_size);
+ utf8_mode = IsMostlyUTF8(
+ &ringbuffer_.start()[input_pos_ & kRingBufferMask],
+ input_size, kMinUTF8Ratio);
+ if (utf8_mode) {
+ EstimateBitCostsForLiteralsUTF8(input_pos_, input_size,
+ kRingBufferMask, kRingBufferMask,
+ ringbuffer_.start(), &literal_cost_[0]);
+ } else {
+ EstimateBitCostsForLiterals(input_pos_, input_size,
+ kRingBufferMask, kRingBufferMask,
+ ringbuffer_.start(), &literal_cost_[0]);
+ }
+ CreateBackwardReferences(
+ input_size, input_pos_,
+ ringbuffer_.start(),
+ &literal_cost_[0],
+ kRingBufferMask, kMaxBackwardDistance,
+ hashers_.get(),
+ hash_type_,
+ &commands);
+ ComputeDistanceShortCodes(&commands, input_pos_, kMaxBackwardDistance,
+ dist_ringbuffer_,
+ &dist_ringbuffer_idx_);
+ }
+ EncodingParams params;
+ params.num_direct_distance_codes =
+ params_.mode == BrotliParams::MODE_FONT ? 12 : 0;
+ params.distance_postfix_bits =
+ params_.mode == BrotliParams::MODE_FONT ? 1 : 0;
+ params.literal_context_mode = CONTEXT_SIGNED;
+ const int storage_ix0 = storage_ix_;
+ MetaBlock mb;
+ BuildMetaBlock(params, commands, ringbuffer_.start(), input_pos_,
+ kRingBufferMask, &mb);
+ StoreMetaBlock(mb, is_last, ringbuffer_.start(), kRingBufferMask,
+ &input_pos_, &storage_ix_, storage_);
+ size_t output_size = is_last ? ((storage_ix_ + 7) >> 3) : (storage_ix_ >> 3);
+ output_size -= (storage_ix0 >> 3);
+ if (input_size + 4 < output_size) {
+ storage_ix_ = storage_ix0;
+ storage_[storage_ix_ >> 3] &= (1 << (storage_ix_ & 7)) - 1;
+ EncodeMetaBlockLength(input_size, false, true, &storage_ix_, storage_);
+ size_t hdr_size = (storage_ix_ + 7) >> 3;
+ memcpy(encoded_buffer, storage_, hdr_size);
+ memcpy(encoded_buffer + hdr_size, input_buffer, input_size);
+ *encoded_size = hdr_size + input_size;
+ if (is_last) {
+ encoded_buffer[*encoded_size] = 0x3; // ISLAST, ISEMPTY
+ ++(*encoded_size);
+ }
+ storage_ix_ = 0;
+ storage_[0] = 0;
+ } else {
+ memcpy(encoded_buffer, storage_, output_size);
+ *encoded_size = output_size;
+ if (is_last) {
+ storage_ix_ = 0;
+ storage_[0] = 0;
+ } else {
+ storage_ix_ -= output_size << 3;
+ storage_[storage_ix_ >> 3] = storage_[output_size];
+ }
+ }
+}
+
+void BrotliCompressor::FinishStream(
+ size_t* encoded_size, uint8_t* encoded_buffer) {
+ WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer);
+}
+
+
+int BrotliCompressBuffer(BrotliParams params,
+ size_t input_size,
+ const uint8_t* input_buffer,
+ size_t* encoded_size,
+ uint8_t* encoded_buffer) {
+ if (input_size == 0) {
+ encoded_buffer[0] = 6;
+ *encoded_size = 1;
+ return 1;
+ }
+
+ BrotliCompressor compressor(params);
+ compressor.WriteStreamHeader();
+
+ const int max_block_size = 1 << kMetaBlockSizeBits;
+ size_t max_output_size = *encoded_size;
+ const uint8_t* input_end = input_buffer + input_size;
+ *encoded_size = 0;
+
+ while (input_buffer < input_end) {
+ int block_size = max_block_size;
+ bool is_last = false;
+ if (block_size >= input_end - input_buffer) {
+ block_size = input_end - input_buffer;
+ is_last = true;
+ }
+ size_t output_size = max_output_size;
+ compressor.WriteMetaBlock(block_size, input_buffer, is_last,
+ &output_size, &encoded_buffer[*encoded_size]);
+ input_buffer += block_size;
+ *encoded_size += output_size;
+ max_output_size -= output_size;
+ }
+
+ return 1;
+}
+
+} // namespace brotli