| package brotli |
| |
| /* Copyright 2013 Google Inc. All Rights Reserved. |
| |
| Distributed under MIT license. |
| See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
| */ |
| |
| /* Functions to estimate the bit cost of Huffman trees. */ |
| func shannonEntropy(population []uint32, size uint, total *uint) float64 { |
| var sum uint = 0 |
| var retval float64 = 0 |
| var population_end []uint32 = population[size:] |
| var p uint |
| for -cap(population) < -cap(population_end) { |
| p = uint(population[0]) |
| population = population[1:] |
| sum += p |
| retval -= float64(p) * fastLog2(p) |
| } |
| |
| if sum != 0 { |
| retval += float64(sum) * fastLog2(sum) |
| } |
| *total = sum |
| return retval |
| } |
| |
| func bitsEntropy(population []uint32, size uint) float64 { |
| var sum uint |
| var retval float64 = shannonEntropy(population, size, &sum) |
| if retval < float64(sum) { |
| /* At least one bit per literal is needed. */ |
| retval = float64(sum) |
| } |
| |
| return retval |
| } |
| |
| const kOneSymbolHistogramCost float64 = 12 |
| const kTwoSymbolHistogramCost float64 = 20 |
| const kThreeSymbolHistogramCost float64 = 28 |
| const kFourSymbolHistogramCost float64 = 37 |
| |
| func populationCostLiteral(histogram *histogramLiteral) float64 { |
| var data_size uint = histogramDataSizeLiteral() |
| var count int = 0 |
| var s [5]uint |
| var bits float64 = 0.0 |
| var i uint |
| if histogram.total_count_ == 0 { |
| return kOneSymbolHistogramCost |
| } |
| |
| for i = 0; i < data_size; i++ { |
| if histogram.data_[i] > 0 { |
| s[count] = i |
| count++ |
| if count > 4 { |
| break |
| } |
| } |
| } |
| |
| if count == 1 { |
| return kOneSymbolHistogramCost |
| } |
| |
| if count == 2 { |
| return kTwoSymbolHistogramCost + float64(histogram.total_count_) |
| } |
| |
| if count == 3 { |
| var histo0 uint32 = histogram.data_[s[0]] |
| var histo1 uint32 = histogram.data_[s[1]] |
| var histo2 uint32 = histogram.data_[s[2]] |
| var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2)) |
| return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax) |
| } |
| |
| if count == 4 { |
| var histo [4]uint32 |
| var h23 uint32 |
| var histomax uint32 |
| for i = 0; i < 4; i++ { |
| histo[i] = histogram.data_[s[i]] |
| } |
| |
| /* Sort */ |
| for i = 0; i < 4; i++ { |
| var j uint |
| for j = i + 1; j < 4; j++ { |
| if histo[j] > histo[i] { |
| var tmp uint32 = histo[j] |
| histo[j] = histo[i] |
| histo[i] = tmp |
| } |
| } |
| } |
| |
| h23 = histo[2] + histo[3] |
| histomax = brotli_max_uint32_t(h23, histo[0]) |
| return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax) |
| } |
| { |
| var max_depth uint = 1 |
| var depth_histo = [codeLengthCodes]uint32{0} |
| /* In this loop we compute the entropy of the histogram and simultaneously |
| build a simplified histogram of the code length codes where we use the |
| zero repeat code 17, but we don't use the non-zero repeat code 16. */ |
| |
| var log2total float64 = fastLog2(histogram.total_count_) |
| for i = 0; i < data_size; { |
| if histogram.data_[i] > 0 { |
| var log2p float64 = log2total - fastLog2(uint(histogram.data_[i])) |
| /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = |
| = log2(total_count) - log2(count(symbol)) */ |
| |
| var depth uint = uint(log2p + 0.5) |
| /* Approximate the bit depth by round(-log2(P(symbol))) */ |
| bits += float64(histogram.data_[i]) * log2p |
| |
| if depth > 15 { |
| depth = 15 |
| } |
| |
| if depth > max_depth { |
| max_depth = depth |
| } |
| |
| depth_histo[depth]++ |
| i++ |
| } else { |
| var reps uint32 = 1 |
| /* Compute the run length of zeros and add the appropriate number of 0 |
| and 17 code length codes to the code length code histogram. */ |
| |
| var k uint |
| for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ { |
| reps++ |
| } |
| |
| i += uint(reps) |
| if i == data_size { |
| /* Don't add any cost for the last zero run, since these are encoded |
| only implicitly. */ |
| break |
| } |
| |
| if reps < 3 { |
| depth_histo[0] += reps |
| } else { |
| reps -= 2 |
| for reps > 0 { |
| depth_histo[repeatZeroCodeLength]++ |
| |
| /* Add the 3 extra bits for the 17 code length code. */ |
| bits += 3 |
| |
| reps >>= 3 |
| } |
| } |
| } |
| } |
| |
| /* Add the estimated encoding cost of the code length code histogram. */ |
| bits += float64(18 + 2*max_depth) |
| |
| /* Add the entropy of the code length code histogram. */ |
| bits += bitsEntropy(depth_histo[:], codeLengthCodes) |
| } |
| |
| return bits |
| } |
| |
| func populationCostCommand(histogram *histogramCommand) float64 { |
| var data_size uint = histogramDataSizeCommand() |
| var count int = 0 |
| var s [5]uint |
| var bits float64 = 0.0 |
| var i uint |
| if histogram.total_count_ == 0 { |
| return kOneSymbolHistogramCost |
| } |
| |
| for i = 0; i < data_size; i++ { |
| if histogram.data_[i] > 0 { |
| s[count] = i |
| count++ |
| if count > 4 { |
| break |
| } |
| } |
| } |
| |
| if count == 1 { |
| return kOneSymbolHistogramCost |
| } |
| |
| if count == 2 { |
| return kTwoSymbolHistogramCost + float64(histogram.total_count_) |
| } |
| |
| if count == 3 { |
| var histo0 uint32 = histogram.data_[s[0]] |
| var histo1 uint32 = histogram.data_[s[1]] |
| var histo2 uint32 = histogram.data_[s[2]] |
| var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2)) |
| return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax) |
| } |
| |
| if count == 4 { |
| var histo [4]uint32 |
| var h23 uint32 |
| var histomax uint32 |
| for i = 0; i < 4; i++ { |
| histo[i] = histogram.data_[s[i]] |
| } |
| |
| /* Sort */ |
| for i = 0; i < 4; i++ { |
| var j uint |
| for j = i + 1; j < 4; j++ { |
| if histo[j] > histo[i] { |
| var tmp uint32 = histo[j] |
| histo[j] = histo[i] |
| histo[i] = tmp |
| } |
| } |
| } |
| |
| h23 = histo[2] + histo[3] |
| histomax = brotli_max_uint32_t(h23, histo[0]) |
| return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax) |
| } |
| { |
| var max_depth uint = 1 |
| var depth_histo = [codeLengthCodes]uint32{0} |
| /* In this loop we compute the entropy of the histogram and simultaneously |
| build a simplified histogram of the code length codes where we use the |
| zero repeat code 17, but we don't use the non-zero repeat code 16. */ |
| |
| var log2total float64 = fastLog2(histogram.total_count_) |
| for i = 0; i < data_size; { |
| if histogram.data_[i] > 0 { |
| var log2p float64 = log2total - fastLog2(uint(histogram.data_[i])) |
| /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = |
| = log2(total_count) - log2(count(symbol)) */ |
| |
| var depth uint = uint(log2p + 0.5) |
| /* Approximate the bit depth by round(-log2(P(symbol))) */ |
| bits += float64(histogram.data_[i]) * log2p |
| |
| if depth > 15 { |
| depth = 15 |
| } |
| |
| if depth > max_depth { |
| max_depth = depth |
| } |
| |
| depth_histo[depth]++ |
| i++ |
| } else { |
| var reps uint32 = 1 |
| /* Compute the run length of zeros and add the appropriate number of 0 |
| and 17 code length codes to the code length code histogram. */ |
| |
| var k uint |
| for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ { |
| reps++ |
| } |
| |
| i += uint(reps) |
| if i == data_size { |
| /* Don't add any cost for the last zero run, since these are encoded |
| only implicitly. */ |
| break |
| } |
| |
| if reps < 3 { |
| depth_histo[0] += reps |
| } else { |
| reps -= 2 |
| for reps > 0 { |
| depth_histo[repeatZeroCodeLength]++ |
| |
| /* Add the 3 extra bits for the 17 code length code. */ |
| bits += 3 |
| |
| reps >>= 3 |
| } |
| } |
| } |
| } |
| |
| /* Add the estimated encoding cost of the code length code histogram. */ |
| bits += float64(18 + 2*max_depth) |
| |
| /* Add the entropy of the code length code histogram. */ |
| bits += bitsEntropy(depth_histo[:], codeLengthCodes) |
| } |
| |
| return bits |
| } |
| |
| func populationCostDistance(histogram *histogramDistance) float64 { |
| var data_size uint = histogramDataSizeDistance() |
| var count int = 0 |
| var s [5]uint |
| var bits float64 = 0.0 |
| var i uint |
| if histogram.total_count_ == 0 { |
| return kOneSymbolHistogramCost |
| } |
| |
| for i = 0; i < data_size; i++ { |
| if histogram.data_[i] > 0 { |
| s[count] = i |
| count++ |
| if count > 4 { |
| break |
| } |
| } |
| } |
| |
| if count == 1 { |
| return kOneSymbolHistogramCost |
| } |
| |
| if count == 2 { |
| return kTwoSymbolHistogramCost + float64(histogram.total_count_) |
| } |
| |
| if count == 3 { |
| var histo0 uint32 = histogram.data_[s[0]] |
| var histo1 uint32 = histogram.data_[s[1]] |
| var histo2 uint32 = histogram.data_[s[2]] |
| var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2)) |
| return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax) |
| } |
| |
| if count == 4 { |
| var histo [4]uint32 |
| var h23 uint32 |
| var histomax uint32 |
| for i = 0; i < 4; i++ { |
| histo[i] = histogram.data_[s[i]] |
| } |
| |
| /* Sort */ |
| for i = 0; i < 4; i++ { |
| var j uint |
| for j = i + 1; j < 4; j++ { |
| if histo[j] > histo[i] { |
| var tmp uint32 = histo[j] |
| histo[j] = histo[i] |
| histo[i] = tmp |
| } |
| } |
| } |
| |
| h23 = histo[2] + histo[3] |
| histomax = brotli_max_uint32_t(h23, histo[0]) |
| return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax) |
| } |
| { |
| var max_depth uint = 1 |
| var depth_histo = [codeLengthCodes]uint32{0} |
| /* In this loop we compute the entropy of the histogram and simultaneously |
| build a simplified histogram of the code length codes where we use the |
| zero repeat code 17, but we don't use the non-zero repeat code 16. */ |
| |
| var log2total float64 = fastLog2(histogram.total_count_) |
| for i = 0; i < data_size; { |
| if histogram.data_[i] > 0 { |
| var log2p float64 = log2total - fastLog2(uint(histogram.data_[i])) |
| /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = |
| = log2(total_count) - log2(count(symbol)) */ |
| |
| var depth uint = uint(log2p + 0.5) |
| /* Approximate the bit depth by round(-log2(P(symbol))) */ |
| bits += float64(histogram.data_[i]) * log2p |
| |
| if depth > 15 { |
| depth = 15 |
| } |
| |
| if depth > max_depth { |
| max_depth = depth |
| } |
| |
| depth_histo[depth]++ |
| i++ |
| } else { |
| var reps uint32 = 1 |
| /* Compute the run length of zeros and add the appropriate number of 0 |
| and 17 code length codes to the code length code histogram. */ |
| |
| var k uint |
| for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ { |
| reps++ |
| } |
| |
| i += uint(reps) |
| if i == data_size { |
| /* Don't add any cost for the last zero run, since these are encoded |
| only implicitly. */ |
| break |
| } |
| |
| if reps < 3 { |
| depth_histo[0] += reps |
| } else { |
| reps -= 2 |
| for reps > 0 { |
| depth_histo[repeatZeroCodeLength]++ |
| |
| /* Add the 3 extra bits for the 17 code length code. */ |
| bits += 3 |
| |
| reps >>= 3 |
| } |
| } |
| } |
| } |
| |
| /* Add the estimated encoding cost of the code length code histogram. */ |
| bits += float64(18 + 2*max_depth) |
| |
| /* Add the entropy of the code length code histogram. */ |
| bits += bitsEntropy(depth_histo[:], codeLengthCodes) |
| } |
| |
| return bits |
| } |