| // Copyright 2014-2021 Ulrich Kunitz. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package lzma |
| |
| import ( |
| "errors" |
| "unicode" |
| ) |
| |
| // node represents a node in the binary tree. |
| type node struct { |
| // x is the search value |
| x uint32 |
| // p parent node |
| p uint32 |
| // l left child |
| l uint32 |
| // r right child |
| r uint32 |
| } |
| |
| // wordLen is the number of bytes represented by the v field of a node. |
| const wordLen = 4 |
| |
| // binTree supports the identification of the next operation based on a |
| // binary tree. |
| // |
| // Nodes will be identified by their index into the ring buffer. |
| type binTree struct { |
| dict *encoderDict |
| // ring buffer of nodes |
| node []node |
| // absolute offset of the entry for the next node. Position 4 |
| // byte larger. |
| hoff int64 |
| // front position in the node ring buffer |
| front uint32 |
| // index of the root node |
| root uint32 |
| // current x value |
| x uint32 |
| // preallocated array |
| data []byte |
| } |
| |
| // null represents the nonexistent index. We can't use zero because it |
| // would always exist or we would need to decrease the index for each |
| // reference. |
| const null uint32 = 1<<32 - 1 |
| |
| // newBinTree initializes the binTree structure. The capacity defines |
| // the size of the buffer and defines the maximum distance for which |
| // matches will be found. |
| func newBinTree(capacity int) (t *binTree, err error) { |
| if capacity < 1 { |
| return nil, errors.New( |
| "newBinTree: capacity must be larger than zero") |
| } |
| if int64(capacity) >= int64(null) { |
| return nil, errors.New( |
| "newBinTree: capacity must less 2^{32}-1") |
| } |
| t = &binTree{ |
| node: make([]node, capacity), |
| hoff: -int64(wordLen), |
| root: null, |
| data: make([]byte, maxMatchLen), |
| } |
| return t, nil |
| } |
| |
| func (t *binTree) SetDict(d *encoderDict) { t.dict = d } |
| |
| // WriteByte writes a single byte into the binary tree. |
| func (t *binTree) WriteByte(c byte) error { |
| t.x = (t.x << 8) | uint32(c) |
| t.hoff++ |
| if t.hoff < 0 { |
| return nil |
| } |
| v := t.front |
| if int64(v) < t.hoff { |
| // We are overwriting old nodes stored in the tree. |
| t.remove(v) |
| } |
| t.node[v].x = t.x |
| t.add(v) |
| t.front++ |
| if int64(t.front) >= int64(len(t.node)) { |
| t.front = 0 |
| } |
| return nil |
| } |
| |
| // Writes writes a sequence of bytes into the binTree structure. |
| func (t *binTree) Write(p []byte) (n int, err error) { |
| for _, c := range p { |
| t.WriteByte(c) |
| } |
| return len(p), nil |
| } |
| |
| // add puts the node v into the tree. The node must not be part of the |
| // tree before. |
| func (t *binTree) add(v uint32) { |
| vn := &t.node[v] |
| // Set left and right to null indices. |
| vn.l, vn.r = null, null |
| // If the binary tree is empty make v the root. |
| if t.root == null { |
| t.root = v |
| vn.p = null |
| return |
| } |
| x := vn.x |
| p := t.root |
| // Search for the right leave link and add the new node. |
| for { |
| pn := &t.node[p] |
| if x <= pn.x { |
| if pn.l == null { |
| pn.l = v |
| vn.p = p |
| return |
| } |
| p = pn.l |
| } else { |
| if pn.r == null { |
| pn.r = v |
| vn.p = p |
| return |
| } |
| p = pn.r |
| } |
| } |
| } |
| |
| // parent returns the parent node index of v and the pointer to v value |
| // in the parent. |
| func (t *binTree) parent(v uint32) (p uint32, ptr *uint32) { |
| if t.root == v { |
| return null, &t.root |
| } |
| p = t.node[v].p |
| if t.node[p].l == v { |
| ptr = &t.node[p].l |
| } else { |
| ptr = &t.node[p].r |
| } |
| return |
| } |
| |
| // Remove node v. |
| func (t *binTree) remove(v uint32) { |
| vn := &t.node[v] |
| p, ptr := t.parent(v) |
| l, r := vn.l, vn.r |
| if l == null { |
| // Move the right child up. |
| *ptr = r |
| if r != null { |
| t.node[r].p = p |
| } |
| return |
| } |
| if r == null { |
| // Move the left child up. |
| *ptr = l |
| t.node[l].p = p |
| return |
| } |
| |
| // Search the in-order predecessor u. |
| un := &t.node[l] |
| ur := un.r |
| if ur == null { |
| // In order predecessor is l. Move it up. |
| un.r = r |
| t.node[r].p = l |
| un.p = p |
| *ptr = l |
| return |
| } |
| var u uint32 |
| for { |
| // Look for the max value in the tree where l is root. |
| u = ur |
| ur = t.node[u].r |
| if ur == null { |
| break |
| } |
| } |
| // replace u with ul |
| un = &t.node[u] |
| ul := un.l |
| up := un.p |
| t.node[up].r = ul |
| if ul != null { |
| t.node[ul].p = up |
| } |
| |
| // replace v by u |
| un.l, un.r = l, r |
| t.node[l].p = u |
| t.node[r].p = u |
| *ptr = u |
| un.p = p |
| } |
| |
| // search looks for the node that have the value x or for the nodes that |
| // brace it. The node highest in the tree with the value x will be |
| // returned. All other nodes with the same value live in left subtree of |
| // the returned node. |
| func (t *binTree) search(v uint32, x uint32) (a, b uint32) { |
| a, b = null, null |
| if v == null { |
| return |
| } |
| for { |
| vn := &t.node[v] |
| if x <= vn.x { |
| if x == vn.x { |
| return v, v |
| } |
| b = v |
| if vn.l == null { |
| return |
| } |
| v = vn.l |
| } else { |
| a = v |
| if vn.r == null { |
| return |
| } |
| v = vn.r |
| } |
| } |
| } |
| |
| // max returns the node with maximum value in the subtree with v as |
| // root. |
| func (t *binTree) max(v uint32) uint32 { |
| if v == null { |
| return null |
| } |
| for { |
| r := t.node[v].r |
| if r == null { |
| return v |
| } |
| v = r |
| } |
| } |
| |
| // min returns the node with the minimum value in the subtree with v as |
| // root. |
| func (t *binTree) min(v uint32) uint32 { |
| if v == null { |
| return null |
| } |
| for { |
| l := t.node[v].l |
| if l == null { |
| return v |
| } |
| v = l |
| } |
| } |
| |
| // pred returns the in-order predecessor of node v. |
| func (t *binTree) pred(v uint32) uint32 { |
| if v == null { |
| return null |
| } |
| u := t.max(t.node[v].l) |
| if u != null { |
| return u |
| } |
| for { |
| p := t.node[v].p |
| if p == null { |
| return null |
| } |
| if t.node[p].r == v { |
| return p |
| } |
| v = p |
| } |
| } |
| |
| // succ returns the in-order successor of node v. |
| func (t *binTree) succ(v uint32) uint32 { |
| if v == null { |
| return null |
| } |
| u := t.min(t.node[v].r) |
| if u != null { |
| return u |
| } |
| for { |
| p := t.node[v].p |
| if p == null { |
| return null |
| } |
| if t.node[p].l == v { |
| return p |
| } |
| v = p |
| } |
| } |
| |
| // xval converts the first four bytes of a into an 32-bit unsigned |
| // integer in big-endian order. |
| func xval(a []byte) uint32 { |
| var x uint32 |
| switch len(a) { |
| default: |
| x |= uint32(a[3]) |
| fallthrough |
| case 3: |
| x |= uint32(a[2]) << 8 |
| fallthrough |
| case 2: |
| x |= uint32(a[1]) << 16 |
| fallthrough |
| case 1: |
| x |= uint32(a[0]) << 24 |
| case 0: |
| } |
| return x |
| } |
| |
| // dumpX converts value x into a four-letter string. |
| func dumpX(x uint32) string { |
| a := make([]byte, 4) |
| for i := 0; i < 4; i++ { |
| c := byte(x >> uint((3-i)*8)) |
| if unicode.IsGraphic(rune(c)) { |
| a[i] = c |
| } else { |
| a[i] = '.' |
| } |
| } |
| return string(a) |
| } |
| |
| /* |
| // dumpNode writes a representation of the node v into the io.Writer. |
| func (t *binTree) dumpNode(w io.Writer, v uint32, indent int) { |
| if v == null { |
| return |
| } |
| |
| vn := &t.node[v] |
| |
| t.dumpNode(w, vn.r, indent+2) |
| |
| for i := 0; i < indent; i++ { |
| fmt.Fprint(w, " ") |
| } |
| if vn.p == null { |
| fmt.Fprintf(w, "node %d %q parent null\n", v, dumpX(vn.x)) |
| } else { |
| fmt.Fprintf(w, "node %d %q parent %d\n", v, dumpX(vn.x), vn.p) |
| } |
| |
| t.dumpNode(w, vn.l, indent+2) |
| } |
| |
| // dump prints a representation of the binary tree into the writer. |
| func (t *binTree) dump(w io.Writer) error { |
| bw := bufio.NewWriter(w) |
| t.dumpNode(bw, t.root, 0) |
| return bw.Flush() |
| } |
| */ |
| |
| func (t *binTree) distance(v uint32) int { |
| dist := int(t.front) - int(v) |
| if dist <= 0 { |
| dist += len(t.node) |
| } |
| return dist |
| } |
| |
| type matchParams struct { |
| rep [4]uint32 |
| // length when match will be accepted |
| nAccept int |
| // nodes to check |
| check int |
| // finish if length get shorter |
| stopShorter bool |
| } |
| |
| func (t *binTree) match(m match, distIter func() (int, bool), p matchParams, |
| ) (r match, checked int, accepted bool) { |
| buf := &t.dict.buf |
| for { |
| if checked >= p.check { |
| return m, checked, true |
| } |
| dist, ok := distIter() |
| if !ok { |
| return m, checked, false |
| } |
| checked++ |
| if m.n > 0 { |
| i := buf.rear - dist + m.n - 1 |
| if i < 0 { |
| i += len(buf.data) |
| } else if i >= len(buf.data) { |
| i -= len(buf.data) |
| } |
| if buf.data[i] != t.data[m.n-1] { |
| if p.stopShorter { |
| return m, checked, false |
| } |
| continue |
| } |
| } |
| n := buf.matchLen(dist, t.data) |
| switch n { |
| case 0: |
| if p.stopShorter { |
| return m, checked, false |
| } |
| continue |
| case 1: |
| if uint32(dist-minDistance) != p.rep[0] { |
| continue |
| } |
| } |
| if n < m.n || (n == m.n && int64(dist) >= m.distance) { |
| continue |
| } |
| m = match{int64(dist), n} |
| if n >= p.nAccept { |
| return m, checked, true |
| } |
| } |
| } |
| |
| func (t *binTree) NextOp(rep [4]uint32) operation { |
| // retrieve maxMatchLen data |
| n, _ := t.dict.buf.Peek(t.data[:maxMatchLen]) |
| if n == 0 { |
| panic("no data in buffer") |
| } |
| t.data = t.data[:n] |
| |
| var ( |
| m match |
| x, u, v uint32 |
| iterPred, iterSucc func() (int, bool) |
| ) |
| p := matchParams{ |
| rep: rep, |
| nAccept: maxMatchLen, |
| check: 32, |
| } |
| i := 4 |
| iterSmall := func() (dist int, ok bool) { |
| i-- |
| if i <= 0 { |
| return 0, false |
| } |
| return i, true |
| } |
| m, checked, accepted := t.match(m, iterSmall, p) |
| if accepted { |
| goto end |
| } |
| p.check -= checked |
| x = xval(t.data) |
| u, v = t.search(t.root, x) |
| if u == v && len(t.data) == 4 { |
| iter := func() (dist int, ok bool) { |
| if u == null { |
| return 0, false |
| } |
| dist = t.distance(u) |
| u, v = t.search(t.node[u].l, x) |
| if u != v { |
| u = null |
| } |
| return dist, true |
| } |
| m, _, _ = t.match(m, iter, p) |
| goto end |
| } |
| p.stopShorter = true |
| iterSucc = func() (dist int, ok bool) { |
| if v == null { |
| return 0, false |
| } |
| dist = t.distance(v) |
| v = t.succ(v) |
| return dist, true |
| } |
| m, checked, accepted = t.match(m, iterSucc, p) |
| if accepted { |
| goto end |
| } |
| p.check -= checked |
| iterPred = func() (dist int, ok bool) { |
| if u == null { |
| return 0, false |
| } |
| dist = t.distance(u) |
| u = t.pred(u) |
| return dist, true |
| } |
| m, _, _ = t.match(m, iterPred, p) |
| end: |
| if m.n == 0 { |
| return lit{t.data[0]} |
| } |
| return m |
| } |