blob: 1b727ffa1b087179d79387493f6f2dd2bfd6a1c6 [file] [log] [blame]
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
// The asm code generally follows the pure Go code in encode_other.go, except
// where marked with a "!!!".
// ----------------------------------------------------------------------------
// func emitCopy(dst []byte, offset, length int) int
//
// All local variables fit into registers. The register allocation:
// - BX offset
// - CX length
// - SI &dst[0]
// - DI &dst[i]
TEXT ·emitCopy(SB), NOSPLIT, $0-48
MOVQ dst_base+0(FP), DI
MOVQ DI, SI
MOVQ offset+24(FP), BX
MOVQ length+32(FP), CX
loop0:
// for length >= 68 { etc }
CMPL CX, $68
JLT step1
// Emit a length 64 copy, encoded as 3 bytes.
MOVB $0xfe, 0(DI)
MOVW BX, 1(DI)
ADDQ $3, DI
SUBL $64, CX
JMP loop0
step1:
// if length > 64 { etc }
CMPL CX, $64
JLE step2
// Emit a length 60 copy, encoded as 3 bytes.
MOVB $0xee, 0(DI)
MOVW BX, 1(DI)
ADDQ $3, DI
SUBL $60, CX
step2:
// if length >= 12 || offset >= 2048 { goto step3 }
CMPL CX, $12
JGE step3
CMPL BX, $2048
JGE step3
// Emit the remaining copy, encoded as 2 bytes.
MOVB BX, 1(DI)
SHRL $8, BX
SHLB $5, BX
SUBB $4, CX
SHLB $2, CX
ORB CX, BX
ORB $1, BX
MOVB BX, 0(DI)
ADDQ $2, DI
// Return the number of bytes written.
SUBQ SI, DI
MOVQ DI, ret+40(FP)
RET
step3:
// Emit the remaining copy, encoded as 3 bytes.
SUBL $1, CX
SHLB $2, CX
ORB $2, CX
MOVB CX, 0(DI)
MOVW BX, 1(DI)
ADDQ $3, DI
// Return the number of bytes written.
SUBQ SI, DI
MOVQ DI, ret+40(FP)
RET
// ----------------------------------------------------------------------------
// func extendMatch(src []byte, i, j int) int
//
// All local variables fit into registers. The register allocation:
// - CX &src[0]
// - DX &src[len(src)]
// - SI &src[i]
// - DI &src[j]
// - R9 &src[len(src) - 8]
TEXT ·extendMatch(SB), NOSPLIT, $0-48
MOVQ src_base+0(FP), CX
MOVQ src_len+8(FP), DX
MOVQ i+24(FP), SI
MOVQ j+32(FP), DI
ADDQ CX, DX
ADDQ CX, SI
ADDQ CX, DI
MOVQ DX, R9
SUBQ $8, R9
cmp8:
// As long as we are 8 or more bytes before the end of src, we can load and
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
CMPQ DI, R9
JA cmp1
MOVQ (SI), AX
MOVQ (DI), BX
CMPQ AX, BX
JNE bsf
ADDQ $8, SI
ADDQ $8, DI
JMP cmp8
bsf:
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
// the index of the first byte that differs. The BSF instruction finds the
// least significant 1 bit, the amd64 architecture is little-endian, and
// the shift by 3 converts a bit index to a byte index.
XORQ AX, BX
BSFQ BX, BX
SHRQ $3, BX
ADDQ BX, DI
// Convert from &src[ret] to ret.
SUBQ CX, DI
MOVQ DI, ret+40(FP)
RET
cmp1:
// In src's tail, compare 1 byte at a time.
CMPQ DI, DX
JAE end
MOVB (SI), AX
MOVB (DI), BX
CMPB AX, BX
JNE end
ADDQ $1, SI
ADDQ $1, DI
JMP cmp1
end:
// Convert from &src[ret] to ret.
SUBQ CX, DI
MOVQ DI, ret+40(FP)
RET