encode_amd64.s - third_party/golang/snappy - Git at Google

 // Copyright 2016 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // +build !appengine
 // +build gc
 // +build !noasm

 #include "textflag.h"

 // The asm code generally follows the pure Go code in encode_other.go, except
 // where marked with a "!!!".

 // ----------------------------------------------------------------------------

 // func emitLiteral(dst, lit []byte) int
 //
 // All local variables fit into registers. The register allocation:
 //	- AX	return value
 //	- BX	n
 //	- CX	len(lit)
 //	- SI	&lit[0]
 //	- DI	&dst[i]
 //
 // The 24 bytes of stack space is to call runtime·memmove.
 TEXT ·emitLiteral(SB), NOSPLIT, $24-56
 	MOVQ dst_base+0(FP), DI
 	MOVQ lit_base+24(FP), SI
 	MOVQ lit_len+32(FP), CX
 	MOVQ CX, AX
 	MOVL CX, BX
 	SUBL $1, BX

 	CMPL BX, $60
 	JLT  oneByte
 	CMPL BX, $256
 	JLT  twoBytes

 threeBytes:
 	MOVB $0xf4, 0(DI)
 	MOVW BX, 1(DI)
 	ADDQ $3, DI
 	ADDQ $3, AX
 	JMP  end

 twoBytes:
 	MOVB $0xf0, 0(DI)
 	MOVB BX, 1(DI)
 	ADDQ $2, DI
 	ADDQ $2, AX
 	JMP  end

 oneByte:
 	SHLB $2, BX
 	MOVB BX, 0(DI)
 	ADDQ $1, DI
 	ADDQ $1, AX

 end:
 	MOVQ AX, ret+48(FP)

 	// copy(dst[i:], lit)
 	//
 	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
 	// DI, SI and CX as arguments.
 	MOVQ DI, 0(SP)
 	MOVQ SI, 8(SP)
 	MOVQ CX, 16(SP)
 	CALL runtime·memmove(SB)
 	RET

 // ----------------------------------------------------------------------------

 // func emitCopy(dst []byte, offset, length int) int
 //
 // All local variables fit into registers. The register allocation:
 //	- BX	offset
 //	- CX	length
 //	- SI	&dst[0]
 //	- DI	&dst[i]
 TEXT ·emitCopy(SB), NOSPLIT, $0-48
 	MOVQ dst_base+0(FP), DI
 	MOVQ DI, SI
 	MOVQ offset+24(FP), BX
 	MOVQ length+32(FP), CX

 loop0:
 	// for length >= 68 { etc }
 	CMPL CX, $68
 	JLT  step1

 	// Emit a length 64 copy, encoded as 3 bytes.
 	MOVB $0xfe, 0(DI)
 	MOVW BX, 1(DI)
 	ADDQ $3, DI
 	SUBL $64, CX
 	JMP  loop0

 step1:
 	// if length > 64 { etc }
 	CMPL CX, $64
 	JLE  step2

 	// Emit a length 60 copy, encoded as 3 bytes.
 	MOVB $0xee, 0(DI)
 	MOVW BX, 1(DI)
 	ADDQ $3, DI
 	SUBL $60, CX

 step2:
 	// if length >= 12 || offset >= 2048 { goto step3 }
 	CMPL CX, $12
 	JGE  step3
 	CMPL BX, $2048
 	JGE  step3

 	// Emit the remaining copy, encoded as 2 bytes.
 	MOVB BX, 1(DI)
 	SHRL $8, BX
 	SHLB $5, BX
 	SUBB $4, CX
 	SHLB $2, CX
 	ORB  CX, BX
 	ORB  $1, BX
 	MOVB BX, 0(DI)
 	ADDQ $2, DI

 	// Return the number of bytes written.
 	SUBQ SI, DI
 	MOVQ DI, ret+40(FP)
 	RET

 step3:
 	// Emit the remaining copy, encoded as 3 bytes.
 	SUBL $1, CX
 	SHLB $2, CX
 	ORB  $2, CX
 	MOVB CX, 0(DI)
 	MOVW BX, 1(DI)
 	ADDQ $3, DI

 	// Return the number of bytes written.
 	SUBQ SI, DI
 	MOVQ DI, ret+40(FP)
 	RET

 // ----------------------------------------------------------------------------

 // func extendMatch(src []byte, i, j int) int
 //
 // All local variables fit into registers. The register allocation:
 //	- CX	&src[0]
 //	- DX	&src[len(src)]
 //	- SI	&src[i]
 //	- DI	&src[j]
 //	- R9	&src[len(src) - 8]
 TEXT ·extendMatch(SB), NOSPLIT, $0-48
 	MOVQ src_base+0(FP), CX
 	MOVQ src_len+8(FP), DX
 	MOVQ i+24(FP), SI
 	MOVQ j+32(FP), DI
 	ADDQ CX, DX
 	ADDQ CX, SI
 	ADDQ CX, DI
 	MOVQ DX, R9
 	SUBQ $8, R9

 cmp8:
 	// As long as we are 8 or more bytes before the end of src, we can load and
 	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
 	CMPQ DI, R9
 	JA   cmp1
 	MOVQ (SI), AX
 	MOVQ (DI), BX
 	CMPQ AX, BX
 	JNE  bsf
 	ADDQ $8, SI
 	ADDQ $8, DI
 	JMP  cmp8

 bsf:
 	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
 	// the index of the first byte that differs. The BSF instruction finds the
 	// least significant 1 bit, the amd64 architecture is little-endian, and
 	// the shift by 3 converts a bit index to a byte index.
 	XORQ AX, BX
 	BSFQ BX, BX
 	SHRQ $3, BX
 	ADDQ BX, DI

 	// Convert from &src[ret] to ret.
 	SUBQ CX, DI
 	MOVQ DI, ret+40(FP)
 	RET

 cmp1:
 	// In src's tail, compare 1 byte at a time.
 	CMPQ DI, DX
 	JAE  end
 	MOVB (SI), AX
 	MOVB (DI), BX
 	CMPB AX, BX
 	JNE  end
 	ADDQ $1, SI
 	ADDQ $1, DI
 	JMP  cmp1

 end:
 	// Convert from &src[ret] to ret.
 	SUBQ CX, DI
 	MOVQ DI, ret+40(FP)
 	RET
	// Copyright 2016 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	// +build !appengine
	// +build gc
	// +build !noasm

	#include "textflag.h"

	// The asm code generally follows the pure Go code in encode_other.go, except
	// where marked with a "!!!".

	// ----------------------------------------------------------------------------

	// func emitLiteral(dst, lit []byte) int
	//
	// All local variables fit into registers. The register allocation:
	// - AX return value
	// - BX n
	// - CX len(lit)
	// - SI &lit[0]
	// - DI &dst[i]
	//
	// The 24 bytes of stack space is to call runtime·memmove.
	TEXT ·emitLiteral(SB), NOSPLIT, $24-56
	MOVQ dst_base+0(FP), DI
	MOVQ lit_base+24(FP), SI
	MOVQ lit_len+32(FP), CX
	MOVQ CX, AX
	MOVL CX, BX
	SUBL $1, BX

	CMPL BX, $60
	JLT oneByte
	CMPL BX, $256
	JLT twoBytes

	threeBytes:
	MOVB $0xf4, 0(DI)
	MOVW BX, 1(DI)
	ADDQ $3, DI
	ADDQ $3, AX
	JMP end

	twoBytes:
	MOVB $0xf0, 0(DI)
	MOVB BX, 1(DI)
	ADDQ $2, DI
	ADDQ $2, AX
	JMP end

	oneByte:
	SHLB $2, BX
	MOVB BX, 0(DI)
	ADDQ $1, DI
	ADDQ $1, AX

	end:
	MOVQ AX, ret+48(FP)

	// copy(dst[i:], lit)
	//
	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
	// DI, SI and CX as arguments.
	MOVQ DI, 0(SP)
	MOVQ SI, 8(SP)
	MOVQ CX, 16(SP)
	CALL runtime·memmove(SB)
	RET

	// ----------------------------------------------------------------------------

	// func emitCopy(dst []byte, offset, length int) int
	//
	// All local variables fit into registers. The register allocation:
	// - BX offset
	// - CX length
	// - SI &dst[0]
	// - DI &dst[i]
	TEXT ·emitCopy(SB), NOSPLIT, $0-48
	MOVQ dst_base+0(FP), DI
	MOVQ DI, SI
	MOVQ offset+24(FP), BX
	MOVQ length+32(FP), CX

	loop0:
	// for length >= 68 { etc }
	CMPL CX, $68
	JLT step1

	// Emit a length 64 copy, encoded as 3 bytes.
	MOVB $0xfe, 0(DI)
	MOVW BX, 1(DI)
	ADDQ $3, DI
	SUBL $64, CX
	JMP loop0

	step1:
	// if length > 64 { etc }
	CMPL CX, $64
	JLE step2

	// Emit a length 60 copy, encoded as 3 bytes.
	MOVB $0xee, 0(DI)
	MOVW BX, 1(DI)
	ADDQ $3, DI
	SUBL $60, CX

	step2:
	// if length >= 12 \|\| offset >= 2048 { goto step3 }
	CMPL CX, $12
	JGE step3
	CMPL BX, $2048
	JGE step3

	// Emit the remaining copy, encoded as 2 bytes.
	MOVB BX, 1(DI)
	SHRL $8, BX
	SHLB $5, BX
	SUBB $4, CX
	SHLB $2, CX
	ORB CX, BX
	ORB $1, BX
	MOVB BX, 0(DI)
	ADDQ $2, DI

	// Return the number of bytes written.
	SUBQ SI, DI
	MOVQ DI, ret+40(FP)
	RET

	step3:
	// Emit the remaining copy, encoded as 3 bytes.
	SUBL $1, CX
	SHLB $2, CX
	ORB $2, CX
	MOVB CX, 0(DI)
	MOVW BX, 1(DI)
	ADDQ $3, DI

	// Return the number of bytes written.
	SUBQ SI, DI
	MOVQ DI, ret+40(FP)
	RET

	// ----------------------------------------------------------------------------

	// func extendMatch(src []byte, i, j int) int
	//
	// All local variables fit into registers. The register allocation:
	// - CX &src[0]
	// - DX &src[len(src)]
	// - SI &src[i]
	// - DI &src[j]
	// - R9 &src[len(src) - 8]
	TEXT ·extendMatch(SB), NOSPLIT, $0-48
	MOVQ src_base+0(FP), CX
	MOVQ src_len+8(FP), DX
	MOVQ i+24(FP), SI
	MOVQ j+32(FP), DI
	ADDQ CX, DX
	ADDQ CX, SI
	ADDQ CX, DI
	MOVQ DX, R9
	SUBQ $8, R9

	cmp8:
	// As long as we are 8 or more bytes before the end of src, we can load and
	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
	CMPQ DI, R9
	JA cmp1
	MOVQ (SI), AX
	MOVQ (DI), BX
	CMPQ AX, BX
	JNE bsf
	ADDQ $8, SI
	ADDQ $8, DI
	JMP cmp8

	bsf:
	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
	// the index of the first byte that differs. The BSF instruction finds the
	// least significant 1 bit, the amd64 architecture is little-endian, and
	// the shift by 3 converts a bit index to a byte index.
	XORQ AX, BX
	BSFQ BX, BX
	SHRQ $3, BX
	ADDQ BX, DI

	// Convert from &src[ret] to ret.
	SUBQ CX, DI
	MOVQ DI, ret+40(FP)
	RET

	cmp1:
	// In src's tail, compare 1 byte at a time.
	CMPQ DI, DX
	JAE end
	MOVB (SI), AX
	MOVB (DI), BX
	CMPB AX, BX
	JNE end
	ADDQ $1, SI
	ADDQ $1, DI
	JMP cmp1

	end:
	// Convert from &src[ret] to ret.
	SUBQ CX, DI
	MOVQ DI, ret+40(FP)
	RET