blob: b87196c997c6f8ace27cfdbe6b4483bdde54fdce [file] [log] [blame] [edit]
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package spanner
import (
"bytes"
"encoding/binary"
"fmt"
"math"
)
// ssformat provides encoding utilities for Sortable String Format (ssformat) keys.
// This encoding scheme converts database values into byte sequences that preserve
// lexicographic ordering when compared byte-by-byte.
//
// The encoding supports both ascending (increasing) and descending (decreasing)
// sort orders for all data types.
const (
// isKey is the bit set in the header byte to indicate this is a key encoding.
ssIsKey = 0x80
// Unsigned integers (variable length 1-9 bytes)
ssTypeUint1 = 0
ssTypeDecreasingUint1 = 40
// Signed integers (variable length 1-8 bytes)
ssTypeNegInt1 = 16
ssTypePosInt1 = 17
ssTypeDecreasingNegInt1 = 48
ssTypeDecreasingPosInt1 = 49
// Strings
ssTypeString = 25
ssTypeDecreasingString = 57
// Nullable markers
ssTypeNullOrderedFirst = 27
ssTypeNullableNotNullNullOrderedFirst = 28
ssTypeNullableNotNullNullOrderedLast = 59
ssTypeNullOrderedLast = 60
// Doubles (variable length 1-8 bytes, encoded as transformed int64)
ssTypeNegDouble1 = 73
ssTypePosDouble1 = 74
ssTypeDecreasingNegDouble1 = 89
ssTypeDecreasingPosDouble1 = 90
// Escape characters for string/bytes encoding
ssAscendingZeroEscape byte = 0xF0
ssAscendingFFEscape byte = 0x10
ssSep byte = 0x78
// Tag encoding constants
ssObjectExistenceTag = 0x7E
ssMaxFieldTag = 0xFFFF
// Offset to make negative timestamp seconds sort correctly
ssTimestampSecondsOffset = 1 << 63
)
// makePrefixSuccessor returns the smallest possible key that is larger than
// the input key and that does not have the input key as a prefix.
//
// This is done by setting the least significant bit of the last byte.
// Returns nil if the input is empty or nil.
func makePrefixSuccessor(key []byte) []byte {
if len(key) == 0 {
return nil
}
result := make([]byte, len(key))
copy(result, key)
result[len(result)-1] |= 1
return result
}
// appendCompositeTag appends a composite tag for a table identifier to the buffer.
// Tags must be in the range [1, 65535] and not equal to 0x7E (object existence tag).
//
// Tag encoding uses variable length:
// - Tags 1-15: 1 byte
// - Tags 16-4095: 2 bytes
// - Tags 4096-65535: 3 bytes
func appendCompositeTag(buf []byte, tag int) ([]byte, error) {
if tag == ssObjectExistenceTag || tag <= 0 || tag > ssMaxFieldTag {
return buf, fmt.Errorf("ssformat: invalid tag value %d (must be 1-%d, excluding %d)",
tag, ssMaxFieldTag, ssObjectExistenceTag)
}
if tag < 16 {
// Short tag: encode as (tag << 1)
return append(buf, byte(tag<<1)), nil
}
// Long tag: reserve one bit for the prefix successor bit.
// Header format: 0 NN TTTTT where NN is num_extra_bytes (01=1, 10=2)
// and TTTTT are the high bits of the shifted tag.
shiftedTag := tag << 1
if shiftedTag < (1 << 13) { // Original tag < 4096
// 2-byte encoding: num_extra_bytes=1, header byte is 001xxxxx (0x20 | tag_bits)
return append(buf,
byte((1<<5)|(shiftedTag>>8)),
byte(shiftedTag&0xFF),
), nil
}
// 3-byte encoding: num_extra_bytes=2, header byte is 010xxxxx (0x40 | tag_bits)
return append(buf,
byte((2<<5)|(shiftedTag>>16)),
byte((shiftedTag>>8)&0xFF),
byte(shiftedTag&0xFF),
), nil
}
// appendNullOrderedFirst appends a NULL marker that sorts before all non-NULL values.
func appendNullOrderedFirst(buf []byte) []byte {
return append(buf, byte(ssIsKey|ssTypeNullOrderedFirst), 0)
}
// appendNullOrderedLast appends a NULL marker that sorts after all non-NULL values.
func appendNullOrderedLast(buf []byte) []byte {
return append(buf, byte(ssIsKey|ssTypeNullOrderedLast), 0)
}
// appendNotNullMarkerNullOrderedFirst appends a non-NULL marker when using NULLS FIRST ordering.
func appendNotNullMarkerNullOrderedFirst(buf []byte) []byte {
return append(buf, byte(ssIsKey|ssTypeNullableNotNullNullOrderedFirst))
}
// appendNotNullMarkerNullOrderedLast appends a non-NULL marker when using NULLS LAST ordering.
func appendNotNullMarkerNullOrderedLast(buf []byte) []byte {
return append(buf, byte(ssIsKey|ssTypeNullableNotNullNullOrderedLast))
}
// appendUint64Increasing appends an unsigned 64-bit integer value in ascending sort order.
func appendUint64Increasing(buf []byte, val uint64) []byte {
var payload [9]byte // Max 9 bytes for value payload
payloadLen := 0
tempVal := val
payload[8-payloadLen] = byte((tempVal & 0x7F) << 1) // LSB is prefix-successor bit (0)
tempVal >>= 7
payloadLen++
for tempVal > 0 {
payload[8-payloadLen] = byte(tempVal & 0xFF)
tempVal >>= 8
payloadLen++
}
buf = append(buf, byte(ssIsKey|(ssTypeUint1+payloadLen-1)))
return append(buf, payload[9-payloadLen:]...)
}
// appendUint64Decreasing appends an unsigned 64-bit integer value in descending sort order.
func appendUint64Decreasing(buf []byte, val uint64) []byte {
var payload [9]byte
payloadLen := 0
tempVal := val
payload[8-payloadLen] = byte((^(tempVal & 0x7F) & 0x7F) << 1)
tempVal >>= 7
payloadLen++
for tempVal > 0 {
payload[8-payloadLen] = byte(^(tempVal & 0xFF))
tempVal >>= 8
payloadLen++
}
buf = append(buf, byte(ssIsKey|(ssTypeDecreasingUint1-payloadLen+1)))
return append(buf, payload[9-payloadLen:]...)
}
// appendIntInternal is the internal implementation for signed integer and double encoding.
func appendIntInternal(buf []byte, val int64, decreasing, isDouble bool) []byte {
if decreasing {
val = ^val
}
var payload [8]byte // Max 8 bytes for payload
payloadLen := 0
tempVal := val
payload[7-payloadLen] = byte((tempVal & 0x7F) << 1)
tempVal >>= 7
payloadLen++
// For positive numbers, loop until all bits are 0s.
// For negative numbers, loop until all bits are 1s (sign extension).
loopEndVal := int64(0)
if tempVal < 0 {
loopEndVal = -1
}
for tempVal != loopEndVal {
payload[7-payloadLen] = byte(tempVal & 0xFF)
tempVal >>= 8
payloadLen++
}
var typeVal int
if val >= 0 {
switch {
case !decreasing && !isDouble:
typeVal = ssTypePosInt1 + payloadLen - 1
case !decreasing && isDouble:
typeVal = ssTypePosDouble1 + payloadLen - 1
case decreasing && !isDouble:
typeVal = ssTypeDecreasingPosInt1 + payloadLen - 1
default: // decreasing && isDouble
typeVal = ssTypeDecreasingPosDouble1 + payloadLen - 1
}
} else {
switch {
case !decreasing && !isDouble:
typeVal = ssTypeNegInt1 - payloadLen + 1
case !decreasing && isDouble:
typeVal = ssTypeNegDouble1 - payloadLen + 1
case decreasing && !isDouble:
typeVal = ssTypeDecreasingNegInt1 - payloadLen + 1
default: // decreasing && isDouble
typeVal = ssTypeDecreasingNegDouble1 - payloadLen + 1
}
}
buf = append(buf, byte(ssIsKey|typeVal))
return append(buf, payload[8-payloadLen:]...)
}
// appendIntIncreasing appends a signed integer value in ascending sort order.
func appendIntIncreasing(buf []byte, value int64) []byte {
return appendIntInternal(buf, value, false, false)
}
// appendIntDecreasing appends a signed integer value in descending sort order.
func appendIntDecreasing(buf []byte, value int64) []byte {
return appendIntInternal(buf, value, true, false)
}
// appendDoubleInternal is the internal implementation for double encoding.
// The double is transformed to maintain lexicographic ordering:
// negative doubles (bit 63 = 1) are transformed via (MinInt64 - enc).
func appendDoubleInternal(buf []byte, value float64, decreasing bool) []byte {
enc := int64(math.Float64bits(value))
if enc < 0 {
enc = math.MinInt64 - enc
}
return appendIntInternal(buf, enc, decreasing, true)
}
// appendDoubleIncreasing appends a double value in ascending sort order.
func appendDoubleIncreasing(buf []byte, value float64) []byte {
return appendDoubleInternal(buf, value, false)
}
// appendDoubleDecreasing appends a double value in descending sort order.
func appendDoubleDecreasing(buf []byte, value float64) []byte {
return appendDoubleInternal(buf, value, true)
}
// appendByteSequence is the internal implementation for string and bytes encoding.
func appendByteSequence(buf, data []byte, decreasing bool) []byte {
if decreasing {
buf = append(buf, byte(ssIsKey|ssTypeDecreasingString))
} else {
buf = append(buf, byte(ssIsKey|ssTypeString))
}
for _, b := range data {
currentByte := b
if decreasing {
currentByte = ^b
}
switch currentByte {
case 0x00:
// Escape sequence for 0x00: write 0x00 followed by 0xF0
buf = append(buf, 0x00, ssAscendingZeroEscape)
case 0xFF:
// Escape sequence for 0xFF: write 0xFF followed by 0x10
buf = append(buf, 0xFF, ssAscendingFFEscape)
default:
buf = append(buf, currentByte)
}
}
// Terminator
if decreasing {
buf = append(buf, 0xFF, ssSep)
} else {
buf = append(buf, 0x00, ssSep)
}
return buf
}
// appendStringIncreasing appends a UTF-8 string value in ascending sort order.
func appendStringIncreasing(buf []byte, value string) []byte {
return appendByteSequence(buf, []byte(value), false)
}
// appendStringDecreasing appends a UTF-8 string value in descending sort order.
func appendStringDecreasing(buf []byte, value string) []byte {
return appendByteSequence(buf, []byte(value), true)
}
// appendBytesIncreasing appends a byte slice value in ascending sort order.
func appendBytesIncreasing(buf []byte, value []byte) []byte {
return appendByteSequence(buf, value, false)
}
// appendBytesDecreasing appends a byte slice value in descending sort order.
func appendBytesDecreasing(buf []byte, value []byte) []byte {
return appendByteSequence(buf, value, true)
}
// encodeTimestamp encodes a timestamp as 12 bytes: 8 bytes for seconds since epoch
// (with offset to handle negative), 4 bytes for nanoseconds.
func encodeTimestamp(seconds int64, nanos int32) []byte {
offsetSeconds := uint64(seconds) + ssTimestampSecondsOffset
buf := make([]byte, 12)
binary.BigEndian.PutUint64(buf[0:8], offsetSeconds)
binary.BigEndian.PutUint32(buf[8:12], uint32(nanos))
return buf
}
// encodeUUID encodes a UUID (128-bit) as 16 bytes in big-endian order.
func encodeUUID(high, low int64) []byte {
buf := make([]byte, 16)
binary.BigEndian.PutUint64(buf[0:8], uint64(high))
binary.BigEndian.PutUint64(buf[8:16], uint64(low))
return buf
}
// targetRange represents a key range with start and limit boundaries for routing.
type targetRange struct {
start []byte
limit []byte
approximate bool
}
// newTargetRange creates a new targetRange with the given boundaries.
func newTargetRange(start, limit []byte, approximate bool) *targetRange {
return &targetRange{
start: start,
limit: limit,
approximate: approximate,
}
}
// isPoint returns true if this range represents a single key (point lookup).
func (r *targetRange) isPoint() bool {
return len(r.limit) == 0
}
// mergeFrom merges another targetRange into this one. The resulting range
// will be the union of the two ranges, taking the minimum start key and
// maximum limit key.
func (r *targetRange) mergeFrom(other *targetRange) {
if bytes.Compare(other.start, r.start) < 0 {
r.start = other.start
}
if other.isPoint() {
if bytes.Compare(other.start, r.limit) >= 0 {
r.limit = makePrefixSuccessor(other.start)
}
} else if bytes.Compare(other.limit, r.limit) > 0 {
r.limit = other.limit
}
r.approximate = r.approximate || other.approximate
}