blob: 46b3450b095ba8cbcec9ad10e7317bda9254a915 [file] [log] [blame]
/* CFBurstTrie.c
Copyright (c) 2008-2016, Apple Inc. and the Swift project authors
Portions Copyright (c) 2014-2016 Apple Inc. and the Swift project authors
Licensed under Apache License v2.0 with Runtime Library Exception
See http://swift.org/LICENSE.txt for license information
See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
Responsibility: Jennifer Moore
*/
#include "CFInternal.h"
#include "CFBurstTrie.h"
#include <CoreFoundation/CFByteOrder.h>
#include "CFNumber.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <limits.h>
#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
#include <unistd.h>
#include <sys/param.h>
#include <sys/mman.h>
#endif
#include <errno.h>
#include <assert.h>
#if DEPLOYMENT_TARGET_WINDOWS
#define open _NS_open
#define statinfo _stat
#define stat(x,y) _NS_stat(x,y)
#define __builtin_memcmp(x, y, z) memcmp(x, y, z)
#define __builtin_popcountll(x) popcountll(x)
#define bzero(dst, size) ZeroMemory(dst, size)
#define S_IWUSR 0
#define S_IRUSR 0
static int pwrite(int fd, const void *buf, size_t nbyte, off_t offset) {
// Get where we are
long pos = _tell(fd);
// Move to new offset
_lseek(fd, offset, SEEK_SET);
// Write data
int res = _write(fd, buf, nbyte);
// Return to previous offset
_lseek(fd, pos, SEEK_SET);
return res;
}
#else
#define statinfo stat
#endif
#if 0
#pragma mark Types and Utilities
#endif
#define MAX_STRING_ALLOCATION_SIZE 342
#define MAX_STRING_SIZE 1024
#define MAX_KEY_LENGTH MAX_STRING_SIZE * 4
#define CHARACTER_SET_SIZE 256
#define MAX_LIST_SIZE 256 // 64
#define MAX_BITMAP_SIZE 200
#define MAX_BUFFER_SIZE (4096<<2)
#define NextTrie_GetPtr(p) (p & ((~(uintptr_t)0)-3))
#define NextTrie_GetKind(p) (p & 3)
#define NextTrie_SetKind(p, kind) (p |= (3&kind))
#define DiskNextTrie_GetPtr(map,offset) (((uintptr_t)map) + (uintptr_t)(offset & ((~(uintptr_t)0)-3)))
#define DiskNextTrie_GetKind(p) (p & 3)
#define DiskNextTrie_SetKind(p, kind) (p |= (3&kind))
// Use this macro to avoid forgetting to check the pointer before assigning value to it.
#define SetPayload(pointer, value) do { if (pointer) *pointer = value; } while (0)
enum { Nothing = 0, TrieKind = 1, ListKind = 2, CompactTrieKind = 3 };
typedef enum { FailedInsert = 0, NewTerm = 1, ExistingTerm = 2 } CFBTInsertCode;
#pragma pack (1)
typedef uintptr_t NextTrie;
typedef struct _TrieLevel {
NextTrie slots[CHARACTER_SET_SIZE];
uint32_t weight;
uint32_t payload;
} TrieLevel;
typedef TrieLevel *TrieLevelRef;
typedef struct _MapTrieLevel {
uint32_t slots[CHARACTER_SET_SIZE];
uint32_t payload;
} MapTrieLevel;
typedef MapTrieLevel *MapTrieLevelRef;
typedef struct _CompactMapTrieLevel {
uint64_t bitmap[CHARACTER_SET_SIZE / 64];
uint32_t payload;
uint32_t slots[];
} CompactMapTrieLevel;
typedef CompactMapTrieLevel *CompactMapTrieLevelRef;
typedef struct _ListNode {
struct _ListNode *next;
uint32_t weight;
uint32_t payload;
uint16_t length;
UInt8 string[];
}* ListNodeRef;
typedef struct _Page {
uint32_t length;
char data[];
} Page;
typedef struct _PageEntryPacked {
uint8_t pfxLen;
uint16_t strlen;
uint32_t payload;
UInt8 string[];
} PageEntryPacked;
typedef struct _PageEntry {
uint16_t strlen;
uint32_t payload;
UInt8 string[];
} PageEntry;
typedef struct _TrieHeader {
uint32_t signature;
uint32_t rootOffset;
uint32_t count;
uint32_t size;
uint32_t flags;
uint64_t reserved[16];
} TrieHeader;
typedef struct _TrieCursor {
uint64_t signature;
uint64_t counter;
NextTrie next;
uint32_t keylen;
uint32_t prefixlen;
const uint8_t *prefix;
uint8_t key[MAX_KEY_LENGTH];
} TrieCursor;
typedef struct _MapCursor {
uint64_t signature;
TrieHeader *header;
uint32_t next;
uint32_t prefixlen;
uint32_t keylen;
const uint8_t *prefix;
uint8_t key[MAX_STRING_SIZE*4];
} MapCursor;
typedef struct _CompactMapCursor {
uint32_t next;
uint32_t entryOffsetInPage;
uint32_t offsetInEntry;
uint32_t payload;
// On a page, the first entry could has 0 strlen. So we need this variable to tell us whether
// the cursor is merely pointing at the beginning of the page, or the first entry.
// For example, if the trie contains "ab" and "abc", where "a" is stored on an array level,
// while "b" and "bc" are stored on a page level. If we creat a cursor for string "a", this cursor
// will point at the beginning of the page, but not at any particular key. The both entryOffsetInPage and
// offsetInEntry fields of the cursor are set to 0 in this case. Now if we add "a" to the
// trie. the page level will actually contains three entries. The first entry corresponds to string "a".
// That entry has 0 strlen value. If we creat a cursor for string "a" again, this cursor will
// point at the first entry on the page. But the entryOffsetInPage and offsetInEntry fields are still
// set to 0s. So we need an additional variable to make distinction between these two situations.
BOOL isOnPage;
} CompactMapCursor;
typedef struct _CompactMapCursor *MapCursorRef;
enum {
_kCFBurstTrieCursorTrieType = 0,
_kCFBurstTrieCursorMapType
};
typedef struct _CFBurstTrieCursor {
CompactMapCursor mapCursor;
CFIndex cursorType;
CFBurstTrieRef trie;
} _CFBurstTrieCursor;
// ** Legacy
typedef struct _DiskTrieLevel {
uint32_t slots[CHARACTER_SET_SIZE];
uint32_t weight;
uint32_t payload;
} DiskTrieLevel;
typedef DiskTrieLevel *DiskTrieLevelRef;
typedef struct _CompactDiskTrieLevel {
uint64_t bitmap[CHARACTER_SET_SIZE / 64]; // CHARACTER_SET_SIZE / 64bits per word
uint32_t weight;
uint32_t payload;
uint32_t slots[];
} CompactDiskTrieLevel;
typedef CompactDiskTrieLevel *CompactDiskTrieLevelRef;
typedef struct _StringPage {
uint32_t length;
char data[];
} StringPage;
typedef struct _StringPageEntryPacked {
uint8_t pfxLen;
uint16_t strlen; // make uint8_t if possible
uint32_t payload;
char string[];
} StringPageEntryPacked;
typedef struct _StringPageEntry {
uint16_t strlen; // make uint8_t if possible
uint32_t payload;
char string[];
} StringPageEntry;
typedef struct _fileHeader {
uint32_t signature;
uint32_t rootOffset;
uint32_t count;
uint32_t size;
uint32_t flags;
} fileHeader;
// **
#pragma pack()
struct _CFBurstTrie {
union {
TrieLevel root;
DiskTrieLevel diskRoot;
MapTrieLevel maproot;
};
char *mapBase;
uint32_t mapSize;
uint32_t mapOffset;
uint32_t cflags;
uint32_t count;
uint32_t containerSize;
int retain;
#if DEPLOYMENT_TARGET_WINDOWS
HANDLE mapHandle;
HANDLE mappedFileHandle;
#endif
};
#if 0
#pragma mark -
#pragma mark Forward declarations
#endif
typedef struct _TraverseContext {
void *context;
void (*callback)(void*, const UInt8*, uint32_t, uint32_t);
} TraverseContext;
static bool foundKey(void *context, const uint8_t *key, uint32_t payload, bool exact)
{
if (context != NULL) {
TraverseContext *ctx = (TraverseContext *)context;
if (ctx->context && ctx->callback) {
ctx->callback(ctx->context, key, 1, payload);
}
}
return false;
}
void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool));
static CFBTInsertCode addCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload);
static void findCFBurstTrieLevel(CFBurstTrieRef trie, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool));
static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool));
static void traverseCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool));
static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool));
static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, CompactMapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool));
static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool));
static size_t serializeCFBurstTrie(CFBurstTrieRef trie, size_t start_offset, int fd);
static Boolean burstTrieMappedFind(DiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix);
static Boolean burstTrieMappedPageFind(StringPage *page, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix);
static Boolean burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix);
static void destroyCFBurstTrie(CFBurstTrieRef trie);
static void finalizeCFBurstTrie(TrieLevelRef trie);
static void finalizeCFBurstTrieList(ListNodeRef node);
static int nodeWeightCompare(const void *a, const void *b);
static int nodeStringCompare(const void *a, const void *b);
static bool foundKey(void *context, const uint8_t *key, uint32_t payload, bool exact);
static bool containsKey(void *context, const uint8_t *key, uint32_t payload, bool exact);
static CFIndex burstTrieConvertCharactersToUTF8(UniChar *chars, CFIndex numChars, UInt8 *buffer);
static Boolean advanceMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length);
static Boolean getMapCursorPayload(CFBurstTrieRef trie, const CompactMapCursor *cursor, uint32_t *payload);
static void copyMapCursor(const CompactMapCursor *source, CompactMapCursor* destination);
static Boolean areMapCursorsEqual(const CompactMapCursor *lhs, const CompactMapCursor *rhs);
static void traverseFromMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback);
static Boolean getMapCursorPayloadFromPackedPageEntry(PageEntryPacked *entry, const CompactMapCursor *cursor, uint32_t *payload);
static Boolean getMapCursorPayloadFromPageEntry(PageEntry *entry, const CompactMapCursor *cursor, uint32_t *payload);
CFBurstTrieRef CFBurstTrieCreateWithOptions(CFDictionaryRef options) {
CFBurstTrieRef trie = NULL;
trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie));
trie->containerSize = MAX_LIST_SIZE;
CFNumberRef valueAsCFNumber;
if (CFDictionaryGetValueIfPresent(options, kCFBurstTrieCreationOptionNameContainerSize, (const void **)&valueAsCFNumber)) {
int value;
CFNumberGetValue(valueAsCFNumber, kCFNumberIntType, &value);
trie->containerSize = value > 2 && value < 4096 ? value : MAX_LIST_SIZE;
}
trie->retain = 1;
return trie;
}
CFBurstTrieRef CFBurstTrieCreate() {
CFBurstTrieRef trie = NULL;
int listSize = MAX_LIST_SIZE;
CFNumberRef value = CFNumberCreate(kCFAllocatorDefault, kCFNumberIntType, &listSize);
CFMutableDictionaryRef options = CFDictionaryCreateMutable(kCFAllocatorDefault, 1, NULL, NULL);
CFDictionarySetValue(options, kCFBurstTrieCreationOptionNameContainerSize, value);
trie = CFBurstTrieCreateWithOptions(options);
CFRelease(value);
CFRelease(options);
return trie;
}
CFBurstTrieRef CFBurstTrieCreateFromFile(CFStringRef path) {
struct statinfo sb;
char filename[PATH_MAX];
int fd;
/* Check valid path name */
if (!CFStringGetCString(path, filename, PATH_MAX, kCFStringEncodingUTF8)) return NULL;
/* Check if file exists */
if (stat(filename, &sb) != 0) return NULL;
/* Check if file can be opened */
if ((fd=open(filename, CF_OPENFLGS|O_RDONLY)) < 0) return NULL;
#if DEPLOYMENT_TARGET_WINDOWS
HANDLE mappedFileHandle = (HANDLE)_get_osfhandle(fd);
if (!mappedFileHandle) return NULL;
HANDLE mapHandle = CreateFileMapping(mappedFileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
if (!mapHandle) return NULL;
char *map = (char *)MapViewOfFile(mapHandle, FILE_MAP_READ, 0, 0, sb.st_size);
if (!map) return NULL;
#else
char *map = mmap(0, sb.st_size, PROT_READ, MAP_FILE|MAP_SHARED, fd, 0);
#endif
CFBurstTrieRef trie = NULL;
TrieHeader *header = (TrieHeader *)map;
if (((uint32_t*)map)[0] == 0xbabeface) {
trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie));
trie->mapBase = map;
trie->mapSize = CFSwapInt32LittleToHost(sb.st_size);
trie->mapOffset = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->rootOffset);
trie->cflags = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->flags);
trie->count = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->count);
trie->retain = 1;
#if DEPLOYMENT_TARGET_WINDOWS
trie->mappedFileHandle = mappedFileHandle;
trie->mapHandle = mapHandle;
#else
// On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here.
close(fd);
#endif
} else if (header->signature == 0xcafebabe || header->signature == 0x0ddba11) {
trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie));
trie->mapBase = map;
trie->mapSize = CFSwapInt32LittleToHost(sb.st_size);
trie->cflags = CFSwapInt32LittleToHost(header->flags);
trie->count = CFSwapInt32LittleToHost(header->count);
trie->retain = 1;
#if DEPLOYMENT_TARGET_WINDOWS
trie->mappedFileHandle = mappedFileHandle;
trie->mapHandle = mapHandle;
#else
// On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here.
close(fd);
#endif
} else {
close(fd);
}
return trie;
}
CFBurstTrieRef CFBurstTrieCreateFromMapBytes(char *mapBase) {
CFBurstTrieRef trie = NULL;
TrieHeader *header = (TrieHeader *)mapBase;
if (mapBase && ((uint32_t*)mapBase)[0] == 0xbabeface) {
trie = (CFBurstTrieRef) malloc(sizeof(struct _CFBurstTrie));
trie->mapBase = mapBase;
trie->mapSize = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->size);
trie->mapOffset = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->rootOffset);
trie->cflags = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->flags);
trie->count = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->count);
trie->retain = 1;
} else if (mapBase && (header->signature == 0xcafebabe || header->signature == 0x0ddba11)) {
trie = (CFBurstTrieRef) malloc(sizeof(struct _CFBurstTrie));
trie->mapBase = mapBase;
trie->mapSize = CFSwapInt32LittleToHost(header->size);
trie->cflags = CFSwapInt32LittleToHost(header->flags);
trie->count = CFSwapInt32LittleToHost(header->count);
trie->retain = 1;
}
return trie;
}
Boolean CFBurstTrieInsert(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex payload) {
return CFBurstTrieAddWithWeight(trie, term, termRange, 1, (uint32_t)payload);
}
Boolean CFBurstTrieAdd(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t payload) {
return CFBurstTrieAddWithWeight(trie, term, termRange, 1, payload);
}
Boolean CFBurstTrieInsertCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex payload) {
return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, 1, (uint32_t)payload);
}
Boolean CFBurstTrieAddCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t payload) {
return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, 1, payload);
}
Boolean CFBurstTrieInsertUTF8String(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, CFIndex payload) {
return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, 1, (uint32_t)payload);
}
Boolean CFBurstTrieAddUTF8String(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, uint32_t payload) {
return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, 1, payload);
}
Boolean CFBurstTrieInsertWithWeight(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex weight, CFIndex payload) {
return CFBurstTrieAddWithWeight(trie, term, termRange, weight, (uint32_t)payload);
}
Boolean CFBurstTrieAddWithWeight(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t weight, uint32_t payload) {
Boolean success = false;
CFIndex size = MAX_STRING_ALLOCATION_SIZE;
CFIndex bytesize = termRange.length * 4; //** 4-byte max character size
if (!trie->mapBase && termRange.length < MAX_STRING_SIZE && payload > 0) {
CFIndex length;
UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1];
UInt8 *key = buffer;
if (bytesize >= size) {
size = bytesize;
key = (UInt8 *) malloc(sizeof(UInt8) * size + 1);
}
CFStringGetBytes(term, termRange, kCFStringEncodingUTF8, (UInt8)'-', (Boolean)0, key, size, &length);
key[length] = 0;
success = CFBurstTrieAddUTF8StringWithWeight(trie, key, length, weight, payload);
if (buffer != key) free(key);
}
return success;
}
Boolean CFBurstTrieInsertCharactersWithWeight(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex weight, CFIndex payload) {
return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, weight, (uint32_t)payload);
}
Boolean CFBurstTrieAddCharactersWithWeight(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t weight, uint32_t payload) {
Boolean success = false;
CFIndex size = MAX_STRING_ALLOCATION_SIZE;
CFIndex bytesize = numChars * 4; //** 4-byte max character size
if (!trie->mapBase && numChars < MAX_STRING_SIZE && payload > 0) {
CFIndex length;
UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1];
UInt8 *key = buffer;
if (bytesize >= size) {
size = bytesize;
key = (UInt8 *) malloc(sizeof(UInt8) * size + 1);
}
length = burstTrieConvertCharactersToUTF8(chars, numChars, key);
key[length] = 0;
success = CFBurstTrieAddUTF8StringWithWeight(trie, key, length, weight, payload);
if (buffer != key) free(key);
}
return success;
}
Boolean CFBurstTrieInsertUTF8StringWithWeight(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, CFIndex weight, CFIndex payload) {
return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, weight, (uint32_t)payload);
}
Boolean CFBurstTrieAddUTF8StringWithWeight(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, uint32_t weight, uint32_t payload) {
CFBTInsertCode code = FailedInsert;
if (!trie->mapBase && numChars < MAX_STRING_SIZE*4 && payload > 0) {
code = addCFBurstTrieLevel(trie, &trie->root, chars, numChars, weight, payload);
if (code == NewTerm) trie->count++;
}
return code > FailedInsert;
}
Boolean CFBurstTrieFind(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex *payload) {
uint32_t p;
if (CFBurstTrieContains(trie, term, termRange, &p)) {
SetPayload(payload, p);
return true;
}
return false;
}
Boolean CFBurstTrieContains(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t *payload) {
Boolean success = false;
CFIndex size = MAX_STRING_ALLOCATION_SIZE;
CFIndex bytesize = termRange.length * 4; //** 4-byte max character size
if (termRange.length < MAX_STRING_SIZE) {
CFIndex length;
UInt8 buffer[MAX_STRING_ALLOCATION_SIZE+1];
UInt8 *key = buffer;
if (bytesize >= size) {
size = bytesize;
key = (UInt8 *) malloc(sizeof(UInt8) * size + 1);
}
CFStringGetBytes(term, termRange, kCFStringEncodingUTF8, (UInt8)'-', (Boolean)0, key, size, &length);
key[length] = 0;
success = CFBurstTrieContainsUTF8String(trie, key, length, payload);
if (buffer != key) free(key);
}
return success;
}
Boolean CFBurstTrieFindCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex *payload) {
uint32_t p;
if (CFBurstTrieContainsCharacters(trie, chars, numChars, &p)) {
SetPayload(payload, p);
return true;
}
return false;
}
Boolean CFBurstTrieContainsCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t *payload) {
Boolean success = false;
CFIndex size = MAX_STRING_ALLOCATION_SIZE;
CFIndex bytesize = numChars * 4; //** 4-byte max character size
if (numChars < MAX_STRING_SIZE) {
CFIndex length;
UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1];
UInt8 *key = buffer;
if (bytesize >= size) {
size = bytesize;
key = (UInt8 *) malloc(sizeof(UInt8) * size + 1);
}
length = burstTrieConvertCharactersToUTF8(chars, numChars, key);
key[length] = 0;
success = CFBurstTrieContainsUTF8String(trie, key, length, payload);
if (buffer != key) free(key);
}
return success;
}
Boolean CFBurstTrieFindUTF8String(CFBurstTrieRef trie, UInt8 *key, CFIndex length, CFIndex *payload) {
uint32_t p;
if (CFBurstTrieContainsUTF8String(trie, key, length, &p)) {
SetPayload(payload, p);
return true;
}
return false;
}
Boolean CFBurstTrieContainsUTF8String(CFBurstTrieRef trie, UInt8 *key, CFIndex length, uint32_t *payload) {
Boolean success = false;
if (length < MAX_STRING_SIZE) {
if (trie->mapBase && ((fileHeader *)trie->mapBase)->signature == 0xbabeface) {
bool prefix = (trie->cflags & kCFBurstTriePrefixCompression);
success = burstTrieMappedFind((DiskTrieLevelRef)(trie->mapBase+CFSwapInt32LittleToHost((((uint32_t*)trie->mapBase)[1]))), trie->mapBase, key, length, payload, prefix);
} else if (trie->mapBase && trie->cflags & (kCFBurstTriePrefixCompression | kCFBurstTrieSortByKey)) {
_CFBurstTrieCursor cursor;
if (!CFBurstTrieSetCursorForBytes(trie, &cursor, key, length))
return FALSE;
return CFBurstTrieCursorGetPayload(&cursor, payload);
} else {
uint32_t found = 0;
void *cursor = 0;
traverseCFBurstTrieWithCursor(trie, key, length, &cursor, true, &found, containsKey);
if (found) SetPayload(payload, found);
success = found > 0;
}
}
return success;
}
Boolean CFBurstTrieSerialize(CFBurstTrieRef trie, CFStringRef path, CFBurstTrieOpts opts) {
Boolean success = false;
if (trie->mapBase) {
return success;
} else {
int fd;
char filename[PATH_MAX];
/* Check valid path name */
if (!CFStringGetCString(path, filename, PATH_MAX, kCFStringEncodingUTF8)) return success;
/* Check if file can be opened */
if ((fd=open(filename, CF_OPENFLGS|O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR)) < 0) return success;
if (CFBurstTrieSerializeWithFileDescriptor(trie, fd, opts)) success = true;
close(fd);
}
return success;
}
Boolean CFBurstTrieSerializeWithFileDescriptor(CFBurstTrieRef trie, int fd, CFBurstTrieOpts opts) {
Boolean success = false;
if (!trie->mapBase && fd >= 0) {
off_t start_offset = lseek(fd, 0, SEEK_END);
trie->cflags = opts;
trie->mapSize = serializeCFBurstTrie(trie, start_offset, fd);
#if DEPLOYMENT_TARGET_WINDOWS
HANDLE mappedFileHandle = (HANDLE)_get_osfhandle(fd);
// We need to make sure we have our own handle to keep this file open as long as the mmap lasts
DuplicateHandle(GetCurrentProcess(), mappedFileHandle, GetCurrentProcess(), &mappedFileHandle, 0, 0, DUPLICATE_SAME_ACCESS);
HANDLE mapHandle = CreateFileMapping(mappedFileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
if (!mapHandle) return NULL;
char *map = (char *)MapViewOfFile(mapHandle, FILE_MAP_READ, 0, start_offset, trie->mapSize);
if (!map) return NULL;
trie->mapBase = map;
trie->mapHandle = mapHandle;
trie->mappedFileHandle = mappedFileHandle;
#else
trie->mapBase = mmap(0, trie->mapSize, PROT_READ, MAP_FILE|MAP_SHARED, fd, start_offset);
#endif
success = true;
}
return success;
}
void CFBurstTrieTraverse(CFBurstTrieRef trie, void *ctx, void (*callback)(void*, const UInt8*, uint32_t, uint32_t)) {
TrieHeader *header = (TrieHeader *)trie->mapBase;
if (!trie->mapBase || (header->signature == 0xcafebabe || header->signature == 0x0ddba11)) {
void *cursor = 0;
TraverseContext context;
context.context = ctx;
context.callback = callback;
traverseCFBurstTrieWithCursor(trie, (const uint8_t *)"", 0, &cursor, false, &context, foundKey);
}
}
void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool))
{
traverseCFBurstTrieWithCursor(trie, prefix, prefixLen, cursor, false, ctx, callback);
}
void CFBurstTriePrint(CFBurstTrieRef trie) {
}
CFIndex CFBurstTrieGetCount(CFBurstTrieRef trie) {
return trie->count;
}
CFBurstTrieRef CFBurstTrieRetain(CFBurstTrieRef trie) {
trie->retain++;
return trie;
}
void CFBurstTrieRelease(CFBurstTrieRef trie) {
trie->retain--;
if (trie->retain == 0) destroyCFBurstTrie(trie);
return;
}
Boolean CFBurstTrieSetCursorForBytes(CFBurstTrieRef trie, CFBurstTrieCursorRef cursor, const UInt8* bytes, CFIndex length)
{
if (!trie->mapBase || !(trie->cflags & (kCFBurstTriePrefixCompression | kCFBurstTrieSortByKey))) {
//fprintf(stderr, "CFBurstTrieCreateCursorForBytes() only support file based trie in prefix compression format.\n");
return FALSE;
}
TrieHeader *header = (TrieHeader*)trie->mapBase;
if (length < 0 || !trie)
return FALSE;
cursor->trie = trie;
if (trie->mapBase) {
cursor->cursorType = _kCFBurstTrieCursorMapType;
cursor->mapCursor.next = header->rootOffset;
cursor->mapCursor.isOnPage = FALSE;
cursor->mapCursor.entryOffsetInPage = 0;
cursor->mapCursor.offsetInEntry = 0;
cursor->mapCursor.payload = 0;
} else
assert(false);
if (!bytes || length == 0)
return TRUE;
return CFBurstTrieCursorAdvanceForBytes(cursor, bytes, length);
}
CFBurstTrieCursorRef CFBurstTrieCreateCursorForBytes(CFBurstTrieRef trie, const UInt8* bytes, CFIndex length)
{
CFBurstTrieCursorRef cursor = (CFBurstTrieCursorRef)calloc(sizeof(_CFBurstTrieCursor), 1);
if (!CFBurstTrieSetCursorForBytes(trie, cursor, bytes, length)) {
CFBurstTrieCursorRelease(cursor);
return NULL;
}
return cursor;
}
CFBurstTrieCursorRef CFBurstTrieCursorCreateByCopy(CFBurstTrieCursorRef cursor)
{
if (!cursor)
return NULL;
CFBurstTrieCursorRef newCursor = (CFBurstTrieCursorRef)calloc(sizeof(_CFBurstTrieCursor), 1);
switch (cursor->cursorType) {
case _kCFBurstTrieCursorMapType:
copyMapCursor(&cursor->mapCursor, &newCursor->mapCursor);
break;
case _kCFBurstTrieCursorTrieType:
assert(false);
break;
}
newCursor->cursorType = cursor->cursorType;
newCursor->trie = cursor->trie;
return newCursor;
}
Boolean CFBurstTrieCursorIsEqual(CFBurstTrieCursorRef lhs, CFBurstTrieCursorRef rhs)
{
if (lhs->trie != rhs->trie || lhs->cursorType != rhs->cursorType)
return FALSE;
if (lhs->cursorType == _kCFBurstTrieCursorMapType)
return areMapCursorsEqual(&lhs->mapCursor, &rhs->mapCursor);
else
return FALSE;
}
Boolean CFBurstTrieCursorAdvanceForBytes(CFBurstTrieCursorRef cursor, const UInt8* bytes, CFIndex length)
{
switch (cursor->cursorType) {
case _kCFBurstTrieCursorMapType: {
CompactMapCursor tempCursor;
copyMapCursor(&cursor->mapCursor, &tempCursor);
if (advanceMapCursor(cursor->trie, (CompactMapCursor*)&cursor->mapCursor, bytes, length))
return TRUE;
else {
copyMapCursor(&tempCursor, &cursor->mapCursor);
return FALSE;
}
}
case _kCFBurstTrieCursorTrieType:
return FALSE;
}
return FALSE;
}
Boolean CFBurstTrieCursorGetPayload(CFBurstTrieCursorRef cursor, uint32_t *payload)
{
switch (cursor->cursorType) {
case _kCFBurstTrieCursorMapType:
return getMapCursorPayload(cursor->trie, (CompactMapCursor*)&cursor->mapCursor, payload);
case _kCFBurstTrieCursorTrieType:
return FALSE;
}
return FALSE;
}
void CFBurstTrieCursorRelease(CFBurstTrieCursorRef cursor)
{
if (!cursor)
return;
free(cursor);
}
void CFBurstTrieTraverseFromCursor(CFBurstTrieCursorRef cursor, void *ctx, CFBurstTrieTraversalCallback callback)
{
if (!cursor)
return;
UInt8 *bytes = (UInt8*)calloc(1, MAX_KEY_LENGTH);
uint32_t capacity = MAX_KEY_LENGTH;
uint32_t length = 0;
Boolean stop = FALSE;
switch (cursor->cursorType) {
case _kCFBurstTrieCursorMapType: {
CompactMapCursor tempCursor;
copyMapCursor(&cursor->mapCursor, &tempCursor);
traverseFromMapCursor(cursor->trie, &tempCursor, bytes, capacity,length, &stop, ctx, callback);
break;
}
case _kCFBurstTrieCursorTrieType:
break;
}
free(bytes);
}
#if 0
#pragma mark -
#pragma mark Insertion
#endif
static ListNodeRef makeCFBurstTrieListNode(const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload) {
ListNodeRef node = (ListNodeRef) calloc(1, sizeof(*node) + keylen + 1);
memcpy(node->string, key, keylen);
node->string[keylen] = 0;
node->next = 0;
node->length = keylen;
node->weight = weight;
node->payload = payload;
return node;
}
static void addCFBurstTrieBurstLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload) {
if (keylen) {
NextTrie next = root->slots[*key];
ListNodeRef newNode = makeCFBurstTrieListNode(key+1, keylen-1, weight, payload);
newNode->weight = weight;
newNode->next = (ListNodeRef) NextTrie_GetPtr(next);
next = (uintptr_t) newNode;
NextTrie_SetKind(next, ListKind);
root->slots[*key] = next;
} else {
// ** Handle payload.
root->weight = weight;
root->payload = payload;
}
}
static TrieLevelRef burstCFBurstTrieLevel(CFBurstTrieRef trie, ListNodeRef list, uint32_t listCount) {
TrieLevelRef newLevel = (TrieLevelRef) calloc(1, sizeof(struct _TrieLevel));
while (list) {
addCFBurstTrieBurstLevel(trie, newLevel, list->string, list->length, list->weight, list->payload);
ListNodeRef temp = list;
list = list->next;
free(temp);
}
return newLevel;
}
static CFBTInsertCode addCFBurstTrieListNode(CFBurstTrieRef trie, ListNodeRef list, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload, uint32_t *listCount)
{
CFBTInsertCode code = FailedInsert;
uint32_t count = 1;
ListNodeRef last = list;
while (list) {
if (list->length == keylen && memcmp(key, list->string, keylen) == 0) {
list->weight += weight;
list->payload = payload;
code = ExistingTerm;
break;
} else {
count++;
last = list;
list = list->next;
}
}
if (!list) {
last->next = makeCFBurstTrieListNode(key, keylen, weight, payload);
code = NewTerm;
}
*listCount = count;
return code;
}
static CFBTInsertCode addCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload)
{
CFBTInsertCode code = FailedInsert;
if (keylen) {
NextTrie next = root->slots[*key];
if (NextTrie_GetKind(next) == TrieKind) {
TrieLevelRef nextLevel = (TrieLevelRef) NextTrie_GetPtr(next);
code = addCFBurstTrieLevel(trie, nextLevel, key+1, keylen-1, weight, payload);
} else {
if (NextTrie_GetKind(next) == ListKind) {
uint32_t listCount;
ListNodeRef listNode = (ListNodeRef) NextTrie_GetPtr(next);
code = addCFBurstTrieListNode(trie, listNode, key+1, keylen-1, weight, payload, &listCount);
if (listCount > trie->containerSize) {
next = (uintptr_t) burstCFBurstTrieLevel(trie, listNode, listCount);
NextTrie_SetKind(next, TrieKind);
}
} else {
// ** Make a new list node
next = (uintptr_t) makeCFBurstTrieListNode(key+1, keylen-1, weight, payload);
NextTrie_SetKind(next, ListKind);
code = NewTerm;
}
root->slots[*key] = next;
}
} else {
// ** Handle payload
if (!root->weight) code = NewTerm;
else code = ExistingTerm;
root->weight += weight;
root->payload = payload;
}
return code;
}
#if 0
#pragma mark -
#pragma mark Searching
#endif
static void findCFBurstTrieList(CFBurstTrieRef trie, TrieCursor *cursor, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool))
{
ListNodeRef list = (ListNodeRef)NextTrie_GetPtr(cursor->next);
int len = cursor->prefixlen-cursor->keylen;
len = len <= 0 ? 0 : len;
while (list) {
int lencompare = list->length-len;
if (list->length >= len &&
(len == 0 || memcmp(list->string, cursor->prefix+cursor->keylen, len) == 0)) {
memcpy(cursor->key+cursor->keylen, list->string, list->length);
cursor->key[cursor->keylen+list->length] = 0;
cursor->next = (NextTrie)list;
if (list->payload && callback(ctx, cursor->key, list->payload, lencompare==0)) return;
}
list = list->next;
}
}
static void findCFBurstTrieMappedPage(CFBurstTrieRef trie, MapCursor *cursor, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool))
{
Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
uint32_t end = page->length;
uint32_t cur = 0;
int len = cursor->prefixlen-cursor->keylen;
len = len <= 0 ? 0 : len;
if (trie->cflags & kCFBurstTriePrefixCompression) {
uint8_t pfx[CHARACTER_SET_SIZE];
PageEntryPacked *lastEntry = 0;
while (cur < end) {
PageEntryPacked *entry = (PageEntryPacked *)&page->data[cur];
int lencompare = (entry->strlen+entry->pfxLen)-len;
if (lastEntry && entry->pfxLen>lastEntry->pfxLen) memcpy(pfx+lastEntry->pfxLen, lastEntry->string, entry->pfxLen-lastEntry->pfxLen);
if (lencompare >= 0 &&
(len == 0 || (__builtin_memcmp(pfx, cursor->prefix+cursor->keylen, entry->pfxLen) == 0 &&
__builtin_memcmp(entry->string, cursor->prefix+cursor->keylen+entry->pfxLen, cursor->prefixlen-cursor->keylen-entry->pfxLen) == 0))) {
memcpy(cursor->key+cursor->keylen, pfx, entry->pfxLen);
memcpy(cursor->key+cursor->keylen+entry->pfxLen, entry->string, entry->strlen);
cursor->key[cursor->keylen+entry->pfxLen+entry->strlen] = 0;
if (entry->payload && callback(ctx, (const uint8_t *)cursor->key, entry->payload, lencompare==0)) return;
}
lastEntry = entry;
cur += sizeof(*entry) + entry->strlen;
}
} else {
while (cur < end) {
PageEntry *entry = (PageEntry *)&page->data[cur];
int lencompare = entry->strlen-len;
if (lencompare >= 0 && __builtin_memcmp(entry->string, cursor->prefix+cursor->keylen, len) == 0) {
memcpy(cursor->key+cursor->keylen, entry->string, entry->strlen);
cursor->key[cursor->keylen+entry->strlen] = 0;
if (entry->payload && callback(ctx, (const uint8_t *)cursor->key, entry->payload, lencompare==0)) return;
}
cur += sizeof(*entry) + entry->strlen;
}
}
}
static void findCFBurstTrieLevel(CFBurstTrieRef trie, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool))
{
if (cursor->keylen < cursor->prefixlen) {
cursor->next = ((TrieLevelRef)NextTrie_GetPtr(cursor->next))->slots[cursor->prefix[cursor->keylen]];
cursor->key[cursor->keylen++] = cursor->prefix[cursor->keylen];
if (NextTrie_GetKind(cursor->next) == TrieKind) {
findCFBurstTrieLevel(trie, cursor, exactmatch, ctx, callback);
} else if (NextTrie_GetKind(cursor->next) == ListKind) {
findCFBurstTrieList(trie, cursor, ctx, callback);
}
} else {
TrieLevelRef root = (TrieLevelRef)NextTrie_GetPtr(cursor->next);
if (root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return;
if (cursor->keylen == cursor->prefixlen && exactmatch) return;
traverseCFBurstTrieLevel(trie, root, cursor, exactmatch, ctx, callback);
}
}
static void findCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool))
{
CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
if (cursor->keylen < cursor->prefixlen) {
uint8_t mykey = *(cursor->prefix+cursor->keylen);
cursor->key[cursor->keylen++] = *(cursor->prefix+cursor->keylen);
uint8_t slot = mykey / 64;
uint8_t bit = mykey % 64;
uint32_t item = 0;
uint64_t bword = root->bitmap[slot];
if (bword & (1ull << bit)) {
// ** Count all the set bits up to this bit
for (int i=0; i < slot; i++) item += __builtin_popcountll(root->bitmap[i]);
item += __builtin_popcountll(bword & ((1ull << bit)-1));
uint32_t offset = root->slots[item];
cursor->next = offset;
if (DiskNextTrie_GetKind(offset) == TrieKind) {
findCFBurstTrieMappedLevel(trie, cursor, exactmatch, ctx, callback);
} else if (DiskNextTrie_GetKind(offset) == CompactTrieKind) {
findCFBurstTrieCompactMappedLevel(trie, cursor, exactmatch, ctx, callback);
} else if (DiskNextTrie_GetKind(offset) == ListKind) {
findCFBurstTrieMappedPage(trie, cursor, ctx, callback);
}
}
} else {
if(root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return;
if (cursor->keylen == cursor->prefixlen && exactmatch) return;
traverseCFBurstTrieCompactMappedLevel(trie, root, cursor, exactmatch, ctx, callback);
}
}
static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool))
{
MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
if (cursor->keylen < cursor->prefixlen) {
uint8_t slot = *(cursor->prefix+cursor->keylen);
cursor->next = root->slots[slot];
cursor->key[cursor->keylen++] = slot;
if (DiskNextTrie_GetKind(cursor->next) == TrieKind) {
findCFBurstTrieMappedLevel(trie, cursor, exactmatch, ctx, callback);
} else if (DiskNextTrie_GetKind(cursor->next) == CompactTrieKind) {
findCFBurstTrieCompactMappedLevel(trie, cursor, exactmatch, ctx, callback);
} else if (DiskNextTrie_GetKind(cursor->next) == ListKind) {
findCFBurstTrieMappedPage(trie, cursor, ctx, callback);
}
} else {
if (root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return;
if (cursor->keylen == cursor->prefixlen && exactmatch) return;
traverseCFBurstTrieMappedLevel(trie, root, cursor, exactmatch, ctx, callback);
}
}
static void traverseCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool))
{
cursor->key[cursor->keylen] = 0;
uint32_t len = cursor->keylen;
for (int i=0; i < CHARACTER_SET_SIZE; i++) {
NextTrie next = root->slots[i];
cursor->keylen = len;
cursor->key[cursor->keylen++] = i;
if (NextTrie_GetKind(next) == TrieKind) {
TrieLevelRef level = (TrieLevelRef)NextTrie_GetPtr(next);
if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return;
if (cursor->keylen == cursor->prefixlen && exactmatch) return;
traverseCFBurstTrieLevel(trie, level, cursor, exactmatch, ctx, callback);
} else if (NextTrie_GetKind(next) == ListKind) {
cursor->next = next;
cursor->key[cursor->keylen] = 0;
findCFBurstTrieList(trie, cursor, ctx, callback);
}
}
}
static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool))
{
cursor->key[cursor->keylen] = 0;
uint32_t len = cursor->keylen;
for (int i=0; i < CHARACTER_SET_SIZE; i++) {
uint32_t offset = (uint32_t)root->slots[i];
cursor->keylen = len;
cursor->key[cursor->keylen++] = i;
if (DiskNextTrie_GetKind(offset) == TrieKind) {
MapTrieLevelRef level = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset);
if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return;
if (cursor->keylen == cursor->prefixlen && exactmatch) return;
traverseCFBurstTrieMappedLevel(trie, level, cursor, exactmatch, ctx, callback);
} else if (DiskNextTrie_GetKind(offset) == CompactTrieKind) {
CompactMapTrieLevelRef level = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset);
if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return;
if (cursor->keylen == cursor->prefixlen && exactmatch) return;
traverseCFBurstTrieCompactMappedLevel(trie, level, cursor, exactmatch, ctx, callback);
} else if (DiskNextTrie_GetKind(offset) == ListKind) {
cursor->next = offset;
cursor->key[cursor->keylen] = 0;
findCFBurstTrieMappedPage(trie, cursor, ctx, callback);
}
}
}
static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, CompactMapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool))
{
cursor->key[cursor->keylen] = 0;
uint32_t len = cursor->keylen;
for (uint32_t c=0; c < CHARACTER_SET_SIZE; c++) {
//** This could be optimized to remember what the last slot/item was and not count bits over and over.
uint8_t mykey = c;
uint32_t slot = mykey / 64;
uint32_t bit = mykey % 64;
uint32_t item = 0;
uint64_t bword = root->bitmap[slot];
cursor->keylen = len;
if (bword & (1ull << bit)) {
// ** Count all the set bits up to this bit
for (int i=0; i < slot; i++) item += __builtin_popcountll(root->bitmap[i]);
item += __builtin_popcountll(bword & ((1ull << bit)-1));
uint32_t offset = root->slots[item];
cursor->key[cursor->keylen++] = mykey;
if(DiskNextTrie_GetKind(offset) == CompactTrieKind) {
CompactMapTrieLevelRef level = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset);
if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return;
if (cursor->keylen == cursor->prefixlen && exactmatch) return;
traverseCFBurstTrieCompactMappedLevel(trie, level, cursor, exactmatch, ctx, callback);
} else if(DiskNextTrie_GetKind(offset) == TrieKind) {
traverseCFBurstTrieMappedLevel(trie, (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset), cursor, exactmatch, ctx, callback);
} else if (DiskNextTrie_GetKind(offset) == ListKind) {
cursor->next = offset;
cursor->key[cursor->keylen] = 0;
findCFBurstTrieMappedPage(trie, cursor, ctx, callback);
}
}
}
}
static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) {
if (trie->mapBase) {
if (trie->cflags & kCFBurstTriePrefixCompression) {
fprintf(stderr, "Please use CFBurstTrieCursorRef API for file based trie.\n");
return;
} else {
TrieHeader *header = (TrieHeader *)trie->mapBase;
MapCursor csr;
csr.next = header->rootOffset;
csr.prefix = prefix;
csr.prefixlen = prefixLen;
csr.key[0] = 0;
csr.keylen = 0;
findCFBurstTrieMappedLevel(trie, &csr, exactmatch, ctx, callback);
}
} else {
TrieCursor csr;
csr.next = ((uintptr_t)&trie->root)|TrieKind;
csr.prefix = prefix;
csr.prefixlen = prefixLen;
csr.key[0] = 0;
csr.keylen = 0;
findCFBurstTrieLevel(trie, &csr, exactmatch, ctx, callback);
}
}
CF_INLINE uint32_t getPackedPageEntrySize(PageEntryPacked *entry)
{
return sizeof(PageEntryPacked) + entry->strlen;
}
CF_INLINE uint32_t getPageEntrySize(PageEntry *entry)
{
return sizeof(PageEntry) + entry->strlen;
}
/*
static void _printPageEntry(PageEntryPacked *entry) {
printf("entry 0x%p:\n", entry);
printf("pfxLen = %d, strLen = %d\n", entry->pfxLen, entry->strlen);
if (entry->strlen > 0) {
printf("string = ");
for (int i = 0; i < entry->strlen; ++i)
printf("%d ", entry->string[i]);
printf("\n");
}
printf("\n");
}
*/
static Boolean advanceCursorOnMappedPageForByte(Page *page, CompactMapCursor *cursor, UInt8 byte) {
PageEntryPacked *entry;
Boolean found = FALSE;
uint32_t minPrefixLength = 0;
if (cursor->isOnPage) {
entry = (PageEntryPacked *)&page->data[cursor->entryOffsetInPage];
//_printPageEntry(entry);
BOOL shouldContinue = TRUE;
if (!(cursor->entryOffsetInPage == 0 && entry->strlen == 0)) {
cursor->offsetInEntry++;
if (entry->string[cursor->offsetInEntry] == byte)
found = TRUE;
else if (entry->string[cursor->offsetInEntry] > byte)
shouldContinue = FALSE;
else {
minPrefixLength = entry->pfxLen + cursor->offsetInEntry;
cursor->entryOffsetInPage += getPackedPageEntrySize(entry);
}
}
if (found) {
cursor->isOnPage = TRUE;
return TRUE;
} else if (!shouldContinue)
return FALSE;
} else {
cursor->entryOffsetInPage = 0;
}
uint32_t pageSize = page->length - sizeof(Page);
while (cursor->entryOffsetInPage < pageSize) {
entry = (PageEntryPacked *)&page->data[cursor->entryOffsetInPage];
//_printPageEntry(entry);
if (minPrefixLength > entry->pfxLen)
break;
else if (minPrefixLength < entry->pfxLen)
cursor->entryOffsetInPage += getPackedPageEntrySize(entry);
else {
if (entry->strlen == 0)
cursor->entryOffsetInPage += getPackedPageEntrySize(entry);
else {
if (entry->string[0] > byte)
// Entries are sorted alphabetically
break;
else if (entry->string[0] < byte)
cursor->entryOffsetInPage += getPackedPageEntrySize(entry);
else {
cursor->offsetInEntry = 0;
found = TRUE;
break;
}
}
}
}
if (found)
cursor->isOnPage = TRUE;
return found;
}
static Boolean advanceCursorMappedPageWithPrefixCompression(Page *page, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
{
if (length == 0) {
PageEntryPacked *entry = (PageEntryPacked*)&page->data[0];
if (!cursor->isOnPage) {
cursor->entryOffsetInPage = 0;
cursor->offsetInEntry = 0;
cursor->isOnPage = entry->pfxLen == 0 && entry->strlen == 0;
}
getMapCursorPayloadFromPackedPageEntry(entry, cursor, &cursor->payload);
return TRUE;
}
for (CFIndex i = 0; i < length; ++i) {
if (!advanceCursorOnMappedPageForByte(page, cursor, bytes[i])) return FALSE;
}
PageEntryPacked *entry = (PageEntryPacked*)&page->data[cursor->entryOffsetInPage];
getMapCursorPayloadFromPackedPageEntry(entry, cursor, &cursor->payload);
return TRUE;
}
static Boolean advanceCursorMappedPageSortedByKey(Page *page, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
{
if (length == 0) {
PageEntry*entry = (PageEntry*)&page->data[0];
if (!cursor->isOnPage) {
cursor->entryOffsetInPage = 0;
cursor->offsetInEntry = 0;
cursor->isOnPage = entry->strlen == 0;
}
getMapCursorPayloadFromPageEntry(entry, cursor, &cursor->payload);
return TRUE;
}
PageEntry *entry;
uint32_t pageSize = page->length - sizeof(Page);
const UInt8 * prefix = NULL;
uint32_t prefixLength = 0;
if (cursor->isOnPage) {
entry = (PageEntry*)&page->data[cursor->entryOffsetInPage];
prefix = entry->string;
prefixLength = cursor->offsetInEntry + 1;
}
while (cursor->entryOffsetInPage < pageSize) {
PageEntry *entry = (PageEntry*)&page->data[cursor->entryOffsetInPage];
if (entry->strlen == 0) {
cursor->entryOffsetInPage += getPageEntrySize(entry);
continue;
}
if (entry->strlen <= prefixLength)
return FALSE;
else {
int cmpResult = 0;
if (prefixLength > 0)
cmpResult = __builtin_memcmp(entry->string, prefix, prefixLength);
if (cmpResult > 0)
return FALSE;
else if (cmpResult == 0) {
if (entry->strlen < prefixLength + length) {
int cmpResult2 = __builtin_memcmp(entry->string + prefixLength, bytes, entry->strlen - prefixLength);
if (cmpResult2 > 0)
return FALSE;
else
cursor->entryOffsetInPage += getPageEntrySize(entry);
} else {
int cmpResult2 = __builtin_memcmp(entry->string + prefixLength, bytes, length);
if (cmpResult2 > 0)
return FALSE;
else if (cmpResult2 == 0) {
cursor->isOnPage = TRUE;
cursor->offsetInEntry = prefixLength + length - 1;
getMapCursorPayloadFromPageEntry(entry, cursor, &cursor->payload);
return TRUE;
} else
cursor->entryOffsetInPage += getPageEntrySize(entry);
}
} else
cursor->entryOffsetInPage += getPageEntrySize(entry);
}
}
return FALSE;
}
static Boolean advanceCursorMappedPage(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
{
if (!bytes || length < 0)
return FALSE;
Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
uint32_t pageSize = page->length - sizeof(Page);
if (pageSize == 0)
return FALSE;
if (trie->cflags & kCFBurstTriePrefixCompression)
return advanceCursorMappedPageWithPrefixCompression(page, cursor, bytes, length);
else if (trie->cflags & kCFBurstTrieSortByKey)
return advanceCursorMappedPageSortedByKey(page, cursor, bytes, length);
else
return FALSE;
}
static Boolean advanceCursorCompactMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
{
if (!bytes || length < 0)
return FALSE;
CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
if (length == 0) {
cursor->payload = root->payload;
return TRUE;
}
uint8_t slot = bytes[0] / 64;
uint8_t bit = bytes[0] % 64;
uint32_t item = 0;
uint64_t bword = root->bitmap[slot];
if (bword & (1ull << bit)) {
// Count all the set bits up to this bit
for (int i = 0; i < slot; ++i)
item += __builtin_popcountll(root->bitmap[i]);
item += __builtin_popcountll(bword & ((1ull << bit)-1));
cursor->next = root->slots[item];
return advanceMapCursor(trie, cursor, bytes + 1, length - 1);
} else {
return FALSE;
}
}
static Boolean advanceCursorMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
{
if (!bytes || length < 0)
return FALSE;
MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
if (length == 0) {
cursor->payload = root->payload;
return TRUE;
}
cursor->next = root->slots[bytes[0]];
return advanceMapCursor(trie, cursor, bytes + 1, length - 1);
}
static Boolean advanceMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
{
bool result = FALSE;
switch (DiskNextTrie_GetKind(cursor->next)) {
case TrieKind:
result = advanceCursorMappedLevel(trie, cursor, bytes, length);
break;
case CompactTrieKind:
result = advanceCursorCompactMappedLevel(trie, cursor, bytes, length);
break;
case ListKind:
result = advanceCursorMappedPage(trie, cursor, bytes, length);
break;
case Nothing: {
TrieHeader *header = (TrieHeader*)trie->mapBase;
if (cursor->next == header->rootOffset)
result = advanceCursorMappedLevel(trie, cursor, bytes, length);
}
}
return result;
}
static void traverseFromMapCursorMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
{
MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
if (root->payload) {
callback(ctx, bytes, length, root->payload, stop);
if (*stop)
return;
}
if (length >= capacity)
return;
for (int i = 0; i < CHARACTER_SET_SIZE; ++i) {
bytes[length] = i;
cursor->next = (uint32_t)root->slots[i];
cursor->isOnPage = FALSE;
cursor->entryOffsetInPage = 0;
cursor->offsetInEntry = 0;
cursor->payload = 0;
int addByte = i ? 1 : 0;
traverseFromMapCursor(trie, cursor, bytes, capacity - addByte, length + addByte, stop, ctx, callback);
if (*stop)
return;
}
}
static void traverseFromMapCursorCompactMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
{
CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
if (root->payload) {
callback(ctx, bytes, length, root->payload, stop);
if (*stop)
return;
}
if (length >= capacity)
return;
for (int c = 0; c < CHARACTER_SET_SIZE; ++c) {
bytes[length] = c;
//This could be optimized to remember what the last slot/item was and not count bits over and over.
uint8_t slot = c / 64;
uint8_t bit = c % 64;
uint32_t item = 0;
uint64_t bword = root->bitmap[slot];
if (bword & (1ull << bit)) {
// Count all the set bits up to this bit
for (int i = 0; i < slot; ++i)
item += __builtin_popcountll(root->bitmap[i]);
item += __builtin_popcountll(bword & ((1ull << bit)-1));
cursor->next = root->slots[item];
cursor->isOnPage = FALSE;
cursor->entryOffsetInPage = 0;
cursor->offsetInEntry = 0;
cursor->payload = 0;
traverseFromMapCursor(trie, cursor, bytes, capacity - 1, length + 1, stop, ctx, callback);
if (*stop)
return;
}
}
}
static void traverseFromMapCursorMappedPageWithPrefixCompression(Page *page, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
{
uint32_t pageSize = page->length - sizeof(Page);
uint32_t offset = cursor->entryOffsetInPage;
uint32_t minPrefixLength = 0;
if (cursor->isOnPage) {
PageEntryPacked *entry = (PageEntryPacked*)&page->data[offset];
int32_t remainingLength = entry->strlen - cursor->offsetInEntry - 1;
if (remainingLength >= 0 && remainingLength <= capacity) {
memcpy(bytes + length, entry->string + cursor->offsetInEntry + 1, remainingLength);
callback(ctx, bytes, length + remainingLength, entry->payload, stop);
if (*stop)
return;
}
minPrefixLength = entry->pfxLen + cursor->offsetInEntry;
offset += getPackedPageEntrySize(entry);
}
PageEntryPacked *previousEntry = NULL;
while (offset < pageSize) {
PageEntryPacked *entry = (PageEntryPacked*)&page->data[offset];
if (minPrefixLength > entry->pfxLen)
break;
else if (entry->payload && entry->strlen <= capacity) {
if (previousEntry) {
int32_t len = previousEntry->strlen + previousEntry->pfxLen - entry->pfxLen;
if (len <= length) {
length -= previousEntry->strlen + previousEntry->pfxLen - entry->pfxLen;
}
}
memcpy(bytes + length, entry->string, entry->strlen);
callback(ctx, bytes, length + entry->strlen, entry->payload, stop);
length += entry->strlen;
if (*stop)
return;
}
previousEntry = entry;
offset += getPackedPageEntrySize(entry);
}
}
static void traverseFromMapCursorMappedPageSortedByKey(Page *page, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
{
uint32_t pageSize = page->length - sizeof(Page);
uint32_t offset = cursor->entryOffsetInPage;
uint32_t prefixLength = 0;
const UInt8 *prefix = NULL;
if (cursor->isOnPage) {
PageEntry *entry = (PageEntry*)&page->data[offset];
int32_t remainingLength = entry->strlen - cursor->offsetInEntry - 1;
if (remainingLength >= 0 && remainingLength <= capacity) {
memcpy(bytes + length, entry->string + cursor->offsetInEntry, remainingLength);
callback(ctx, bytes, length + remainingLength, entry->payload, stop);
if (*stop)
return;
}
prefixLength = cursor->offsetInEntry + 1;
prefix = entry->string;
offset += getPageEntrySize(entry);
}
while (offset < pageSize) {
PageEntry *entry = (PageEntry*)&page->data[offset];
if (entry->strlen < prefixLength)
break;
else {
int cmpResult = __builtin_memcmp(entry->string, prefix, prefixLength);
if (cmpResult > 0)
break;
else if (entry->payload && entry->strlen <= capacity) {
if (entry->strlen > 0)
memcpy(bytes + length, entry->string + prefixLength, entry->strlen - prefixLength);
callback(ctx, bytes, length + entry->strlen - prefixLength, entry->payload, stop);
if (*stop)
return;
}
offset += getPageEntrySize(entry);
}
}
}
static void traverseFromMapCursorMappedPage(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
{
Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
if (trie->cflags & kCFBurstTriePrefixCompression)
traverseFromMapCursorMappedPageWithPrefixCompression(page, cursor, bytes, capacity, length, stop, ctx, callback);
else if (trie->cflags & kCFBurstTrieSortByKey)
traverseFromMapCursorMappedPageSortedByKey(page, cursor, bytes, capacity, length, stop, ctx, callback);
}
void traverseFromMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
{
switch (DiskNextTrie_GetKind(cursor->next)) {
case TrieKind:
traverseFromMapCursorMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback);
break;
case CompactTrieKind:
traverseFromMapCursorCompactMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback);
break;
case ListKind:
traverseFromMapCursorMappedPage(trie, cursor, bytes, capacity, length, stop, ctx, callback);
break;
case Nothing: {
TrieHeader *header = (TrieHeader*)trie->mapBase;
if (cursor->next == header->rootOffset) {
traverseFromMapCursorMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback);
}
break;
}
}
}
void copyMapCursor(const CompactMapCursor *source, CompactMapCursor* destination)
{
destination->next = source->next;
destination->entryOffsetInPage = source->entryOffsetInPage;
destination->offsetInEntry = source->offsetInEntry;
destination->isOnPage = source->isOnPage;
destination->payload = source->payload;
}
Boolean areMapCursorsEqual(const CompactMapCursor *lhs, const CompactMapCursor *rhs)
{
return lhs->entryOffsetInPage == rhs->entryOffsetInPage && lhs->isOnPage == rhs->isOnPage && lhs->next == rhs->next && lhs->offsetInEntry == rhs->offsetInEntry;
}
static Boolean getMapCursorPayloadFromPackedPageEntry(PageEntryPacked *entry, const CompactMapCursor *cursor, uint32_t *payload)
{
if (payload)
*payload = 0;
if (cursor->entryOffsetInPage == 0 && cursor->offsetInEntry == 0 && entry->strlen == 0) {
if (payload)
*payload = entry->payload;
return TRUE;
} else if (cursor->offsetInEntry != entry->strlen - 1)
return FALSE;
else {
if (payload)
*payload = entry->payload;
return TRUE;
}
}
static Boolean getMapCursorPayloadFromPageEntry(PageEntry *entry, const CompactMapCursor *cursor, uint32_t *payload)
{
if (payload)
*payload = 0;
if (cursor->entryOffsetInPage == 0 && cursor->offsetInEntry == 0 && entry->strlen == 0) {
if (payload)
*payload = entry->payload;
return TRUE;
} else if (cursor->offsetInEntry != entry->strlen - 1)
return FALSE;
else {
if (payload)
*payload = entry->payload;
return TRUE;
}
}
Boolean getMapCursorPayload(CFBurstTrieRef trie, const CompactMapCursor *cursor, uint32_t *payload)
{
if (!cursor)
return FALSE;
if (cursor->payload) {
if (payload)
*payload = cursor->payload;
return TRUE;
}
return FALSE;
}
// Legacy
static Boolean burstTrieMappedFind(DiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) {
Boolean success = false;
if (length) {
uint32_t offset = CFSwapInt32LittleToHost((uint32_t)trie->slots[*key]);
if(DiskNextTrie_GetKind(offset) == TrieKind) {
return burstTrieMappedFind((DiskTrieLevelRef)DiskNextTrie_GetPtr(map,offset), map, key+1, length-1, payload, prefix);
} else if(DiskNextTrie_GetKind(offset) == CompactTrieKind) {
return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix);
} else {
if(DiskNextTrie_GetKind(offset) == ListKind) {
return burstTrieMappedPageFind((StringPage *)DiskNextTrie_GetPtr(map, offset), key+1, length-1, payload, prefix);
} else {
return success;
}
}
} else {
if (trie->weight) {
SetPayload(payload, CFSwapInt32LittleToHost(trie->payload));
success = true;
}
}
return success;
}
static Boolean burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) {
Boolean success = false;
if (length) {
uint32_t mykey = *key;
uint32_t slot = mykey / 64;
uint32_t bit = mykey % 64;
uint32_t item = 0;
uint64_t bword = CFSwapInt64LittleToHost(trie->bitmap[slot]);
if (bword & (1ull << bit)) {
/* Count all the set bits up to this bit */
for (int i=0; i < slot; i++) {
item += __builtin_popcountll(trie->bitmap[i]);
}
item += __builtin_popcountll(bword & ((1ull << bit)-1));
uint32_t offset = CFSwapInt32LittleToHost((uint32_t)trie->slots[item]);
if(DiskNextTrie_GetKind(offset) == TrieKind) {
return burstTrieMappedFind((DiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix);
} else if(DiskNextTrie_GetKind(offset) == CompactTrieKind) {
return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix);
}
else {
if(DiskNextTrie_GetKind(offset) == ListKind) {
return burstTrieMappedPageFind((StringPage *)DiskNextTrie_GetPtr(map, offset), key+1, length-1, payload, prefix);
} else {
return success;
}
}
}
} else {
if (trie->weight) {
SetPayload(payload, CFSwapInt32LittleToHost(trie->payload));
success = true;
}
}
return success;
}
static Boolean burstTrieMappedPageFind(StringPage *page, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) {
Boolean success = false;
uint32_t end = CFSwapInt32LittleToHost(page->length);
uint32_t cur = 0;
if (prefix) {
uint8_t pfx[256];
while (cur < end) {
StringPageEntryPacked *entry = (StringPageEntryPacked *)&page->data[cur];
uint16_t strlen = entry->pfxLen+CFSwapInt16LittleToHost(entry->strlen);
if (strlen == length && __builtin_memcmp(pfx, key, entry->pfxLen) == 0 && __builtin_memcmp(entry->string, key+entry->pfxLen, length-entry->pfxLen) == 0) {
SetPayload(payload, CFSwapInt32LittleToHost(entry->payload));
success = true;
return success;
} else {
memcpy(pfx+entry->pfxLen, entry->string, MIN(255-entry->pfxLen, length-entry->pfxLen));
cur += sizeof(*entry) + strlen - entry->pfxLen;
}
}
} else {
while (cur < end) {
StringPageEntry *entry = (StringPageEntry *)&page->data[cur];
uint16_t strlen = CFSwapInt16LittleToHost(entry->strlen);
if (strlen == length && __builtin_memcmp(entry->string, key, length) == 0) {
SetPayload(payload, CFSwapInt32LittleToHost(entry->payload));
success = true;
return success;
} else {
cur += sizeof(*entry) + strlen;
}
}
}
return success;
}
#if 0
#pragma mark -
#pragma mark Serialization
#endif
static bool serializeCFBurstTrieLevels(CFBurstTrieRef trie, TrieLevelRef root, uint32_t *offset, off_t start_offset, bool dispose, bool isroot, int fd)
{
bool dense = true;
int count = 0;
for (int i=0; i < CHARACTER_SET_SIZE; i++) if (root->slots[i]) count++;
uint32_t this_offset = *offset;
if ((trie->cflags & kCFBurstTrieBitmapCompression) && count < MAX_BITMAP_SIZE && !isroot) {
size_t size = sizeof(CompactMapTrieLevel) + sizeof(uint32_t) * count;
int offsetSlot = 0;
CompactMapTrieLevel *maptrie = (CompactMapTrieLevel *)alloca(size);
bzero(maptrie, size);
*offset += size;
for (int i=0; i < CHARACTER_SET_SIZE; i++) {
NextTrie next = root->slots[i];
if (next) {
uint32_t slot = i / 64;
uint32_t bit = i % 64;
maptrie->bitmap[slot] |= 1ull<<bit;
if (NextTrie_GetKind(next) == TrieKind) {
TrieLevelRef nextLevel = (TrieLevelRef)NextTrie_GetPtr(next);
uint32_t childOffset = *offset;
if (serializeCFBurstTrieLevels(trie, nextLevel, offset, start_offset, true, false, fd)) {
maptrie->slots[offsetSlot] = (TrieKind|childOffset);
} else {
maptrie->slots[offsetSlot] = (CompactTrieKind|childOffset);
}
} else {
maptrie->slots[offsetSlot] = next;
}
offsetSlot++;
}
}
maptrie->payload = root->payload;
int bitcount = 0;
for (int i=0; i < 4; i++) bitcount += __builtin_popcountll(maptrie->bitmap[i]);
assert(bitcount == count);
pwrite(fd, maptrie, size, this_offset+start_offset);
dense = false;
} else {
MapTrieLevel maptrie;
*offset += sizeof(maptrie);
for (int i=0; i < CHARACTER_SET_SIZE; i++) {
NextTrie next = root->slots[i];
if (NextTrie_GetKind(next) == TrieKind) {
TrieLevelRef nextLevel = (TrieLevelRef)NextTrie_GetPtr(next);
uint32_t childOffset = *offset;
if (serializeCFBurstTrieLevels(trie, nextLevel, offset, start_offset, true, false, fd)) {
maptrie.slots[i] = (TrieKind|childOffset);
} else {
maptrie.slots[i] = (CompactTrieKind|childOffset);
}
} else {
maptrie.slots[i] = next;
}
}
maptrie.payload = root->payload;
pwrite(fd, &maptrie, sizeof(maptrie), this_offset+start_offset);
}
if (dispose) free(root);
return dense;
}
static void serializeCFBurstTrieList(CFBurstTrieRef trie, ListNodeRef listNode, int fd)
{
uint32_t listCount;
size_t size = trie->containerSize;
// ** Temp list of nodes to sort
ListNodeRef *nodes = (ListNodeRef *)malloc(sizeof(ListNodeRef) * size);
for (listCount = 0; listNode; listCount++) {
if (listCount >= size) {
size *= 2;
nodes = (ListNodeRef *)realloc(nodes, sizeof(ListNodeRef) * size);
}
nodes[listCount] = listNode;
listNode = listNode->next;
}
char _buffer[MAX_BUFFER_SIZE];
size_t bufferSize = (sizeof(Page) + size * (sizeof(PageEntryPacked) + MAX_STRING_SIZE));
char *buffer = bufferSize < MAX_BUFFER_SIZE ? _buffer : (char *) malloc(bufferSize);
Page *page = (Page *)buffer;
uint32_t current = 0;
if (trie->cflags & kCFBurstTriePrefixCompression) {
qsort(nodes, listCount, sizeof(ListNodeRef), nodeStringCompare);
ListNodeRef last = 0;
for (int i=0; i < listCount; i++) {
listNode = nodes[i];
uint8_t pfxLen = 0;
if (last) {
for ( ;
pfxLen < CHARACTER_SET_SIZE-1 &&
pfxLen < listNode->length &&
pfxLen < last->length &&
listNode->string[pfxLen] == last->string[pfxLen];
pfxLen++);
}
PageEntryPacked *entry = (PageEntryPacked *)(&page->data[current]);
entry->strlen = listNode->length - pfxLen;
entry->payload = listNode->payload;
entry->pfxLen = pfxLen;
memcpy(entry->string, listNode->string+pfxLen, listNode->length-pfxLen);
current += listNode->length - pfxLen + sizeof(PageEntryPacked);
last = listNode;
}
size_t len = (sizeof(PageEntryPacked) + current + 3) & ~3;
page->length = current;
write(fd, page, len);
} else {
if (trie->cflags & kCFBurstTrieSortByKey)
qsort(nodes, listCount, sizeof(ListNodeRef), nodeStringCompare);
else
qsort(nodes, listCount, sizeof(ListNodeRef), nodeWeightCompare);
for (int i=0; i < listCount; i++) {
listNode = nodes[i];
PageEntry *entry = (PageEntry *)(&page->data[current]);
entry->strlen = listNode->length;
entry->payload = listNode->payload;
memcpy(entry->string, listNode->string, listNode->length);
current += listNode->length + sizeof(PageEntry);
}
size_t len = (sizeof(Page) + current + 3) & ~3;
page->length = current;
write(fd, page, len);
}
free(nodes);
if (buffer != _buffer) free(buffer);
}
static void serializeCFBurstTrieLists(CFBurstTrieRef trie, TrieLevelRef root, off_t start_offset, int fd)
{
for (int i=0; i < CHARACTER_SET_SIZE; i++) {
NextTrie next = root->slots[i];
uint32_t offset;
if (NextTrie_GetKind(next) == TrieKind) {
TrieLevelRef nextLevel = (TrieLevelRef)NextTrie_GetPtr(next);
serializeCFBurstTrieLists(trie, nextLevel, start_offset, fd);
} else {
if (NextTrie_GetKind(next) == ListKind) {
ListNodeRef listNode = (ListNodeRef)NextTrie_GetPtr(next);
offset = lseek(fd, 0, SEEK_CUR) - start_offset;
serializeCFBurstTrieList(trie, listNode, fd);
finalizeCFBurstTrieList(listNode);
//assert((offset & 3)==0);
root->slots[i] = (offset|ListKind);
}
}
}
}
static size_t serializeCFBurstTrie(CFBurstTrieRef trie, size_t start_offset, int fd)
{
TrieHeader header;
header.signature = 0x0ddba11;
header.rootOffset = 0;
header.count = trie->count;
header.size = 0;
header.flags = trie->cflags;
header.reserved[0] = 0;
uint32_t offset;
lseek(fd, start_offset, SEEK_SET);
size_t header_size = sizeof(header);
write(fd, &header, header_size);
serializeCFBurstTrieLists(trie, &trie->root, start_offset, fd);
offset = lseek(fd, 0, SEEK_CUR) - start_offset;
size_t off = offsetof(TrieHeader, rootOffset);
size_t offsize = sizeof(offset);
pwrite(fd, &offset, offsize, off+start_offset);
serializeCFBurstTrieLevels(trie, &trie->root, &offset, start_offset, false, true, fd);
size_t off2 = offsetof(TrieHeader, size);
offsize = sizeof(offset);
pwrite(fd, &offset, offsize, off2+start_offset);
offset = lseek(fd, 0, SEEK_END);
return (size_t)(offset-start_offset);
}
#if 0
#pragma mark -
#pragma mark Release
#endif
static void destroyCFBurstTrie(CFBurstTrieRef trie) {
if (trie->mapBase) {
#if DEPLOYMENT_TARGET_WINDOWS
UnmapViewOfFile(trie->mapBase);
CloseHandle(trie->mapHandle);
CloseHandle(trie->mappedFileHandle);
#else
munmap(trie->mapBase, trie->mapSize);
#endif
} else {
finalizeCFBurstTrie(&trie->root);
}
free(trie);
return;
}
static void finalizeCFBurstTrie(TrieLevelRef trie) {
for (int i=0; i < CHARACTER_SET_SIZE; i++) {
if (NextTrie_GetKind(trie->slots[i]) == TrieKind) {
finalizeCFBurstTrie((TrieLevelRef)NextTrie_GetPtr(trie->slots[i]));
free((void *)NextTrie_GetPtr(trie->slots[i]));
} else if (NextTrie_GetKind(trie->slots[i]) == ListKind) {
finalizeCFBurstTrieList((ListNodeRef)NextTrie_GetPtr(trie->slots[i]));
}
}
}
static void finalizeCFBurstTrieList(ListNodeRef node) {
do {
ListNodeRef next = node->next;
free(node);
node = next;
} while(node);
}
#if 0
#pragma mark -
#pragma mark Helpers
#endif
static int nodeWeightCompare(const void *a, const void *b) {
ListNodeRef nodeA = *(ListNodeRef *)a;
ListNodeRef nodeB = *(ListNodeRef *)b;
return (nodeB->weight - nodeA->weight);
}
static int nodeStringCompare(const void *a, const void *b) {
ListNodeRef nodeA = *(ListNodeRef *)a;
ListNodeRef nodeB = *(ListNodeRef *)b;
int result = memcmp((char *)nodeA->string, (char *)nodeB->string, MIN(nodeA->length, nodeB->length));
if (result == 0) result = nodeA->length-nodeB->length;
return result;
}
static bool containsKey(void *context, const uint8_t *key, uint32_t payload, bool exact)
{
uint32_t *ctx = (uint32_t *)context;
if (exact) *ctx = payload;
return exact;
}
static CFIndex burstTrieConvertCharactersToUTF8(UniChar *chars, CFIndex numChars, UInt8 *buffer) {
uint32_t i, j;
for (i = j = 0; i < numChars; i++) {
UniChar c = chars[i];
if (CFStringIsSurrogateHighCharacter(c) && i + 1 < numChars && CFStringIsSurrogateLowCharacter(chars[i + 1])) {
UTF32Char lc = CFStringGetLongCharacterForSurrogatePair(c, chars[i + 1]);
buffer[j++] = 0xf0 + (lc >> 18);
buffer[j++] = 0x80 + ((lc & 0x3ffff) >> 12);
buffer[j++] = 0x80 + ((lc & 0xfff) >> 6);
buffer[j++] = 0x80 + (lc & 0x3f);
i++;
} else {
if (c < 0x80) {
buffer[j++] = c;
} else if (c < 0x800) {
buffer[j++] = 0xc0 + (c >> 6);
buffer[j++] = 0x80 + (c & 0x3f);
} else {
buffer[j++] = 0xe0 + (c >> 12);
buffer[j++] = 0x80 + ((c & 0xfff) >> 6);
buffer[j++] = 0x80 + (c & 0x3f);
}
}
}
buffer[j] = 0;
return j;
}