blob: d7156ed3dada0c10da4aacc5545d10c066d01a0f [file] [log] [blame]
/*
* dict.c: dictionary of reusable strings, just used to avoid allocation
* and freeing operations.
*
* Copyright (C) 2003-2012 Daniel Veillard.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND
* CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER.
*
* Author: daniel@veillard.com
*/
#define IN_LIBXML
#include "libxml.h"
#include <limits.h>
#include <string.h>
#include <time.h>
#include "private/dict.h"
#include "private/threads.h"
#include <libxml/parser.h>
#include <libxml/dict.h>
#include <libxml/xmlmemory.h>
#include <libxml/xmlstring.h>
#ifndef SIZE_MAX
#define SIZE_MAX ((size_t) -1)
#endif
#define MAX_FILL_NUM 7
#define MAX_FILL_DENOM 8
#define MIN_HASH_SIZE 8
#define MAX_HASH_SIZE (1u << 31)
typedef struct _xmlDictStrings xmlDictStrings;
typedef xmlDictStrings *xmlDictStringsPtr;
struct _xmlDictStrings {
xmlDictStringsPtr next;
xmlChar *free;
xmlChar *end;
size_t size;
size_t nbStrings;
xmlChar array[1];
};
typedef xmlHashedString xmlDictEntry;
/*
* The entire dictionary
*/
struct _xmlDict {
int ref_counter;
xmlDictEntry *table;
size_t size;
unsigned int nbElems;
xmlDictStringsPtr strings;
struct _xmlDict *subdict;
/* used for randomization */
unsigned seed;
/* used to impose a limit on size */
size_t limit;
};
/*
* A mutex for modifying the reference counter for shared
* dictionaries.
*/
static xmlMutex xmlDictMutex;
/**
* xmlInitializeDict:
*
* DEPRECATED: Alias for xmlInitParser.
*
* Returns 0.
*/
int
xmlInitializeDict(void) {
xmlInitParser();
return(0);
}
/**
* xmlInitDictInternal:
*
* Initialize mutex.
*/
void
xmlInitDictInternal(void) {
xmlInitMutex(&xmlDictMutex);
}
/**
* xmlDictCleanup:
*
* DEPRECATED: This function is a no-op. Call xmlCleanupParser
* to free global state but see the warnings there. xmlCleanupParser
* should be only called once at program exit. In most cases, you don't
* have call cleanup functions at all.
*/
void
xmlDictCleanup(void) {
}
/**
* xmlCleanupDictInternal:
*
* Free the dictionary mutex.
*/
void
xmlCleanupDictInternal(void) {
xmlCleanupMutex(&xmlDictMutex);
}
/*
* xmlDictAddString:
* @dict: the dictionary
* @name: the name of the userdata
* @len: the length of the name
*
* Add the string to the array[s]
*
* Returns the pointer of the local string, or NULL in case of error.
*/
static const xmlChar *
xmlDictAddString(xmlDictPtr dict, const xmlChar *name, unsigned int namelen) {
xmlDictStringsPtr pool;
const xmlChar *ret;
size_t size = 0; /* + sizeof(_xmlDictStrings) == 1024 */
size_t limit = 0;
pool = dict->strings;
while (pool != NULL) {
if ((size_t)(pool->end - pool->free) > namelen)
goto found_pool;
if (pool->size > size) size = pool->size;
limit += pool->size;
pool = pool->next;
}
/*
* Not found, need to allocate
*/
if (pool == NULL) {
if ((dict->limit > 0) && (limit > dict->limit)) {
return(NULL);
}
if (size == 0) {
size = 1000;
} else {
if (size < (SIZE_MAX - sizeof(xmlDictStrings)) / 4)
size *= 4; /* exponential growth */
else
size = SIZE_MAX - sizeof(xmlDictStrings);
}
if (size / 4 < namelen) {
if ((size_t) namelen + 0 < (SIZE_MAX - sizeof(xmlDictStrings)) / 4)
size = 4 * (size_t) namelen; /* just in case ! */
else
return(NULL);
}
pool = (xmlDictStringsPtr) xmlMalloc(sizeof(xmlDictStrings) + size);
if (pool == NULL)
return(NULL);
pool->size = size;
pool->nbStrings = 0;
pool->free = &pool->array[0];
pool->end = &pool->array[size];
pool->next = dict->strings;
dict->strings = pool;
}
found_pool:
ret = pool->free;
memcpy(pool->free, name, namelen);
pool->free += namelen;
*(pool->free++) = 0;
pool->nbStrings++;
return(ret);
}
/*
* xmlDictAddQString:
* @dict: the dictionary
* @prefix: the prefix of the userdata
* @plen: the prefix length
* @name: the name of the userdata
* @len: the length of the name
*
* Add the QName to the array[s]
*
* Returns the pointer of the local string, or NULL in case of error.
*/
static const xmlChar *
xmlDictAddQString(xmlDictPtr dict, const xmlChar *prefix, unsigned int plen,
const xmlChar *name, unsigned int namelen)
{
xmlDictStringsPtr pool;
const xmlChar *ret;
size_t size = 0; /* + sizeof(_xmlDictStrings) == 1024 */
size_t limit = 0;
pool = dict->strings;
while (pool != NULL) {
if ((size_t)(pool->end - pool->free) > namelen + plen + 1)
goto found_pool;
if (pool->size > size) size = pool->size;
limit += pool->size;
pool = pool->next;
}
/*
* Not found, need to allocate
*/
if (pool == NULL) {
if ((dict->limit > 0) && (limit > dict->limit)) {
return(NULL);
}
if (size == 0) size = 1000;
else size *= 4; /* exponential growth */
if (size < 4 * (namelen + plen + 1))
size = 4 * (namelen + plen + 1); /* just in case ! */
pool = (xmlDictStringsPtr) xmlMalloc(sizeof(xmlDictStrings) + size);
if (pool == NULL)
return(NULL);
pool->size = size;
pool->nbStrings = 0;
pool->free = &pool->array[0];
pool->end = &pool->array[size];
pool->next = dict->strings;
dict->strings = pool;
}
found_pool:
ret = pool->free;
memcpy(pool->free, prefix, plen);
pool->free += plen;
*(pool->free++) = ':';
memcpy(pool->free, name, namelen);
pool->free += namelen;
*(pool->free++) = 0;
pool->nbStrings++;
return(ret);
}
/**
* xmlDictCreate:
*
* Create a new dictionary
*
* Returns the newly created dictionary, or NULL if an error occurred.
*/
xmlDictPtr
xmlDictCreate(void) {
xmlDictPtr dict;
xmlInitParser();
dict = xmlMalloc(sizeof(xmlDict));
if (dict == NULL)
return(NULL);
dict->ref_counter = 1;
dict->limit = 0;
dict->size = 0;
dict->nbElems = 0;
dict->table = NULL;
dict->strings = NULL;
dict->subdict = NULL;
dict->seed = xmlRandom();
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
dict->seed = 0;
#endif
return(dict);
}
/**
* xmlDictCreateSub:
* @sub: an existing dictionary
*
* Create a new dictionary, inheriting strings from the read-only
* dictionary @sub. On lookup, strings are first searched in the
* new dictionary, then in @sub, and if not found are created in the
* new dictionary.
*
* Returns the newly created dictionary, or NULL if an error occurred.
*/
xmlDictPtr
xmlDictCreateSub(xmlDictPtr sub) {
xmlDictPtr dict = xmlDictCreate();
if ((dict != NULL) && (sub != NULL)) {
dict->seed = sub->seed;
dict->subdict = sub;
xmlDictReference(dict->subdict);
}
return(dict);
}
/**
* xmlDictReference:
* @dict: the dictionary
*
* Increment the reference counter of a dictionary
*
* Returns 0 in case of success and -1 in case of error
*/
int
xmlDictReference(xmlDictPtr dict) {
if (dict == NULL) return -1;
xmlMutexLock(&xmlDictMutex);
dict->ref_counter++;
xmlMutexUnlock(&xmlDictMutex);
return(0);
}
/**
* xmlDictFree:
* @dict: the dictionary
*
* Free the hash @dict and its contents. The userdata is
* deallocated with @f if provided.
*/
void
xmlDictFree(xmlDictPtr dict) {
xmlDictStringsPtr pool, nextp;
if (dict == NULL)
return;
/* decrement the counter, it may be shared by a parser and docs */
xmlMutexLock(&xmlDictMutex);
dict->ref_counter--;
if (dict->ref_counter > 0) {
xmlMutexUnlock(&xmlDictMutex);
return;
}
xmlMutexUnlock(&xmlDictMutex);
if (dict->subdict != NULL) {
xmlDictFree(dict->subdict);
}
if (dict->table) {
xmlFree(dict->table);
}
pool = dict->strings;
while (pool != NULL) {
nextp = pool->next;
xmlFree(pool);
pool = nextp;
}
xmlFree(dict);
}
/**
* xmlDictOwns:
* @dict: the dictionary
* @str: the string
*
* check if a string is owned by the dictionary
*
* Returns 1 if true, 0 if false and -1 in case of error
* -1 in case of error
*/
int
xmlDictOwns(xmlDictPtr dict, const xmlChar *str) {
xmlDictStringsPtr pool;
if ((dict == NULL) || (str == NULL))
return(-1);
pool = dict->strings;
while (pool != NULL) {
if ((str >= &pool->array[0]) && (str <= pool->free))
return(1);
pool = pool->next;
}
if (dict->subdict)
return(xmlDictOwns(dict->subdict, str));
return(0);
}
/**
* xmlDictSize:
* @dict: the dictionary
*
* Query the number of elements installed in the hash @dict.
*
* Returns the number of elements in the dictionary or
* -1 in case of error
*/
int
xmlDictSize(xmlDictPtr dict) {
if (dict == NULL)
return(-1);
if (dict->subdict)
return(dict->nbElems + dict->subdict->nbElems);
return(dict->nbElems);
}
/**
* xmlDictSetLimit:
* @dict: the dictionary
* @limit: the limit in bytes
*
* Set a size limit for the dictionary
* Added in 2.9.0
*
* Returns the previous limit of the dictionary or 0
*/
size_t
xmlDictSetLimit(xmlDictPtr dict, size_t limit) {
size_t ret;
if (dict == NULL)
return(0);
ret = dict->limit;
dict->limit = limit;
return(ret);
}
/**
* xmlDictGetUsage:
* @dict: the dictionary
*
* Get how much memory is used by a dictionary for strings
* Added in 2.9.0
*
* Returns the amount of strings allocated
*/
size_t
xmlDictGetUsage(xmlDictPtr dict) {
xmlDictStringsPtr pool;
size_t limit = 0;
if (dict == NULL)
return(0);
pool = dict->strings;
while (pool != NULL) {
limit += pool->size;
pool = pool->next;
}
return(limit);
}
/*****************************************************************
*
* The code below was rewritten and is additionally licensed under
* the main license in file 'Copyright'.
*
*****************************************************************/
ATTRIBUTE_NO_SANITIZE_INTEGER
static unsigned
xmlDictHashName(unsigned seed, const xmlChar* data, size_t maxLen,
size_t *plen) {
unsigned h1, h2;
size_t i;
HASH_INIT(h1, h2, seed);
for (i = 0; i < maxLen && data[i]; i++) {
HASH_UPDATE(h1, h2, data[i]);
}
HASH_FINISH(h1, h2);
*plen = i;
return(h2 | MAX_HASH_SIZE);
}
ATTRIBUTE_NO_SANITIZE_INTEGER
static unsigned
xmlDictHashQName(unsigned seed, const xmlChar *prefix, const xmlChar *name,
size_t *pplen, size_t *plen) {
unsigned h1, h2;
size_t i;
HASH_INIT(h1, h2, seed);
for (i = 0; prefix[i] != 0; i++) {
HASH_UPDATE(h1, h2, prefix[i]);
}
*pplen = i;
HASH_UPDATE(h1, h2, ':');
for (i = 0; name[i] != 0; i++) {
HASH_UPDATE(h1, h2, name[i]);
}
*plen = i;
HASH_FINISH(h1, h2);
/*
* Always set the upper bit of hash values since 0 means an unoccupied
* bucket.
*/
return(h2 | MAX_HASH_SIZE);
}
unsigned
xmlDictComputeHash(const xmlDict *dict, const xmlChar *string) {
size_t len;
return(xmlDictHashName(dict->seed, string, SIZE_MAX, &len));
}
#define HASH_ROL31(x,n) ((x) << (n) | ((x) & 0x7FFFFFFF) >> (31 - (n)))
ATTRIBUTE_NO_SANITIZE_INTEGER
unsigned
xmlDictCombineHash(unsigned v1, unsigned v2) {
/*
* The upper bit of hash values is always set, so we have to operate on
* 31-bit hashes here.
*/
v1 ^= v2;
v1 += HASH_ROL31(v2, 5);
return((v1 & 0xFFFFFFFF) | 0x80000000);
}
/**
* xmlDictFindEntry:
* @dict: dict
* @prefix: optional QName prefix
* @name: string
* @len: length of string
* @hashValue: valid hash value of string
* @pfound: result of search
*
* Try to find a matching hash table entry. If an entry was found, set
* @found to 1 and return the entry. Otherwise, set @found to 0 and return
* the location where a new entry should be inserted.
*/
ATTRIBUTE_NO_SANITIZE_INTEGER
static xmlDictEntry *
xmlDictFindEntry(const xmlDict *dict, const xmlChar *prefix,
const xmlChar *name, int len, unsigned hashValue,
int *pfound) {
xmlDictEntry *entry;
unsigned mask, pos, displ;
int found = 0;
mask = dict->size - 1;
pos = hashValue & mask;
entry = &dict->table[pos];
if (entry->hashValue != 0) {
/*
* Robin hood hashing: abort if the displacement of the entry
* is smaller than the displacement of the key we look for.
* This also stops at the correct position when inserting.
*/
displ = 0;
do {
if (entry->hashValue == hashValue) {
if (prefix == NULL) {
/*
* name is not necessarily null-terminated.
*/
if ((strncmp((const char *) entry->name,
(const char *) name, len) == 0) &&
(entry->name[len] == 0)) {
found = 1;
break;
}
} else {
if (xmlStrQEqual(prefix, name, entry->name)) {
found = 1;
break;
}
}
}
displ++;
pos++;
entry++;
if ((pos & mask) == 0)
entry = dict->table;
} while ((entry->hashValue != 0) &&
(((pos - entry->hashValue) & mask) >= displ));
}
*pfound = found;
return(entry);
}
/**
* xmlDictGrow:
* @dict: dictionary
* @size: new size of the dictionary
*
* Resize the dictionary hash table.
*
* Returns 0 in case of success, -1 if a memory allocation failed.
*/
static int
xmlDictGrow(xmlDictPtr dict, unsigned size) {
const xmlDictEntry *oldentry, *oldend, *end;
xmlDictEntry *table;
unsigned oldsize, i;
/* Add 0 to avoid spurious -Wtype-limits warning on 64-bit GCC */
if ((size_t) size + 0 > SIZE_MAX / sizeof(table[0]))
return(-1);
table = xmlMalloc(size * sizeof(table[0]));
if (table == NULL)
return(-1);
memset(table, 0, size * sizeof(table[0]));
oldsize = dict->size;
if (oldsize == 0)
goto done;
oldend = &dict->table[oldsize];
end = &table[size];
/*
* Robin Hood sorting order is maintained if we
*
* - compute dict indices with modulo
* - resize by an integer factor
* - start to copy from the beginning of a probe sequence
*/
oldentry = dict->table;
while (oldentry->hashValue != 0) {
if (++oldentry >= oldend)
oldentry = dict->table;
}
for (i = 0; i < oldsize; i++) {
if (oldentry->hashValue != 0) {
xmlDictEntry *entry = &table[oldentry->hashValue & (size - 1)];
while (entry->hashValue != 0) {
if (++entry >= end)
entry = table;
}
*entry = *oldentry;
}
if (++oldentry >= oldend)
oldentry = dict->table;
}
xmlFree(dict->table);
done:
dict->table = table;
dict->size = size;
return(0);
}
/**
* xmlDictLookupInternal:
* @dict: dict
* @prefix: optional QName prefix
* @name: string
* @maybeLen: length of string or -1 if unknown
* @update: whether the string should be added
*
* Internal lookup and update function.
*/
ATTRIBUTE_NO_SANITIZE_INTEGER
static const xmlDictEntry *
xmlDictLookupInternal(xmlDictPtr dict, const xmlChar *prefix,
const xmlChar *name, int maybeLen, int update) {
xmlDictEntry *entry = NULL;
const xmlChar *ret;
unsigned hashValue;
size_t maxLen, len, plen, klen;
int found = 0;
if ((dict == NULL) || (name == NULL))
return(NULL);
maxLen = (maybeLen < 0) ? SIZE_MAX : (size_t) maybeLen;
if (prefix == NULL) {
hashValue = xmlDictHashName(dict->seed, name, maxLen, &len);
if (len > INT_MAX / 2)
return(NULL);
klen = len;
} else {
hashValue = xmlDictHashQName(dict->seed, prefix, name, &plen, &len);
if ((len > INT_MAX / 2) || (plen >= INT_MAX / 2 - len))
return(NULL);
klen = plen + 1 + len;
}
if ((dict->limit > 0) && (klen >= dict->limit))
return(NULL);
/*
* Check for an existing entry
*/
if (dict->size > 0)
entry = xmlDictFindEntry(dict, prefix, name, klen, hashValue, &found);
if (found)
return(entry);
if ((dict->subdict != NULL) && (dict->subdict->size > 0)) {
xmlDictEntry *subEntry;
unsigned subHashValue;
if (prefix == NULL)
subHashValue = xmlDictHashName(dict->subdict->seed, name, len,
&len);
else
subHashValue = xmlDictHashQName(dict->subdict->seed, prefix, name,
&plen, &len);
subEntry = xmlDictFindEntry(dict->subdict, prefix, name, klen,
subHashValue, &found);
if (found)
return(subEntry);
}
if (!update)
return(NULL);
/*
* Grow the hash table if needed
*/
if (dict->nbElems + 1 > dict->size / MAX_FILL_DENOM * MAX_FILL_NUM) {
unsigned newSize, mask, displ, pos;
if (dict->size == 0) {
newSize = MIN_HASH_SIZE;
} else {
if (dict->size >= MAX_HASH_SIZE)
return(NULL);
newSize = dict->size * 2;
}
if (xmlDictGrow(dict, newSize) != 0)
return(NULL);
/*
* Find new entry
*/
mask = dict->size - 1;
displ = 0;
pos = hashValue & mask;
entry = &dict->table[pos];
while ((entry->hashValue != 0) &&
((pos - entry->hashValue) & mask) >= displ) {
displ++;
pos++;
entry++;
if ((pos & mask) == 0)
entry = dict->table;
}
}
if (prefix == NULL)
ret = xmlDictAddString(dict, name, len);
else
ret = xmlDictAddQString(dict, prefix, plen, name, len);
if (ret == NULL)
return(NULL);
/*
* Shift the remainder of the probe sequence to the right
*/
if (entry->hashValue != 0) {
const xmlDictEntry *end = &dict->table[dict->size];
const xmlDictEntry *cur = entry;
do {
cur++;
if (cur >= end)
cur = dict->table;
} while (cur->hashValue != 0);
if (cur < entry) {
/*
* If we traversed the end of the buffer, handle the part
* at the start of the buffer.
*/
memmove(&dict->table[1], dict->table,
(char *) cur - (char *) dict->table);
cur = end - 1;
dict->table[0] = *cur;
}
memmove(&entry[1], entry, (char *) cur - (char *) entry);
}
/*
* Populate entry
*/
entry->hashValue = hashValue;
entry->name = ret;
dict->nbElems++;
return(entry);
}
/**
* xmlDictLookup:
* @dict: dictionary
* @name: string key
* @len: length of the key, if -1 it is recomputed
*
* Lookup a string and add it to the dictionary if it wasn't found.
*
* Returns the interned copy of the string or NULL if a memory allocation
* failed.
*/
const xmlChar *
xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) {
const xmlDictEntry *entry;
entry = xmlDictLookupInternal(dict, NULL, name, len, 1);
if (entry == NULL)
return(NULL);
return(entry->name);
}
/**
* xmlDictLookupHashed:
* @dict: dictionary
* @name: string key
* @len: length of the key, if -1 it is recomputed
*
* Lookup a dictionary entry and add the string to the dictionary if
* it wasn't found.
*
* Returns the dictionary entry.
*/
xmlHashedString
xmlDictLookupHashed(xmlDictPtr dict, const xmlChar *name, int len) {
const xmlDictEntry *entry;
xmlHashedString ret;
entry = xmlDictLookupInternal(dict, NULL, name, len, 1);
if (entry == NULL) {
ret.name = NULL;
ret.hashValue = 0;
} else {
ret = *entry;
}
return(ret);
}
/**
* xmlDictExists:
* @dict: the dictionary
* @name: the name of the userdata
* @len: the length of the name, if -1 it is recomputed
*
* Check if a string exists in the dictionary.
*
* Returns the internal copy of the name or NULL if not found.
*/
const xmlChar *
xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len) {
const xmlDictEntry *entry;
entry = xmlDictLookupInternal(dict, NULL, name, len, 0);
if (entry == NULL)
return(NULL);
return(entry->name);
}
/**
* xmlDictQLookup:
* @dict: the dictionary
* @prefix: the prefix
* @name: the name
*
* Lookup the QName @prefix:@name and add it to the dictionary if
* it wasn't found.
*
* Returns the interned copy of the string or NULL if a memory allocation
* failed.
*/
const xmlChar *
xmlDictQLookup(xmlDictPtr dict, const xmlChar *prefix, const xmlChar *name) {
const xmlDictEntry *entry;
entry = xmlDictLookupInternal(dict, prefix, name, -1, 1);
if (entry == NULL)
return(NULL);
return(entry->name);
}
/*
* Pseudo-random generator
*/
static xmlMutex xmlRngMutex;
static unsigned globalRngState[2];
#ifdef XML_THREAD_LOCAL
static XML_THREAD_LOCAL int localRngInitialized = 0;
static XML_THREAD_LOCAL unsigned localRngState[2];
#endif
ATTRIBUTE_NO_SANITIZE_INTEGER
void
xmlInitRandom(void) {
int var;
xmlInitMutex(&xmlRngMutex);
/* TODO: Get seed values from system PRNG */
globalRngState[0] = (unsigned) time(NULL) ^
HASH_ROL((unsigned) (size_t) &xmlInitRandom, 8);
globalRngState[1] = HASH_ROL((unsigned) (size_t) &xmlRngMutex, 16) ^
HASH_ROL((unsigned) (size_t) &var, 24);
}
void
xmlCleanupRandom(void) {
xmlCleanupMutex(&xmlRngMutex);
}
ATTRIBUTE_NO_SANITIZE_INTEGER
static unsigned
xoroshiro64ss(unsigned *s) {
unsigned s0 = s[0];
unsigned s1 = s[1];
unsigned result = HASH_ROL(s0 * 0x9E3779BB, 5) * 5;
s1 ^= s0;
s[0] = HASH_ROL(s0, 26) ^ s1 ^ (s1 << 9);
s[1] = HASH_ROL(s1, 13);
return(result & 0xFFFFFFFF);
}
unsigned
xmlRandom(void) {
#ifdef XML_THREAD_LOCAL
if (!localRngInitialized) {
xmlMutexLock(&xmlRngMutex);
localRngState[0] = xoroshiro64ss(globalRngState);
localRngState[1] = xoroshiro64ss(globalRngState);
localRngInitialized = 1;
xmlMutexUnlock(&xmlRngMutex);
}
return(xoroshiro64ss(localRngState));
#else
unsigned ret;
xmlMutexLock(&xmlRngMutex);
ret = xoroshiro64ss(globalRngState);
xmlMutexUnlock(&xmlRngMutex);
return(ret);
#endif
}