blob: dd0f6a616929e98ab04686d5b2f01616a4e6e661 [file] [log] [blame]
diff --git a/source/common/Makefile.in b/source/common/Makefile.in
index 79e371b0..d21f5d06 100644
--- a/source/common/Makefile.in
+++ b/source/common/Makefile.in
@@ -88,8 +88,9 @@ ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o ucnv_ct.o \
resource.o uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
ucurr.o \
-localebuilder.o \
+localebuilder.o localeprioritylist.o \
messagepattern.o ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o locdspnm.o loclikely.o locresdata.o \
+lsr.o loclikelysubtags.o locdistance.o localematcher.o \
bytestream.o stringpiece.o bytesinkutil.o \
stringtriebuilder.o bytestriebuilder.o \
bytestrie.o bytestrieiterator.o \
diff --git a/source/common/charstr.cpp b/source/common/charstr.cpp
index 852cc539..dda29dac 100644
--- a/source/common/charstr.cpp
+++ b/source/common/charstr.cpp
@@ -35,6 +35,17 @@ CharString& CharString::operator=(CharString&& src) U_NOEXCEPT {
return *this;
}
+char *CharString::cloneData(UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ char *p = static_cast<char *>(uprv_malloc(len + 1));
+ if (p == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+ uprv_memcpy(p, buffer.getAlias(), len + 1);
+ return p;
+}
+
CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) {
len=s.len;
@@ -52,6 +63,18 @@ int32_t CharString::lastIndexOf(char c) const {
return -1;
}
+bool CharString::contains(StringPiece s) const {
+ if (s.empty()) { return false; }
+ const char *p = buffer.getAlias();
+ int32_t lastStart = len - s.length();
+ for (int32_t i = 0; i <= lastStart; ++i) {
+ if (uprv_memcmp(p + i, s.data(), s.length()) == 0) {
+ return true;
+ }
+ }
+ return false;
+}
+
CharString &CharString::truncate(int32_t newLength) {
if(newLength<0) {
newLength=0;
diff --git a/source/common/charstr.h b/source/common/charstr.h
index 1a97e019..23b950ed 100644
--- a/source/common/charstr.h
+++ b/source/common/charstr.h
@@ -82,10 +82,24 @@ public:
const char *data() const { return buffer.getAlias(); }
char *data() { return buffer.getAlias(); }
+ /**
+ * Allocates length()+1 chars and copies the NUL-terminated data().
+ * The caller must uprv_free() the result.
+ */
+ char *cloneData(UErrorCode &errorCode) const;
+
+ bool operator==(StringPiece other) const {
+ return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
+ }
+ bool operator!=(StringPiece other) const {
+ return !operator==(other);
+ }
/** @return last index of c, or -1 if c is not in this string */
int32_t lastIndexOf(char c) const;
+ bool contains(StringPiece s) const;
+
CharString &clear() { len=0; buffer[0]=0; return *this; }
CharString &truncate(int32_t newLength);
diff --git a/source/common/localebuilder.cpp b/source/common/localebuilder.cpp
index fe931fcf..837b92f1 100644
--- a/source/common/localebuilder.cpp
+++ b/source/common/localebuilder.cpp
@@ -157,13 +157,18 @@ _isKeywordValue(const char* key, const char* value, int32_t value_len)
}
static void
-_copyExtensions(const Locale& from, Locale* to, bool validate, UErrorCode& errorCode)
+_copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
+ Locale& to, bool validate, UErrorCode& errorCode)
{
if (U_FAILURE(errorCode)) { return; }
- LocalPointer<icu::StringEnumeration> iter(from.createKeywords(errorCode));
- if (U_FAILURE(errorCode) || iter.isNull()) { return; }
+ LocalPointer<icu::StringEnumeration> ownedKeywords;
+ if (keywords == nullptr) {
+ ownedKeywords.adoptInstead(from.createKeywords(errorCode));
+ if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }
+ keywords = ownedKeywords.getAlias();
+ }
const char* key;
- while ((key = iter->next(nullptr, errorCode)) != nullptr) {
+ while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
CharString value;
CharStringByteSink sink(&value);
from.getKeywordValue(key, sink, errorCode);
@@ -176,34 +181,34 @@ _copyExtensions(const Locale& from, Locale* to, bool validate, UErrorCode& error
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
- to->setKeywordValue(key, value.data(), errorCode);
+ to.setKeywordValue(key, value.data(), errorCode);
if (U_FAILURE(errorCode)) { return; }
}
}
void static
-_clearUAttributesAndKeyType(Locale* locale, UErrorCode& errorCode)
+_clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
{
// Clear Unicode attributes
- locale->setKeywordValue(kAttributeKey, "", errorCode);
+ locale.setKeywordValue(kAttributeKey, "", errorCode);
// Clear all Unicode keyword values
- LocalPointer<icu::StringEnumeration> iter(locale->createUnicodeKeywords(errorCode));
+ LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));
if (U_FAILURE(errorCode) || iter.isNull()) { return; }
const char* key;
while ((key = iter->next(nullptr, errorCode)) != nullptr) {
- locale->setUnicodeKeywordValue(key, nullptr, errorCode);
+ locale.setUnicodeKeywordValue(key, nullptr, errorCode);
}
}
static void
-_setUnicodeExtensions(Locale* locale, const CharString& value, UErrorCode& errorCode)
+_setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
{
// Add the unicode extensions to extensions_
CharString locale_str("und-u-", errorCode);
locale_str.append(value, errorCode);
_copyExtensions(
- Locale::forLanguageTag(locale_str.data(), errorCode),
+ Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,
locale, false, errorCode);
}
@@ -235,10 +240,10 @@ LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
status_);
return *this;
}
- _clearUAttributesAndKeyType(extensions_, status_);
+ _clearUAttributesAndKeyType(*extensions_, status_);
if (U_FAILURE(status_)) { return *this; }
if (!value.empty()) {
- _setUnicodeExtensions(extensions_, value_str, status_);
+ _setUnicodeExtensions(*extensions_, value_str, status_);
}
return *this;
}
@@ -401,6 +406,24 @@ Locale makeBogusLocale() {
return bogus;
}
+void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
+{
+ if (U_FAILURE(errorCode)) { return; }
+ LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));
+ if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {
+ // Error, or no extensions to copy.
+ return;
+ }
+ if (extensions_ == nullptr) {
+ extensions_ = new Locale();
+ if (extensions_ == nullptr) {
+ status_ = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ }
+ _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);
+}
+
Locale LocaleBuilder::build(UErrorCode& errorCode)
{
if (U_FAILURE(errorCode)) {
@@ -425,7 +448,7 @@ Locale LocaleBuilder::build(UErrorCode& errorCode)
}
Locale product(locale_str.data());
if (extensions_ != nullptr) {
- _copyExtensions(*extensions_, &product, true, errorCode);
+ _copyExtensions(*extensions_, nullptr, product, true, errorCode);
}
if (U_FAILURE(errorCode)) {
return makeBogusLocale();
diff --git a/source/common/localematcher.cpp b/source/common/localematcher.cpp
new file mode 100644
index 00000000..d975fe75
--- /dev/null
+++ b/source/common/localematcher.cpp
@@ -0,0 +1,720 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// localematcher.cpp
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCMATCHER_H__
+#define __LOCMATCHER_H__
+
+#include "unicode/utypes.h"
+#include "unicode/localebuilder.h"
+#include "unicode/localematcher.h"
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+#include "cstring.h"
+#include "localeprioritylist.h"
+#include "loclikelysubtags.h"
+#include "locdistance.h"
+#include "lsr.h"
+#include "uassert.h"
+#include "uhash.h"
+#include "uvector.h"
+
+#define UND_LSR LSR("und", "", "")
+
+/**
+ * Indicator for the lifetime of desired-locale objects passed into the LocaleMatcher.
+ *
+ * @draft ICU 65
+ */
+enum ULocMatchLifetime {
+ /**
+ * Locale objects are temporary.
+ * The matcher will make a copy of a locale that will be used beyond one function call.
+ *
+ * @draft ICU 65
+ */
+ ULOCMATCH_TEMPORARY_LOCALES,
+ /**
+ * Locale objects are stored at least as long as the matcher is used.
+ * The matcher will keep only a pointer to a locale that will be used beyond one function call,
+ * avoiding a copy.
+ *
+ * @draft ICU 65
+ */
+ ULOCMATCH_STORED_LOCALES // TODO: permanent? cached? clone?
+};
+#ifndef U_IN_DOXYGEN
+typedef enum ULocMatchLifetime ULocMatchLifetime;
+#endif
+
+U_NAMESPACE_BEGIN
+
+LocaleMatcher::Result::Result(LocaleMatcher::Result &&src) U_NOEXCEPT :
+ desiredLocale(src.desiredLocale),
+ supportedLocale(src.supportedLocale),
+ desiredIndex(src.desiredIndex),
+ supportedIndex(src.supportedIndex),
+ desiredIsOwned(src.desiredIsOwned) {
+ if (desiredIsOwned) {
+ src.desiredLocale = nullptr;
+ src.desiredIndex = -1;
+ src.desiredIsOwned = FALSE;
+ }
+}
+
+LocaleMatcher::Result::~Result() {
+ if (desiredIsOwned) {
+ delete desiredLocale;
+ }
+}
+
+LocaleMatcher::Result &LocaleMatcher::Result::operator=(LocaleMatcher::Result &&src) U_NOEXCEPT {
+ this->~Result();
+
+ desiredLocale = src.desiredLocale;
+ supportedLocale = src.supportedLocale;
+ desiredIndex = src.desiredIndex;
+ supportedIndex = src.supportedIndex;
+ desiredIsOwned = src.desiredIsOwned;
+
+ if (desiredIsOwned) {
+ src.desiredLocale = nullptr;
+ src.desiredIndex = -1;
+ src.desiredIsOwned = FALSE;
+ }
+ return *this;
+}
+
+Locale LocaleMatcher::Result::makeResolvedLocale(UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode) || supportedLocale == nullptr) {
+ return Locale::getRoot();
+ }
+ const Locale *bestDesired = getDesiredLocale();
+ if (bestDesired == nullptr || *supportedLocale == *bestDesired) {
+ return *supportedLocale;
+ }
+ LocaleBuilder b;
+ b.setLocale(*supportedLocale);
+
+ // Copy the region from bestDesired, if there is one.
+ const char *region = bestDesired->getCountry();
+ if (*region != 0) {
+ b.setRegion(region);
+ }
+
+ // Copy the variants from bestDesired, if there are any.
+ // Note that this will override any supportedLocale variants.
+ // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
+ const char *variants = bestDesired->getVariant();
+ if (*variants != 0) {
+ b.setVariant(variants);
+ }
+
+ // Copy the extensions from bestDesired, if there are any.
+ // C++ note: The following note, copied from Java, may not be true,
+ // as long as C++ copies by legacy ICU keyword, not by extension singleton.
+ // Note that this will override any supportedLocale extensions.
+ // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
+ // (replacing calendar).
+ b.copyExtensionsFrom(*bestDesired, errorCode);
+ return b.build(errorCode);
+}
+
+LocaleMatcher::Builder::Builder(LocaleMatcher::Builder &&src) U_NOEXCEPT :
+ errorCode_(src.errorCode_),
+ supportedLocales_(src.supportedLocales_),
+ thresholdDistance_(src.thresholdDistance_),
+ demotion_(src.demotion_),
+ defaultLocale_(src.defaultLocale_),
+ favor_(src.favor_) {
+ src.supportedLocales_ = nullptr;
+ src.defaultLocale_ = nullptr;
+}
+
+LocaleMatcher::Builder::~Builder() {
+ delete supportedLocales_;
+ delete defaultLocale_;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::operator=(LocaleMatcher::Builder &&src) U_NOEXCEPT {
+ this->~Builder();
+
+ errorCode_ = src.errorCode_;
+ supportedLocales_ = src.supportedLocales_;
+ thresholdDistance_ = src.thresholdDistance_;
+ demotion_ = src.demotion_;
+ defaultLocale_ = src.defaultLocale_;
+ favor_ = src.favor_;
+
+ src.supportedLocales_ = nullptr;
+ src.defaultLocale_ = nullptr;
+ return *this;
+}
+
+void LocaleMatcher::Builder::clearSupportedLocales() {
+ if (supportedLocales_ != nullptr) {
+ supportedLocales_->removeAllElements();
+ }
+}
+
+bool LocaleMatcher::Builder::ensureSupportedLocaleVector() {
+ if (U_FAILURE(errorCode_)) { return false; }
+ if (supportedLocales_ != nullptr) { return true; }
+ supportedLocales_ = new UVector(uprv_deleteUObject, nullptr, errorCode_);
+ if (U_FAILURE(errorCode_)) { return false; }
+ if (supportedLocales_ == nullptr) {
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+ return false;
+ }
+ return true;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListString(
+ StringPiece locales) {
+ LocalePriorityList list(locales, errorCode_);
+ if (U_FAILURE(errorCode_)) { return *this; }
+ clearSupportedLocales();
+ if (!ensureSupportedLocaleVector()) { return *this; }
+ int32_t length = list.getLengthIncludingRemoved();
+ for (int32_t i = 0; i < length; ++i) {
+ Locale *locale = list.orphanLocaleAt(i);
+ if (locale == nullptr) { continue; }
+ supportedLocales_->addElement(locale, errorCode_);
+ if (U_FAILURE(errorCode_)) {
+ delete locale;
+ break;
+ }
+ }
+ return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ clearSupportedLocales();
+ if (!ensureSupportedLocaleVector()) { return *this; }
+ while (locales.hasNext()) {
+ const Locale &locale = locales.next();
+ Locale *clone = locale.clone();
+ if (clone == nullptr) {
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ supportedLocales_->addElement(clone, errorCode_);
+ if (U_FAILURE(errorCode_)) {
+ delete clone;
+ break;
+ }
+ }
+ return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) {
+ if (!ensureSupportedLocaleVector()) { return *this; }
+ Locale *clone = locale.clone();
+ if (clone == nullptr) {
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ supportedLocales_->addElement(clone, errorCode_);
+ if (U_FAILURE(errorCode_)) {
+ delete clone;
+ }
+ return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setDefaultLocale(const Locale *defaultLocale) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ Locale *clone = nullptr;
+ if (defaultLocale != nullptr) {
+ clone = defaultLocale->clone();
+ if (clone == nullptr) {
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ }
+ delete defaultLocale_;
+ defaultLocale_ = clone;
+ return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag subtag) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ favor_ = subtag;
+ return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion demotion) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ demotion_ = demotion;
+ return *this;
+}
+
+#if 0
+/**
+ * <i>Internal only!</i>
+ *
+ * @param thresholdDistance the thresholdDistance to set, with -1 = default
+ * @return this Builder object
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+@Deprecated
+LocaleMatcher::Builder &LocaleMatcher::Builder::internalSetThresholdDistance(int32_t thresholdDistance) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ if (thresholdDistance > 100) {
+ thresholdDistance = 100;
+ }
+ thresholdDistance_ = thresholdDistance;
+ return *this;
+}
+#endif
+
+UBool LocaleMatcher::Builder::copyErrorTo(UErrorCode &outErrorCode) const {
+ if (U_FAILURE(outErrorCode)) { return TRUE; }
+ if (U_SUCCESS(errorCode_)) { return FALSE; }
+ outErrorCode = errorCode_;
+ return TRUE;
+}
+
+LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const {
+ if (U_SUCCESS(errorCode) && U_FAILURE(errorCode_)) {
+ errorCode = errorCode_;
+ }
+ return LocaleMatcher(*this, errorCode);
+}
+
+namespace {
+
+LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale,
+ UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) {
+ return UND_LSR;
+ } else {
+ return likelySubtags.makeMaximizedLsrFrom(locale, errorCode);
+ }
+}
+
+int32_t hashLSR(const UHashTok token) {
+ const LSR *lsr = static_cast<const LSR *>(token.pointer);
+ return lsr->hashCode;
+}
+
+UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
+ const LSR *lsr1 = static_cast<const LSR *>(t1.pointer);
+ const LSR *lsr2 = static_cast<const LSR *>(t2.pointer);
+ return *lsr1 == *lsr2;
+}
+
+bool putIfAbsent(UHashtable *lsrToIndex, const LSR &lsr, int32_t i, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return false; }
+ U_ASSERT(i > 0);
+ int32_t index = uhash_geti(lsrToIndex, &lsr);
+ if (index != 0) {
+ return false;
+ } else {
+ uhash_puti(lsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode);
+ return U_SUCCESS(errorCode);
+ }
+}
+
+} // namespace
+
+LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
+ likelySubtags(*XLikelySubtags::getSingleton(errorCode)),
+ localeDistance(*LocaleDistance::getSingleton(errorCode)),
+ thresholdDistance(builder.thresholdDistance_),
+ demotionPerDesiredLocale(0),
+ favorSubtag(builder.favor_),
+ supportedLocales(nullptr), lsrs(nullptr), supportedLocalesLength(0),
+ supportedLsrToIndex(nullptr),
+ supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0),
+ ownedDefaultLocale(nullptr), defaultLocale(nullptr), defaultLocaleIndex(-1) {
+ if (U_FAILURE(errorCode)) { return; }
+ if (thresholdDistance < 0) {
+ thresholdDistance = localeDistance.getDefaultScriptDistance();
+ }
+ supportedLocalesLength = builder.supportedLocales_ != nullptr ?
+ builder.supportedLocales_->size() : 0;
+ const Locale *def = builder.defaultLocale_;
+ int32_t idef = -1;
+ if (supportedLocalesLength > 0) {
+ // Store the supported locales in input order,
+ // so that when different types are used (e.g., language tag strings)
+ // we can return those by parallel index.
+ supportedLocales = static_cast<const Locale **>(
+ uprv_malloc(supportedLocalesLength * sizeof(const Locale *)));
+ // Supported LRSs in input order.
+ // In C++, we store these permanently to simplify ownership management
+ // in the hash tables. Duplicate LSRs (if any) are unused overhead.
+ lsrs = new LSR[supportedLocalesLength];
+ if (supportedLocales == nullptr || lsrs == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ // If the constructor fails partway, we need null pointers for destructibility.
+ uprv_memset(supportedLocales, 0, supportedLocalesLength * sizeof(const Locale *));
+ // Also find the first supported locale whose LSR is
+ // the same as that for the default locale.
+ LSR builderDefaultLSR;
+ const LSR *defLSR = nullptr;
+ if (def != nullptr) {
+ builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ defLSR = &builderDefaultLSR;
+ }
+ for (int32_t i = 0; i < supportedLocalesLength; ++i) {
+ const Locale &locale = *static_cast<Locale *>(builder.supportedLocales_->elementAt(i));
+ supportedLocales[i] = locale.clone();
+ if (supportedLocales[i] == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ const Locale &supportedLocale = *supportedLocales[i];
+ LSR &lsr = lsrs[i] = getMaximalLsrOrUnd(likelySubtags, supportedLocale, errorCode);
+ lsr.setHashCode();
+ if (U_FAILURE(errorCode)) { return; }
+ if (idef < 0 && defLSR != nullptr && lsr == *defLSR) {
+ idef = i;
+ defLSR = &lsr; // owned pointer to put into supportedLsrToIndex
+ if (*def == supportedLocale) {
+ def = &supportedLocale; // owned pointer to keep
+ }
+ }
+ }
+
+ // We need an unordered map from LSR to first supported locale with that LSR,
+ // and an ordered list of (LSR, supported index).
+ // We insert the supported locales in the following order:
+ // 1. Default locale, if it is supported.
+ // 2. Priority locales (aka "paradigm locales") in builder order.
+ // 3. Remaining locales in builder order.
+ // In Java, we use a LinkedHashMap for both map & ordered lists.
+ // In C++, we use separate structures.
+ // We over-allocate arrays of LSRs and indexes for simplicity.
+ // We reserve slots at the array starts for the default and paradigm locales,
+ // plus enough for all supported locales.
+ // If there are few paradigm locales and few duplicate supported LSRs,
+ // then the amount of wasted space is small.
+ supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong,
+ supportedLocalesLength, &errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ int32_t paradigmLimit = 1 + localeDistance.getParadigmLSRsLength();
+ int32_t suppLSRsCapacity = paradigmLimit + supportedLocalesLength;
+ supportedLSRs = static_cast<const LSR **>(
+ uprv_malloc(suppLSRsCapacity * sizeof(const LSR *)));
+ supportedIndexes = static_cast<int32_t *>(
+ uprv_malloc(suppLSRsCapacity * sizeof(int32_t)));
+ if (supportedLSRs == nullptr || supportedIndexes == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ int32_t paradigmIndex = 0;
+ int32_t otherIndex = paradigmLimit;
+ if (idef >= 0) {
+ uhash_puti(supportedLsrToIndex, const_cast<LSR *>(defLSR), idef + 1, &errorCode);
+ supportedLSRs[0] = defLSR;
+ supportedIndexes[0] = idef;
+ paradigmIndex = 1;
+ }
+ for (int32_t i = 0; i < supportedLocalesLength; ++i) {
+ if (i == idef) { continue; }
+ const Locale &locale = *supportedLocales[i];
+ const LSR &lsr = lsrs[i];
+ if (defLSR == nullptr) {
+ U_ASSERT(i == 0);
+ def = &locale;
+ defLSR = &lsr;
+ idef = 0;
+ uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), 0 + 1, &errorCode);
+ supportedLSRs[0] = &lsr;
+ supportedIndexes[0] = 0;
+ paradigmIndex = 1;
+ } else if (idef >= 0 && lsr == *defLSR) {
+ // lsr == *defLSR means that this supported locale is
+ // a duplicate of the default locale.
+ // Either an explicit default locale is supported, and we added it before the loop,
+ // or there is no explicit default locale, and this is
+ // a duplicate of the first supported locale.
+ // In both cases, idef >= 0 now, so otherwise we can skip the comparison.
+ // For a duplicate, putIfAbsent() is a no-op, so nothing to do.
+ } else {
+ if (putIfAbsent(supportedLsrToIndex, lsr, i + 1, errorCode)) {
+ if (localeDistance.isParadigmLSR(lsr)) {
+ supportedLSRs[paradigmIndex] = &lsr;
+ supportedIndexes[paradigmIndex++] = i;
+ } else {
+ supportedLSRs[otherIndex] = &lsr;
+ supportedIndexes[otherIndex++] = i;
+ }
+ }
+ }
+ if (U_FAILURE(errorCode)) { return; }
+ }
+ // Squeeze out unused array slots.
+ if (paradigmIndex < paradigmLimit && paradigmLimit < otherIndex) {
+ uprv_memmove(supportedLSRs + paradigmIndex, supportedLSRs + paradigmLimit,
+ (otherIndex - paradigmLimit) * sizeof(const LSR *));
+ uprv_memmove(supportedIndexes + paradigmIndex, supportedIndexes + paradigmLimit,
+ (otherIndex - paradigmLimit) * sizeof(int32_t));
+ }
+ supportedLSRsLength = otherIndex - (paradigmLimit - paradigmIndex);
+ }
+
+ if (def != nullptr && (idef < 0 || def != supportedLocales[idef])) {
+ ownedDefaultLocale = def->clone();
+ if (ownedDefaultLocale == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ def = ownedDefaultLocale;
+ }
+ defaultLocale = def;
+ defaultLocaleIndex = idef;
+
+ if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) {
+ demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale();
+ }
+}
+
+LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT :
+ likelySubtags(src.likelySubtags),
+ localeDistance(src.localeDistance),
+ thresholdDistance(src.thresholdDistance),
+ demotionPerDesiredLocale(src.demotionPerDesiredLocale),
+ favorSubtag(src.favorSubtag),
+ supportedLocales(src.supportedLocales), lsrs(src.lsrs),
+ supportedLocalesLength(src.supportedLocalesLength),
+ supportedLsrToIndex(src.supportedLsrToIndex),
+ supportedLSRs(src.supportedLSRs),
+ supportedIndexes(src.supportedIndexes),
+ supportedLSRsLength(src.supportedLSRsLength),
+ ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale),
+ defaultLocaleIndex(src.defaultLocaleIndex) {
+ src.supportedLocales = nullptr;
+ src.lsrs = nullptr;
+ src.supportedLocalesLength = 0;
+ src.supportedLsrToIndex = nullptr;
+ src.supportedLSRs = nullptr;
+ src.supportedIndexes = nullptr;
+ src.supportedLSRsLength = 0;
+ src.ownedDefaultLocale = nullptr;
+ src.defaultLocale = nullptr;
+ src.defaultLocaleIndex = -1;
+}
+
+LocaleMatcher::~LocaleMatcher() {
+ for (int32_t i = 0; i < supportedLocalesLength; ++i) {
+ delete supportedLocales[i];
+ }
+ uprv_free(supportedLocales);
+ delete[] lsrs;
+ uhash_close(supportedLsrToIndex);
+ uprv_free(supportedLSRs);
+ uprv_free(supportedIndexes);
+ delete ownedDefaultLocale;
+}
+
+LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) U_NOEXCEPT {
+ this->~LocaleMatcher();
+
+ thresholdDistance = src.thresholdDistance;
+ demotionPerDesiredLocale = src.demotionPerDesiredLocale;
+ favorSubtag = src.favorSubtag;
+ supportedLocales = src.supportedLocales;
+ lsrs = src.lsrs;
+ supportedLocalesLength = src.supportedLocalesLength;
+ supportedLsrToIndex = src.supportedLsrToIndex;
+ supportedLSRs = src.supportedLSRs;
+ supportedIndexes = src.supportedIndexes;
+ supportedLSRsLength = src.supportedLSRsLength;
+ ownedDefaultLocale = src.ownedDefaultLocale;
+ defaultLocale = src.defaultLocale;
+ defaultLocaleIndex = src.defaultLocaleIndex;
+
+ src.supportedLocales = nullptr;
+ src.lsrs = nullptr;
+ src.supportedLocalesLength = 0;
+ src.supportedLsrToIndex = nullptr;
+ src.supportedLSRs = nullptr;
+ src.supportedIndexes = nullptr;
+ src.supportedLSRsLength = 0;
+ src.ownedDefaultLocale = nullptr;
+ src.defaultLocale = nullptr;
+ src.defaultLocaleIndex = -1;
+ return *this;
+}
+
+class LocaleLsrIterator {
+public:
+ LocaleLsrIterator(const XLikelySubtags &likelySubtags, Locale::Iterator &locales,
+ ULocMatchLifetime lifetime) :
+ likelySubtags(likelySubtags), locales(locales), lifetime(lifetime) {}
+
+ ~LocaleLsrIterator() {
+ if (lifetime == ULOCMATCH_TEMPORARY_LOCALES) {
+ delete remembered;
+ }
+ }
+
+ bool hasNext() const {
+ return locales.hasNext();
+ }
+
+ LSR next(UErrorCode &errorCode) {
+ current = &locales.next();
+ return getMaximalLsrOrUnd(likelySubtags, *current, errorCode);
+ }
+
+ void rememberCurrent(int32_t desiredIndex, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return; }
+ bestDesiredIndex = desiredIndex;
+ if (lifetime == ULOCMATCH_STORED_LOCALES) {
+ remembered = current;
+ } else {
+ // ULOCMATCH_TEMPORARY_LOCALES
+ delete remembered;
+ remembered = new Locale(*current);
+ if (remembered == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ }
+
+ const Locale *orphanRemembered() {
+ const Locale *rem = remembered;
+ remembered = nullptr;
+ return rem;
+ }
+
+ int32_t getBestDesiredIndex() const {
+ return bestDesiredIndex;
+ }
+
+private:
+ const XLikelySubtags &likelySubtags;
+ Locale::Iterator &locales;
+ ULocMatchLifetime lifetime;
+ const Locale *current = nullptr, *remembered = nullptr;
+ int32_t bestDesiredIndex = -1;
+};
+
+const Locale *LocaleMatcher::getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ int32_t suppIndex = getBestSuppIndex(
+ getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
+ nullptr, errorCode);
+ return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
+}
+
+const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales,
+ UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ if (!desiredLocales.hasNext()) {
+ return defaultLocale;
+ }
+ LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
+ int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
+ return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
+}
+
+const Locale *LocaleMatcher::getBestMatchForListString(
+ StringPiece desiredLocaleList, UErrorCode &errorCode) const {
+ LocalePriorityList list(desiredLocaleList, errorCode);
+ LocalePriorityList::Iterator iter = list.iterator();
+ return getBestMatch(iter, errorCode);
+}
+
+LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
+ const Locale &desiredLocale, UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) {
+ return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
+ }
+ int32_t suppIndex = getBestSuppIndex(
+ getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
+ nullptr, errorCode);
+ if (U_FAILURE(errorCode) || suppIndex < 0) {
+ return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
+ } else {
+ return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, FALSE);
+ }
+}
+
+LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
+ Locale::Iterator &desiredLocales, UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) {
+ return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
+ }
+ LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
+ int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
+ if (U_FAILURE(errorCode) || suppIndex < 0) {
+ return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE);
+ } else {
+ return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex],
+ lsrIter.getBestDesiredIndex(), suppIndex, TRUE);
+ }
+}
+
+int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter,
+ UErrorCode &errorCode) const {
+ if (U_FAILURE(errorCode)) { return -1; }
+ int32_t desiredIndex = 0;
+ int32_t bestSupportedLsrIndex = -1;
+ for (int32_t bestDistance = thresholdDistance;;) {
+ // Quick check for exact maximized LSR.
+ // Returns suppIndex+1 where 0 means not found.
+ if (supportedLsrToIndex != nullptr) {
+ desiredLSR.setHashCode();
+ int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR);
+ if (index != 0) {
+ int32_t suppIndex = index - 1;
+ if (remainingIter != nullptr) {
+ remainingIter->rememberCurrent(desiredIndex, errorCode);
+ }
+ return suppIndex;
+ }
+ }
+ int32_t bestIndexAndDistance = localeDistance.getBestIndexAndDistance(
+ desiredLSR, supportedLSRs, supportedLSRsLength, bestDistance, favorSubtag);
+ if (bestIndexAndDistance >= 0) {
+ bestDistance = bestIndexAndDistance & 0xff;
+ if (remainingIter != nullptr) {
+ remainingIter->rememberCurrent(desiredIndex, errorCode);
+ if (U_FAILURE(errorCode)) { return -1; }
+ }
+ bestSupportedLsrIndex = bestIndexAndDistance >= 0 ? bestIndexAndDistance >> 8 : -1;
+ }
+ if ((bestDistance -= demotionPerDesiredLocale) <= 0) {
+ break;
+ }
+ if (remainingIter == nullptr || !remainingIter->hasNext()) {
+ break;
+ }
+ desiredLSR = remainingIter->next(errorCode);
+ if (U_FAILURE(errorCode)) { return -1; }
+ ++desiredIndex;
+ }
+ if (bestSupportedLsrIndex < 0) {
+ // no good match
+ return -1;
+ }
+ return supportedIndexes[bestSupportedLsrIndex];
+}
+
+double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const {
+ // Returns the inverse of the distance: That is, 1-distance(desired, supported).
+ LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
+ if (U_FAILURE(errorCode)) { return 0; }
+ const LSR *pSuppLSR = &suppLSR;
+ int32_t distance = localeDistance.getBestIndexAndDistance(
+ getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
+ &pSuppLSR, 1,
+ thresholdDistance, favorSubtag) & 0xff;
+ return (100 - distance) / 100.0;
+}
+
+U_NAMESPACE_END
+
+#endif // __LOCMATCHER_H__
diff --git a/source/common/localeprioritylist.cpp b/source/common/localeprioritylist.cpp
new file mode 100644
index 00000000..06442fb4
--- /dev/null
+++ b/source/common/localeprioritylist.cpp
@@ -0,0 +1,239 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// localeprioritylist.cpp
+// created: 2019jul11 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/localpointer.h"
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "localeprioritylist.h"
+#include "uarrsort.h"
+#include "uassert.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+int32_t hashLocale(const UHashTok token) {
+ auto *locale = static_cast<const Locale *>(token.pointer);
+ return locale->hashCode();
+}
+
+UBool compareLocales(const UHashTok t1, const UHashTok t2) {
+ auto *l1 = static_cast<const Locale *>(t1.pointer);
+ auto *l2 = static_cast<const Locale *>(t2.pointer);
+ return *l1 == *l2;
+}
+
+constexpr int32_t WEIGHT_ONE = 1000;
+
+struct LocaleAndWeight {
+ Locale *locale;
+ int32_t weight; // 0..1000 = 0.0..1.0
+ int32_t index; // force stable sort
+
+ int32_t compare(const LocaleAndWeight &other) const {
+ int32_t diff = other.weight - weight; // descending: other-this
+ if (diff != 0) { return diff; }
+ return index - other.index;
+ }
+};
+
+int32_t U_CALLCONV
+compareLocaleAndWeight(const void * /*context*/, const void *left, const void *right) {
+ return static_cast<const LocaleAndWeight *>(left)->
+ compare(*static_cast<const LocaleAndWeight *>(right));
+}
+
+const char *skipSpaces(const char *p, const char *limit) {
+ while (p < limit && *p == ' ') { ++p; }
+ return p;
+}
+
+int32_t findTagLength(const char *p, const char *limit) {
+ // Look for accept-language delimiters.
+ // Leave other validation up to the Locale constructor.
+ const char *q;
+ for (q = p; q < limit; ++q) {
+ char c = *q;
+ if (c == ' ' || c == ',' || c == ';') { break; }
+ }
+ return static_cast<int32_t>(q - p);
+}
+
+/**
+ * Parses and returns a qvalue weight in millis.
+ * Advances p to after the parsed substring.
+ * Returns a negative value if parsing fails.
+ */
+int32_t parseWeight(const char *&p, const char *limit) {
+ p = skipSpaces(p, limit);
+ char c;
+ if (p == limit || ((c = *p) != '0' && c != '1')) { return -1; }
+ int32_t weight = (c - '0') * 1000;
+ if (++p == limit || *p != '.') { return weight; }
+ int32_t multiplier = 100;
+ while (++p != limit && '0' <= (c = *p) && c <= '9') {
+ c -= '0';
+ if (multiplier > 0) {
+ weight += c * multiplier;
+ multiplier /= 10;
+ } else if (multiplier == 0) {
+ // round up
+ if (c >= 5) { ++weight; }
+ multiplier = -1;
+ } // else ignore further fraction digits
+ }
+ return weight <= WEIGHT_ONE ? weight : -1; // bad if > 1.0
+}
+
+} // namespace
+
+/**
+ * Nothing but a wrapper over a MaybeStackArray of LocaleAndWeight.
+ *
+ * This wrapper exists (and is not in an anonymous namespace)
+ * so that we can forward-declare it in the header file and
+ * don't have to expose the MaybeStackArray specialization and
+ * the LocaleAndWeight to code (like the test) that #includes localeprioritylist.h.
+ * Also, otherwise we would have to do a platform-specific
+ * template export declaration of some kind for the MaybeStackArray specialization
+ * to be properly exported from the common DLL.
+ */
+struct LocaleAndWeightArray : public UMemory {
+ MaybeStackArray<LocaleAndWeight, 20> array;
+};
+
+LocalePriorityList::LocalePriorityList(StringPiece s, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return; }
+ list = new LocaleAndWeightArray();
+ if (list == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ const char *p = s.data();
+ const char *limit = p + s.length();
+ while ((p = skipSpaces(p, limit)) != limit) {
+ if (*p == ',') { // empty range field
+ ++p;
+ continue;
+ }
+ int32_t tagLength = findTagLength(p, limit);
+ if (tagLength == 0) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ CharString tag(p, tagLength, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ Locale locale = Locale(tag.data());
+ if (locale.isBogus()) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ int32_t weight = WEIGHT_ONE;
+ if ((p = skipSpaces(p + tagLength, limit)) != limit && *p == ';') {
+ if ((p = skipSpaces(p + 1, limit)) == limit || *p != 'q' ||
+ (p = skipSpaces(p + 1, limit)) == limit || *p != '=' ||
+ (++p, (weight = parseWeight(p, limit)) < 0)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ p = skipSpaces(p, limit);
+ }
+ if (p != limit && *p != ',') { // trailing junk
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ add(locale, weight, errorCode);
+ if (p == limit) { break; }
+ ++p;
+ }
+ sort(errorCode);
+}
+
+LocalePriorityList::~LocalePriorityList() {
+ if (list != nullptr) {
+ for (int32_t i = 0; i < listLength; ++i) {
+ delete list->array[i].locale;
+ }
+ delete list;
+ }
+ uhash_close(map);
+}
+
+const Locale *LocalePriorityList::localeAt(int32_t i) const {
+ return list->array[i].locale;
+}
+
+Locale *LocalePriorityList::orphanLocaleAt(int32_t i) {
+ if (list == nullptr) { return nullptr; }
+ LocaleAndWeight &lw = list->array[i];
+ Locale *l = lw.locale;
+ lw.locale = nullptr;
+ return l;
+}
+
+bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return false; }
+ if (map == nullptr) {
+ if (weight <= 0) { return true; } // do not add q=0
+ map = uhash_open(hashLocale, compareLocales, uhash_compareLong, &errorCode);
+ if (U_FAILURE(errorCode)) { return false; }
+ }
+ LocalPointer<Locale> clone;
+ int32_t index = uhash_geti(map, &locale);
+ if (index != 0) {
+ // Duplicate: Remove the old item and append it anew.
+ LocaleAndWeight &lw = list->array[index - 1];
+ clone.adoptInstead(lw.locale);
+ lw.locale = nullptr;
+ lw.weight = 0;
+ ++numRemoved;
+ }
+ if (weight <= 0) { // do not add q=0
+ if (index != 0) {
+ // Not strictly necessary but cleaner.
+ uhash_removei(map, &locale);
+ }
+ return true;
+ }
+ if (clone.isNull()) {
+ clone.adoptInstead(locale.clone());
+ if (clone.isNull() || (clone->isBogus() && !locale.isBogus())) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return false;
+ }
+ }
+ if (listLength == list->array.getCapacity()) {
+ int32_t newCapacity = listLength < 50 ? 100 : 4 * listLength;
+ if (list->array.resize(newCapacity, listLength) == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return false;
+ }
+ }
+ uhash_puti(map, clone.getAlias(), listLength + 1, &errorCode);
+ if (U_FAILURE(errorCode)) { return false; }
+ LocaleAndWeight &lw = list->array[listLength];
+ lw.locale = clone.orphan();
+ lw.weight = weight;
+ lw.index = listLength++;
+ if (weight < WEIGHT_ONE) { hasWeights = true; }
+ U_ASSERT(uhash_count(map) == getLength());
+ return true;
+}
+
+void LocalePriorityList::sort(UErrorCode &errorCode) {
+ // Sort by descending weights if there is a mix of weights.
+ // The comparator forces a stable sort via the item index.
+ if (U_FAILURE(errorCode) || getLength() <= 1 || !hasWeights) { return; }
+ uprv_sortArray(list->array.getAlias(), listLength, sizeof(LocaleAndWeight),
+ compareLocaleAndWeight, nullptr, FALSE, &errorCode);
+}
+
+U_NAMESPACE_END
diff --git a/source/common/localeprioritylist.h b/source/common/localeprioritylist.h
new file mode 100644
index 00000000..80ca38a7
--- /dev/null
+++ b/source/common/localeprioritylist.h
@@ -0,0 +1,115 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// localeprioritylist.h
+// created: 2019jul11 Markus W. Scherer
+
+#ifndef __LOCALEPRIORITYLIST_H__
+#define __LOCALEPRIORITYLIST_H__
+
+#include "unicode/utypes.h"
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+
+struct UHashtable;
+
+U_NAMESPACE_BEGIN
+
+struct LocaleAndWeightArray;
+
+/**
+ * Parses a list of locales from an accept-language string.
+ * We are a bit more lenient than the spec:
+ * We accept extra whitespace in more places, empty range fields,
+ * and any number of qvalue fraction digits.
+ *
+ * https://tools.ietf.org/html/rfc2616#section-14.4
+ * 14.4 Accept-Language
+ *
+ * Accept-Language = "Accept-Language" ":"
+ * 1#( language-range [ ";" "q" "=" qvalue ] )
+ * language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" )
+ *
+ * Each language-range MAY be given an associated quality value which
+ * represents an estimate of the user's preference for the languages
+ * specified by that range. The quality value defaults to "q=1". For
+ * example,
+ *
+ * Accept-Language: da, en-gb;q=0.8, en;q=0.7
+ *
+ * https://tools.ietf.org/html/rfc2616#section-3.9
+ * 3.9 Quality Values
+ *
+ * HTTP content negotiation (section 12) uses short "floating point"
+ * numbers to indicate the relative importance ("weight") of various
+ * negotiable parameters. A weight is normalized to a real number in
+ * the range 0 through 1, where 0 is the minimum and 1 the maximum
+ * value. If a parameter has a quality value of 0, then content with
+ * this parameter is `not acceptable' for the client. HTTP/1.1
+ * applications MUST NOT generate more than three digits after the
+ * decimal point. User configuration of these values SHOULD also be
+ * limited in this fashion.
+ *
+ * qvalue = ( "0" [ "." 0*3DIGIT ] )
+ * | ( "1" [ "." 0*3("0") ] )
+ */
+class U_COMMON_API LocalePriorityList : public UMemory {
+public:
+ class Iterator : public Locale::Iterator {
+ public:
+ UBool hasNext() const override { return count < length; }
+
+ const Locale &next() override {
+ for(;;) {
+ const Locale *locale = list.localeAt(index++);
+ if (locale != nullptr) {
+ ++count;
+ return *locale;
+ }
+ }
+ }
+
+ private:
+ friend class LocalePriorityList;
+
+ Iterator(const LocalePriorityList &list) : list(list), length(list.getLength()) {}
+
+ const LocalePriorityList &list;
+ int32_t index = 0;
+ int32_t count = 0;
+ const int32_t length;
+ };
+
+ LocalePriorityList(StringPiece s, UErrorCode &errorCode);
+
+ ~LocalePriorityList();
+
+ int32_t getLength() const { return listLength - numRemoved; }
+
+ int32_t getLengthIncludingRemoved() const { return listLength; }
+
+ Iterator iterator() const { return Iterator(*this); }
+
+ const Locale *localeAt(int32_t i) const;
+
+ Locale *orphanLocaleAt(int32_t i);
+
+private:
+ LocalePriorityList(const LocalePriorityList &) = delete;
+ LocalePriorityList &operator=(const LocalePriorityList &) = delete;
+
+ bool add(const Locale &locale, int32_t weight, UErrorCode &errorCode);
+
+ void sort(UErrorCode &errorCode);
+
+ LocaleAndWeightArray *list = nullptr;
+ int32_t listLength = 0;
+ int32_t numRemoved = 0;
+ bool hasWeights = false; // other than 1.0
+ UHashtable *map = nullptr;
+};
+
+U_NAMESPACE_END
+
+#endif // __LOCALEPRIORITYLIST_H__
diff --git a/source/common/locdistance.cpp b/source/common/locdistance.cpp
new file mode 100644
index 00000000..800d0eac
--- /dev/null
+++ b/source/common/locdistance.cpp
@@ -0,0 +1,364 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// locdistance.cpp
+// created: 2019may08 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/localematcher.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "cstring.h"
+#include "locdistance.h"
+#include "loclikelysubtags.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uinvchar.h"
+#include "umutex.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+/**
+ * Bit flag used on the last character of a subtag in the trie.
+ * Must be set consistently by the builder and the lookup code.
+ */
+constexpr int32_t END_OF_SUBTAG = 0x80;
+/** Distance value bit flag, set by the builder. */
+constexpr int32_t DISTANCE_SKIP_SCRIPT = 0x80;
+/** Distance value bit flag, set by trieNext(). */
+constexpr int32_t DISTANCE_IS_FINAL = 0x100;
+constexpr int32_t DISTANCE_IS_FINAL_OR_SKIP_SCRIPT = DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
+
+constexpr int32_t ABOVE_THRESHOLD = 100;
+
+// Indexes into array of distances.
+enum {
+ IX_DEF_LANG_DISTANCE,
+ IX_DEF_SCRIPT_DISTANCE,
+ IX_DEF_REGION_DISTANCE,
+ IX_MIN_REGION_DISTANCE,
+ IX_LIMIT
+};
+
+LocaleDistance *gLocaleDistance = nullptr;
+UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
+
+UBool U_CALLCONV cleanup() {
+ delete gLocaleDistance;
+ gLocaleDistance = nullptr;
+ gInitOnce.reset();
+ return TRUE;
+}
+
+} // namespace
+
+void U_CALLCONV LocaleDistance::initLocaleDistance(UErrorCode &errorCode) {
+ // This function is invoked only via umtx_initOnce().
+ U_ASSERT(gLocaleDistance == nullptr);
+ const XLikelySubtags &likely = *XLikelySubtags::getSingleton(errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ const LocaleDistanceData &data = likely.getDistanceData();
+ if (data.distanceTrieBytes == nullptr ||
+ data.regionToPartitions == nullptr || data.partitions == nullptr ||
+ // ok if no paradigms
+ data.distances == nullptr) {
+ errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
+ gLocaleDistance = new LocaleDistance(data);
+ if (gLocaleDistance == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE_DISTANCE, cleanup);
+}
+
+const LocaleDistance *LocaleDistance::getSingleton(UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ umtx_initOnce(gInitOnce, &LocaleDistance::initLocaleDistance, errorCode);
+ return gLocaleDistance;
+}
+
+LocaleDistance::LocaleDistance(const LocaleDistanceData &data) :
+ trie(data.distanceTrieBytes),
+ regionToPartitionsIndex(data.regionToPartitions), partitionArrays(data.partitions),
+ paradigmLSRs(data.paradigms), paradigmLSRsLength(data.paradigmsLength),
+ defaultLanguageDistance(data.distances[IX_DEF_LANG_DISTANCE]),
+ defaultScriptDistance(data.distances[IX_DEF_SCRIPT_DISTANCE]),
+ defaultRegionDistance(data.distances[IX_DEF_REGION_DISTANCE]),
+ minRegionDistance(data.distances[IX_MIN_REGION_DISTANCE]) {
+ // For the default demotion value, use the
+ // default region distance between unrelated Englishes.
+ // Thus, unless demotion is turned off,
+ // a mere region difference for one desired locale
+ // is as good as a perfect match for the next following desired locale.
+ // As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>.
+ LSR en("en", "Latn", "US");
+ LSR enGB("en", "Latn", "GB");
+ const LSR *p_enGB = &enGB;
+ defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, &p_enGB, 1,
+ 50, ULOCMATCH_FAVOR_LANGUAGE) & 0xff;
+}
+
+int32_t LocaleDistance::getBestIndexAndDistance(
+ const LSR &desired,
+ const LSR **supportedLSRs, int32_t supportedLSRsLength,
+ int32_t threshold, ULocMatchFavorSubtag favorSubtag) const {
+ BytesTrie iter(trie);
+ // Look up the desired language only once for all supported LSRs.
+ // Its "distance" is either a match point value of 0, or a non-match negative value.
+ // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
+ int32_t desLangDistance = trieNext(iter, desired.language, false);
+ uint64_t desLangState = desLangDistance >= 0 && supportedLSRsLength > 1 ? iter.getState64() : 0;
+ // Index of the supported LSR with the lowest distance.
+ int32_t bestIndex = -1;
+ for (int32_t slIndex = 0; slIndex < supportedLSRsLength; ++slIndex) {
+ const LSR &supported = *supportedLSRs[slIndex];
+ bool star = false;
+ int32_t distance = desLangDistance;
+ if (distance >= 0) {
+ U_ASSERT((distance & DISTANCE_IS_FINAL) == 0);
+ if (slIndex != 0) {
+ iter.resetToState64(desLangState);
+ }
+ distance = trieNext(iter, supported.language, true);
+ }
+ // Note: The data builder verifies that there are no rules with "any" (*) language and
+ // real (non *) script or region subtags.
+ // This means that if the lookup for either language fails we can use
+ // the default distances without further lookups.
+ int32_t flags;
+ if (distance >= 0) {
+ flags = distance & DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
+ distance &= ~DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
+ } else { // <*, *>
+ if (uprv_strcmp(desired.language, supported.language) == 0) {
+ distance = 0;
+ } else {
+ distance = defaultLanguageDistance;
+ }
+ flags = 0;
+ star = true;
+ }
+ U_ASSERT(0 <= distance && distance <= 100);
+ // We implement "favor subtag" by reducing the language subtag distance
+ // (unscientifically reducing it to a quarter of the normal value),
+ // so that the script distance is relatively more important.
+ // For example, given a default language distance of 80, we reduce it to 20,
+ // which is below the default threshold of 50, which is the default script distance.
+ if (favorSubtag == ULOCMATCH_FAVOR_SCRIPT) {
+ distance >>= 2;
+ }
+ if (distance >= threshold) {
+ continue;
+ }
+
+ int32_t scriptDistance;
+ if (star || flags != 0) {
+ if (uprv_strcmp(desired.script, supported.script) == 0) {
+ scriptDistance = 0;
+ } else {
+ scriptDistance = defaultScriptDistance;
+ }
+ } else {
+ scriptDistance = getDesSuppScriptDistance(iter, iter.getState64(),
+ desired.script, supported.script);
+ flags = scriptDistance & DISTANCE_IS_FINAL;
+ scriptDistance &= ~DISTANCE_IS_FINAL;
+ }
+ distance += scriptDistance;
+ if (distance >= threshold) {
+ continue;
+ }
+
+ if (uprv_strcmp(desired.region, supported.region) == 0) {
+ // regionDistance = 0
+ } else if (star || (flags & DISTANCE_IS_FINAL) != 0) {
+ distance += defaultRegionDistance;
+ } else {
+ int32_t remainingThreshold = threshold - distance;
+ if (minRegionDistance >= remainingThreshold) {
+ continue;
+ }
+
+ // From here on we know the regions are not equal.
+ // Map each region to zero or more partitions. (zero = one non-matching string)
+ // (Each array of single-character partition strings is encoded as one string.)
+ // If either side has more than one, then we find the maximum distance.
+ // This could be optimized by adding some more structure, but probably not worth it.
+ distance += getRegionPartitionsDistance(
+ iter, iter.getState64(),
+ partitionsForRegion(desired),
+ partitionsForRegion(supported),
+ remainingThreshold);
+ }
+ if (distance < threshold) {
+ if (distance == 0) {
+ return slIndex << 8;
+ }
+ bestIndex = slIndex;
+ threshold = distance;
+ }
+ }
+ return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD;
+}
+
+int32_t LocaleDistance::getDesSuppScriptDistance(
+ BytesTrie &iter, uint64_t startState, const char *desired, const char *supported) {
+ // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
+ int32_t distance = trieNext(iter, desired, false);
+ if (distance >= 0) {
+ distance = trieNext(iter, supported, true);
+ }
+ if (distance < 0) {
+ UStringTrieResult result = iter.resetToState64(startState).next(u'*'); // <*, *>
+ U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+ if (uprv_strcmp(desired, supported) == 0) {
+ distance = 0; // same script
+ } else {
+ distance = iter.getValue();
+ U_ASSERT(distance >= 0);
+ }
+ if (result == USTRINGTRIE_FINAL_VALUE) {
+ distance |= DISTANCE_IS_FINAL;
+ }
+ }
+ return distance;
+}
+
+int32_t LocaleDistance::getRegionPartitionsDistance(
+ BytesTrie &iter, uint64_t startState,
+ const char *desiredPartitions, const char *supportedPartitions, int32_t threshold) {
+ char desired = *desiredPartitions++;
+ char supported = *supportedPartitions++;
+ U_ASSERT(desired != 0 && supported != 0);
+ // See if we have single desired/supported partitions, from NUL-terminated
+ // partition strings without explicit length.
+ bool suppLengthGt1 = *supportedPartitions != 0; // gt1: more than 1 character
+ // equivalent to: if (desLength == 1 && suppLength == 1)
+ if (*desiredPartitions == 0 && !suppLengthGt1) {
+ // Fastpath for single desired/supported partitions.
+ UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
+ if (USTRINGTRIE_HAS_NEXT(result)) {
+ result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
+ if (USTRINGTRIE_HAS_VALUE(result)) {
+ return iter.getValue();
+ }
+ }
+ return getFallbackRegionDistance(iter, startState);
+ }
+
+ const char *supportedStart = supportedPartitions - 1; // for restart of inner loop
+ int32_t regionDistance = 0;
+ // Fall back to * only once, not for each pair of partition strings.
+ bool star = false;
+ for (;;) {
+ // Look up each desired-partition string only once,
+ // not for each (desired, supported) pair.
+ UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
+ if (USTRINGTRIE_HAS_NEXT(result)) {
+ uint64_t desState = suppLengthGt1 ? iter.getState64() : 0;
+ for (;;) {
+ result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
+ int32_t d;
+ if (USTRINGTRIE_HAS_VALUE(result)) {
+ d = iter.getValue();
+ } else if (star) {
+ d = 0;
+ } else {
+ d = getFallbackRegionDistance(iter, startState);
+ star = true;
+ }
+ if (d >= threshold) {
+ return d;
+ } else if (regionDistance < d) {
+ regionDistance = d;
+ }
+ if ((supported = *supportedPartitions++) != 0) {
+ iter.resetToState64(desState);
+ } else {
+ break;
+ }
+ }
+ } else if (!star) {
+ int32_t d = getFallbackRegionDistance(iter, startState);
+ if (d >= threshold) {
+ return d;
+ } else if (regionDistance < d) {
+ regionDistance = d;
+ }
+ star = true;
+ }
+ if ((desired = *desiredPartitions++) != 0) {
+ iter.resetToState64(startState);
+ supportedPartitions = supportedStart;
+ supported = *supportedPartitions++;
+ } else {
+ break;
+ }
+ }
+ return regionDistance;
+}
+
+int32_t LocaleDistance::getFallbackRegionDistance(BytesTrie &iter, uint64_t startState) {
+#if U_DEBUG
+ UStringTrieResult result =
+#endif
+ iter.resetToState64(startState).next(u'*'); // <*, *>
+ U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+ int32_t distance = iter.getValue();
+ U_ASSERT(distance >= 0);
+ return distance;
+}
+
+int32_t LocaleDistance::trieNext(BytesTrie &iter, const char *s, bool wantValue) {
+ uint8_t c;
+ if ((c = *s) == 0) {
+ return -1; // no empty subtags in the distance data
+ }
+ for (;;) {
+ c = uprv_invCharToAscii(c);
+ // EBCDIC: If *s is not an invariant character,
+ // then c is now 0 and will simply not match anything, which is harmless.
+ uint8_t next = *++s;
+ if (next != 0) {
+ if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
+ return -1;
+ }
+ } else {
+ // last character of this subtag
+ UStringTrieResult result = iter.next(c | END_OF_SUBTAG);
+ if (wantValue) {
+ if (USTRINGTRIE_HAS_VALUE(result)) {
+ int32_t value = iter.getValue();
+ if (result == USTRINGTRIE_FINAL_VALUE) {
+ value |= DISTANCE_IS_FINAL;
+ }
+ return value;
+ }
+ } else {
+ if (USTRINGTRIE_HAS_NEXT(result)) {
+ return 0;
+ }
+ }
+ return -1;
+ }
+ c = next;
+ }
+}
+
+UBool LocaleDistance::isParadigmLSR(const LSR &lsr) const {
+ // Linear search for a very short list (length 6 as of 2019).
+ // If there are many paradigm LSRs we should use a hash set.
+ U_ASSERT(paradigmLSRsLength <= 15);
+ for (int32_t i = 0; i < paradigmLSRsLength; ++i) {
+ if (lsr == paradigmLSRs[i]) { return true; }
+ }
+ return false;
+}
+
+U_NAMESPACE_END
diff --git a/source/common/locdistance.h b/source/common/locdistance.h
new file mode 100644
index 00000000..7439f51c
--- /dev/null
+++ b/source/common/locdistance.h
@@ -0,0 +1,109 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// locdistance.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCDISTANCE_H__
+#define __LOCDISTANCE_H__
+
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/localematcher.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "lsr.h"
+
+U_NAMESPACE_BEGIN
+
+struct LocaleDistanceData;
+
+/**
+ * Offline-built data for LocaleMatcher.
+ * Mostly but not only the data for mapping locales to their maximized forms.
+ */
+class LocaleDistance final : public UMemory {
+public:
+ static const LocaleDistance *getSingleton(UErrorCode &errorCode);
+
+ /**
+ * Finds the supported LSR with the smallest distance from the desired one.
+ * Equivalent LSR subtags must be normalized into a canonical form.
+ *
+ * <p>Returns the index of the lowest-distance supported LSR in bits 31..8
+ * (negative if none has a distance below the threshold),
+ * and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
+ */
+ int32_t getBestIndexAndDistance(const LSR &desired,
+ const LSR **supportedLSRs, int32_t supportedLSRsLength,
+ int32_t threshold, ULocMatchFavorSubtag favorSubtag) const;
+
+ int32_t getParadigmLSRsLength() const { return paradigmLSRsLength; }
+
+ UBool isParadigmLSR(const LSR &lsr) const;
+
+ int32_t getDefaultScriptDistance() const {
+ return defaultScriptDistance;
+ }
+
+ int32_t getDefaultDemotionPerDesiredLocale() const {
+ return defaultDemotionPerDesiredLocale;
+ }
+
+private:
+ LocaleDistance(const LocaleDistanceData &data);
+ LocaleDistance(const LocaleDistance &other) = delete;
+ LocaleDistance &operator=(const LocaleDistance &other) = delete;
+
+ static void initLocaleDistance(UErrorCode &errorCode);
+
+ static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState,
+ const char *desired, const char *supported);
+
+ static int32_t getRegionPartitionsDistance(
+ BytesTrie &iter, uint64_t startState,
+ const char *desiredPartitions, const char *supportedPartitions,
+ int32_t threshold);
+
+ static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState);
+
+ static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue);
+
+ const char *partitionsForRegion(const LSR &lsr) const {
+ // ill-formed region -> one non-matching string
+ int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex];
+ return partitionArrays[pIndex];
+ }
+
+ int32_t getDefaultRegionDistance() const {
+ return defaultRegionDistance;
+ }
+
+ // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
+ // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
+ // There is also a trie value for each subsequence of whole subtags.
+ // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
+ BytesTrie trie;
+
+ /**
+ * Maps each region to zero or more single-character partitions.
+ */
+ const uint8_t *regionToPartitionsIndex;
+ const char **partitionArrays;
+
+ /**
+ * Used to get the paradigm region for a cluster, if there is one.
+ */
+ const LSR *paradigmLSRs;
+ int32_t paradigmLSRsLength;
+
+ int32_t defaultLanguageDistance;
+ int32_t defaultScriptDistance;
+ int32_t defaultRegionDistance;
+ int32_t minRegionDistance;
+ int32_t defaultDemotionPerDesiredLocale;
+};
+
+U_NAMESPACE_END
+
+#endif // __LOCDISTANCE_H__
diff --git a/source/common/locid.cpp b/source/common/locid.cpp
index caffdb8b..93f3d3cb 100644
--- a/source/common/locid.cpp
+++ b/source/common/locid.cpp
@@ -1399,5 +1399,7 @@ Locale::getBaseName() const {
return baseName;
}
+Locale::Iterator::~Iterator() = default;
+
//eof
U_NAMESPACE_END
diff --git a/source/common/loclikelysubtags.cpp b/source/common/loclikelysubtags.cpp
new file mode 100644
index 00000000..d7f5e124
--- /dev/null
+++ b/source/common/loclikelysubtags.cpp
@@ -0,0 +1,638 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// loclikelysubtags.cpp
+// created: 2019may08 Markus W. Scherer
+
+#include <utility>
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/localpointer.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "charstr.h"
+#include "cstring.h"
+#include "loclikelysubtags.h"
+#include "lsr.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uhash.h"
+#include "uinvchar.h"
+#include "umutex.h"
+#include "uresdata.h"
+#include "uresimp.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+constexpr char PSEUDO_ACCENTS_PREFIX = '\''; // -XA, -PSACCENT
+constexpr char PSEUDO_BIDI_PREFIX = '+'; // -XB, -PSBIDI
+constexpr char PSEUDO_CRACKED_PREFIX = ','; // -XC, -PSCRACK
+
+/**
+ * Stores NUL-terminated strings with duplicate elimination.
+ * Checks for unique UTF-16 string pointers and converts to invariant characters.
+ */
+class UniqueCharStrings {
+public:
+ UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
+ uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ strings = new CharString();
+ if (strings == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ ~UniqueCharStrings() {
+ uhash_close(&map);
+ delete strings;
+ }
+
+ /** Returns/orphans the CharString that contains all strings. */
+ CharString *orphanCharStrings() {
+ CharString *result = strings;
+ strings = nullptr;
+ return result;
+ }
+
+ /** Adds a string and returns a unique number for it. */
+ int32_t add(const UnicodeString &s, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return 0; }
+ if (isFrozen) {
+ errorCode = U_NO_WRITE_PERMISSION;
+ return 0;
+ }
+ // The string points into the resource bundle.
+ const char16_t *p = s.getBuffer();
+ int32_t oldIndex = uhash_geti(&map, p);
+ if (oldIndex != 0) { // found duplicate
+ return oldIndex;
+ }
+ // Explicit NUL terminator for the previous string.
+ // The strings object is also terminated with one implicit NUL.
+ strings->append(0, errorCode);
+ int32_t newIndex = strings->length();
+ strings->appendInvariantChars(s, errorCode);
+ uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
+ return newIndex;
+ }
+
+ void freeze() { isFrozen = true; }
+
+ /**
+ * Returns a string pointer for its unique number, if this object is frozen.
+ * Otherwise nullptr.
+ */
+ const char *get(int32_t i) const {
+ U_ASSERT(isFrozen);
+ return isFrozen && i > 0 ? strings->data() + i : nullptr;
+ }
+
+private:
+ UHashtable map;
+ CharString *strings;
+ bool isFrozen = false;
+};
+
+} // namespace
+
+LocaleDistanceData::LocaleDistanceData(LocaleDistanceData &&data) :
+ distanceTrieBytes(data.distanceTrieBytes),
+ regionToPartitions(data.regionToPartitions),
+ partitions(data.partitions),
+ paradigms(data.paradigms), paradigmsLength(data.paradigmsLength),
+ distances(data.distances) {
+ data.partitions = nullptr;
+ data.paradigms = nullptr;
+}
+
+LocaleDistanceData::~LocaleDistanceData() {
+ uprv_free(partitions);
+ delete[] paradigms;
+}
+
+// TODO(ICU-20777): Rename to just LikelySubtagsData.
+struct XLikelySubtagsData {
+ UResourceBundle *langInfoBundle = nullptr;
+ UniqueCharStrings strings;
+ CharStringMap languageAliases;
+ CharStringMap regionAliases;
+ const uint8_t *trieBytes = nullptr;
+ LSR *lsrs = nullptr;
+ int32_t lsrsLength = 0;
+
+ LocaleDistanceData distanceData;
+
+ XLikelySubtagsData(UErrorCode &errorCode) : strings(errorCode) {}
+
+ ~XLikelySubtagsData() {
+ ures_close(langInfoBundle);
+ delete[] lsrs;
+ }
+
+ void load(UErrorCode &errorCode) {
+ langInfoBundle = ures_openDirect(nullptr, "langInfo", &errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ StackUResourceBundle stackTempBundle;
+ ResourceDataValue value;
+ ures_getValueWithFallback(langInfoBundle, "likely", stackTempBundle.getAlias(),
+ value, errorCode);
+ ResourceTable likelyTable = value.getTable(errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+
+ // Read all strings in the resource bundle and convert them to invariant char *.
+ LocalMemory<int32_t> languageIndexes, regionIndexes, lsrSubtagIndexes;
+ int32_t languagesLength = 0, regionsLength = 0, lsrSubtagsLength = 0;
+ if (!readStrings(likelyTable, "languageAliases", value,
+ languageIndexes, languagesLength, errorCode) ||
+ !readStrings(likelyTable, "regionAliases", value,
+ regionIndexes, regionsLength, errorCode) ||
+ !readStrings(likelyTable, "lsrs", value,
+ lsrSubtagIndexes,lsrSubtagsLength, errorCode)) {
+ return;
+ }
+ if ((languagesLength & 1) != 0 ||
+ (regionsLength & 1) != 0 ||
+ (lsrSubtagsLength % 3) != 0) {
+ errorCode = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ if (lsrSubtagsLength == 0) {
+ errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
+
+ if (!likelyTable.findValue("trie", value)) {
+ errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
+ int32_t length;
+ trieBytes = value.getBinary(length, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+
+ // Also read distance/matcher data if available,
+ // to open & keep only one resource bundle pointer
+ // and to use one single UniqueCharStrings.
+ UErrorCode matchErrorCode = U_ZERO_ERROR;
+ ures_getValueWithFallback(langInfoBundle, "match", stackTempBundle.getAlias(),
+ value, matchErrorCode);
+ LocalMemory<int32_t> partitionIndexes, paradigmSubtagIndexes;
+ int32_t partitionsLength = 0, paradigmSubtagsLength = 0;
+ if (U_SUCCESS(matchErrorCode)) {
+ ResourceTable matchTable = value.getTable(errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+
+ if (matchTable.findValue("trie", value)) {
+ distanceData.distanceTrieBytes = value.getBinary(length, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ }
+
+ if (matchTable.findValue("regionToPartitions", value)) {
+ distanceData.regionToPartitions = value.getBinary(length, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ if (length < LSR::REGION_INDEX_LIMIT) {
+ errorCode = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ }
+
+ if (!readStrings(matchTable, "partitions", value,
+ partitionIndexes, partitionsLength, errorCode) ||
+ !readStrings(matchTable, "paradigms", value,
+ paradigmSubtagIndexes, paradigmSubtagsLength, errorCode)) {
+ return;
+ }
+ if ((paradigmSubtagsLength % 3) != 0) {
+ errorCode = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+
+ if (matchTable.findValue("distances", value)) {
+ distanceData.distances = value.getIntVector(length, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ if (length < 4) { // LocaleDistance IX_LIMIT
+ errorCode = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ }
+ } else if (matchErrorCode == U_MISSING_RESOURCE_ERROR) {
+ // ok for likely subtags
+ } else { // error other than missing resource
+ errorCode = matchErrorCode;
+ return;
+ }
+
+ // Fetch & store invariant-character versions of strings
+ // only after we have collected and de-duplicated all of them.
+ strings.freeze();
+
+ languageAliases = CharStringMap(languagesLength / 2, errorCode);
+ for (int32_t i = 0; i < languagesLength; i += 2) {
+ languageAliases.put(strings.get(languageIndexes[i]),
+ strings.get(languageIndexes[i + 1]), errorCode);
+ }
+
+ regionAliases = CharStringMap(regionsLength / 2, errorCode);
+ for (int32_t i = 0; i < regionsLength; i += 2) {
+ regionAliases.put(strings.get(regionIndexes[i]),
+ strings.get(regionIndexes[i + 1]), errorCode);
+ }
+ if (U_FAILURE(errorCode)) { return; }
+
+ lsrsLength = lsrSubtagsLength / 3;
+ lsrs = new LSR[lsrsLength];
+ if (lsrs == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ for (int32_t i = 0, j = 0; i < lsrSubtagsLength; i += 3, ++j) {
+ lsrs[j] = LSR(strings.get(lsrSubtagIndexes[i]),
+ strings.get(lsrSubtagIndexes[i + 1]),
+ strings.get(lsrSubtagIndexes[i + 2]));
+ }
+
+ if (partitionsLength > 0) {
+ distanceData.partitions = static_cast<const char **>(
+ uprv_malloc(partitionsLength * sizeof(const char *)));
+ if (distanceData.partitions == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ for (int32_t i = 0; i < partitionsLength; ++i) {
+ distanceData.partitions[i] = strings.get(partitionIndexes[i]);
+ }
+ }
+
+ if (paradigmSubtagsLength > 0) {
+ distanceData.paradigmsLength = paradigmSubtagsLength / 3;
+ LSR *paradigms = new LSR[distanceData.paradigmsLength];
+ if (paradigms == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ for (int32_t i = 0, j = 0; i < paradigmSubtagsLength; i += 3, ++j) {
+ paradigms[j] = LSR(strings.get(paradigmSubtagIndexes[i]),
+ strings.get(paradigmSubtagIndexes[i + 1]),
+ strings.get(paradigmSubtagIndexes[i + 2]));
+ }
+ distanceData.paradigms = paradigms;
+ }
+ }
+
+private:
+ bool readStrings(const ResourceTable &table, const char *key, ResourceValue &value,
+ LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) {
+ if (table.findValue(key, value)) {
+ ResourceArray stringArray = value.getArray(errorCode);
+ if (U_FAILURE(errorCode)) { return false; }
+ length = stringArray.getSize();
+ if (length == 0) { return true; }
+ int32_t *rawIndexes = indexes.allocateInsteadAndCopy(length);
+ if (rawIndexes == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return false;
+ }
+ for (int i = 0; i < length; ++i) {
+ stringArray.getValue(i, value); // returns TRUE because i < length
+ rawIndexes[i] = strings.add(value.getUnicodeString(errorCode), errorCode);
+ if (U_FAILURE(errorCode)) { return false; }
+ }
+ }
+ return true;
+ }
+};
+
+namespace {
+
+XLikelySubtags *gLikelySubtags = nullptr;
+UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
+
+UBool U_CALLCONV cleanup() {
+ delete gLikelySubtags;
+ gLikelySubtags = nullptr;
+ gInitOnce.reset();
+ return TRUE;
+}
+
+} // namespace
+
+void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
+ // This function is invoked only via umtx_initOnce().
+ U_ASSERT(gLikelySubtags == nullptr);
+ XLikelySubtagsData data(errorCode);
+ data.load(errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ gLikelySubtags = new XLikelySubtags(data);
+ if (gLikelySubtags == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_LIKELY_SUBTAGS, cleanup);
+}
+
+const XLikelySubtags *XLikelySubtags::getSingleton(UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return nullptr; }
+ umtx_initOnce(gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode);
+ return gLikelySubtags;
+}
+
+XLikelySubtags::XLikelySubtags(XLikelySubtagsData &data) :
+ langInfoBundle(data.langInfoBundle),
+ strings(data.strings.orphanCharStrings()),
+ languageAliases(std::move(data.languageAliases)),
+ regionAliases(std::move(data.regionAliases)),
+ trie(data.trieBytes),
+ lsrs(data.lsrs),
+#if U_DEBUG
+ lsrsLength(data.lsrsLength),
+#endif
+ distanceData(std::move(data.distanceData)) {
+ data.langInfoBundle = nullptr;
+ data.lsrs = nullptr;
+
+ // Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
+ UStringTrieResult result = trie.next(u'*');
+ U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
+ trieUndState = trie.getState64();
+ result = trie.next(u'*');
+ U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
+ trieUndZzzzState = trie.getState64();
+ result = trie.next(u'*');
+ U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+ defaultLsrIndex = trie.getValue();
+ trie.reset();
+
+ for (char16_t c = u'a'; c <= u'z'; ++c) {
+ result = trie.next(c);
+ if (result == USTRINGTRIE_NO_VALUE) {
+ trieFirstLetterStates[c - u'a'] = trie.getState64();
+ }
+ trie.reset();
+ }
+}
+
+XLikelySubtags::~XLikelySubtags() {
+ ures_close(langInfoBundle);
+ delete strings;
+ delete[] lsrs;
+}
+
+LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
+ const char *name = locale.getName();
+ if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
+ // Private use language tag x-subtag-subtag...
+ return LSR(name, "", "");
+ }
+ return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
+ locale.getVariant(), errorCode);
+}
+
+namespace {
+
+const char *getCanonical(const CharStringMap &aliases, const char *alias) {
+ const char *canonical = aliases.get(alias);
+ return canonical == nullptr ? alias : canonical;
+}
+
+} // namespace
+
+LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region,
+ const char *variant, UErrorCode &errorCode) const {
+ // Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
+ // They should match only themselves,
+ // not other locales with what looks like the same language and script subtags.
+ char c1;
+ if (region[0] == 'X' && (c1 = region[1]) != 0 && region[2] == 0) {
+ switch (c1) {
+ case 'A':
+ return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region, errorCode);
+ case 'B':
+ return LSR(PSEUDO_BIDI_PREFIX, language, script, region, errorCode);
+ case 'C':
+ return LSR(PSEUDO_CRACKED_PREFIX, language, script, region, errorCode);
+ default: // normal locale
+ break;
+ }
+ }
+
+ if (variant[0] == 'P' && variant[1] == 'S') {
+ if (uprv_strcmp(variant, "PSACCENT") == 0) {
+ return LSR(PSEUDO_ACCENTS_PREFIX, language, script,
+ *region == 0 ? "XA" : region, errorCode);
+ } else if (uprv_strcmp(variant, "PSBIDI") == 0) {
+ return LSR(PSEUDO_BIDI_PREFIX, language, script,
+ *region == 0 ? "XB" : region, errorCode);
+ } else if (uprv_strcmp(variant, "PSCRACK") == 0) {
+ return LSR(PSEUDO_CRACKED_PREFIX, language, script,
+ *region == 0 ? "XC" : region, errorCode);
+ }
+ // else normal locale
+ }
+
+ language = getCanonical(languageAliases, language);
+ // (We have no script mappings.)
+ region = getCanonical(regionAliases, region);
+ return maximize(language, script, region);
+}
+
+LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region) const {
+ if (uprv_strcmp(language, "und") == 0) {
+ language = "";
+ }
+ if (uprv_strcmp(script, "Zzzz") == 0) {
+ script = "";
+ }
+ if (uprv_strcmp(region, "ZZ") == 0) {
+ region = "";
+ }
+ if (*script != 0 && *region != 0 && *language != 0) {
+ return LSR(language, script, region); // already maximized
+ }
+
+ uint32_t retainOldMask = 0;
+ BytesTrie iter(trie);
+ uint64_t state;
+ int32_t value;
+ // Small optimization: Array lookup for first language letter.
+ int32_t c0;
+ if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
+ language[1] != 0 && // language.length() >= 2
+ (state = trieFirstLetterStates[c0]) != 0) {
+ value = trieNext(iter.resetToState64(state), language, 1);
+ } else {
+ value = trieNext(iter, language, 0);
+ }
+ if (value >= 0) {
+ if (*language != 0) {
+ retainOldMask |= 4;
+ }
+ state = iter.getState64();
+ } else {
+ retainOldMask |= 4;
+ iter.resetToState64(trieUndState); // "und" ("*")
+ state = 0;
+ }
+
+ if (value > 0) {
+ // Intermediate or final value from just language.
+ if (value == SKIP_SCRIPT) {
+ value = 0;
+ }
+ if (*script != 0) {
+ retainOldMask |= 2;
+ }
+ } else {
+ value = trieNext(iter, script, 0);
+ if (value >= 0) {
+ if (*script != 0) {
+ retainOldMask |= 2;
+ }
+ state = iter.getState64();
+ } else {
+ retainOldMask |= 2;
+ if (state == 0) {
+ iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
+ } else {
+ iter.resetToState64(state);
+ value = trieNext(iter, "", 0);
+ U_ASSERT(value >= 0);
+ state = iter.getState64();
+ }
+ }
+ }
+
+ if (value > 0) {
+ // Final value from just language or language+script.
+ if (*region != 0) {
+ retainOldMask |= 1;
+ }
+ } else {
+ value = trieNext(iter, region, 0);
+ if (value >= 0) {
+ if (*region != 0) {
+ retainOldMask |= 1;
+ }
+ } else {
+ retainOldMask |= 1;
+ if (state == 0) {
+ value = defaultLsrIndex;
+ } else {
+ iter.resetToState64(state);
+ value = trieNext(iter, "", 0);
+ U_ASSERT(value > 0);
+ }
+ }
+ }
+ U_ASSERT(value < lsrsLength);
+ const LSR &result = lsrs[value];
+
+ if (*language == 0) {
+ language = "und";
+ }
+
+ if (retainOldMask == 0) {
+ // Quickly return a copy of the lookup-result LSR
+ // without new allocation of the subtags.
+ return LSR(result.language, result.script, result.region);
+ }
+ if ((retainOldMask & 4) == 0) {
+ language = result.language;
+ }
+ if ((retainOldMask & 2) == 0) {
+ script = result.script;
+ }
+ if ((retainOldMask & 1) == 0) {
+ region = result.region;
+ }
+ return LSR(language, script, region);
+}
+
+int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
+ UStringTrieResult result;
+ uint8_t c;
+ if ((c = s[i]) == 0) {
+ result = iter.next(u'*');
+ } else {
+ for (;;) {
+ c = uprv_invCharToAscii(c);
+ // EBCDIC: If s[i] is not an invariant character,
+ // then c is now 0 and will simply not match anything, which is harmless.
+ uint8_t next = s[++i];
+ if (next != 0) {
+ if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
+ return -1;
+ }
+ } else {
+ // last character of this subtag
+ result = iter.next(c | 0x80);
+ break;
+ }
+ c = next;
+ }
+ }
+ switch (result) {
+ case USTRINGTRIE_NO_MATCH: return -1;
+ case USTRINGTRIE_NO_VALUE: return 0;
+ case USTRINGTRIE_INTERMEDIATE_VALUE:
+ U_ASSERT(iter.getValue() == SKIP_SCRIPT);
+ return SKIP_SCRIPT;
+ case USTRINGTRIE_FINAL_VALUE: return iter.getValue();
+ default: return -1;
+ }
+}
+
+// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
+// in loclikely.cpp to this new code, including activating this
+// minimizeSubtags() function. The LocaleMatcher does not minimize.
+#if 0
+LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn,
+ const char *regionIn, ULocale.Minimize fieldToFavor,
+ UErrorCode &errorCode) const {
+ LSR result = maximize(languageIn, scriptIn, regionIn);
+
+ // We could try just a series of checks, like:
+ // LSR result2 = addLikelySubtags(languageIn, "", "");
+ // if result.equals(result2) return result2;
+ // However, we can optimize 2 of the cases:
+ // (languageIn, "", "")
+ // (languageIn, "", regionIn)
+
+ // value00 = lookup(result.language, "", "")
+ BytesTrie iter = new BytesTrie(trie);
+ int value = trieNext(iter, result.language, 0);
+ U_ASSERT(value >= 0);
+ if (value == 0) {
+ value = trieNext(iter, "", 0);
+ U_ASSERT(value >= 0);
+ if (value == 0) {
+ value = trieNext(iter, "", 0);
+ }
+ }
+ U_ASSERT(value > 0);
+ LSR value00 = lsrs[value];
+ boolean favorRegionOk = false;
+ if (result.script.equals(value00.script)) { //script is default
+ if (result.region.equals(value00.region)) {
+ return new LSR(result.language, "", "");
+ } else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
+ return new LSR(result.language, "", result.region);
+ } else {
+ favorRegionOk = true;
+ }
+ }
+
+ // The last case is not as easy to optimize.
+ // Maybe do later, but for now use the straightforward code.
+ LSR result2 = maximize(languageIn, scriptIn, "");
+ if (result2.equals(result)) {
+ return new LSR(result.language, result.script, "");
+ } else if (favorRegionOk) {
+ return new LSR(result.language, "", result.region);
+ }
+ return result;
+}
+#endif
+
+U_NAMESPACE_END
diff --git a/source/common/loclikelysubtags.h b/source/common/loclikelysubtags.h
new file mode 100644
index 00000000..8c8a08ac
--- /dev/null
+++ b/source/common/loclikelysubtags.h
@@ -0,0 +1,143 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// loclikelysubtags.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCLIKELYSUBTAGS_H__
+#define __LOCLIKELYSUBTAGS_H__
+
+#include <utility>
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "lsr.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+struct XLikelySubtagsData;
+
+/**
+ * Map of const char * keys & values.
+ * Stores pointers as is: Does not own/copy/adopt/release strings.
+ */
+class CharStringMap final : public UMemory {
+public:
+ /** Constructs an unusable non-map. */
+ CharStringMap() : map(nullptr) {}
+ CharStringMap(int32_t size, UErrorCode &errorCode) {
+ map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars,
+ size, &errorCode);
+ }
+ CharStringMap(CharStringMap &&other) U_NOEXCEPT : map(other.map) {
+ other.map = nullptr;
+ }
+ CharStringMap(const CharStringMap &other) = delete;
+ ~CharStringMap() {
+ uhash_close(map);
+ }
+
+ CharStringMap &operator=(CharStringMap &&other) U_NOEXCEPT {
+ map = other.map;
+ other.map = nullptr;
+ return *this;
+ }
+ CharStringMap &operator=(const CharStringMap &other) = delete;
+
+ const char *get(const char *key) const { return static_cast<const char *>(uhash_get(map, key)); }
+ void put(const char *key, const char *value, UErrorCode &errorCode) {
+ uhash_put(map, const_cast<char *>(key), const_cast<char *>(value), &errorCode);
+ }
+
+private:
+ UHashtable *map;
+};
+
+struct LocaleDistanceData {
+ LocaleDistanceData() = default;
+ LocaleDistanceData(LocaleDistanceData &&data);
+ ~LocaleDistanceData();
+
+ const uint8_t *distanceTrieBytes = nullptr;
+ const uint8_t *regionToPartitions = nullptr;
+ const char **partitions = nullptr;
+ const LSR *paradigms = nullptr;
+ int32_t paradigmsLength = 0;
+ const int32_t *distances = nullptr;
+
+private:
+ LocaleDistanceData &operator=(const LocaleDistanceData &) = delete;
+};
+
+// TODO(ICU-20777): Rename to just LikelySubtags.
+class XLikelySubtags final : public UMemory {
+public:
+ ~XLikelySubtags();
+
+ static constexpr int32_t SKIP_SCRIPT = 1;
+
+ // VisibleForTesting
+ static const XLikelySubtags *getSingleton(UErrorCode &errorCode);
+
+ // VisibleForTesting
+ LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const;
+
+ // TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
+ // in loclikely.cpp to this new code, including activating this
+ // minimizeSubtags() function. The LocaleMatcher does not minimize.
+#if 0
+ LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn,
+ ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const;
+#endif
+
+ // visible for LocaleDistance
+ const LocaleDistanceData &getDistanceData() const { return distanceData; }
+
+private:
+ XLikelySubtags(XLikelySubtagsData &data);
+ XLikelySubtags(const XLikelySubtags &other) = delete;
+ XLikelySubtags &operator=(const XLikelySubtags &other) = delete;
+
+ static void initLikelySubtags(UErrorCode &errorCode);
+
+ LSR makeMaximizedLsr(const char *language, const char *script, const char *region,
+ const char *variant, UErrorCode &errorCode) const;
+
+ /**
+ * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
+ */
+ LSR maximize(const char *language, const char *script, const char *region) const;
+
+ static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i);
+
+ UResourceBundle *langInfoBundle;
+ // We could store the strings by value, except that if there were few enough strings,
+ // moving the contents could copy it to a different array,
+ // invalidating the pointers stored in the maps.
+ CharString *strings;
+ CharStringMap languageAliases;
+ CharStringMap regionAliases;
+
+ // The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs.
+ // There is also a trie value for each intermediate lang and lang+script.
+ // '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"".
+ BytesTrie trie;
+ uint64_t trieUndState;
+ uint64_t trieUndZzzzState;
+ int32_t defaultLsrIndex;
+ uint64_t trieFirstLetterStates[26];
+ const LSR *lsrs;
+#if U_DEBUG
+ int32_t lsrsLength;
+#endif
+
+ // distance/matcher data: see comment in XLikelySubtagsData::load()
+ LocaleDistanceData distanceData;
+};
+
+U_NAMESPACE_END
+
+#endif // __LOCLIKELYSUBTAGS_H__
diff --git a/source/common/lsr.cpp b/source/common/lsr.cpp
new file mode 100644
index 00000000..0c28eeda
--- /dev/null
+++ b/source/common/lsr.cpp
@@ -0,0 +1,101 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// lsr.cpp
+// created: 2019may08 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "lsr.h"
+#include "uinvchar.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_BEGIN
+
+LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, UErrorCode &errorCode) :
+ language(nullptr), script(nullptr), region(r),
+ regionIndex(indexForRegion(region)) {
+ if (U_SUCCESS(errorCode)) {
+ CharString langScript;
+ langScript.append(prefix, errorCode).append(lang, errorCode).append('\0', errorCode);
+ int32_t scriptOffset = langScript.length();
+ langScript.append(prefix, errorCode).append(scr, errorCode);
+ owned = langScript.cloneData(errorCode);
+ if (U_SUCCESS(errorCode)) {
+ language = owned;
+ script = owned + scriptOffset;
+ }
+ }
+}
+
+LSR::LSR(LSR &&other) U_NOEXCEPT :
+ language(other.language), script(other.script), region(other.region), owned(other.owned),
+ regionIndex(other.regionIndex), hashCode(other.hashCode) {
+ if (owned != nullptr) {
+ other.language = other.script = "";
+ other.owned = nullptr;
+ other.hashCode = 0;
+ }
+}
+
+void LSR::deleteOwned() {
+ uprv_free(owned);
+}
+
+LSR &LSR::operator=(LSR &&other) U_NOEXCEPT {
+ this->~LSR();
+ language = other.language;
+ script = other.script;
+ region = other.region;
+ regionIndex = other.regionIndex;
+ owned = other.owned;
+ hashCode = other.hashCode;
+ if (owned != nullptr) {
+ other.language = other.script = "";
+ other.owned = nullptr;
+ other.hashCode = 0;
+ }
+ return *this;
+}
+
+UBool LSR::operator==(const LSR &other) const {
+ return
+ uprv_strcmp(language, other.language) == 0 &&
+ uprv_strcmp(script, other.script) == 0 &&
+ regionIndex == other.regionIndex &&
+ // Compare regions if both are ill-formed (and their indexes are 0).
+ (regionIndex > 0 || uprv_strcmp(region, other.region) == 0);
+}
+
+int32_t LSR::indexForRegion(const char *region) {
+ int32_t c = region[0];
+ int32_t a = c - '0';
+ if (0 <= a && a <= 9) { // digits: "419"
+ int32_t b = region[1] - '0';
+ if (b < 0 || 9 < b) { return 0; }
+ c = region[2] - '0';
+ if (c < 0 || 9 < c || region[3] != 0) { return 0; }
+ return (10 * a + b) * 10 + c + 1;
+ } else { // letters: "DE"
+ a = uprv_upperOrdinal(c);
+ if (a < 0 || 25 < a) { return 0; }
+ int32_t b = uprv_upperOrdinal(region[1]);
+ if (b < 0 || 25 < b || region[2] != 0) { return 0; }
+ return 26 * a + b + 1001;
+ }
+ return 0;
+}
+
+LSR &LSR::setHashCode() {
+ if (hashCode == 0) {
+ hashCode =
+ (ustr_hashCharsN(language, static_cast<int32_t>(uprv_strlen(language))) * 37 +
+ ustr_hashCharsN(script, static_cast<int32_t>(uprv_strlen(script)))) * 37 +
+ regionIndex;
+ }
+ return *this;
+}
+
+U_NAMESPACE_END
diff --git a/source/common/lsr.h b/source/common/lsr.h
new file mode 100644
index 00000000..db6cf938
--- /dev/null
+++ b/source/common/lsr.h
@@ -0,0 +1,72 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// lsr.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LSR_H__
+#define __LSR_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "cstring.h"
+
+U_NAMESPACE_BEGIN
+
+struct LSR final : public UMemory {
+ static constexpr int32_t REGION_INDEX_LIMIT = 1001 + 26 * 26;
+
+ const char *language;
+ const char *script;
+ const char *region;
+ char *owned = nullptr;
+ /** Index for region, 0 if ill-formed. @see indexForRegion */
+ int32_t regionIndex = 0;
+ /** Only set for LSRs that will be used in a hash table. */
+ int32_t hashCode = 0;
+
+ LSR() : language("und"), script(""), region("") {}
+
+ /** Constructor which aliases all subtag pointers. */
+ LSR(const char *lang, const char *scr, const char *r) :
+ language(lang), script(scr), region(r),
+ regionIndex(indexForRegion(region)) {}
+ /**
+ * Constructor which prepends the prefix to the language and script,
+ * copies those into owned memory, and aliases the region.
+ */
+ LSR(char prefix, const char *lang, const char *scr, const char *r, UErrorCode &errorCode);
+ LSR(LSR &&other) U_NOEXCEPT;
+ LSR(const LSR &other) = delete;
+ inline ~LSR() {
+ // Pure inline code for almost all instances.
+ if (owned != nullptr) {
+ deleteOwned();
+ }
+ }
+
+ LSR &operator=(LSR &&other) U_NOEXCEPT;
+ LSR &operator=(const LSR &other) = delete;
+
+ /**
+ * Returns a positive index (>0) for a well-formed region code.
+ * Do not rely on a particular region->index mapping; it may change.
+ * Returns 0 for ill-formed strings.
+ */
+ static int32_t indexForRegion(const char *region);
+
+ UBool operator==(const LSR &other) const;
+
+ inline UBool operator!=(const LSR &other) const {
+ return !operator==(other);
+ }
+
+ LSR &setHashCode();
+
+private:
+ void deleteOwned();
+};
+
+U_NAMESPACE_END
+
+#endif // __LSR_H__
diff --git a/source/common/resource.h b/source/common/resource.h
index ee93d41a..5199b858 100644
--- a/source/common/resource.h
+++ b/source/common/resource.h
@@ -94,13 +94,20 @@ public:
*/
int32_t getSize() const { return length; }
/**
- * @param i Array item index.
+ * @param i Table item index.
* @param key Output-only, receives the key of the i'th item.
* @param value Output-only, receives the value of the i'th item.
* @return TRUE if i is non-negative and less than getSize().
*/
UBool getKeyAndValue(int32_t i, const char *&key, ResourceValue &value) const;
+ /**
+ * @param key Key string to find in the table.
+ * @param value Output-only, receives the value of the item with that key.
+ * @return TRUE if the table contains the key.
+ */
+ UBool findValue(const char *key, ResourceValue &value) const;
+
private:
const uint16_t *keys16;
const int32_t *keys32;
diff --git a/source/common/ucln_cmn.h b/source/common/ucln_cmn.h
index 0ca911b4..c4b22ca4 100644
--- a/source/common/ucln_cmn.h
+++ b/source/common/ucln_cmn.h
@@ -41,6 +41,8 @@ typedef enum ECleanupCommonType {
UCLN_COMMON_LOCALE_KEY_TYPE,
UCLN_COMMON_LOCALE,
UCLN_COMMON_LOCALE_AVAILABLE,
+ UCLN_COMMON_LIKELY_SUBTAGS,
+ UCLN_COMMON_LOCALE_DISTANCE,
UCLN_COMMON_ULOC,
UCLN_COMMON_CURRENCY,
UCLN_COMMON_LOADED_NORMALIZER2,
diff --git a/source/common/uinvchar.cpp b/source/common/uinvchar.cpp
index 8ce2350d..6e5fb48c 100644
--- a/source/common/uinvchar.cpp
+++ b/source/common/uinvchar.cpp
@@ -445,6 +445,13 @@ uprv_copyEbcdic(const UDataSwapper *ds,
return length;
}
+U_CFUNC UBool
+uprv_isEbcdicAtSign(char c) {
+ static const uint8_t ebcdicAtSigns[] = {
+ 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
+ return c != 0 && uprv_strchr((const char *)ebcdicAtSigns, c) != nullptr;
+}
+
/* compare invariant strings; variant characters compare less than others and unlike each other */
U_CFUNC int32_t
uprv_compareInvAscii(const UDataSwapper *ds,
@@ -561,6 +568,11 @@ uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
}
}
+U_CAPI char U_EXPORT2
+uprv_ebcdicToAscii(char c) {
+ return (char)asciiFromEbcdic[(uint8_t)c];
+}
+
U_CAPI char U_EXPORT2
uprv_ebcdicToLowercaseAscii(char c) {
return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
diff --git a/source/common/uinvchar.h b/source/common/uinvchar.h
index 56dddfa8..a43cfcd9 100644
--- a/source/common/uinvchar.h
+++ b/source/common/uinvchar.h
@@ -68,6 +68,75 @@ uprv_isInvariantUString(const UChar *s, int32_t length);
# error Unknown charset family!
#endif
+#ifdef __cplusplus
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Like U_UPPER_ORDINAL(x) but with validation.
+ * Returns 0..25 for A..Z else a value outside 0..25.
+ */
+inline int32_t uprv_upperOrdinal(int32_t c) {
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+ return c - 'A';
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ // EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8).
+ // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
+ if (c <= 'I') { return c - 'A'; } // A-I --> 0-8
+ if (c < 'J') { return -1; }
+ if (c <= 'R') { return c - 'J' + 9; } // J-R --> 9..17
+ if (c < 'S') { return -1; }
+ return c - 'S' + 18; // S-Z --> 18..25
+#else
+# error Unknown charset family!
+#endif
+}
+
+// Like U_UPPER_ORDINAL(x) but for lowercase and with validation.
+// Returns 0..25 for a..z else a value outside 0..25.
+inline int32_t uprv_lowerOrdinal(int32_t c) {
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+ return c - 'a';
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ // EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8).
+ // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
+ if (c <= 'i') { return c - 'a'; } // a-i --> 0-8
+ if (c < 'j') { return -1; }
+ if (c <= 'r') { return c - 'j' + 9; } // j-r --> 9..17
+ if (c < 's') { return -1; }
+ return c - 's' + 18; // s-z --> 18..25
+#else
+# error Unknown charset family!
+#endif
+}
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Returns true if c == '@' is possible.
+ * The @ sign is variant, and the @ sign used on one
+ * EBCDIC machine won't be compiled the same way on other EBCDIC based machines.
+ * @internal
+ */
+U_CFUNC UBool
+uprv_isEbcdicAtSign(char c);
+
+/**
+ * \def uprv_isAtSign
+ * Returns true if c == '@' is possible.
+ * For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign().
+ * @internal
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define uprv_isAtSign(c) ((c)=='@')
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define uprv_isAtSign(c) uprv_isEbcdicAtSign(c)
+#else
+# error Unknown charset family!
+#endif
+
/**
* Compare two EBCDIC invariant-character strings in ASCII order.
* @internal
@@ -88,6 +157,26 @@ uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2);
# error Unknown charset family!
#endif
+/**
+ * Converts an EBCDIC invariant character to ASCII.
+ * @internal
+ */
+U_INTERNAL char U_EXPORT2
+uprv_ebcdicToAscii(char c);
+
+/**
+ * \def uprv_invCharToAscii
+ * Converts an invariant character to ASCII.
+ * @internal
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define uprv_invCharToAscii(c) (c)
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+# define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c)
+#else
+# error Unknown charset family!
+#endif
+
/**
* Converts an EBCDIC invariant character to lowercase ASCII.
* @internal
diff --git a/source/common/unicode/localebuilder.h b/source/common/unicode/localebuilder.h
index 960e5980..a91a0b51 100644
--- a/source/common/unicode/localebuilder.h
+++ b/source/common/unicode/localebuilder.h
@@ -4,6 +4,7 @@
#define __LOCALEBUILDER_H__
#include "unicode/locid.h"
+#include "unicode/localematcher.h"
#include "unicode/stringpiece.h"
#include "unicode/uobject.h"
#include "unicode/utypes.h"
@@ -277,6 +278,10 @@ public:
Locale build(UErrorCode& status);
private:
+ friend class LocaleMatcher::Result;
+
+ void copyExtensionsFrom(const Locale& src, UErrorCode& errorCode);
+
UErrorCode status_;
char language_[9];
char script_[5];
diff --git a/source/common/unicode/localematcher.h b/source/common/unicode/localematcher.h
new file mode 100644
index 00000000..701123f7
--- /dev/null
+++ b/source/common/unicode/localematcher.h
@@ -0,0 +1,605 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+
+// localematcher.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCALEMATCHER_H__
+#define __LOCALEMATCHER_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales.
+ */
+
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Builder option for whether the language subtag or the script subtag is most important.
+ *
+ * @see Builder#setFavorSubtag(FavorSubtag)
+ * @draft ICU 65
+ */
+enum ULocMatchFavorSubtag {
+ /**
+ * Language differences are most important, then script differences, then region differences.
+ * (This is the default behavior.)
+ *
+ * @draft ICU 65
+ */
+ ULOCMATCH_FAVOR_LANGUAGE,
+ /**
+ * Makes script differences matter relatively more than language differences.
+ *
+ * @draft ICU 65
+ */
+ ULOCMATCH_FAVOR_SCRIPT
+};
+#ifndef U_IN_DOXYGEN
+typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag;
+#endif
+
+/**
+ * Builder option for whether all desired locales are treated equally or
+ * earlier ones are preferred.
+ *
+ * @see Builder#setDemotionPerDesiredLocale(Demotion)
+ * @draft ICU 65
+ */
+enum ULocMatchDemotion {
+ /**
+ * All desired locales are treated equally.
+ *
+ * @draft ICU 65
+ */
+ ULOCMATCH_DEMOTION_NONE,
+ /**
+ * Earlier desired locales are preferred.
+ *
+ * <p>From each desired locale to the next,
+ * the distance to any supported locale is increased by an additional amount
+ * which is at least as large as most region mismatches.
+ * A later desired locale has to have a better match with some supported locale
+ * due to more than merely having the same region subtag.
+ *
+ * <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code>
+ * yields <code>Result(en-GB, en)</code> because
+ * with the demotion of sv its perfect match is no better than
+ * the region distance between the earlier desired locale en-GB and en=en-US.
+ *
+ * <p>Notes:
+ * <ul>
+ * <li>In some cases, language and/or script differences can be as small as
+ * the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
+ * <li>It is possible for certain region differences to be larger than usual,
+ * and larger than the demotion.
+ * (As of CLDR 35 there is no such case, but
+ * this is possible in future versions of the data.)
+ * </ul>
+ *
+ * @draft ICU 65
+ */
+ ULOCMATCH_DEMOTION_REGION
+};
+#ifndef U_IN_DOXYGEN
+typedef enum ULocMatchDemotion ULocMatchDemotion;
+#endif
+
+struct UHashtable;
+
+U_NAMESPACE_BEGIN
+
+struct LSR;
+
+class LocaleDistance;
+class LocaleLsrIterator;
+class UVector;
+class XLikelySubtags;
+
+/**
+ * Immutable class that picks the best match between a user's desired locales and
+ * an application's supported locales.
+ * Movable but not copyable.
+ *
+ * <p>Example:
+ * <pre>
+ * UErrorCode errorCode = U_ZERO_ERROR;
+ * LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode);
+ * Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode); // "en"
+ * </pre>
+ *
+ * <p>A matcher takes into account when languages are close to one another,
+ * such as Danish and Norwegian,
+ * and when regional variants are close, like en-GB and en-AU as opposed to en-US.
+ *
+ * <p>If there are multiple supported locales with the same (language, script, region)
+ * likely subtags, then the current implementation returns the first of those locales.
+ * It ignores variant subtags (except for pseudolocale variants) and extensions.
+ * This may change in future versions.
+ *
+ * <p>For example, the current implementation does not distinguish between
+ * de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
+ *
+ * <p>If you prefer one equivalent locale over another, then provide only the preferred one,
+ * or place it earlier in the list of supported locales.
+ *
+ * <p>Otherwise, the order of supported locales may have no effect on the best-match results.
+ * The current implementation compares each desired locale with supported locales
+ * in the following order:
+ * 1. Default locale, if supported;
+ * 2. CLDR "paradigm locales" like en-GB and es-419;
+ * 3. other supported locales.
+ * This may change in future versions.
+ *
+ * <p>Often a product will just need one matcher instance, built with the languages
+ * that it supports. However, it may want multiple instances with different
+ * default languages based on additional information, such as the domain.
+ *
+ * <p>This class is not intended for public subclassing.
+ *
+ * @draft ICU 65
+ */
+class U_COMMON_API LocaleMatcher : public UMemory {
+public:
+ /**
+ * Data for the best-matching pair of a desired and a supported locale.
+ * Movable but not copyable.
+ *
+ * @draft ICU 65
+ */
+ class U_COMMON_API Result : public UMemory {
+ public:
+ /**
+ * Move constructor; might modify the source.
+ * This object will have the same contents that the source object had.
+ *
+ * @param src Result to move contents from.
+ * @draft ICU 65
+ */
+ Result(Result &&src) U_NOEXCEPT;
+
+ /**
+ * Destructor.
+ *
+ * @draft ICU 65
+ */
+ ~Result();
+
+ /**
+ * Move assignment; might modify the source.
+ * This object will have the same contents that the source object had.
+ *
+ * @param src Result to move contents from.
+ * @draft ICU 65
+ */
+ Result &operator=(Result &&src) U_NOEXCEPT;
+
+ /**
+ * Returns the best-matching desired locale.
+ * nullptr if the list of desired locales is empty or if none matched well enough.
+ *
+ * @return the best-matching desired locale, or nullptr.
+ * @draft ICU 65
+ */
+ inline const Locale *getDesiredLocale() const { return desiredLocale; }
+
+ /**
+ * Returns the best-matching supported locale.
+ * If none matched well enough, this is the default locale.
+ * The default locale is nullptr if the list of supported locales is empty and
+ * no explicit default locale is set.
+ *
+ * @return the best-matching supported locale, or nullptr.
+ * @draft ICU 65
+ */
+ inline const Locale *getSupportedLocale() const { return supportedLocale; }
+
+ /**
+ * Returns the index of the best-matching desired locale in the input Iterable order.
+ * -1 if the list of desired locales is empty or if none matched well enough.
+ *
+ * @return the index of the best-matching desired locale, or -1.
+ * @draft ICU 65
+ */
+ inline int32_t getDesiredIndex() const { return desiredIndex; }
+
+ /**
+ * Returns the index of the best-matching supported locale in the
+ * constructor’s or builder’s input order (“set” Collection plus “added” locales).
+ * If the matcher was built from a locale list string, then the iteration order is that
+ * of a LocalePriorityList built from the same string.
+ * -1 if the list of supported locales is empty or if none matched well enough.
+ *
+ * @return the index of the best-matching supported locale, or -1.
+ * @draft ICU 65
+ */
+ inline int32_t getSupportedIndex() const { return supportedIndex; }
+
+ /**
+ * Takes the best-matching supported locale and adds relevant fields of the
+ * best-matching desired locale, such as the -t- and -u- extensions.
+ * May replace some fields of the supported locale.
+ * The result is the locale that should be used for date and number formatting, collation, etc.
+ * Returns the root locale if getSupportedLocale() returns nullptr.
+ *
+ * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
+ *
+ * @return a locale combining the best-matching desired and supported locales.
+ * @draft ICU 65
+ */
+ Locale makeResolvedLocale(UErrorCode &errorCode) const;
+
+ private:
+ Result(const Locale *desired, const Locale *supported,
+ int32_t desIndex, int32_t suppIndex, UBool owned) :
+ desiredLocale(desired), supportedLocale(supported),
+ desiredIndex(desIndex), supportedIndex(suppIndex),
+ desiredIsOwned(owned) {}
+
+ Result(const Result &other) = delete;
+ Result &operator=(const Result &other) = delete;
+
+ const Locale *desiredLocale;
+ const Locale *supportedLocale;
+ int32_t desiredIndex;
+ int32_t supportedIndex;
+ UBool desiredIsOwned;
+
+ friend class LocaleMatcher;
+ };
+
+ /**
+ * LocaleMatcher builder.
+ * Movable but not copyable.
+ *
+ * @see LocaleMatcher#builder()
+ * @draft ICU 65
+ */
+ class U_COMMON_API Builder : public UMemory {
+ public:
+ /**
+ * Constructs a builder used in chaining parameters for building a LocaleMatcher.
+ *
+ * @return a new Builder object
+ * @draft ICU 65
+ */
+ Builder() {}
+
+ /**
+ * Move constructor; might modify the source.
+ * This builder will have the same contents that the source builder had.
+ *
+ * @param src Builder to move contents from.
+ * @draft ICU 65
+ */
+ Builder(Builder &&src) U_NOEXCEPT;
+
+ /**
+ * Destructor.
+ *
+ * @draft ICU 65
+ */
+ ~Builder();
+
+ /**
+ * Move assignment; might modify the source.
+ * This builder will have the same contents that the source builder had.
+ *
+ * @param src Builder to move contents from.
+ * @draft ICU 65
+ */
+ Builder &operator=(Builder &&src) U_NOEXCEPT;
+
+ /**
+ * Parses an Accept-Language string
+ * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
+ * such as "af, en, fr;q=0.9", and sets the supported locales accordingly.
+ * Allows whitespace in more places but does not allow "*".
+ * Clears any previously set/added supported locales first.
+ *
+ * @param locales the Accept-Language string of locales to set
+ * @return this Builder object
+ * @draft ICU 65
+ */
+ Builder &setSupportedLocalesFromListString(StringPiece locales);
+
+ /**
+ * Copies the supported locales, preserving iteration order.
+ * Clears any previously set/added supported locales first.
+ * Duplicates are allowed, and are not removed.
+ *
+ * @param locales the list of locale
+ * @return this Builder object
+ * @draft ICU 65
+ */
+ Builder &setSupportedLocales(Locale::Iterator &locales);
+
+ /**
+ * Copies the supported locales from the begin/end range, preserving iteration order.
+ * Clears any previously set/added supported locales first.
+ * Duplicates are allowed, and are not removed.
+ *
+ * Each of the iterator parameter values must be an
+ * input iterator whose value is convertible to const Locale &.
+ *
+ * @param begin Start of range.
+ * @param end Exclusive end of range.
+ * @return this Builder object
+ * @draft ICU 65
+ */
+ template<typename Iter>
+ Builder &setSupportedLocales(Iter begin, Iter end) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ clearSupportedLocales();
+ while (begin != end) {
+ addSupportedLocale(*begin++);
+ }
+ return *this;
+ }
+
+ /**
+ * Copies the supported locales from the begin/end range, preserving iteration order.
+ * Calls the converter to convert each *begin to a Locale or const Locale &.
+ * Clears any previously set/added supported locales first.
+ * Duplicates are allowed, and are not removed.
+ *
+ * Each of the iterator parameter values must be an
+ * input iterator whose value is convertible to const Locale &.
+ *
+ * @param begin Start of range.
+ * @param end Exclusive end of range.
+ * @param converter Converter from *begin to const Locale & or compatible.
+ * @return this Builder object
+ * @draft ICU 65
+ */
+ template<typename Iter, typename Conv>
+ Builder &setSupportedLocalesViaConverter(Iter begin, Iter end, Conv converter) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ clearSupportedLocales();
+ while (begin != end) {
+ addSupportedLocale(converter(*begin++));
+ }
+ return *this;
+ }
+
+ /**
+ * Adds another supported locale.
+ * Duplicates are allowed, and are not removed.
+ *
+ * @param locale another locale
+ * @return this Builder object
+ * @draft ICU 65
+ */
+ Builder &addSupportedLocale(const Locale &locale);
+
+ /**
+ * Sets the default locale; if nullptr, or if it is not set explicitly,
+ * then the first supported locale is used as the default locale.
+ *
+ * @param defaultLocale the default locale (will be copied)
+ * @return this Builder object
+ * @draft ICU 65
+ */
+ Builder &setDefaultLocale(const Locale *defaultLocale);
+
+ /**
+ * If ULOCMATCH_FAVOR_SCRIPT, then the language differences are smaller than script
+ * differences.
+ * This is used in situations (such as maps) where
+ * it is better to fall back to the same script than a similar language.
+ *
+ * @param subtag the subtag to favor
+ * @return this Builder object
+ * @draft ICU 65
+ */
+ Builder &setFavorSubtag(ULocMatchFavorSubtag subtag);
+
+ /**
+ * Option for whether all desired locales are treated equally or
+ * earlier ones are preferred (this is the default).
+ *
+ * @param demotion the demotion per desired locale to set.
+ * @return this Builder object
+ * @draft ICU 65
+ */
+ Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion);
+
+ /**
+ * Sets the UErrorCode if an error occurred while setting parameters.
+ * Preserves older error codes in the outErrorCode.
+ *
+ * @param outErrorCode Set to an error code if it does not contain one already
+ * and an error occurred while setting parameters.
+ * Otherwise unchanged.
+ * @return TRUE if U_FAILURE(outErrorCode)
+ * @draft ICU 65
+ */
+ UBool copyErrorTo(UErrorCode &outErrorCode) const;
+
+ /**
+ * Builds and returns a new locale matcher.
+ * This builder can continue to be used.
+ *
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return new LocaleMatcher.
+ * @draft ICU 65
+ */
+ LocaleMatcher build(UErrorCode &errorCode) const;
+
+ private:
+ friend class LocaleMatcher;
+
+ Builder(const Builder &other) = delete;
+ Builder &operator=(const Builder &other) = delete;
+
+ void clearSupportedLocales();
+ bool ensureSupportedLocaleVector();
+
+ UErrorCode errorCode_ = U_ZERO_ERROR;
+ UVector *supportedLocales_ = nullptr;
+ int32_t thresholdDistance_ = -1;
+ ULocMatchDemotion demotion_ = ULOCMATCH_DEMOTION_REGION;
+ Locale *defaultLocale_ = nullptr;
+ ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE;
+ };
+
+ // FYI No public LocaleMatcher constructors in C++; use the Builder.
+
+ /**
+ * Move copy constructor; might modify the source.
+ * This matcher will have the same settings that the source matcher had.
+ * @param src source matcher
+ * @draft ICU 65
+ */
+ LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT;
+
+ /**
+ * Destructor.
+ * @draft ICU 65
+ */
+ ~LocaleMatcher();
+
+ /**
+ * Move assignment operator; might modify the source.
+ * This matcher will have the same settings that the source matcher had.
+ * The behavior is undefined if *this and src are the same object.
+ * @param src source matcher
+ * @return *this
+ * @draft ICU 65
+ */
+ LocaleMatcher &operator=(LocaleMatcher &&src) U_NOEXCEPT;
+
+ /**
+ * Returns the supported locale which best matches the desired locale.
+ *
+ * @param desiredLocale Typically a user's language.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching supported locale.
+ * @draft ICU 65
+ */
+ const Locale *getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const;
+
+ /**
+ * Returns the supported locale which best matches one of the desired locales.
+ *
+ * @param desiredLocales Typically a user's languages, in order of preference (descending).
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching supported locale.
+ * @draft ICU 65
+ */
+ const Locale *getBestMatch(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
+
+ /**
+ * Parses an Accept-Language string
+ * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
+ * such as "af, en, fr;q=0.9",
+ * and returns the supported locale which best matches one of the desired locales.
+ * Allows whitespace in more places but does not allow "*".
+ *
+ * @param desiredLocaleList Typically a user's languages, as an Accept-Language string.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching supported locale.
+ * @draft ICU 65
+ */
+ const Locale *getBestMatchForListString(StringPiece desiredLocaleList, UErrorCode &errorCode) const;
+
+ /**
+ * Returns the best match between the desired locale and the supported locales.
+ * If the result's desired locale is not nullptr, then it is the address of the input locale.
+ * It has not been cloned.
+ *
+ * @param desiredLocale Typically a user's language.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching pair of the desired and a supported locale.
+ * @draft ICU 65
+ */
+ Result getBestMatchResult(const Locale &desiredLocale, UErrorCode &errorCode) const;
+
+ /**
+ * Returns the best match between the desired and supported locales.
+ * If the result's desired locale is not nullptr, then it is a clone of
+ * the best-matching desired locale. The Result object owns the clone.
+ *
+ * @param desiredLocales Typically a user's languages, in order of preference (descending).
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching pair of a desired and a supported locale.
+ * @draft ICU 65
+ */
+ Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Returns a fraction between 0 and 1, where 1 means that the languages are a
+ * perfect match, and 0 means that they are completely different.
+ *
+ * <p>This is mostly an implementation detail, and the precise values may change over time.
+ * The implementation may use either the maximized forms or the others ones, or both.
+ * The implementation may or may not rely on the forms to be consistent with each other.
+ *
+ * <p>Callers should construct and use a matcher rather than match pairs of locales directly.
+ *
+ * @param desired Desired locale.
+ * @param supported Supported locale.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return value between 0 and 1, inclusive.
+ * @internal (has a known user)
+ */
+ double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
+#endif // U_HIDE_INTERNAL_API
+
+private:
+ LocaleMatcher(const Builder &builder, UErrorCode &errorCode);
+ LocaleMatcher(const LocaleMatcher &other) = delete;
+ LocaleMatcher &operator=(const LocaleMatcher &other) = delete;
+
+ int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;
+
+ const XLikelySubtags &likelySubtags;
+ const LocaleDistance &localeDistance;
+ int32_t thresholdDistance;
+ int32_t demotionPerDesiredLocale;
+ ULocMatchFavorSubtag favorSubtag;
+
+ // These are in input order.
+ const Locale ** supportedLocales;
+ LSR *lsrs;
+ int32_t supportedLocalesLength;
+ // These are in preference order: 1. Default locale 2. paradigm locales 3. others.
+ UHashtable *supportedLsrToIndex; // Map<LSR, Integer> stores index+1 because 0 is "not found"
+ // Array versions of the supportedLsrToIndex keys and values.
+ // The distance lookup loops over the supportedLSRs and returns the index of the best match.
+ const LSR **supportedLSRs;
+ int32_t *supportedIndexes;
+ int32_t supportedLSRsLength;
+ Locale *ownedDefaultLocale;
+ const Locale *defaultLocale;
+ int32_t defaultLocaleIndex;
+};
+
+U_NAMESPACE_END
+
+#endif // U_HIDE_DRAFT_API
+#endif // U_SHOW_CPLUSPLUS_API
+#endif // __LOCALEMATCHER_H__
diff --git a/source/common/unicode/locid.h b/source/common/unicode/locid.h
index 7e410e53..0c9aecb6 100644
--- a/source/common/unicode/locid.h
+++ b/source/common/unicode/locid.h
@@ -1008,6 +1008,104 @@ public:
*/
virtual UClassID getDynamicClassID() const;
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * A Locale iterator interface similar to a Java Iterator<Locale>.
+ * @draft ICU 65
+ */
+ class U_COMMON_API Iterator /* not : public UObject because this is an interface/mixin class */ {
+ public:
+ /** @draft ICU 65 */
+ virtual ~Iterator();
+
+ /**
+ * @return TRUE if next() can be called again.
+ * @draft ICU 65
+ */
+ virtual UBool hasNext() const = 0;
+
+ /**
+ * @return the next locale.
+ * @draft ICU 65
+ */
+ virtual const Locale &next() = 0;
+ };
+
+ /**
+ * A generic Locale iterator implementation over Locale input iterators.
+ * @draft ICU 65
+ */
+ template<typename Iter>
+ class RangeIterator : public Iterator, public UMemory {
+ public:
+ /**
+ * Constructs an iterator from a begin/end range.
+ * Each of the iterator parameter values must be an
+ * input iterator whose value is convertible to const Locale &.
+ *
+ * @param begin Start of range.
+ * @param end Exclusive end of range.
+ * @draft ICU 65
+ */
+ RangeIterator(Iter begin, Iter end) : it_(begin), end_(end) {}
+
+ /**
+ * @return TRUE if next() can be called again.
+ * @draft ICU 65
+ */
+ UBool hasNext() const override { return it_ != end_; }
+
+ /**
+ * @return the next locale.
+ * @draft ICU 65
+ */
+ const Locale &next() override { return *it_++; }
+
+ private:
+ Iter it_;
+ const Iter end_;
+ };
+
+ /**
+ * A generic Locale iterator implementation over Locale input iterators.
+ * Calls the converter to convert each *begin to a const Locale &.
+ * @draft ICU 65
+ */
+ template<typename Iter, typename Conv>
+ class ConvertingIterator : public Iterator, public UMemory {
+ public:
+ /**
+ * Constructs an iterator from a begin/end range.
+ * Each of the iterator parameter values must be an
+ * input iterator whose value the converter converts to const Locale &.
+ *
+ * @param begin Start of range.
+ * @param end Exclusive end of range.
+ * @param converter Converter from *begin to const Locale & or compatible.
+ * @draft ICU 65
+ */
+ ConvertingIterator(Iter begin, Iter end, Conv converter) :
+ it_(begin), end_(end), converter_(converter) {}
+
+ /**
+ * @return TRUE if next() can be called again.
+ * @draft ICU 65
+ */
+ UBool hasNext() const override { return it_ != end_; }
+
+ /**
+ * @return the next locale.
+ * @draft ICU 65
+ */
+ const Locale &next() override { return converter_(*it_++); }
+
+ private:
+ Iter it_;
+ const Iter end_;
+ Conv converter_;
+ };
+#endif // U_HIDE_DRAFT_API
+
protected: /* only protected for testing purposes. DO NOT USE. */
#ifndef U_HIDE_INTERNAL_API
/**
diff --git a/source/common/uresbund.cpp b/source/common/uresbund.cpp
index 585c0e5f..3224fb37 100644
--- a/source/common/uresbund.cpp
+++ b/source/common/uresbund.cpp
@@ -38,6 +38,7 @@
#include "umutex.h"
#include "putilimp.h"
#include "uassert.h"
+#include "uresdata.h"
using namespace icu;
@@ -1963,7 +1964,7 @@ void getAllItemsWithFallback(
// When the sink sees the no-fallback/no-inheritance marker,
// then it would remove the parent's item.
// We would deserialize parent values even though they are overridden in a child bundle.
- value.pResData = &bundle->fResData;
+ value.setData(&bundle->fResData);
UResourceDataEntry *parentEntry = bundle->fData->fParent;
UBool hasParent = parentEntry != NULL && U_SUCCESS(parentEntry->fBogus);
value.setResource(bundle->fRes, ResourceTracer(bundle));
@@ -2011,31 +2012,60 @@ void getAllItemsWithFallback(
} // namespace
+// Requires a ResourceDataValue fill-in, so that we need not cast from a ResourceValue.
+// Unfortunately, the caller must know which subclass to make and pass in.
+// Alternatively, we could make it as polymorphic as in Java by
+// returning a ResourceValue pointer (possibly wrapped into a LocalPointer)
+// that the caller then owns.
+//
+// Also requires a UResourceBundle fill-in, so that the value's ResourceTracer
+// can point to a non-local bundle.
+// Without tracing, the child bundle could be a function-local object.
+U_CAPI void U_EXPORT2
+ures_getValueWithFallback(const UResourceBundle *bundle, const char *path,
+ UResourceBundle *tempFillIn,
+ ResourceDataValue &value, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return; }
+ if (path == nullptr) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ const UResourceBundle *rb;
+ if (*path == 0) {
+ // empty path
+ rb = bundle;
+ } else {
+ rb = ures_getByKeyWithFallback(bundle, path, tempFillIn, &errorCode);
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ }
+ value.setData(&rb->fResData);
+ value.setResource(rb->fRes, ResourceTracer(rb));
+}
+
U_CAPI void U_EXPORT2
ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path,
icu::ResourceSink &sink, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return; }
- if (path == NULL) {
+ if (path == nullptr) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
- UResourceBundle stackBundle;
- ures_initStackObject(&stackBundle);
+ StackUResourceBundle stackBundle;
const UResourceBundle *rb;
if (*path == 0) {
// empty path
rb = bundle;
} else {
- rb = ures_getByKeyWithFallback(bundle, path, &stackBundle, &errorCode);
+ rb = ures_getByKeyWithFallback(bundle, path, stackBundle.getAlias(), &errorCode);
if (U_FAILURE(errorCode)) {
- ures_close(&stackBundle);
return;
}
}
// Get all table items with fallback.
ResourceDataValue value;
getAllItemsWithFallback(rb, value, sink, errorCode);
- ures_close(&stackBundle);
}
U_CAPI UResourceBundle* U_EXPORT2 ures_getByKey(const UResourceBundle *resB, const char* inKey, UResourceBundle *fillIn, UErrorCode *status) {
diff --git a/source/common/uresdata.cpp b/source/common/uresdata.cpp
index ce04142f..b3c2e2e2 100644
--- a/source/common/uresdata.cpp
+++ b/source/common/uresdata.cpp
@@ -509,7 +509,7 @@ const UChar *ResourceDataValue::getString(int32_t &length, UErrorCode &errorCode
if(U_FAILURE(errorCode)) {
return NULL;
}
- const UChar *s = res_getString(fTraceInfo, pResData, res, &length);
+ const UChar *s = res_getString(fTraceInfo, &getData(), res, &length);
if(s == NULL) {
errorCode = U_RESOURCE_TYPE_MISMATCH;
}
@@ -520,7 +520,7 @@ const UChar *ResourceDataValue::getAliasString(int32_t &length, UErrorCode &erro
if(U_FAILURE(errorCode)) {
return NULL;
}
- const UChar *s = res_getAlias(pResData, res, &length);
+ const UChar *s = res_getAlias(&getData(), res, &length);
if(s == NULL) {
errorCode = U_RESOURCE_TYPE_MISMATCH;
}
@@ -551,7 +551,7 @@ const int32_t *ResourceDataValue::getIntVector(int32_t &length, UErrorCode &erro
if(U_FAILURE(errorCode)) {
return NULL;
}
- const int32_t *iv = res_getIntVector(fTraceInfo, pResData, res, &length);
+ const int32_t *iv = res_getIntVector(fTraceInfo, &getData(), res, &length);
if(iv == NULL) {
errorCode = U_RESOURCE_TYPE_MISMATCH;
}
@@ -562,7 +562,7 @@ const uint8_t *ResourceDataValue::getBinary(int32_t &length, UErrorCode &errorCo
if(U_FAILURE(errorCode)) {
return NULL;
}
- const uint8_t *b = res_getBinary(fTraceInfo, pResData, res, &length);
+ const uint8_t *b = res_getBinary(fTraceInfo, &getData(), res, &length);
if(b == NULL) {
errorCode = U_RESOURCE_TYPE_MISMATCH;
}
@@ -580,12 +580,12 @@ ResourceArray ResourceDataValue::getArray(UErrorCode &errorCode) const {
switch(RES_GET_TYPE(res)) {
case URES_ARRAY:
if (offset!=0) { // empty if offset==0
- items32 = (const Resource *)pResData->pRoot+offset;
+ items32 = (const Resource *)getData().pRoot+offset;
length = *items32++;
}
break;
case URES_ARRAY16:
- items16 = pResData->p16BitUnits+offset;
+ items16 = getData().p16BitUnits+offset;
length = *items16++;
break;
default:
@@ -608,19 +608,19 @@ ResourceTable ResourceDataValue::getTable(UErrorCode &errorCode) const {
switch(RES_GET_TYPE(res)) {
case URES_TABLE:
if (offset != 0) { // empty if offset==0
- keys16 = (const uint16_t *)(pResData->pRoot+offset);
+ keys16 = (const uint16_t *)(getData().pRoot+offset);
length = *keys16++;
items32 = (const Resource *)(keys16+length+(~length&1));
}
break;
case URES_TABLE16:
- keys16 = pResData->p16BitUnits+offset;
+ keys16 = getData().p16BitUnits+offset;
length = *keys16++;
items16 = keys16 + length;
break;
case URES_TABLE32:
if (offset != 0) { // empty if offset==0
- keys32 = pResData->pRoot+offset;
+ keys32 = getData().pRoot+offset;
length = *keys32++;
items32 = (const Resource *)keys32 + length;
}
@@ -633,18 +633,18 @@ ResourceTable ResourceDataValue::getTable(UErrorCode &errorCode) const {
}
UBool ResourceDataValue::isNoInheritanceMarker() const {
- return ::isNoInheritanceMarker(pResData, res);
+ return ::isNoInheritanceMarker(&getData(), res);
}
int32_t ResourceDataValue::getStringArray(UnicodeString *dest, int32_t capacity,
UErrorCode &errorCode) const {
- return ::getStringArray(pResData, getArray(errorCode), dest, capacity, errorCode);
+ return ::getStringArray(&getData(), getArray(errorCode), dest, capacity, errorCode);
}
int32_t ResourceDataValue::getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity,
UErrorCode &errorCode) const {
if(URES_IS_ARRAY(res)) {
- return ::getStringArray(pResData, getArray(errorCode), dest, capacity, errorCode);
+ return ::getStringArray(&getData(), getArray(errorCode), dest, capacity, errorCode);
}
if(U_FAILURE(errorCode)) {
return 0;
@@ -658,7 +658,7 @@ int32_t ResourceDataValue::getStringArrayOrStringAsArray(UnicodeString *dest, in
return 1;
}
int32_t sLength;
- const UChar *s = res_getString(fTraceInfo, pResData, res, &sLength);
+ const UChar *s = res_getString(fTraceInfo, &getData(), res, &sLength);
if(s != NULL) {
dest[0].setTo(TRUE, s, sLength);
return 1;
@@ -673,7 +673,7 @@ UnicodeString ResourceDataValue::getStringOrFirstOfArray(UErrorCode &errorCode)
return us;
}
int32_t sLength;
- const UChar *s = res_getString(fTraceInfo, pResData, res, &sLength);
+ const UChar *s = res_getString(fTraceInfo, &getData(), res, &sLength);
if(s != NULL) {
us.setTo(TRUE, s, sLength);
return us;
@@ -684,7 +684,7 @@ UnicodeString ResourceDataValue::getStringOrFirstOfArray(UErrorCode &errorCode)
}
if(array.getSize() > 0) {
// Tracing is already performed above (unimportant for trace that this is an array)
- s = res_getStringNoTrace(pResData, array.internalGetResource(pResData, 0), &sLength);
+ s = res_getStringNoTrace(&getData(), array.internalGetResource(&getData(), 0), &sLength);
if(s != NULL) {
us.setTo(TRUE, s, sLength);
return us;
@@ -821,14 +821,14 @@ UBool icu::ResourceTable::getKeyAndValue(int32_t i,
const char *&key, icu::ResourceValue &value) const {
if(0 <= i && i < length) {
icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value);
- if (keys16 != NULL) {
- key = RES_GET_KEY16(rdValue.pResData, keys16[i]);
+ if (keys16 != nullptr) {
+ key = RES_GET_KEY16(&rdValue.getData(), keys16[i]);
} else {
- key = RES_GET_KEY32(rdValue.pResData, keys32[i]);
+ key = RES_GET_KEY32(&rdValue.getData(), keys32[i]);
}
Resource res;
- if (items16 != NULL) {
- res = makeResourceFrom16(rdValue.pResData, items16[i]);
+ if (items16 != nullptr) {
+ res = makeResourceFrom16(&rdValue.getData(), items16[i]);
} else {
res = items32[i];
}
@@ -842,6 +842,29 @@ UBool icu::ResourceTable::getKeyAndValue(int32_t i,
return FALSE;
}
+UBool icu::ResourceTable::findValue(const char *key, ResourceValue &value) const {
+ icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value);
+ const char *realKey = nullptr;
+ int32_t i;
+ if (keys16 != nullptr) {
+ i = _res_findTableItem(&rdValue.getData(), keys16, length, key, &realKey);
+ } else {
+ i = _res_findTable32Item(&rdValue.getData(), keys32, length, key, &realKey);
+ }
+ if (i >= 0) {
+ Resource res;
+ if (items16 != nullptr) {
+ res = makeResourceFrom16(&rdValue.getData(), items16[i]);
+ } else {
+ res = items32[i];
+ }
+ // Same note about lifetime as in getKeyAndValue().
+ rdValue.setResource(res, ResourceTracer(fTraceInfo, key));
+ return TRUE;
+ }
+ return FALSE;
+}
+
U_CAPI Resource U_EXPORT2
res_getArrayItem(const ResourceData *pResData, Resource array, int32_t indexR) {
uint32_t offset=RES_GET_OFFSET(array);
@@ -887,7 +910,7 @@ UBool icu::ResourceArray::getValue(int32_t i, icu::ResourceValue &value) const {
// alive for the duration that fields are being read from it
// (including nested fields).
rdValue.setResource(
- internalGetResource(rdValue.pResData, i),
+ internalGetResource(&rdValue.getData(), i),
ResourceTracer(fTraceInfo, i));
return TRUE;
}
diff --git a/source/common/uresdata.h b/source/common/uresdata.h
index 51647409..d1b67bab 100644
--- a/source/common/uresdata.h
+++ b/source/common/uresdata.h
@@ -511,13 +511,12 @@ inline uint32_t res_getUInt(const ResourceTracer& traceInfo, Resource res) {
class ResourceDataValue : public ResourceValue {
public:
ResourceDataValue() :
- pResData(NULL),
res(static_cast<Resource>(URES_NONE)),
fTraceInfo() {}
virtual ~ResourceDataValue();
void setData(const ResourceData *data) {
- pResData = data;
+ resData = *data;
}
void setResource(Resource r, ResourceTracer&& traceInfo) {
@@ -525,6 +524,7 @@ public:
fTraceInfo = traceInfo;
}
+ const ResourceData &getData() const { return resData; }
virtual UResType getType() const;
virtual const UChar *getString(int32_t &length, UErrorCode &errorCode) const;
virtual const UChar *getAliasString(int32_t &length, UErrorCode &errorCode) const;
@@ -541,9 +541,10 @@ public:
UErrorCode &errorCode) const;
virtual UnicodeString getStringOrFirstOfArray(UErrorCode &errorCode) const;
- const ResourceData *pResData;
-
private:
+ // TODO(ICU-20769): If UResourceBundle.fResData becomes a pointer,
+ // then remove this value field again and just store a pResData pointer.
+ ResourceData resData;
Resource res;
ResourceTracer fTraceInfo;
};
diff --git a/source/common/uresimp.h b/source/common/uresimp.h
index 51db6c52..f453ddc0 100644
--- a/source/common/uresimp.h
+++ b/source/common/uresimp.h
@@ -67,6 +67,9 @@ struct UResourceBundle {
char *fVersion;
UResourceDataEntry *fTopLevelData; /* for getting the valid locale */
char *fResPath; /* full path to the resource: "zh_TW/CollationElements/Sequence" */
+ // TODO(ICU-20769): Try to change the by-value fResData into a pointer,
+ // with the struct in only one place for each bundle.
+ // Also replace class ResourceDataValue.resData with a pResData pointer again.
ResourceData fResData;
char fResBuf[RES_BUFSIZE];
int32_t fResPathLen;
@@ -281,6 +284,11 @@ ures_getStringByKeyWithFallback(const UResourceBundle *resB,
#ifdef __cplusplus
+U_CAPI void U_EXPORT2
+ures_getValueWithFallback(const UResourceBundle *bundle, const char *path,
+ UResourceBundle *tempFillIn,
+ icu::ResourceDataValue &value, UErrorCode &errorCode);
+
U_CAPI void U_EXPORT2
ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path,
icu::ResourceSink &sink, UErrorCode &errorCode);