| //===----------------------------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include <__utility/no_destroy.h> |
| #include <algorithm> |
| #include <clocale> |
| #include <codecvt> |
| #include <cstddef> |
| #include <cstdio> |
| #include <cstdlib> |
| #include <cstring> |
| #include <locale> |
| #include <new> |
| #include <string> |
| #include <type_traits> |
| #include <typeinfo> |
| #include <utility> |
| #include <vector> |
| |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| # include <cwctype> |
| #endif |
| |
| #if defined(_AIX) |
| # include <sys/localedef.h> // for __lc_ctype_ptr |
| #endif |
| |
| #if defined(_LIBCPP_MSVCRT) |
| # define _CTYPE_DISABLE_MACROS |
| #endif |
| |
| #if __has_include("<langinfo.h>") |
| # include <langinfo.h> |
| #endif |
| |
| #include "include/atomic_support.h" |
| #include "include/sso_allocator.h" |
| |
| // On Linux, wint_t and wchar_t have different signed-ness, and this causes |
| // lots of noise in the build log, but no bugs that I know of. |
| _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wsign-conversion") |
| |
| _LIBCPP_PUSH_MACROS |
| #include <__undef_macros> |
| |
| _LIBCPP_BEGIN_NAMESPACE_STD |
| |
| struct __libcpp_unique_locale { |
| __libcpp_unique_locale(const char* nm) : __loc_(newlocale(LC_ALL_MASK, nm, 0)) {} |
| |
| ~__libcpp_unique_locale() { |
| if (__loc_) |
| freelocale(__loc_); |
| } |
| |
| explicit operator bool() const { return __loc_; } |
| |
| locale_t& get() { return __loc_; } |
| |
| locale_t __loc_; |
| |
| private: |
| __libcpp_unique_locale(__libcpp_unique_locale const&); |
| __libcpp_unique_locale& operator=(__libcpp_unique_locale const&); |
| }; |
| |
| #ifdef __cloc_defined |
| locale_t __cloc() { |
| // In theory this could create a race condition. In practice |
| // the race condition is non-fatal since it will just create |
| // a little resource leak. Better approach would be appreciated. |
| static locale_t result = newlocale(LC_ALL_MASK, "C", 0); |
| return result; |
| } |
| #endif // __cloc_defined |
| |
| namespace { |
| |
| struct releaser { |
| void operator()(locale::facet* p) { p->__release_shared(); } |
| }; |
| |
| template <class T, class... Args> |
| T& make(Args... args) { |
| alignas(T) static std::byte buf[sizeof(T)]; |
| auto* obj = ::new (&buf) T(args...); |
| return *obj; |
| } |
| |
| template <typename T, size_t N> |
| inline constexpr size_t countof(const T (&)[N]) { |
| return N; |
| } |
| |
| template <typename T> |
| inline constexpr size_t countof(const T* const begin, const T* const end) { |
| return static_cast<size_t>(end - begin); |
| } |
| |
| string build_name(const string& other, const string& one, locale::category c) { |
| if (other == "*" || one == "*") |
| return "*"; |
| if (c == locale::none || other == one) |
| return other; |
| |
| // FIXME: Handle the more complicated cases, such as when the locale has |
| // different names for different categories. |
| return "*"; |
| } |
| |
| } // namespace |
| |
| const locale::category locale::none; |
| const locale::category locale::collate; |
| const locale::category locale::ctype; |
| const locale::category locale::monetary; |
| const locale::category locale::numeric; |
| const locale::category locale::time; |
| const locale::category locale::messages; |
| const locale::category locale::all; |
| |
| class _LIBCPP_HIDDEN locale::__imp : public facet { |
| enum { N = 30 }; |
| vector<facet*, __sso_allocator<facet*, N> > facets_; |
| string name_; |
| |
| public: |
| explicit __imp(size_t refs = 0); |
| explicit __imp(const string& name, size_t refs = 0); |
| __imp(const __imp&); |
| __imp(const __imp&, const string&, locale::category c); |
| __imp(const __imp& other, const __imp& one, locale::category c); |
| __imp(const __imp&, facet* f, long id); |
| ~__imp(); |
| |
| const string& name() const { return name_; } |
| bool has_facet(long id) const { return static_cast<size_t>(id) < facets_.size() && facets_[static_cast<size_t>(id)]; } |
| const locale::facet* use_facet(long id) const; |
| |
| void acquire(); |
| void release(); |
| static __no_destroy<__imp> classic_locale_imp_; |
| |
| private: |
| void install(facet* f, long id); |
| template <class F> |
| void install(F* f) { |
| install(f, f->id.__get()); |
| } |
| template <class F> |
| void install_from(const __imp& other); |
| }; |
| |
| locale::__imp::__imp(size_t refs) : facet(refs), facets_(N), name_("C") { |
| facets_.clear(); |
| install(&make<std::collate<char> >(1u)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(&make<std::collate<wchar_t> >(1u)); |
| #endif |
| install(&make<std::ctype<char> >(nullptr, false, 1u)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(&make<std::ctype<wchar_t> >(1u)); |
| #endif |
| install(&make<codecvt<char, char, mbstate_t> >(1u)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(&make<codecvt<wchar_t, char, mbstate_t> >(1u)); |
| #endif |
| _LIBCPP_SUPPRESS_DEPRECATED_PUSH |
| install(&make<codecvt<char16_t, char, mbstate_t> >(1u)); |
| install(&make<codecvt<char32_t, char, mbstate_t> >(1u)); |
| _LIBCPP_SUPPRESS_DEPRECATED_POP |
| #ifndef _LIBCPP_HAS_NO_CHAR8_T |
| install(&make<codecvt<char16_t, char8_t, mbstate_t> >(1u)); |
| install(&make<codecvt<char32_t, char8_t, mbstate_t> >(1u)); |
| #endif |
| install(&make<numpunct<char> >(1u)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(&make<numpunct<wchar_t> >(1u)); |
| #endif |
| install(&make<num_get<char> >(1u)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(&make<num_get<wchar_t> >(1u)); |
| #endif |
| install(&make<num_put<char> >(1u)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(&make<num_put<wchar_t> >(1u)); |
| #endif |
| install(&make<moneypunct<char, false> >(1u)); |
| install(&make<moneypunct<char, true> >(1u)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(&make<moneypunct<wchar_t, false> >(1u)); |
| install(&make<moneypunct<wchar_t, true> >(1u)); |
| #endif |
| install(&make<money_get<char> >(1u)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(&make<money_get<wchar_t> >(1u)); |
| #endif |
| install(&make<money_put<char> >(1u)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(&make<money_put<wchar_t> >(1u)); |
| #endif |
| install(&make<time_get<char> >(1u)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(&make<time_get<wchar_t> >(1u)); |
| #endif |
| install(&make<time_put<char> >(1u)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(&make<time_put<wchar_t> >(1u)); |
| #endif |
| install(&make<std::messages<char> >(1u)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(&make<std::messages<wchar_t> >(1u)); |
| #endif |
| } |
| |
| locale::__imp::__imp(const string& name, size_t refs) : facet(refs), facets_(N), name_(name) { |
| #ifndef _LIBCPP_HAS_NO_EXCEPTIONS |
| try { |
| #endif // _LIBCPP_HAS_NO_EXCEPTIONS |
| facets_ = locale::classic().__locale_->facets_; |
| for (unsigned i = 0; i < facets_.size(); ++i) |
| if (facets_[i]) |
| facets_[i]->__add_shared(); |
| install(new collate_byname<char>(name_)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(new collate_byname<wchar_t>(name_)); |
| #endif |
| install(new ctype_byname<char>(name_)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(new ctype_byname<wchar_t>(name_)); |
| #endif |
| install(new codecvt_byname<char, char, mbstate_t>(name_)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(new codecvt_byname<wchar_t, char, mbstate_t>(name_)); |
| #endif |
| _LIBCPP_SUPPRESS_DEPRECATED_PUSH |
| install(new codecvt_byname<char16_t, char, mbstate_t>(name_)); |
| install(new codecvt_byname<char32_t, char, mbstate_t>(name_)); |
| _LIBCPP_SUPPRESS_DEPRECATED_POP |
| #ifndef _LIBCPP_HAS_NO_CHAR8_T |
| install(new codecvt_byname<char16_t, char8_t, mbstate_t>(name_)); |
| install(new codecvt_byname<char32_t, char8_t, mbstate_t>(name_)); |
| #endif |
| install(new numpunct_byname<char>(name_)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(new numpunct_byname<wchar_t>(name_)); |
| #endif |
| install(new moneypunct_byname<char, false>(name_)); |
| install(new moneypunct_byname<char, true>(name_)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(new moneypunct_byname<wchar_t, false>(name_)); |
| install(new moneypunct_byname<wchar_t, true>(name_)); |
| #endif |
| install(new time_get_byname<char>(name_)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(new time_get_byname<wchar_t>(name_)); |
| #endif |
| install(new time_put_byname<char>(name_)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(new time_put_byname<wchar_t>(name_)); |
| #endif |
| install(new messages_byname<char>(name_)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(new messages_byname<wchar_t>(name_)); |
| #endif |
| #ifndef _LIBCPP_HAS_NO_EXCEPTIONS |
| } catch (...) { |
| for (unsigned i = 0; i < facets_.size(); ++i) |
| if (facets_[i]) |
| facets_[i]->__release_shared(); |
| throw; |
| } |
| #endif // _LIBCPP_HAS_NO_EXCEPTIONS |
| } |
| |
| locale::__imp::__imp(const __imp& other) : facets_(max<size_t>(N, other.facets_.size())), name_(other.name_) { |
| facets_ = other.facets_; |
| for (unsigned i = 0; i < facets_.size(); ++i) |
| if (facets_[i]) |
| facets_[i]->__add_shared(); |
| } |
| |
| locale::__imp::__imp(const __imp& other, const string& name, locale::category c) |
| : facets_(N), name_(build_name(other.name_, name, c)) { |
| facets_ = other.facets_; |
| for (unsigned i = 0; i < facets_.size(); ++i) |
| if (facets_[i]) |
| facets_[i]->__add_shared(); |
| #ifndef _LIBCPP_HAS_NO_EXCEPTIONS |
| try { |
| #endif // _LIBCPP_HAS_NO_EXCEPTIONS |
| if (c & locale::collate) { |
| install(new collate_byname<char>(name)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(new collate_byname<wchar_t>(name)); |
| #endif |
| } |
| if (c & locale::ctype) { |
| install(new ctype_byname<char>(name)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(new ctype_byname<wchar_t>(name)); |
| #endif |
| install(new codecvt_byname<char, char, mbstate_t>(name)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(new codecvt_byname<wchar_t, char, mbstate_t>(name)); |
| #endif |
| _LIBCPP_SUPPRESS_DEPRECATED_PUSH |
| install(new codecvt_byname<char16_t, char, mbstate_t>(name)); |
| install(new codecvt_byname<char32_t, char, mbstate_t>(name)); |
| _LIBCPP_SUPPRESS_DEPRECATED_POP |
| #ifndef _LIBCPP_HAS_NO_CHAR8_T |
| install(new codecvt_byname<char16_t, char8_t, mbstate_t>(name)); |
| install(new codecvt_byname<char32_t, char8_t, mbstate_t>(name)); |
| #endif |
| } |
| if (c & locale::monetary) { |
| install(new moneypunct_byname<char, false>(name)); |
| install(new moneypunct_byname<char, true>(name)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(new moneypunct_byname<wchar_t, false>(name)); |
| install(new moneypunct_byname<wchar_t, true>(name)); |
| #endif |
| } |
| if (c & locale::numeric) { |
| install(new numpunct_byname<char>(name)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(new numpunct_byname<wchar_t>(name)); |
| #endif |
| } |
| if (c & locale::time) { |
| install(new time_get_byname<char>(name)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(new time_get_byname<wchar_t>(name)); |
| #endif |
| install(new time_put_byname<char>(name)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(new time_put_byname<wchar_t>(name)); |
| #endif |
| } |
| if (c & locale::messages) { |
| install(new messages_byname<char>(name)); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install(new messages_byname<wchar_t>(name)); |
| #endif |
| } |
| #ifndef _LIBCPP_HAS_NO_EXCEPTIONS |
| } catch (...) { |
| for (unsigned i = 0; i < facets_.size(); ++i) |
| if (facets_[i]) |
| facets_[i]->__release_shared(); |
| throw; |
| } |
| #endif // _LIBCPP_HAS_NO_EXCEPTIONS |
| } |
| |
| template <class F> |
| inline void locale::__imp::install_from(const locale::__imp& one) { |
| long id = F::id.__get(); |
| install(const_cast<F*>(static_cast<const F*>(one.use_facet(id))), id); |
| } |
| |
| locale::__imp::__imp(const __imp& other, const __imp& one, locale::category c) |
| : facets_(N), name_(build_name(other.name_, one.name_, c)) { |
| facets_ = other.facets_; |
| for (unsigned i = 0; i < facets_.size(); ++i) |
| if (facets_[i]) |
| facets_[i]->__add_shared(); |
| #ifndef _LIBCPP_HAS_NO_EXCEPTIONS |
| try { |
| #endif // _LIBCPP_HAS_NO_EXCEPTIONS |
| if (c & locale::collate) { |
| install_from<std::collate<char> >(one); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install_from<std::collate<wchar_t> >(one); |
| #endif |
| } |
| if (c & locale::ctype) { |
| install_from<std::ctype<char> >(one); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install_from<std::ctype<wchar_t> >(one); |
| #endif |
| install_from<std::codecvt<char, char, mbstate_t> >(one); |
| _LIBCPP_SUPPRESS_DEPRECATED_PUSH |
| install_from<std::codecvt<char16_t, char, mbstate_t> >(one); |
| install_from<std::codecvt<char32_t, char, mbstate_t> >(one); |
| _LIBCPP_SUPPRESS_DEPRECATED_POP |
| #ifndef _LIBCPP_HAS_NO_CHAR8_T |
| install_from<std::codecvt<char16_t, char8_t, mbstate_t> >(one); |
| install_from<std::codecvt<char32_t, char8_t, mbstate_t> >(one); |
| #endif |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install_from<std::codecvt<wchar_t, char, mbstate_t> >(one); |
| #endif |
| } |
| if (c & locale::monetary) { |
| install_from<moneypunct<char, false> >(one); |
| install_from<moneypunct<char, true> >(one); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install_from<moneypunct<wchar_t, false> >(one); |
| install_from<moneypunct<wchar_t, true> >(one); |
| #endif |
| install_from<money_get<char> >(one); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install_from<money_get<wchar_t> >(one); |
| #endif |
| install_from<money_put<char> >(one); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install_from<money_put<wchar_t> >(one); |
| #endif |
| } |
| if (c & locale::numeric) { |
| install_from<numpunct<char> >(one); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install_from<numpunct<wchar_t> >(one); |
| #endif |
| install_from<num_get<char> >(one); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install_from<num_get<wchar_t> >(one); |
| #endif |
| install_from<num_put<char> >(one); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install_from<num_put<wchar_t> >(one); |
| #endif |
| } |
| if (c & locale::time) { |
| install_from<time_get<char> >(one); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install_from<time_get<wchar_t> >(one); |
| #endif |
| install_from<time_put<char> >(one); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install_from<time_put<wchar_t> >(one); |
| #endif |
| } |
| if (c & locale::messages) { |
| install_from<std::messages<char> >(one); |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| install_from<std::messages<wchar_t> >(one); |
| #endif |
| } |
| #ifndef _LIBCPP_HAS_NO_EXCEPTIONS |
| } catch (...) { |
| for (unsigned i = 0; i < facets_.size(); ++i) |
| if (facets_[i]) |
| facets_[i]->__release_shared(); |
| throw; |
| } |
| #endif // _LIBCPP_HAS_NO_EXCEPTIONS |
| } |
| |
| locale::__imp::__imp(const __imp& other, facet* f, long id) |
| : facets_(max<size_t>(N, other.facets_.size() + 1)), name_("*") { |
| f->__add_shared(); |
| unique_ptr<facet, releaser> hold(f); |
| facets_ = other.facets_; |
| for (unsigned i = 0; i < other.facets_.size(); ++i) |
| if (facets_[i]) |
| facets_[i]->__add_shared(); |
| install(hold.get(), id); |
| } |
| |
| locale::__imp::~__imp() { |
| for (unsigned i = 0; i < facets_.size(); ++i) |
| if (facets_[i]) |
| facets_[i]->__release_shared(); |
| } |
| |
| void locale::__imp::install(facet* f, long id) { |
| f->__add_shared(); |
| unique_ptr<facet, releaser> hold(f); |
| if (static_cast<size_t>(id) >= facets_.size()) |
| facets_.resize(static_cast<size_t>(id + 1)); |
| if (facets_[static_cast<size_t>(id)]) |
| facets_[static_cast<size_t>(id)]->__release_shared(); |
| facets_[static_cast<size_t>(id)] = hold.release(); |
| } |
| |
| const locale::facet* locale::__imp::use_facet(long id) const { |
| if (!has_facet(id)) |
| __throw_bad_cast(); |
| return facets_[static_cast<size_t>(id)]; |
| } |
| |
| // locale |
| |
| // We don't do reference counting on the classic locale. |
| // It's never destroyed anyway, but atomic reference counting may be very |
| // expensive in parallel applications. The classic locale is used by default |
| // in all streams. Note: if a new global locale is installed, then we lose |
| // the benefit of no reference counting. |
| constinit __no_destroy<locale::__imp> |
| locale::__imp::classic_locale_imp_(__uninitialized_tag{}); // initialized below in classic() |
| |
| const locale& locale::classic() { |
| static const __no_destroy<locale> classic_locale(__private_constructor_tag{}, [] { |
| // executed exactly once on first initialization of `classic_locale` |
| locale::__imp::classic_locale_imp_.__emplace(1u); |
| return &locale::__imp::classic_locale_imp_.__get(); |
| }()); |
| return classic_locale.__get(); |
| } |
| |
| locale& locale::__global() { |
| static __no_destroy<locale> g(locale::classic()); |
| return g.__get(); |
| } |
| |
| void locale::__imp::acquire() { |
| if (this != &locale::__imp::classic_locale_imp_.__get()) |
| __add_shared(); |
| } |
| |
| void locale::__imp::release() { |
| if (this != &locale::__imp::classic_locale_imp_.__get()) |
| __release_shared(); |
| } |
| |
| locale::locale() noexcept : __locale_(__global().__locale_) { __locale_->acquire(); } |
| |
| locale::locale(const locale& l) noexcept : __locale_(l.__locale_) { __locale_->acquire(); } |
| |
| locale::~locale() { __locale_->release(); } |
| |
| const locale& locale::operator=(const locale& other) noexcept { |
| other.__locale_->acquire(); |
| __locale_->release(); |
| __locale_ = other.__locale_; |
| return *this; |
| } |
| |
| locale::locale(const char* name) |
| : __locale_(name ? new __imp(name) : (__throw_runtime_error("locale constructed with null"), nullptr)) { |
| __locale_->acquire(); |
| } |
| |
| locale::locale(const string& name) : __locale_(new __imp(name)) { __locale_->acquire(); } |
| |
| locale::locale(const locale& other, const char* name, category c) |
| : __locale_(name ? new __imp(*other.__locale_, name, c) |
| : (__throw_runtime_error("locale constructed with null"), nullptr)) { |
| __locale_->acquire(); |
| } |
| |
| locale::locale(const locale& other, const string& name, category c) : __locale_(new __imp(*other.__locale_, name, c)) { |
| __locale_->acquire(); |
| } |
| |
| locale::locale(const locale& other, const locale& one, category c) |
| : __locale_(new __imp(*other.__locale_, *one.__locale_, c)) { |
| __locale_->acquire(); |
| } |
| |
| string locale::name() const { return __locale_->name(); } |
| |
| void locale::__install_ctor(const locale& other, facet* f, long facet_id) { |
| if (f) |
| __locale_ = new __imp(*other.__locale_, f, facet_id); |
| else |
| __locale_ = other.__locale_; |
| __locale_->acquire(); |
| } |
| |
| locale locale::global(const locale& loc) { |
| locale& g = __global(); |
| locale r = g; |
| g = loc; |
| if (g.name() != "*") |
| setlocale(LC_ALL, g.name().c_str()); |
| return r; |
| } |
| |
| bool locale::has_facet(id& x) const { return __locale_->has_facet(x.__get()); } |
| |
| const locale::facet* locale::use_facet(id& x) const { return __locale_->use_facet(x.__get()); } |
| |
| bool locale::operator==(const locale& y) const { |
| return (__locale_ == y.__locale_) || (__locale_->name() != "*" && __locale_->name() == y.__locale_->name()); |
| } |
| |
| // locale::facet |
| |
| locale::facet::~facet() {} |
| |
| void locale::facet::__on_zero_shared() noexcept { delete this; } |
| |
| // locale::id |
| |
| constinit int32_t locale::id::__next_id = 0; |
| |
| long locale::id::__get() { |
| call_once(__flag_, [&] { __id_ = __libcpp_atomic_add(&__next_id, 1); }); |
| return __id_ - 1; |
| } |
| |
| // template <> class collate_byname<char> |
| |
| collate_byname<char>::collate_byname(const char* n, size_t refs) |
| : collate<char>(refs), __l_(newlocale(LC_ALL_MASK, n, 0)) { |
| if (__l_ == 0) |
| __throw_runtime_error( |
| ("collate_byname<char>::collate_byname" |
| " failed to construct for " + |
| string(n)) |
| .c_str()); |
| } |
| |
| collate_byname<char>::collate_byname(const string& name, size_t refs) |
| : collate<char>(refs), __l_(newlocale(LC_ALL_MASK, name.c_str(), 0)) { |
| if (__l_ == 0) |
| __throw_runtime_error( |
| ("collate_byname<char>::collate_byname" |
| " failed to construct for " + |
| name) |
| .c_str()); |
| } |
| |
| collate_byname<char>::~collate_byname() { freelocale(__l_); } |
| |
| int collate_byname<char>::do_compare( |
| const char_type* __lo1, const char_type* __hi1, const char_type* __lo2, const char_type* __hi2) const { |
| string_type lhs(__lo1, __hi1); |
| string_type rhs(__lo2, __hi2); |
| int r = strcoll_l(lhs.c_str(), rhs.c_str(), __l_); |
| if (r < 0) |
| return -1; |
| if (r > 0) |
| return 1; |
| return r; |
| } |
| |
| collate_byname<char>::string_type collate_byname<char>::do_transform(const char_type* lo, const char_type* hi) const { |
| const string_type in(lo, hi); |
| string_type out(strxfrm_l(0, in.c_str(), 0, __l_), char()); |
| strxfrm_l(const_cast<char*>(out.c_str()), in.c_str(), out.size() + 1, __l_); |
| return out; |
| } |
| |
| // template <> class collate_byname<wchar_t> |
| |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| collate_byname<wchar_t>::collate_byname(const char* n, size_t refs) |
| : collate<wchar_t>(refs), __l_(newlocale(LC_ALL_MASK, n, 0)) { |
| if (__l_ == 0) |
| __throw_runtime_error( |
| ("collate_byname<wchar_t>::collate_byname(size_t refs)" |
| " failed to construct for " + |
| string(n)) |
| .c_str()); |
| } |
| |
| collate_byname<wchar_t>::collate_byname(const string& name, size_t refs) |
| : collate<wchar_t>(refs), __l_(newlocale(LC_ALL_MASK, name.c_str(), 0)) { |
| if (__l_ == 0) |
| __throw_runtime_error( |
| ("collate_byname<wchar_t>::collate_byname(size_t refs)" |
| " failed to construct for " + |
| name) |
| .c_str()); |
| } |
| |
| collate_byname<wchar_t>::~collate_byname() { freelocale(__l_); } |
| |
| int collate_byname<wchar_t>::do_compare( |
| const char_type* __lo1, const char_type* __hi1, const char_type* __lo2, const char_type* __hi2) const { |
| string_type lhs(__lo1, __hi1); |
| string_type rhs(__lo2, __hi2); |
| int r = wcscoll_l(lhs.c_str(), rhs.c_str(), __l_); |
| if (r < 0) |
| return -1; |
| if (r > 0) |
| return 1; |
| return r; |
| } |
| |
| collate_byname<wchar_t>::string_type |
| collate_byname<wchar_t>::do_transform(const char_type* lo, const char_type* hi) const { |
| const string_type in(lo, hi); |
| string_type out(wcsxfrm_l(0, in.c_str(), 0, __l_), wchar_t()); |
| wcsxfrm_l(const_cast<wchar_t*>(out.c_str()), in.c_str(), out.size() + 1, __l_); |
| return out; |
| } |
| #endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| |
| const ctype_base::mask ctype_base::space; |
| const ctype_base::mask ctype_base::print; |
| const ctype_base::mask ctype_base::cntrl; |
| const ctype_base::mask ctype_base::upper; |
| const ctype_base::mask ctype_base::lower; |
| const ctype_base::mask ctype_base::alpha; |
| const ctype_base::mask ctype_base::digit; |
| const ctype_base::mask ctype_base::punct; |
| const ctype_base::mask ctype_base::xdigit; |
| const ctype_base::mask ctype_base::blank; |
| const ctype_base::mask ctype_base::alnum; |
| const ctype_base::mask ctype_base::graph; |
| |
| // template <> class ctype<wchar_t>; |
| |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| constinit locale::id ctype<wchar_t>::id; |
| |
| ctype<wchar_t>::~ctype() {} |
| |
| bool ctype<wchar_t>::do_is(mask m, char_type c) const { |
| return isascii(c) ? (ctype<char>::classic_table()[c] & m) != 0 : false; |
| } |
| |
| const wchar_t* ctype<wchar_t>::do_is(const char_type* low, const char_type* high, mask* vec) const { |
| for (; low != high; ++low, ++vec) |
| *vec = static_cast<mask>(isascii(*low) ? ctype<char>::classic_table()[*low] : 0); |
| return low; |
| } |
| |
| const wchar_t* ctype<wchar_t>::do_scan_is(mask m, const char_type* low, const char_type* high) const { |
| for (; low != high; ++low) |
| if (isascii(*low) && (ctype<char>::classic_table()[*low] & m)) |
| break; |
| return low; |
| } |
| |
| const wchar_t* ctype<wchar_t>::do_scan_not(mask m, const char_type* low, const char_type* high) const { |
| for (; low != high; ++low) |
| if (!(isascii(*low) && (ctype<char>::classic_table()[*low] & m))) |
| break; |
| return low; |
| } |
| |
| wchar_t ctype<wchar_t>::do_toupper(char_type c) const { |
| # ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE |
| return isascii(c) ? _DefaultRuneLocale.__mapupper[c] : c; |
| # elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__) |
| return isascii(c) ? ctype<char>::__classic_upper_table()[c] : c; |
| # else |
| return (isascii(c) && iswlower_l(c, _LIBCPP_GET_C_LOCALE)) ? c - L'a' + L'A' : c; |
| # endif |
| } |
| |
| const wchar_t* ctype<wchar_t>::do_toupper(char_type* low, const char_type* high) const { |
| for (; low != high; ++low) |
| # ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE |
| *low = isascii(*low) ? _DefaultRuneLocale.__mapupper[*low] : *low; |
| # elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__) |
| *low = isascii(*low) ? ctype<char>::__classic_upper_table()[*low] : *low; |
| # else |
| *low = (isascii(*low) && islower_l(*low, _LIBCPP_GET_C_LOCALE)) ? (*low - L'a' + L'A') : *low; |
| # endif |
| return low; |
| } |
| |
| wchar_t ctype<wchar_t>::do_tolower(char_type c) const { |
| # ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE |
| return isascii(c) ? _DefaultRuneLocale.__maplower[c] : c; |
| # elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__) |
| return isascii(c) ? ctype<char>::__classic_lower_table()[c] : c; |
| # else |
| return (isascii(c) && isupper_l(c, _LIBCPP_GET_C_LOCALE)) ? c - L'A' + 'a' : c; |
| # endif |
| } |
| |
| const wchar_t* ctype<wchar_t>::do_tolower(char_type* low, const char_type* high) const { |
| for (; low != high; ++low) |
| # ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE |
| *low = isascii(*low) ? _DefaultRuneLocale.__maplower[*low] : *low; |
| # elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__) |
| *low = isascii(*low) ? ctype<char>::__classic_lower_table()[*low] : *low; |
| # else |
| *low = (isascii(*low) && isupper_l(*low, _LIBCPP_GET_C_LOCALE)) ? *low - L'A' + L'a' : *low; |
| # endif |
| return low; |
| } |
| |
| wchar_t ctype<wchar_t>::do_widen(char c) const { return c; } |
| |
| const char* ctype<wchar_t>::do_widen(const char* low, const char* high, char_type* dest) const { |
| for (; low != high; ++low, ++dest) |
| *dest = *low; |
| return low; |
| } |
| |
| char ctype<wchar_t>::do_narrow(char_type c, char dfault) const { |
| if (isascii(c)) |
| return static_cast<char>(c); |
| return dfault; |
| } |
| |
| const wchar_t* ctype<wchar_t>::do_narrow(const char_type* low, const char_type* high, char dfault, char* dest) const { |
| for (; low != high; ++low, ++dest) |
| if (isascii(*low)) |
| *dest = static_cast<char>(*low); |
| else |
| *dest = dfault; |
| return low; |
| } |
| #endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| |
| // template <> class ctype<char>; |
| |
| constinit locale::id ctype<char>::id; |
| |
| const size_t ctype<char>::table_size; |
| |
| ctype<char>::ctype(const mask* tab, bool del, size_t refs) : locale::facet(refs), __tab_(tab), __del_(del) { |
| if (__tab_ == 0) |
| __tab_ = classic_table(); |
| } |
| |
| ctype<char>::~ctype() { |
| if (__tab_ && __del_) |
| delete[] __tab_; |
| } |
| |
| char ctype<char>::do_toupper(char_type c) const { |
| #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE |
| return isascii(c) ? static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(c)]) : c; |
| #elif defined(__NetBSD__) |
| return static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]); |
| #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__) |
| return isascii(c) ? static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]) : c; |
| #else |
| return (isascii(c) && islower_l(c, _LIBCPP_GET_C_LOCALE)) ? c - 'a' + 'A' : c; |
| #endif |
| } |
| |
| const char* ctype<char>::do_toupper(char_type* low, const char_type* high) const { |
| for (; low != high; ++low) |
| #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE |
| *low = isascii(*low) ? static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(*low)]) : *low; |
| #elif defined(__NetBSD__) |
| *low = static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(*low)]); |
| #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__) |
| *low = isascii(*low) ? static_cast<char>(__classic_upper_table()[static_cast<size_t>(*low)]) : *low; |
| #else |
| *low = (isascii(*low) && islower_l(*low, _LIBCPP_GET_C_LOCALE)) ? *low - 'a' + 'A' : *low; |
| #endif |
| return low; |
| } |
| |
| char ctype<char>::do_tolower(char_type c) const { |
| #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE |
| return isascii(c) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(c)]) : c; |
| #elif defined(__NetBSD__) |
| return static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(c)]); |
| #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__) |
| return isascii(c) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(c)]) : c; |
| #else |
| return (isascii(c) && isupper_l(c, _LIBCPP_GET_C_LOCALE)) ? c - 'A' + 'a' : c; |
| #endif |
| } |
| |
| const char* ctype<char>::do_tolower(char_type* low, const char_type* high) const { |
| for (; low != high; ++low) |
| #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE |
| *low = isascii(*low) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(*low)]) : *low; |
| #elif defined(__NetBSD__) |
| *low = static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(*low)]); |
| #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__) |
| *low = isascii(*low) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(*low)]) : *low; |
| #else |
| *low = (isascii(*low) && isupper_l(*low, _LIBCPP_GET_C_LOCALE)) ? *low - 'A' + 'a' : *low; |
| #endif |
| return low; |
| } |
| |
| char ctype<char>::do_widen(char c) const { return c; } |
| |
| const char* ctype<char>::do_widen(const char* low, const char* high, char_type* dest) const { |
| for (; low != high; ++low, ++dest) |
| *dest = *low; |
| return low; |
| } |
| |
| char ctype<char>::do_narrow(char_type c, char dfault) const { |
| if (isascii(c)) |
| return static_cast<char>(c); |
| return dfault; |
| } |
| |
| const char* ctype<char>::do_narrow(const char_type* low, const char_type* high, char dfault, char* dest) const { |
| for (; low != high; ++low, ++dest) |
| if (isascii(*low)) |
| *dest = *low; |
| else |
| *dest = dfault; |
| return low; |
| } |
| |
| #if defined(__EMSCRIPTEN__) |
| extern "C" const unsigned short** __ctype_b_loc(); |
| extern "C" const int** __ctype_tolower_loc(); |
| extern "C" const int** __ctype_toupper_loc(); |
| #endif |
| |
| #ifdef _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE |
| const ctype<char>::mask* ctype<char>::classic_table() noexcept { |
| // clang-format off |
| static constexpr const ctype<char>::mask builtin_table[table_size] = { |
| cntrl, cntrl, |
| cntrl, cntrl, |
| cntrl, cntrl, |
| cntrl, cntrl, |
| cntrl, cntrl | space | blank, |
| cntrl | space, cntrl | space, |
| cntrl | space, cntrl | space, |
| cntrl, cntrl, |
| cntrl, cntrl, |
| cntrl, cntrl, |
| cntrl, cntrl, |
| cntrl, cntrl, |
| cntrl, cntrl, |
| cntrl, cntrl, |
| cntrl, cntrl, |
| cntrl, cntrl, |
| space | blank | print, punct | print, |
| punct | print, punct | print, |
| punct | print, punct | print, |
| punct | print, punct | print, |
| punct | print, punct | print, |
| punct | print, punct | print, |
| punct | print, punct | print, |
| punct | print, punct | print, |
| digit | print | xdigit, digit | print | xdigit, |
| digit | print | xdigit, digit | print | xdigit, |
| digit | print | xdigit, digit | print | xdigit, |
| digit | print | xdigit, digit | print | xdigit, |
| digit | print | xdigit, digit | print | xdigit, |
| punct | print, punct | print, |
| punct | print, punct | print, |
| punct | print, punct | print, |
| punct | print, upper | xdigit | print | alpha, |
| upper | xdigit | print | alpha, upper | xdigit | print | alpha, |
| upper | xdigit | print | alpha, upper | xdigit | print | alpha, |
| upper | xdigit | print | alpha, upper | print | alpha, |
| upper | print | alpha, upper | print | alpha, |
| upper | print | alpha, upper | print | alpha, |
| upper | print | alpha, upper | print | alpha, |
| upper | print | alpha, upper | print | alpha, |
| upper | print | alpha, upper | print | alpha, |
| upper | print | alpha, upper | print | alpha, |
| upper | print | alpha, upper | print | alpha, |
| upper | print | alpha, upper | print | alpha, |
| upper | print | alpha, upper | print | alpha, |
| upper | print | alpha, punct | print, |
| punct | print, punct | print, |
| punct | print, punct | print, |
| punct | print, lower | xdigit | print | alpha, |
| lower | xdigit | print | alpha, lower | xdigit | print | alpha, |
| lower | xdigit | print | alpha, lower | xdigit | print | alpha, |
| lower | xdigit | print | alpha, lower | print | alpha, |
| lower | print | alpha, lower | print | alpha, |
| lower | print | alpha, lower | print | alpha, |
| lower | print | alpha, lower | print | alpha, |
| lower | print | alpha, lower | print | alpha, |
| lower | print | alpha, lower | print | alpha, |
| lower | print | alpha, lower | print | alpha, |
| lower | print | alpha, lower | print | alpha, |
| lower | print | alpha, lower | print | alpha, |
| lower | print | alpha, lower | print | alpha, |
| lower | print | alpha, punct | print, |
| punct | print, punct | print, |
| punct | print, cntrl, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
| }; |
| // clang-format on |
| return builtin_table; |
| } |
| #else |
| const ctype<char>::mask* ctype<char>::classic_table() noexcept { |
| # if defined(__APPLE__) || defined(__FreeBSD__) |
| return _DefaultRuneLocale.__runetype; |
| # elif defined(__NetBSD__) |
| return _C_ctype_tab_ + 1; |
| # elif defined(__GLIBC__) |
| return _LIBCPP_GET_C_LOCALE->__ctype_b; |
| # elif defined(_LIBCPP_MSVCRT) || defined(__MINGW32__) |
| return __pctype_func(); |
| # elif defined(__EMSCRIPTEN__) |
| return *__ctype_b_loc(); |
| # elif defined(_NEWLIB_VERSION) |
| // Newlib has a 257-entry table in ctype_.c, where (char)0 starts at [1]. |
| return _ctype_ + 1; |
| # elif defined(_AIX) |
| return (const unsigned int*)__lc_ctype_ptr->obj->mask; |
| # elif defined(__MVS__) |
| # if defined(__NATIVE_ASCII_F) |
| return const_cast<const ctype<char>::mask*>(__OBJ_DATA(__lc_ctype_a)->mask); |
| # else |
| return const_cast<const ctype<char>::mask*>(__ctypec); |
| # endif |
| # else |
| // Platform not supported: abort so the person doing the port knows what to |
| // fix |
| # warning ctype<char>::classic_table() is not implemented |
| printf("ctype<char>::classic_table() is not implemented\n"); |
| abort(); |
| return NULL; |
| # endif |
| } |
| #endif |
| |
| #if defined(__GLIBC__) |
| const int* ctype<char>::__classic_lower_table() noexcept { return _LIBCPP_GET_C_LOCALE->__ctype_tolower; } |
| |
| const int* ctype<char>::__classic_upper_table() noexcept { return _LIBCPP_GET_C_LOCALE->__ctype_toupper; } |
| #elif defined(__NetBSD__) |
| const short* ctype<char>::__classic_lower_table() noexcept { return _C_tolower_tab_ + 1; } |
| |
| const short* ctype<char>::__classic_upper_table() noexcept { return _C_toupper_tab_ + 1; } |
| |
| #elif defined(__EMSCRIPTEN__) |
| const int* ctype<char>::__classic_lower_table() noexcept { return *__ctype_tolower_loc(); } |
| |
| const int* ctype<char>::__classic_upper_table() noexcept { return *__ctype_toupper_loc(); } |
| #elif defined(__MVS__) |
| const unsigned short* ctype<char>::__classic_lower_table() _NOEXCEPT { |
| # if defined(__NATIVE_ASCII_F) |
| return const_cast<const unsigned short*>(__OBJ_DATA(__lc_ctype_a)->lower); |
| # else |
| return const_cast<const unsigned short*>(__ctype + __TOLOWER_INDEX); |
| # endif |
| } |
| const unsigned short* ctype<char>::__classic_upper_table() _NOEXCEPT { |
| # if defined(__NATIVE_ASCII_F) |
| return const_cast<const unsigned short*>(__OBJ_DATA(__lc_ctype_a)->upper); |
| # else |
| return const_cast<const unsigned short*>(__ctype + __TOUPPER_INDEX); |
| # endif |
| } |
| #endif // __GLIBC__ || __NETBSD__ || __EMSCRIPTEN__ || __MVS__ |
| |
| // template <> class ctype_byname<char> |
| |
| ctype_byname<char>::ctype_byname(const char* name, size_t refs) |
| : ctype<char>(0, false, refs), __l_(newlocale(LC_ALL_MASK, name, 0)) { |
| if (__l_ == 0) |
| __throw_runtime_error( |
| ("ctype_byname<char>::ctype_byname" |
| " failed to construct for " + |
| string(name)) |
| .c_str()); |
| } |
| |
| ctype_byname<char>::ctype_byname(const string& name, size_t refs) |
| : ctype<char>(0, false, refs), __l_(newlocale(LC_ALL_MASK, name.c_str(), 0)) { |
| if (__l_ == 0) |
| __throw_runtime_error( |
| ("ctype_byname<char>::ctype_byname" |
| " failed to construct for " + |
| name) |
| .c_str()); |
| } |
| |
| ctype_byname<char>::~ctype_byname() { freelocale(__l_); } |
| |
| char ctype_byname<char>::do_toupper(char_type c) const { |
| return static_cast<char>(toupper_l(static_cast<unsigned char>(c), __l_)); |
| } |
| |
| const char* ctype_byname<char>::do_toupper(char_type* low, const char_type* high) const { |
| for (; low != high; ++low) |
| *low = static_cast<char>(toupper_l(static_cast<unsigned char>(*low), __l_)); |
| return low; |
| } |
| |
| char ctype_byname<char>::do_tolower(char_type c) const { |
| return static_cast<char>(tolower_l(static_cast<unsigned char>(c), __l_)); |
| } |
| |
| const char* ctype_byname<char>::do_tolower(char_type* low, const char_type* high) const { |
| for (; low != high; ++low) |
| *low = static_cast<char>(tolower_l(static_cast<unsigned char>(*low), __l_)); |
| return low; |
| } |
| |
| // template <> class ctype_byname<wchar_t> |
| |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| ctype_byname<wchar_t>::ctype_byname(const char* name, size_t refs) |
| : ctype<wchar_t>(refs), __l_(newlocale(LC_ALL_MASK, name, 0)) { |
| if (__l_ == 0) |
| __throw_runtime_error( |
| ("ctype_byname<wchar_t>::ctype_byname" |
| " failed to construct for " + |
| string(name)) |
| .c_str()); |
| } |
| |
| ctype_byname<wchar_t>::ctype_byname(const string& name, size_t refs) |
| : ctype<wchar_t>(refs), __l_(newlocale(LC_ALL_MASK, name.c_str(), 0)) { |
| if (__l_ == 0) |
| __throw_runtime_error( |
| ("ctype_byname<wchar_t>::ctype_byname" |
| " failed to construct for " + |
| name) |
| .c_str()); |
| } |
| |
| ctype_byname<wchar_t>::~ctype_byname() { freelocale(__l_); } |
| |
| bool ctype_byname<wchar_t>::do_is(mask m, char_type c) const { |
| # ifdef _LIBCPP_WCTYPE_IS_MASK |
| return static_cast<bool>(iswctype_l(c, m, __l_)); |
| # else |
| bool result = false; |
| wint_t ch = static_cast<wint_t>(c); |
| if ((m & space) == space) |
| result |= (iswspace_l(ch, __l_) != 0); |
| if ((m & print) == print) |
| result |= (iswprint_l(ch, __l_) != 0); |
| if ((m & cntrl) == cntrl) |
| result |= (iswcntrl_l(ch, __l_) != 0); |
| if ((m & upper) == upper) |
| result |= (iswupper_l(ch, __l_) != 0); |
| if ((m & lower) == lower) |
| result |= (iswlower_l(ch, __l_) != 0); |
| if ((m & alpha) == alpha) |
| result |= (iswalpha_l(ch, __l_) != 0); |
| if ((m & digit) == digit) |
| result |= (iswdigit_l(ch, __l_) != 0); |
| if ((m & punct) == punct) |
| result |= (iswpunct_l(ch, __l_) != 0); |
| if ((m & xdigit) == xdigit) |
| result |= (iswxdigit_l(ch, __l_) != 0); |
| if ((m & blank) == blank) |
| result |= (iswblank_l(ch, __l_) != 0); |
| return result; |
| # endif |
| } |
| |
| const wchar_t* ctype_byname<wchar_t>::do_is(const char_type* low, const char_type* high, mask* vec) const { |
| for (; low != high; ++low, ++vec) { |
| if (isascii(*low)) |
| *vec = static_cast<mask>(ctype<char>::classic_table()[*low]); |
| else { |
| *vec = 0; |
| wint_t ch = static_cast<wint_t>(*low); |
| if (iswspace_l(ch, __l_)) |
| *vec |= space; |
| # ifndef _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT |
| if (iswprint_l(ch, __l_)) |
| *vec |= print; |
| # endif |
| if (iswcntrl_l(ch, __l_)) |
| *vec |= cntrl; |
| if (iswupper_l(ch, __l_)) |
| *vec |= upper; |
| if (iswlower_l(ch, __l_)) |
| *vec |= lower; |
| # ifndef _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA |
| if (iswalpha_l(ch, __l_)) |
| *vec |= alpha; |
| # endif |
| if (iswdigit_l(ch, __l_)) |
| *vec |= digit; |
| if (iswpunct_l(ch, __l_)) |
| *vec |= punct; |
| # ifndef _LIBCPP_CTYPE_MASK_IS_COMPOSITE_XDIGIT |
| if (iswxdigit_l(ch, __l_)) |
| *vec |= xdigit; |
| # endif |
| if (iswblank_l(ch, __l_)) |
| *vec |= blank; |
| } |
| } |
| return low; |
| } |
| |
| const wchar_t* ctype_byname<wchar_t>::do_scan_is(mask m, const char_type* low, const char_type* high) const { |
| for (; low != high; ++low) { |
| # ifdef _LIBCPP_WCTYPE_IS_MASK |
| if (iswctype_l(*low, m, __l_)) |
| break; |
| # else |
| wint_t ch = static_cast<wint_t>(*low); |
| if ((m & space) == space && iswspace_l(ch, __l_)) |
| break; |
| if ((m & print) == print && iswprint_l(ch, __l_)) |
| break; |
| if ((m & cntrl) == cntrl && iswcntrl_l(ch, __l_)) |
| break; |
| if ((m & upper) == upper && iswupper_l(ch, __l_)) |
| break; |
| if ((m & lower) == lower && iswlower_l(ch, __l_)) |
| break; |
| if ((m & alpha) == alpha && iswalpha_l(ch, __l_)) |
| break; |
| if ((m & digit) == digit && iswdigit_l(ch, __l_)) |
| break; |
| if ((m & punct) == punct && iswpunct_l(ch, __l_)) |
| break; |
| if ((m & xdigit) == xdigit && iswxdigit_l(ch, __l_)) |
| break; |
| if ((m & blank) == blank && iswblank_l(ch, __l_)) |
| break; |
| # endif |
| } |
| return low; |
| } |
| |
| const wchar_t* ctype_byname<wchar_t>::do_scan_not(mask m, const char_type* low, const char_type* high) const { |
| for (; low != high; ++low) { |
| # ifdef _LIBCPP_WCTYPE_IS_MASK |
| if (!iswctype_l(*low, m, __l_)) |
| break; |
| # else |
| wint_t ch = static_cast<wint_t>(*low); |
| if ((m & space) == space && iswspace_l(ch, __l_)) |
| continue; |
| if ((m & print) == print && iswprint_l(ch, __l_)) |
| continue; |
| if ((m & cntrl) == cntrl && iswcntrl_l(ch, __l_)) |
| continue; |
| if ((m & upper) == upper && iswupper_l(ch, __l_)) |
| continue; |
| if ((m & lower) == lower && iswlower_l(ch, __l_)) |
| continue; |
| if ((m & alpha) == alpha && iswalpha_l(ch, __l_)) |
| continue; |
| if ((m & digit) == digit && iswdigit_l(ch, __l_)) |
| continue; |
| if ((m & punct) == punct && iswpunct_l(ch, __l_)) |
| continue; |
| if ((m & xdigit) == xdigit && iswxdigit_l(ch, __l_)) |
| continue; |
| if ((m & blank) == blank && iswblank_l(ch, __l_)) |
| continue; |
| break; |
| # endif |
| } |
| return low; |
| } |
| |
| wchar_t ctype_byname<wchar_t>::do_toupper(char_type c) const { return towupper_l(c, __l_); } |
| |
| const wchar_t* ctype_byname<wchar_t>::do_toupper(char_type* low, const char_type* high) const { |
| for (; low != high; ++low) |
| *low = towupper_l(*low, __l_); |
| return low; |
| } |
| |
| wchar_t ctype_byname<wchar_t>::do_tolower(char_type c) const { return towlower_l(c, __l_); } |
| |
| const wchar_t* ctype_byname<wchar_t>::do_tolower(char_type* low, const char_type* high) const { |
| for (; low != high; ++low) |
| *low = towlower_l(*low, __l_); |
| return low; |
| } |
| |
| wchar_t ctype_byname<wchar_t>::do_widen(char c) const { return __libcpp_btowc_l(c, __l_); } |
| |
| const char* ctype_byname<wchar_t>::do_widen(const char* low, const char* high, char_type* dest) const { |
| for (; low != high; ++low, ++dest) |
| *dest = __libcpp_btowc_l(*low, __l_); |
| return low; |
| } |
| |
| char ctype_byname<wchar_t>::do_narrow(char_type c, char dfault) const { |
| int r = __libcpp_wctob_l(c, __l_); |
| return (r != EOF) ? static_cast<char>(r) : dfault; |
| } |
| |
| const wchar_t* |
| ctype_byname<wchar_t>::do_narrow(const char_type* low, const char_type* high, char dfault, char* dest) const { |
| for (; low != high; ++low, ++dest) { |
| int r = __libcpp_wctob_l(*low, __l_); |
| *dest = (r != EOF) ? static_cast<char>(r) : dfault; |
| } |
| return low; |
| } |
| #endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| |
| // template <> class codecvt<char, char, mbstate_t> |
| |
| constinit locale::id codecvt<char, char, mbstate_t>::id; |
| |
| codecvt<char, char, mbstate_t>::~codecvt() {} |
| |
| codecvt<char, char, mbstate_t>::result codecvt<char, char, mbstate_t>::do_out( |
| state_type&, |
| const intern_type* frm, |
| const intern_type*, |
| const intern_type*& frm_nxt, |
| extern_type* to, |
| extern_type*, |
| extern_type*& to_nxt) const { |
| frm_nxt = frm; |
| to_nxt = to; |
| return noconv; |
| } |
| |
| codecvt<char, char, mbstate_t>::result codecvt<char, char, mbstate_t>::do_in( |
| state_type&, |
| const extern_type* frm, |
| const extern_type*, |
| const extern_type*& frm_nxt, |
| intern_type* to, |
| intern_type*, |
| intern_type*& to_nxt) const { |
| frm_nxt = frm; |
| to_nxt = to; |
| return noconv; |
| } |
| |
| codecvt<char, char, mbstate_t>::result |
| codecvt<char, char, mbstate_t>::do_unshift(state_type&, extern_type* to, extern_type*, extern_type*& to_nxt) const { |
| to_nxt = to; |
| return noconv; |
| } |
| |
| int codecvt<char, char, mbstate_t>::do_encoding() const noexcept { return 1; } |
| |
| bool codecvt<char, char, mbstate_t>::do_always_noconv() const noexcept { return true; } |
| |
| int codecvt<char, char, mbstate_t>::do_length( |
| state_type&, const extern_type* frm, const extern_type* end, size_t mx) const { |
| return static_cast<int>(min<size_t>(mx, static_cast<size_t>(end - frm))); |
| } |
| |
| int codecvt<char, char, mbstate_t>::do_max_length() const noexcept { return 1; } |
| |
| // template <> class codecvt<wchar_t, char, mbstate_t> |
| |
| #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| constinit locale::id codecvt<wchar_t, char, mbstate_t>::id; |
| |
| codecvt<wchar_t, char, mbstate_t>::codecvt(size_t refs) : locale::facet(refs), __l_(_LIBCPP_GET_C_LOCALE) {} |
| |
| codecvt<wchar_t, char, mbstate_t>::codecvt(const char* nm, size_t refs) |
| : locale::facet(refs), __l_(newlocale(LC_ALL_MASK, nm, 0)) { |
| if (__l_ == 0) |
| __throw_runtime_error( |
| ("codecvt_byname<wchar_t, char, mbstate_t>::codecvt_byname" |
| " failed to construct for " + |
| string(nm)) |
| .c_str()); |
| } |
| |
| codecvt<wchar_t, char, mbstate_t>::~codecvt() { |
| if (__l_ != _LIBCPP_GET_C_LOCALE) |
| freelocale(__l_); |
| } |
| |
| codecvt<wchar_t, char, mbstate_t>::result codecvt<wchar_t, char, mbstate_t>::do_out( |
| state_type& st, |
| const intern_type* frm, |
| const intern_type* frm_end, |
| const intern_type*& frm_nxt, |
| extern_type* to, |
| extern_type* to_end, |
| extern_type*& to_nxt) const { |
| // look for first internal null in frm |
| const intern_type* fend = frm; |
| for (; fend != frm_end; ++fend) |
| if (*fend == 0) |
| break; |
| // loop over all null-terminated sequences in frm |
| to_nxt = to; |
| for (frm_nxt = frm; frm != frm_end && to != to_end; frm = frm_nxt, to = to_nxt) { |
| // save state in case it is needed to recover to_nxt on error |
| mbstate_t save_state = st; |
| size_t n = __libcpp_wcsnrtombs_l( |
| to, &frm_nxt, static_cast<size_t>(fend - frm), static_cast<size_t>(to_end - to), &st, __l_); |
| if (n == size_t(-1)) { |
| // need to recover to_nxt |
| for (to_nxt = to; frm != frm_nxt; ++frm) { |
| n = __libcpp_wcrtomb_l(to_nxt, *frm, &save_state, __l_); |
| if (n == size_t(-1)) |
| break; |
| to_nxt += n; |
| } |
| frm_nxt = frm; |
| return error; |
| } |
| if (n == 0) |
| return partial; |
| to_nxt += n; |
| if (to_nxt == to_end) |
| break; |
| if (fend != frm_end) // set up next null terminated sequence |
| { |
| // Try to write the terminating null |
| extern_type tmp[MB_LEN_MAX]; |
| n = __libcpp_wcrtomb_l(tmp, intern_type(), &st, __l_); |
| if (n == size_t(-1)) // on error |
| return error; |
| if (n > static_cast<size_t>(to_end - to_nxt)) // is there room? |
| return partial; |
| for (extern_type* p = tmp; n; --n) // write it |
| *to_nxt++ = *p++; |
| ++frm_nxt; |
| // look for next null in frm |
| for (fend = frm_nxt; fend != frm_end; ++fend) |
| if (*fend == 0) |
| break; |
| } |
| } |
| return frm_nxt == frm_end ? ok : partial; |
| } |
| |
| codecvt<wchar_t, char, mbstate_t>::result codecvt<wchar_t, char, mbstate_t>::do_in( |
| state_type& st, |
| const extern_type* frm, |
| const extern_type* frm_end, |
| const extern_type*& frm_nxt, |
| intern_type* to, |
| intern_type* to_end, |
| intern_type*& to_nxt) const { |
| // look for first internal null in frm |
| const extern_type* fend = frm; |
| for (; fend != frm_end; ++fend) |
| if (*fend == 0) |
| break; |
| // loop over all null-terminated sequences in frm |
| to_nxt = to; |
| for (frm_nxt = frm; frm != frm_end && to != to_end; frm = frm_nxt, to = to_nxt) { |
| // save state in case it is needed to recover to_nxt on error |
| mbstate_t save_state = st; |
| size_t n = __libcpp_mbsnrtowcs_l( |
| to, &frm_nxt, static_cast<size_t>(fend - frm), static_cast<size_t>(to_end - to), &st, __l_); |
| if (n == size_t(-1)) { |
| // need to recover to_nxt |
| for (to_nxt = to; frm != frm_nxt; ++to_nxt) { |
| n = __libcpp_mbrtowc_l(to_nxt, frm, static_cast<size_t>(fend - frm), &save_state, __l_); |
| switch (n) { |
| case 0: |
| ++frm; |
| break; |
| case size_t(-1): |
| frm_nxt = frm; |
| return error; |
| case size_t(-2): |
| frm_nxt = frm; |
| return partial; |
| default: |
| frm += n; |
| break; |
| } |
| } |
| frm_nxt = frm; |
| return frm_nxt == frm_end ? ok : partial; |
| } |
| if (n == size_t(-1)) |
| return error; |
| to_nxt += n; |
| if (to_nxt == to_end) |
| break; |
| if (fend != frm_end) // set up next null terminated sequence |
| { |
| // Try to write the terminating null |
| n = __libcpp_mbrtowc_l(to_nxt, frm_nxt, 1, &st, __l_); |
| if (n != 0) // on error |
| return error; |
| ++to_nxt; |
| ++frm_nxt; |
| // look for next null in frm |
| for (fend = frm_nxt; fend != frm_end; ++fend) |
| if (*fend == 0) |
| break; |
| } |
| } |
| return frm_nxt == frm_end ? ok : partial; |
| } |
| |
| codecvt<wchar_t, char, mbstate_t>::result codecvt<wchar_t, char, mbstate_t>::do_unshift( |
| state_type& st, extern_type* to, extern_type* to_end, extern_type*& to_nxt) const { |
| to_nxt = to; |
| extern_type tmp[MB_LEN_MAX]; |
| size_t n = __libcpp_wcrtomb_l(tmp, intern_type(), &st, __l_); |
| if (n == size_t(-1) || n == 0) // on error |
| return error; |
| --n; |
| if (n > static_cast<size_t>(to_end - to_nxt)) // is there room? |
| return partial; |
| for (extern_type* p = tmp; n; --n) // write it |
| *to_nxt++ = *p++; |
| return ok; |
| } |
| |
| int codecvt<wchar_t, char, mbstate_t>::do_encoding() const noexcept { |
| if (__libcpp_mbtowc_l(nullptr, nullptr, MB_LEN_MAX, __l_) != 0) |
| return -1; |
| |
| // stateless encoding |
| if (__l_ == 0 || __libcpp_mb_cur_max_l(__l_) == 1) // there are no known constant length encodings |
| return 1; // which take more than 1 char to form a wchar_t |
| return 0; |
| } |
| |
| bool codecvt<wchar_t, char, mbstate_t>::do_always_noconv() const noexcept { return false; } |
| |
| int codecvt<wchar_t, char, mbstate_t>::do_length( |
| state_type& st, const extern_type* frm, const extern_type* frm_end, size_t mx) const { |
| int nbytes = 0; |
| for (size_t nwchar_t = 0; nwchar_t < mx && frm != frm_end; ++nwchar_t) { |
| size_t n = __libcpp_mbrlen_l(frm, static_cast<size_t>(frm_end - frm), &st, __l_); |
| switch (n) { |
| case 0: |
| ++nbytes; |
| ++frm; |
| break; |
| case size_t(-1): |
| case size_t(-2): |
| return nbytes; |
| default: |
| nbytes += n; |
| frm += n; |
| break; |
| } |
| } |
| return nbytes; |
| } |
| |
| int codecvt<wchar_t, char, mbstate_t>::do_max_length() const noexcept { |
| return __l_ == 0 ? 1 : static_cast<int>(__libcpp_mb_cur_max_l(__l_)); |
| } |
| #endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS |
| |
| // Valid UTF ranges |
| // UTF-32 UTF-16 UTF-8 # of code points |
| // first second first second third fourth |
| // 000000 - 00007F 0000 - 007F 00 - 7F 127 |
| // 000080 - 0007FF 0080 - 07FF C2 - DF, 80 - BF 1920 |
| // 000800 - 000FFF 0800 - 0FFF E0 - E0, A0 - BF, 80 - BF 2048 |
| // 001000 - 00CFFF 1000 - CFFF E1 - EC, 80 - BF, 80 - BF 49152 |
| // 00D000 - 00D7FF D000 - D7FF ED - ED, 80 - 9F, 80 - BF 2048 |
| // 00D800 - 00DFFF invalid |
| // 00E000 - 00FFFF E000 - FFFF EE - EF, 80 - BF, 80 - BF 8192 |
| // 010000 - 03FFFF D800 - D8BF, DC00 - DFFF F0 - F0, 90 - BF, 80 - BF, 80 - BF 196608 |
| // 040000 - 0FFFFF D8C0 - DBBF, DC00 - DFFF F1 - F3, 80 - BF, 80 - BF, 80 - BF 786432 |
| // 100000 - 10FFFF DBC0 - DBFF, DC00 - DFFF F4 - F4, 80 - 8F, 80 - BF, 80 - BF 65536 |
| |
| _LIBCPP_SUPPRESS_DEPRECATED_PUSH |
| static codecvt_base::result utf16_to_utf8( |
| const uint16_t* frm, |
| const uint16_t* frm_end, |
| const uint16_t*& frm_nxt, |
| uint8_t* to, |
| uint8_t* to_end, |
| uint8_t*& to_nxt, |
| unsigned long Maxcode = 0x10FFFF, |
| codecvt_mode mode = codecvt_mode(0)) { |
| frm_nxt = frm; |
| to_nxt = to; |
| if (mode & generate_header) { |
| if (to_end - to_nxt < 3) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(0xEF); |
| *to_nxt++ = static_cast<uint8_t>(0xBB); |
| *to_nxt++ = static_cast<uint8_t>(0xBF); |
| } |
| for (; frm_nxt < frm_end; ++frm_nxt) { |
| uint16_t wc1 = *frm_nxt; |
| if (wc1 > Maxcode) |
| return codecvt_base::error; |
| if (wc1 < 0x0080) { |
| if (to_end - to_nxt < 1) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(wc1); |
| } else if (wc1 < 0x0800) { |
| if (to_end - to_nxt < 2) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(0xC0 | (wc1 >> 6)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | (wc1 & 0x03F)); |
| } else if (wc1 < 0xD800) { |
| if (to_end - to_nxt < 3) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(0xE0 | (wc1 >> 12)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc1 & 0x0FC0) >> 6)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | (wc1 & 0x003F)); |
| } else if (wc1 < 0xDC00) { |
| if (frm_end - frm_nxt < 2) |
| return codecvt_base::partial; |
| uint16_t wc2 = frm_nxt[1]; |
| if ((wc2 & 0xFC00) != 0xDC00) |
| return codecvt_base::error; |
| if (to_end - to_nxt < 4) |
| return codecvt_base::partial; |
| if (((((wc1 & 0x03C0UL) >> 6) + 1) << 16) + ((wc1 & 0x003FUL) << 10) + (wc2 & 0x03FF) > Maxcode) |
| return codecvt_base::error; |
| ++frm_nxt; |
| uint8_t z = ((wc1 & 0x03C0) >> 6) + 1; |
| *to_nxt++ = static_cast<uint8_t>(0xF0 | (z >> 2)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | ((z & 0x03) << 4) | ((wc1 & 0x003C) >> 2)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc1 & 0x0003) << 4) | ((wc2 & 0x03C0) >> 6)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | (wc2 & 0x003F)); |
| } else if (wc1 < 0xE000) { |
| return codecvt_base::error; |
| } else { |
| if (to_end - to_nxt < 3) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(0xE0 | (wc1 >> 12)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc1 & 0x0FC0) >> 6)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | (wc1 & 0x003F)); |
| } |
| } |
| return codecvt_base::ok; |
| } |
| |
| static codecvt_base::result utf16_to_utf8( |
| const uint32_t* frm, |
| const uint32_t* frm_end, |
| const uint32_t*& frm_nxt, |
| uint8_t* to, |
| uint8_t* to_end, |
| uint8_t*& to_nxt, |
| unsigned long Maxcode = 0x10FFFF, |
| codecvt_mode mode = codecvt_mode(0)) { |
| frm_nxt = frm; |
| to_nxt = to; |
| if (mode & generate_header) { |
| if (to_end - to_nxt < 3) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(0xEF); |
| *to_nxt++ = static_cast<uint8_t>(0xBB); |
| *to_nxt++ = static_cast<uint8_t>(0xBF); |
| } |
| for (; frm_nxt < frm_end; ++frm_nxt) { |
| uint16_t wc1 = static_cast<uint16_t>(*frm_nxt); |
| if (wc1 > Maxcode) |
| return codecvt_base::error; |
| if (wc1 < 0x0080) { |
| if (to_end - to_nxt < 1) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(wc1); |
| } else if (wc1 < 0x0800) { |
| if (to_end - to_nxt < 2) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(0xC0 | (wc1 >> 6)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | (wc1 & 0x03F)); |
| } else if (wc1 < 0xD800) { |
| if (to_end - to_nxt < 3) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(0xE0 | (wc1 >> 12)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc1 & 0x0FC0) >> 6)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | (wc1 & 0x003F)); |
| } else if (wc1 < 0xDC00) { |
| if (frm_end - frm_nxt < 2) |
| return codecvt_base::partial; |
| uint16_t wc2 = static_cast<uint16_t>(frm_nxt[1]); |
| if ((wc2 & 0xFC00) != 0xDC00) |
| return codecvt_base::error; |
| if (to_end - to_nxt < 4) |
| return codecvt_base::partial; |
| if (((((wc1 & 0x03C0UL) >> 6) + 1) << 16) + ((wc1 & 0x003FUL) << 10) + (wc2 & 0x03FF) > Maxcode) |
| return codecvt_base::error; |
| ++frm_nxt; |
| uint8_t z = ((wc1 & 0x03C0) >> 6) + 1; |
| *to_nxt++ = static_cast<uint8_t>(0xF0 | (z >> 2)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | ((z & 0x03) << 4) | ((wc1 & 0x003C) >> 2)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc1 & 0x0003) << 4) | ((wc2 & 0x03C0) >> 6)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | (wc2 & 0x003F)); |
| } else if (wc1 < 0xE000) { |
| return codecvt_base::error; |
| } else { |
| if (to_end - to_nxt < 3) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(0xE0 | (wc1 >> 12)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc1 & 0x0FC0) >> 6)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | (wc1 & 0x003F)); |
| } |
| } |
| return codecvt_base::ok; |
| } |
| |
| static codecvt_base::result utf8_to_utf16( |
| const uint8_t* frm, |
| const uint8_t* frm_end, |
| const uint8_t*& frm_nxt, |
| uint16_t* to, |
| uint16_t* to_end, |
| uint16_t*& to_nxt, |
| unsigned long Maxcode = 0x10FFFF, |
| codecvt_mode mode = codecvt_mode(0)) { |
| frm_nxt = frm; |
| to_nxt = to; |
| if (mode & consume_header) { |
| if (frm_end - frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB && frm_nxt[2] == 0xBF) |
| frm_nxt += 3; |
| } |
| for (; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt) { |
| uint8_t c1 = *frm_nxt; |
| if (c1 > Maxcode) |
| return codecvt_base::error; |
| if (c1 < 0x80) { |
| *to_nxt = static_cast<uint16_t>(c1); |
| ++frm_nxt; |
| } else if (c1 < 0xC2) { |
| return codecvt_base::error; |
| } else if (c1 < 0xE0) { |
| if (frm_end - frm_nxt < 2) |
| return codecvt_base::partial; |
| uint8_t c2 = frm_nxt[1]; |
| if ((c2 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| uint16_t t = static_cast<uint16_t>(((c1 & 0x1F) << 6) | (c2 & 0x3F)); |
| if (t > Maxcode) |
| return codecvt_base::error; |
| *to_nxt = t; |
| frm_nxt += 2; |
| } else if (c1 < 0xF0) { |
| if (frm_end - frm_nxt < 2) |
| return codecvt_base::partial; |
| uint8_t c2 = frm_nxt[1]; |
| switch (c1) { |
| case 0xE0: |
| if ((c2 & 0xE0) != 0xA0) |
| return codecvt_base::error; |
| break; |
| case 0xED: |
| if ((c2 & 0xE0) != 0x80) |
| return codecvt_base::error; |
| break; |
| default: |
| if ((c2 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| break; |
| } |
| if (frm_end - frm_nxt < 3) |
| return codecvt_base::partial; |
| uint8_t c3 = frm_nxt[2]; |
| if ((c3 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| uint16_t t = static_cast<uint16_t>(((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F)); |
| if (t > Maxcode) |
| return codecvt_base::error; |
| *to_nxt = t; |
| frm_nxt += 3; |
| } else if (c1 < 0xF5) { |
| if (frm_end - frm_nxt < 2) |
| return codecvt_base::partial; |
| uint8_t c2 = frm_nxt[1]; |
| switch (c1) { |
| case 0xF0: |
| if (!(0x90 <= c2 && c2 <= 0xBF)) |
| return codecvt_base::error; |
| break; |
| case 0xF4: |
| if ((c2 & 0xF0) != 0x80) |
| return codecvt_base::error; |
| break; |
| default: |
| if ((c2 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| break; |
| } |
| if (frm_end - frm_nxt < 3) |
| return codecvt_base::partial; |
| uint8_t c3 = frm_nxt[2]; |
| if ((c3 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| if (frm_end - frm_nxt < 4) |
| return codecvt_base::partial; |
| uint8_t c4 = frm_nxt[3]; |
| if ((c4 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| if (to_end - to_nxt < 2) |
| return codecvt_base::partial; |
| if ((((c1 & 7UL) << 18) + ((c2 & 0x3FUL) << 12) + ((c3 & 0x3FUL) << 6) + (c4 & 0x3F)) > Maxcode) |
| return codecvt_base::error; |
| *to_nxt = static_cast<uint16_t>( |
| 0xD800 | (((((c1 & 0x07) << 2) | ((c2 & 0x30) >> 4)) - 1) << 6) | ((c2 & 0x0F) << 2) | ((c3 & 0x30) >> 4)); |
| *++to_nxt = static_cast<uint16_t>(0xDC00 | ((c3 & 0x0F) << 6) | (c4 & 0x3F)); |
| frm_nxt += 4; |
| } else { |
| return codecvt_base::error; |
| } |
| } |
| return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok; |
| } |
| |
| static codecvt_base::result utf8_to_utf16( |
| const uint8_t* frm, |
| const uint8_t* frm_end, |
| const uint8_t*& frm_nxt, |
| uint32_t* to, |
| uint32_t* to_end, |
| uint32_t*& to_nxt, |
| unsigned long Maxcode = 0x10FFFF, |
| codecvt_mode mode = codecvt_mode(0)) { |
| frm_nxt = frm; |
| to_nxt = to; |
| if (mode & consume_header) { |
| if (frm_end - frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB && frm_nxt[2] == 0xBF) |
| frm_nxt += 3; |
| } |
| for (; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt) { |
| uint8_t c1 = *frm_nxt; |
| if (c1 > Maxcode) |
| return codecvt_base::error; |
| if (c1 < 0x80) { |
| *to_nxt = static_cast<uint32_t>(c1); |
| ++frm_nxt; |
| } else if (c1 < 0xC2) { |
| return codecvt_base::error; |
| } else if (c1 < 0xE0) { |
| if (frm_end - frm_nxt < 2) |
| return codecvt_base::partial; |
| uint8_t c2 = frm_nxt[1]; |
| if ((c2 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| uint16_t t = static_cast<uint16_t>(((c1 & 0x1F) << 6) | (c2 & 0x3F)); |
| if (t > Maxcode) |
| return codecvt_base::error; |
| *to_nxt = static_cast<uint32_t>(t); |
| frm_nxt += 2; |
| } else if (c1 < 0xF0) { |
| if (frm_end - frm_nxt < 2) |
| return codecvt_base::partial; |
| uint8_t c2 = frm_nxt[1]; |
| switch (c1) { |
| case 0xE0: |
| if ((c2 & 0xE0) != 0xA0) |
| return codecvt_base::error; |
| break; |
| case 0xED: |
| if ((c2 & 0xE0) != 0x80) |
| return codecvt_base::error; |
| break; |
| default: |
| if ((c2 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| break; |
| } |
| if (frm_end - frm_nxt < 3) |
| return codecvt_base::partial; |
| uint8_t c3 = frm_nxt[2]; |
| if ((c3 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| uint16_t t = static_cast<uint16_t>(((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F)); |
| if (t > Maxcode) |
| return codecvt_base::error; |
| *to_nxt = static_cast<uint32_t>(t); |
| frm_nxt += 3; |
| } else if (c1 < 0xF5) { |
| if (frm_end - frm_nxt < 2) |
| return codecvt_base::partial; |
| uint8_t c2 = frm_nxt[1]; |
| switch (c1) { |
| case 0xF0: |
| if (!(0x90 <= c2 && c2 <= 0xBF)) |
| return codecvt_base::error; |
| break; |
| case 0xF4: |
| if ((c2 & 0xF0) != 0x80) |
| return codecvt_base::error; |
| break; |
| default: |
| if ((c2 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| break; |
| } |
| if (frm_end - frm_nxt < 3) |
| return codecvt_base::partial; |
| uint8_t c3 = frm_nxt[2]; |
| if ((c3 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| if (frm_end - frm_nxt < 4) |
| return codecvt_base::partial; |
| uint8_t c4 = frm_nxt[3]; |
| if ((c4 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| if (to_end - to_nxt < 2) |
| return codecvt_base::partial; |
| if ((((c1 & 7UL) << 18) + ((c2 & 0x3FUL) << 12) + ((c3 & 0x3FUL) << 6) + (c4 & 0x3F)) > Maxcode) |
| return codecvt_base::error; |
| *to_nxt = static_cast<uint32_t>( |
| 0xD800 | (((((c1 & 0x07) << 2) | ((c2 & 0x30) >> 4)) - 1) << 6) | ((c2 & 0x0F) << 2) | ((c3 & 0x30) >> 4)); |
| *++to_nxt = static_cast<uint32_t>(0xDC00 | ((c3 & 0x0F) << 6) | (c4 & 0x3F)); |
| frm_nxt += 4; |
| } else { |
| return codecvt_base::error; |
| } |
| } |
| return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok; |
| } |
| |
| static int utf8_to_utf16_length( |
| const uint8_t* frm, |
| const uint8_t* frm_end, |
| size_t mx, |
| unsigned long Maxcode = 0x10FFFF, |
| codecvt_mode mode = codecvt_mode(0)) { |
| const uint8_t* frm_nxt = frm; |
| if (mode & consume_header) { |
| if (frm_end - frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB && frm_nxt[2] == 0xBF) |
| frm_nxt += 3; |
| } |
| for (size_t nchar16_t = 0; frm_nxt < frm_end && nchar16_t < mx; ++nchar16_t) { |
| uint8_t c1 = *frm_nxt; |
| if (c1 > Maxcode) |
| break; |
| if (c1 < 0x80) { |
| ++frm_nxt; |
| } else if (c1 < 0xC2) { |
| break; |
| } else if (c1 < 0xE0) { |
| if ((frm_end - frm_nxt < 2) || (frm_nxt[1] & 0xC0) != 0x80) |
| break; |
| uint16_t t = static_cast<uint16_t>(((c1 & 0x1F) << 6) | (frm_nxt[1] & 0x3F)); |
| if (t > Maxcode) |
| break; |
| frm_nxt += 2; |
| } else if (c1 < 0xF0) { |
| if (frm_end - frm_nxt < 3) |
| break; |
| uint8_t c2 = frm_nxt[1]; |
| uint8_t c3 = frm_nxt[2]; |
| switch (c1) { |
| case 0xE0: |
| if ((c2 & 0xE0) != 0xA0) |
| return static_cast<int>(frm_nxt - frm); |
| break; |
| case 0xED: |
| if ((c2 & 0xE0) != 0x80) |
| return static_cast<int>(frm_nxt - frm); |
| break; |
| default: |
| if ((c2 & 0xC0) != 0x80) |
| return static_cast<int>(frm_nxt - frm); |
| break; |
| } |
| if ((c3 & 0xC0) != 0x80) |
| break; |
| if ((((c1 & 0x0Fu) << 12) | ((c2 & 0x3Fu) << 6) | (c3 & 0x3Fu)) > Maxcode) |
| break; |
| frm_nxt += 3; |
| } else if (c1 < 0xF5) { |
| if (frm_end - frm_nxt < 4 || mx - nchar16_t < 2) |
| break; |
| uint8_t c2 = frm_nxt[1]; |
| uint8_t c3 = frm_nxt[2]; |
| uint8_t c4 = frm_nxt[3]; |
| switch (c1) { |
| case 0xF0: |
| if (!(0x90 <= c2 && c2 <= 0xBF)) |
| return static_cast<int>(frm_nxt - frm); |
| break; |
| case 0xF4: |
| if ((c2 & 0xF0) != 0x80) |
| return static_cast<int>(frm_nxt - frm); |
| break; |
| default: |
| if ((c2 & 0xC0) != 0x80) |
| return static_cast<int>(frm_nxt - frm); |
| break; |
| } |
| if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80) |
| break; |
| if ((((c1 & 7UL) << 18) + ((c2 & 0x3FUL) << 12) + ((c3 & 0x3FUL) << 6) + (c4 & 0x3F)) > Maxcode) |
| break; |
| ++nchar16_t; |
| frm_nxt += 4; |
| } else { |
| break; |
| } |
| } |
| return static_cast<int>(frm_nxt - frm); |
| } |
| |
| static codecvt_base::result ucs4_to_utf8( |
| const uint32_t* frm, |
| const uint32_t* frm_end, |
| const uint32_t*& frm_nxt, |
| uint8_t* to, |
| uint8_t* to_end, |
| uint8_t*& to_nxt, |
| unsigned long Maxcode = 0x10FFFF, |
| codecvt_mode mode = codecvt_mode(0)) { |
| frm_nxt = frm; |
| to_nxt = to; |
| if (mode & generate_header) { |
| if (to_end - to_nxt < 3) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(0xEF); |
| *to_nxt++ = static_cast<uint8_t>(0xBB); |
| *to_nxt++ = static_cast<uint8_t>(0xBF); |
| } |
| for (; frm_nxt < frm_end; ++frm_nxt) { |
| uint32_t wc = *frm_nxt; |
| if ((wc & 0xFFFFF800) == 0x00D800 || wc > Maxcode) |
| return codecvt_base::error; |
| if (wc < 0x000080) { |
| if (to_end - to_nxt < 1) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(wc); |
| } else if (wc < 0x000800) { |
| if (to_end - to_nxt < 2) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(0xC0 | (wc >> 6)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | (wc & 0x03F)); |
| } else if (wc < 0x010000) { |
| if (to_end - to_nxt < 3) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(0xE0 | (wc >> 12)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc & 0x0FC0) >> 6)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | (wc & 0x003F)); |
| } else // if (wc < 0x110000) |
| { |
| if (to_end - to_nxt < 4) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(0xF0 | (wc >> 18)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc & 0x03F000) >> 12)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc & 0x000FC0) >> 6)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | (wc & 0x00003F)); |
| } |
| } |
| return codecvt_base::ok; |
| } |
| |
| static codecvt_base::result utf8_to_ucs4( |
| const uint8_t* frm, |
| const uint8_t* frm_end, |
| const uint8_t*& frm_nxt, |
| uint32_t* to, |
| uint32_t* to_end, |
| uint32_t*& to_nxt, |
| unsigned long Maxcode = 0x10FFFF, |
| codecvt_mode mode = codecvt_mode(0)) { |
| frm_nxt = frm; |
| to_nxt = to; |
| if (mode & consume_header) { |
| if (frm_end - frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB && frm_nxt[2] == 0xBF) |
| frm_nxt += 3; |
| } |
| for (; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt) { |
| uint8_t c1 = static_cast<uint8_t>(*frm_nxt); |
| if (c1 < 0x80) { |
| if (c1 > Maxcode) |
| return codecvt_base::error; |
| *to_nxt = static_cast<uint32_t>(c1); |
| ++frm_nxt; |
| } else if (c1 < 0xC2) { |
| return codecvt_base::error; |
| } else if (c1 < 0xE0) { |
| if (frm_end - frm_nxt < 2) |
| return codecvt_base::partial; |
| uint8_t c2 = frm_nxt[1]; |
| if ((c2 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| uint32_t t = static_cast<uint32_t>(((c1 & 0x1F) << 6) | (c2 & 0x3F)); |
| if (t > Maxcode) |
| return codecvt_base::error; |
| *to_nxt = t; |
| frm_nxt += 2; |
| } else if (c1 < 0xF0) { |
| if (frm_end - frm_nxt < 2) |
| return codecvt_base::partial; |
| uint8_t c2 = frm_nxt[1]; |
| switch (c1) { |
| case 0xE0: |
| if ((c2 & 0xE0) != 0xA0) |
| return codecvt_base::error; |
| break; |
| case 0xED: |
| if ((c2 & 0xE0) != 0x80) |
| return codecvt_base::error; |
| break; |
| default: |
| if ((c2 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| break; |
| } |
| if (frm_end - frm_nxt < 3) |
| return codecvt_base::partial; |
| uint8_t c3 = frm_nxt[2]; |
| if ((c3 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| uint32_t t = static_cast<uint32_t>(((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F)); |
| if (t > Maxcode) |
| return codecvt_base::error; |
| *to_nxt = t; |
| frm_nxt += 3; |
| } else if (c1 < 0xF5) { |
| if (frm_end - frm_nxt < 2) |
| return codecvt_base::partial; |
| uint8_t c2 = frm_nxt[1]; |
| switch (c1) { |
| case 0xF0: |
| if (!(0x90 <= c2 && c2 <= 0xBF)) |
| return codecvt_base::error; |
| break; |
| case 0xF4: |
| if ((c2 & 0xF0) != 0x80) |
| return codecvt_base::error; |
| break; |
| default: |
| if ((c2 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| break; |
| } |
| if (frm_end - frm_nxt < 3) |
| return codecvt_base::partial; |
| uint8_t c3 = frm_nxt[2]; |
| if ((c3 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| if (frm_end - frm_nxt < 4) |
| return codecvt_base::partial; |
| uint8_t c4 = frm_nxt[3]; |
| if ((c4 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| uint32_t t = static_cast<uint32_t>(((c1 & 0x07) << 18) | ((c2 & 0x3F) << 12) | ((c3 & 0x3F) << 6) | (c4 & 0x3F)); |
| if (t > Maxcode) |
| return codecvt_base::error; |
| *to_nxt = t; |
| frm_nxt += 4; |
| } else { |
| return codecvt_base::error; |
| } |
| } |
| return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok; |
| } |
| |
| static int utf8_to_ucs4_length( |
| const uint8_t* frm, |
| const uint8_t* frm_end, |
| size_t mx, |
| unsigned long Maxcode = 0x10FFFF, |
| codecvt_mode mode = codecvt_mode(0)) { |
| const uint8_t* frm_nxt = frm; |
| if (mode & consume_header) { |
| if (frm_end - frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB && frm_nxt[2] == 0xBF) |
| frm_nxt += 3; |
| } |
| for (size_t nchar32_t = 0; frm_nxt < frm_end && nchar32_t < mx; ++nchar32_t) { |
| uint8_t c1 = static_cast<uint8_t>(*frm_nxt); |
| if (c1 < 0x80) { |
| if (c1 > Maxcode) |
| break; |
| ++frm_nxt; |
| } else if (c1 < 0xC2) { |
| break; |
| } else if (c1 < 0xE0) { |
| if ((frm_end - frm_nxt < 2) || ((frm_nxt[1] & 0xC0) != 0x80)) |
| break; |
| if ((((c1 & 0x1Fu) << 6) | (frm_nxt[1] & 0x3Fu)) > Maxcode) |
| break; |
| frm_nxt += 2; |
| } else if (c1 < 0xF0) { |
| if (frm_end - frm_nxt < 3) |
| break; |
| uint8_t c2 = frm_nxt[1]; |
| uint8_t c3 = frm_nxt[2]; |
| switch (c1) { |
| case 0xE0: |
| if ((c2 & 0xE0) != 0xA0) |
| return static_cast<int>(frm_nxt - frm); |
| break; |
| case 0xED: |
| if ((c2 & 0xE0) != 0x80) |
| return static_cast<int>(frm_nxt - frm); |
| break; |
| default: |
| if ((c2 & 0xC0) != 0x80) |
| return static_cast<int>(frm_nxt - frm); |
| break; |
| } |
| if ((c3 & 0xC0) != 0x80) |
| break; |
| if ((((c1 & 0x0Fu) << 12) | ((c2 & 0x3Fu) << 6) | (c3 & 0x3Fu)) > Maxcode) |
| break; |
| frm_nxt += 3; |
| } else if (c1 < 0xF5) { |
| if (frm_end - frm_nxt < 4) |
| break; |
| uint8_t c2 = frm_nxt[1]; |
| uint8_t c3 = frm_nxt[2]; |
| uint8_t c4 = frm_nxt[3]; |
| switch (c1) { |
| case 0xF0: |
| if (!(0x90 <= c2 && c2 <= 0xBF)) |
| return static_cast<int>(frm_nxt - frm); |
| break; |
| case 0xF4: |
| if ((c2 & 0xF0) != 0x80) |
| return static_cast<int>(frm_nxt - frm); |
| break; |
| default: |
| if ((c2 & 0xC0) != 0x80) |
| return static_cast<int>(frm_nxt - frm); |
| break; |
| } |
| if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80) |
| break; |
| if ((((c1 & 0x07u) << 18) | ((c2 & 0x3Fu) << 12) | ((c3 & 0x3Fu) << 6) | (c4 & 0x3Fu)) > Maxcode) |
| break; |
| frm_nxt += 4; |
| } else { |
| break; |
| } |
| } |
| return static_cast<int>(frm_nxt - frm); |
| } |
| |
| static codecvt_base::result ucs2_to_utf8( |
| const uint16_t* frm, |
| const uint16_t* frm_end, |
| const uint16_t*& frm_nxt, |
| uint8_t* to, |
| uint8_t* to_end, |
| uint8_t*& to_nxt, |
| unsigned long Maxcode = 0x10FFFF, |
| codecvt_mode mode = codecvt_mode(0)) { |
| frm_nxt = frm; |
| to_nxt = to; |
| if (mode & generate_header) { |
| if (to_end - to_nxt < 3) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(0xEF); |
| *to_nxt++ = static_cast<uint8_t>(0xBB); |
| *to_nxt++ = static_cast<uint8_t>(0xBF); |
| } |
| for (; frm_nxt < frm_end; ++frm_nxt) { |
| uint16_t wc = *frm_nxt; |
| if ((wc & 0xF800) == 0xD800 || wc > Maxcode) |
| return codecvt_base::error; |
| if (wc < 0x0080) { |
| if (to_end - to_nxt < 1) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(wc); |
| } else if (wc < 0x0800) { |
| if (to_end - to_nxt < 2) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(0xC0 | (wc >> 6)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | (wc & 0x03F)); |
| } else // if (wc <= 0xFFFF) |
| { |
| if (to_end - to_nxt < 3) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(0xE0 | (wc >> 12)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc & 0x0FC0) >> 6)); |
| *to_nxt++ = static_cast<uint8_t>(0x80 | (wc & 0x003F)); |
| } |
| } |
| return codecvt_base::ok; |
| } |
| |
| static codecvt_base::result utf8_to_ucs2( |
| const uint8_t* frm, |
| const uint8_t* frm_end, |
| const uint8_t*& frm_nxt, |
| uint16_t* to, |
| uint16_t* to_end, |
| uint16_t*& to_nxt, |
| unsigned long Maxcode = 0x10FFFF, |
| codecvt_mode mode = codecvt_mode(0)) { |
| frm_nxt = frm; |
| to_nxt = to; |
| if (mode & consume_header) { |
| if (frm_end - frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB && frm_nxt[2] == 0xBF) |
| frm_nxt += 3; |
| } |
| for (; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt) { |
| uint8_t c1 = static_cast<uint8_t>(*frm_nxt); |
| if (c1 < 0x80) { |
| if (c1 > Maxcode) |
| return codecvt_base::error; |
| *to_nxt = static_cast<uint16_t>(c1); |
| ++frm_nxt; |
| } else if (c1 < 0xC2) { |
| return codecvt_base::error; |
| } else if (c1 < 0xE0) { |
| if (frm_end - frm_nxt < 2) |
| return codecvt_base::partial; |
| uint8_t c2 = frm_nxt[1]; |
| if ((c2 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| uint16_t t = static_cast<uint16_t>(((c1 & 0x1F) << 6) | (c2 & 0x3F)); |
| if (t > Maxcode) |
| return codecvt_base::error; |
| *to_nxt = t; |
| frm_nxt += 2; |
| } else if (c1 < 0xF0) { |
| if (frm_end - frm_nxt < 2) |
| return codecvt_base::partial; |
| uint8_t c2 = frm_nxt[1]; |
| switch (c1) { |
| case 0xE0: |
| if ((c2 & 0xE0) != 0xA0) |
| return codecvt_base::error; |
| break; |
| case 0xED: |
| if ((c2 & 0xE0) != 0x80) |
| return codecvt_base::error; |
| break; |
| default: |
| if ((c2 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| break; |
| } |
| if (frm_end - frm_nxt < 3) |
| return codecvt_base::partial; |
| uint8_t c3 = frm_nxt[2]; |
| if ((c3 & 0xC0) != 0x80) |
| return codecvt_base::error; |
| uint16_t t = static_cast<uint16_t>(((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F)); |
| if (t > Maxcode) |
| return codecvt_base::error; |
| *to_nxt = t; |
| frm_nxt += 3; |
| } else { |
| return codecvt_base::error; |
| } |
| } |
| return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok; |
| } |
| |
| static int utf8_to_ucs2_length( |
| const uint8_t* frm, |
| const uint8_t* frm_end, |
| size_t mx, |
| unsigned long Maxcode = 0x10FFFF, |
| codecvt_mode mode = codecvt_mode(0)) { |
| const uint8_t* frm_nxt = frm; |
| if (mode & consume_header) { |
| if (frm_end - frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB && frm_nxt[2] == 0xBF) |
| frm_nxt += 3; |
| } |
| for (size_t nchar32_t = 0; frm_nxt < frm_end && nchar32_t < mx; ++nchar32_t) { |
| uint8_t c1 = static_cast<uint8_t>(*frm_nxt); |
| if (c1 < 0x80) { |
| if (c1 > Maxcode) |
| break; |
| ++frm_nxt; |
| } else if (c1 < 0xC2) { |
| break; |
| } else if (c1 < 0xE0) { |
| if ((frm_end - frm_nxt < 2) || ((frm_nxt[1] & 0xC0) != 0x80)) |
| break; |
| if ((((c1 & 0x1Fu) << 6) | (frm_nxt[1] & 0x3Fu)) > Maxcode) |
| break; |
| frm_nxt += 2; |
| } else if (c1 < 0xF0) { |
| if (frm_end - frm_nxt < 3) |
| break; |
| uint8_t c2 = frm_nxt[1]; |
| uint8_t c3 = frm_nxt[2]; |
| switch (c1) { |
| case 0xE0: |
| if ((c2 & 0xE0) != 0xA0) |
| return static_cast<int>(frm_nxt - frm); |
| break; |
| case 0xED: |
| if ((c2 & 0xE0) != 0x80) |
| return static_cast<int>(frm_nxt - frm); |
| break; |
| default: |
| if ((c2 & 0xC0) != 0x80) |
| return static_cast<int>(frm_nxt - frm); |
| break; |
| } |
| if ((c3 & 0xC0) != 0x80) |
| break; |
| if ((((c1 & 0x0Fu) << 12) | ((c2 & 0x3Fu) << 6) | (c3 & 0x3Fu)) > Maxcode) |
| break; |
| frm_nxt += 3; |
| } else { |
| break; |
| } |
| } |
| return static_cast<int>(frm_nxt - frm); |
| } |
| |
| static codecvt_base::result ucs4_to_utf16be( |
| const uint32_t* frm, |
| const uint32_t* frm_end, |
| const uint32_t*& frm_nxt, |
| uint8_t* to, |
| uint8_t* to_end, |
| uint8_t*& to_nxt, |
| unsigned long Maxcode = 0x10FFFF, |
| codecvt_mode mode = codecvt_mode(0)) { |
| frm_nxt = frm; |
| to_nxt = to; |
| if (mode & generate_header) { |
| if (to_end - to_nxt < 2) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(0xFE); |
| *to_nxt++ = static_cast<uint8_t>(0xFF); |
| } |
| for (; frm_nxt < frm_end; ++frm_nxt) { |
| uint32_t wc = *frm_nxt; |
| if ((wc & 0xFFFFF800) == 0x00D800 || wc > Maxcode) |
| return codecvt_base::error; |
| if (wc < 0x010000) { |
| if (to_end - to_nxt < 2) |
| return codecvt_base::partial; |
| *to_nxt++ = static_cast<uint8_t>(wc >> 8); |
| *to_nxt++ = static_cast<uint8_t>(wc); |
| } else { |
| if (to_end - to_nxt < 4) |
| return codecvt_base::partial; |
| uint16_t t = static_cast<uint16_t>(0xD800 | ((((wc & 0x1F0000) >> 16) - 1) << 6) | ((wc & 0x00FC00) >> 10)); |
| *to_nxt++ = static_cast<uint8_t>(t >> 8); |
| *to_nxt++ = static_cast<uint8_t>(t); |
| t = static_cast<uint16_t>(0xDC00 | (wc & 0x03FF)); |
| *to_nxt++ = static_cast<uint8_t>(t >> 8); |
| *to_nxt++ = static_cast<uint8_t>(t); |
| } |
| } |
| return codecvt_base::ok; |
| } |
| |
| static codecvt_base::result utf16be_to_ucs4( |
| const uint8_t* frm, |
| const uint8_t* frm_end, |
| const uint8_t*& frm_nxt, |
| uint32_t* to, |
| uint32_t* to_end, |
| uint32_t*& to_nxt, |
| unsigned long Maxcode = 0x10FFFF, |
| codecvt_mode mode = codecvt_mode(0)) { |
| frm_nxt = frm; |
| to_nxt = to; |
| if (mode & consume_header) { |
| if (frm_end - frm_nxt >= 2 && frm_nxt[0] == 0xFE && frm_nxt[1] |