blob: 896f3181ac6da290dc1b86b59f63d1167dd73eb0 [file] [log] [blame]
//===--- UnicodeNormalization.cpp - Unicode Normalization Helpers ---------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// Functions that use ICU to do unicode normalization and collation.
//
//===----------------------------------------------------------------------===//
#include "../SwiftShims/UnicodeShims.h"
#include <stdint.h>
#if defined(__APPLE__)
// Declare a few external functions to avoid a dependency on ICU headers.
extern "C" {
// Types
typedef struct UBreakIterator UBreakIterator;
typedef struct UText UText;
typedef struct UBreakIterator UNormalizer2;
typedef enum UBreakIteratorType {} UBreakIteratorType;
typedef enum UErrorCode {} UErrorCode;
typedef enum UCharNameChoice {} UCharNameChoice;
typedef uint16_t UChar;
typedef int32_t UChar32;
typedef int8_t UBool;
typedef swift::__swift_stdlib_UProperty UProperty;
#define U_MAX_VERSION_LENGTH 4
typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
// Grapheme breaking APIs
void ubrk_close(UBreakIterator *);
UBreakIterator *ubrk_open(UBreakIteratorType, const char *, const UChar *,
int32_t, UErrorCode *);
int32_t ubrk_preceding(UBreakIterator *, int32_t);
int32_t ubrk_following(UBreakIterator *, int32_t);
void ubrk_setText(UBreakIterator *, const UChar *, int32_t, UErrorCode *);
void ubrk_setUText(UBreakIterator *, UText *, UErrorCode *);
UText *utext_openUTF8(UText *, const char *, int64_t, UErrorCode *);
UText *utext_openUChars(UText *, const UChar *, int64_t, UErrorCode *);
// Comparison, normalization, and character property APIs
int32_t unorm2_spanQuickCheckYes(const UNormalizer2 *, const UChar *, int32_t,
UErrorCode *);
int32_t unorm2_normalize(const UNormalizer2 *, const UChar *, int32_t, UChar *,
int32_t, UErrorCode *);
const UNormalizer2 *unorm2_getNFCInstance(UErrorCode *);
UBool unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c);
UBool u_hasBinaryProperty(UChar32, UProperty);
void u_charAge(UChar32, UVersionInfo);
int32_t u_getIntPropertyValue(UChar32, UProperty);
int32_t u_charName(UChar32, UCharNameChoice, char *, int32_t, UErrorCode *);
int32_t u_strToLower(UChar *, int32_t, const UChar *, int32_t, const char *,
UErrorCode *);
int32_t u_strToTitle(UChar *, int32_t, const UChar *, int32_t,
UBreakIterator *, const char *, UErrorCode *);
int32_t u_strToUpper(UChar *, int32_t, const UChar *, int32_t, const char *,
UErrorCode *);
double u_getNumericValue(UChar32);
}
#else
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdocumentation"
#include <unicode/ustring.h>
#include <unicode/ucol.h>
#include <unicode/ucoleitr.h>
#include <unicode/uiter.h>
#include <unicode/ubrk.h>
#include <unicode/uchar.h>
#include <unicode/ustring.h>
#include <unicode/uvernum.h>
#include <unicode/uversion.h>
#pragma clang diagnostic pop
#endif
#if !defined(__APPLE__)
#include "swift/Basic/Lazy.h"
#include "swift/Runtime/Config.h"
#include "swift/Runtime/Debug.h"
#include <algorithm>
#include <mutex>
#include <assert.h>
/// Convert the unicode string to uppercase. This function will return the
/// required buffer length as a result. If this length does not match the
/// 'DestinationCapacity' this function must be called again with a buffer of
/// the required length to get an uppercase version of the string.
int32_t
swift::_swift_stdlib_unicode_strToUpper(uint16_t *Destination,
int32_t DestinationCapacity,
const uint16_t *Source,
int32_t SourceLength) {
UErrorCode ErrorCode = U_ZERO_ERROR;
uint32_t OutputLength = u_strToUpper(reinterpret_cast<UChar *>(Destination),
DestinationCapacity,
reinterpret_cast<const UChar *>(Source),
SourceLength,
"", &ErrorCode);
if (U_FAILURE(ErrorCode) && ErrorCode != U_BUFFER_OVERFLOW_ERROR) {
swift::crash("u_strToUpper: Unexpected error uppercasing unicode string.");
}
return OutputLength;
}
/// Convert the unicode string to lowercase. This function will return the
/// required buffer length as a result. If this length does not match the
/// 'DestinationCapacity' this function must be called again with a buffer of
/// the required length to get a lowercase version of the string.
int32_t
swift::_swift_stdlib_unicode_strToLower(uint16_t *Destination,
int32_t DestinationCapacity,
const uint16_t *Source,
int32_t SourceLength) {
UErrorCode ErrorCode = U_ZERO_ERROR;
uint32_t OutputLength = u_strToLower(reinterpret_cast<UChar *>(Destination),
DestinationCapacity,
reinterpret_cast<const UChar *>(Source),
SourceLength,
"", &ErrorCode);
if (U_FAILURE(ErrorCode) && ErrorCode != U_BUFFER_OVERFLOW_ERROR) {
swift::crash("u_strToLower: Unexpected error lowercasing unicode string.");
}
return OutputLength;
}
#endif
namespace {
template <typename T, typename U> T *ptr_cast(U *p) {
return static_cast<T *>(static_cast<void *>(p));
}
template <typename T, typename U> const T *ptr_cast(const U *p) {
return static_cast<const T *>(static_cast<const void *>(p));
}
}
void swift::__swift_stdlib_ubrk_close(
swift::__swift_stdlib_UBreakIterator *bi) {
ubrk_close(ptr_cast<UBreakIterator>(bi));
}
swift::__swift_stdlib_UBreakIterator *swift::__swift_stdlib_ubrk_open(
swift::__swift_stdlib_UBreakIteratorType type, const char *locale,
const __swift_stdlib_UChar *text, int32_t textLength,
__swift_stdlib_UErrorCode *status) {
return ptr_cast<swift::__swift_stdlib_UBreakIterator>(
ubrk_open(static_cast<UBreakIteratorType>(type), locale,
reinterpret_cast<const UChar *>(text), textLength,
ptr_cast<UErrorCode>(status)));
}
int32_t
swift::__swift_stdlib_ubrk_preceding(swift::__swift_stdlib_UBreakIterator *bi,
int32_t offset) {
return ubrk_preceding(ptr_cast<UBreakIterator>(bi), offset);
}
int32_t
swift::__swift_stdlib_ubrk_following(swift::__swift_stdlib_UBreakIterator *bi,
int32_t offset) {
return ubrk_following(ptr_cast<UBreakIterator>(bi), offset);
}
void swift::__swift_stdlib_ubrk_setText(
swift::__swift_stdlib_UBreakIterator *bi, const __swift_stdlib_UChar *text,
__swift_int32_t textLength, __swift_stdlib_UErrorCode *status) {
return ubrk_setText(ptr_cast<UBreakIterator>(bi), ptr_cast<UChar>(text),
textLength, ptr_cast<UErrorCode>(status));
}
void swift::__swift_stdlib_ubrk_setUText(
swift::__swift_stdlib_UBreakIterator *bi, __swift_stdlib_UText *text,
__swift_stdlib_UErrorCode *status) {
return ubrk_setUText(ptr_cast<UBreakIterator>(bi), ptr_cast<UText>(text),
ptr_cast<UErrorCode>(status));
}
SWIFT_RUNTIME_STDLIB_API swift::__swift_stdlib_UText *
swift::__swift_stdlib_utext_openUTF8(__swift_stdlib_UText *ut,
const char *s, int64_t len,
__swift_stdlib_UErrorCode *status) {
return ptr_cast<__swift_stdlib_UText>(
utext_openUTF8(ptr_cast<UText>(ut), s, len,
ptr_cast<UErrorCode>(status)));
}
SWIFT_RUNTIME_STDLIB_API swift::__swift_stdlib_UText *
swift::__swift_stdlib_utext_openUChars(__swift_stdlib_UText *ut,
const __swift_stdlib_UChar *s,
int64_t len,
__swift_stdlib_UErrorCode *status) {
return ptr_cast<__swift_stdlib_UText>(
utext_openUChars(ptr_cast<UText>(ut), ptr_cast<UChar>(s), len,
ptr_cast<UErrorCode>(status)));
}
swift::__swift_stdlib_UBool swift::__swift_stdlib_unorm2_hasBoundaryBefore(
const __swift_stdlib_UNormalizer2 *ptr, __swift_stdlib_UChar32 char32) {
return unorm2_hasBoundaryBefore(ptr_cast<UNormalizer2>(ptr), char32);
}
const swift::__swift_stdlib_UNormalizer2 *
swift::__swift_stdlib_unorm2_getNFCInstance(__swift_stdlib_UErrorCode *err) {
return ptr_cast<__swift_stdlib_UNormalizer2>(
unorm2_getNFCInstance(ptr_cast<UErrorCode>(err)));
}
int32_t swift::__swift_stdlib_unorm2_normalize(
const __swift_stdlib_UNormalizer2 *norm, const __swift_stdlib_UChar *src,
__swift_int32_t len, __swift_stdlib_UChar *dst, __swift_int32_t capacity,
__swift_stdlib_UErrorCode *err) {
// TODO remove this compatibility when we require ICU >= 59 on Linux
#if defined(__APPLE__) || U_ICU_VERSION_MAJOR_NUM >= 59
return unorm2_normalize(ptr_cast<UNormalizer2>(norm), src, len, dst, capacity,
ptr_cast<UErrorCode>(err));
#else
return unorm2_normalize(ptr_cast<UNormalizer2>(norm),
reinterpret_cast<const UChar *>(src), len,
reinterpret_cast<UChar *>(dst), capacity,
ptr_cast<UErrorCode>(err));
#endif
}
__swift_int32_t swift::__swift_stdlib_unorm2_spanQuickCheckYes(
const __swift_stdlib_UNormalizer2 *norm, const __swift_stdlib_UChar *ptr,
__swift_int32_t len, __swift_stdlib_UErrorCode *err) {
return unorm2_spanQuickCheckYes(ptr_cast<UNormalizer2>(norm),
ptr_cast<UChar>(ptr), len,
ptr_cast<UErrorCode>(err));
}
swift::__swift_stdlib_UBool
swift::__swift_stdlib_u_hasBinaryProperty(__swift_stdlib_UChar32 c,
__swift_stdlib_UProperty p) {
return u_hasBinaryProperty(c, static_cast<UProperty>(p));
}
void
swift::__swift_stdlib_u_charAge(__swift_stdlib_UChar32 c,
__swift_stdlib_UVersionInfo versionInfo) {
return u_charAge(c, versionInfo);
}
__swift_int32_t
swift::__swift_stdlib_u_getIntPropertyValue(__swift_stdlib_UChar32 c,
__swift_stdlib_UProperty p) {
return u_getIntPropertyValue(c, static_cast<UProperty>(p));
}
__swift_int32_t swift::__swift_stdlib_u_charName(
__swift_stdlib_UChar32 code, __swift_stdlib_UCharNameChoice nameChoice,
char *buffer, __swift_int32_t bufferLength,
__swift_stdlib_UErrorCode *pErrorCode) {
return u_charName(code, static_cast<UCharNameChoice>(nameChoice),
buffer, bufferLength,
ptr_cast<UErrorCode>(pErrorCode));
}
__swift_int32_t swift::__swift_stdlib_u_strToLower(
__swift_stdlib_UChar *dest, __swift_int32_t destCapacity,
const __swift_stdlib_UChar *src, __swift_int32_t srcLength,
const char *locale, __swift_stdlib_UErrorCode *pErrorCode) {
return u_strToLower(ptr_cast<UChar>(dest), destCapacity,
ptr_cast<UChar>(src), srcLength,
locale, ptr_cast<UErrorCode>(pErrorCode));
}
__swift_int32_t swift::__swift_stdlib_u_strToTitle(
__swift_stdlib_UChar *dest, __swift_int32_t destCapacity,
const __swift_stdlib_UChar *src, __swift_int32_t srcLength,
__swift_stdlib_UBreakIterator *titleIter, const char *locale,
__swift_stdlib_UErrorCode *pErrorCode) {
return u_strToTitle(ptr_cast<UChar>(dest), destCapacity,
ptr_cast<UChar>(src), srcLength,
ptr_cast<UBreakIterator>(titleIter), locale,
ptr_cast<UErrorCode>(pErrorCode));
}
__swift_int32_t swift::__swift_stdlib_u_strToUpper(
__swift_stdlib_UChar *dest, __swift_int32_t destCapacity,
const __swift_stdlib_UChar *src, __swift_int32_t srcLength,
const char *locale, __swift_stdlib_UErrorCode *pErrorCode) {
return u_strToUpper(ptr_cast<UChar>(dest), destCapacity,
ptr_cast<UChar>(src), srcLength,
locale, ptr_cast<UErrorCode>(pErrorCode));
}
double swift::__swift_stdlib_u_getNumericValue(__swift_stdlib_UChar32 c) {
return u_getNumericValue(c);
}
// Force an autolink with ICU
#if defined(__MACH__)
asm(".linker_option \"-licucore\"\n");
#endif // defined(__MACH__)