test/Prototypes/Unicode.swift - third_party/swift - Git at Google

 // RUN: rm -f %t && %target-build-swift -I %S/icu -licucore %s -o %t
 // RUN: %target-run %t
 // REQUIRES: executable_test

 import StdlibUnittest
 import ICU

 //===----------------------------------------------------------------------===//
 //===--- Missing stdlib niceties ------------------------------------------===//
 //===----------------------------------------------------------------------===//

 //===--- UnicodeScalar / Character affordances ----------------------------===//
 extension UnicodeScalar {
   init<E: EncodedScalarProtocol>(_ e: E) {
     // FIXME:use init(_unchecked:) when in stdlib
     self = UnicodeScalar(e.utf32[0])!
   }
 }
 extension Character {
   init<S: Sequence>(_ s: S) where S.Iterator.Element == UnicodeScalar {
     // FIXME: Horribly inefficient, but the stuff to make it fast is private.
     // FIXME: Also, constructing "👩‍❤️‍👩" is foiled by precondition checks
     var r = ""
     for scalar in s {
       r += String(scalar)
     }
     self = Character(r)
   }
 }

 //===--- Comparison between UnsafePointer<T> and UnsafeMutablePointer<T> ---===//
 extension UnsafePointer {
   static func == (
     self_: UnsafePointer, mut: UnsafeMutablePointer<Pointee>) -> Bool {
     return self_ == UnsafePointer(mut)
   }
   static func == (
     mut: UnsafeMutablePointer<Pointee>, self_: UnsafePointer) -> Bool {
     return self_ == UnsafePointer(mut)
   }
   static func != (
     self_: UnsafePointer, mut: UnsafeMutablePointer<Pointee>) -> Bool {
     return self_ == UnsafePointer(mut)
   }
   static func != (
     mut: UnsafeMutablePointer<Pointee>, self_: UnsafePointer) -> Bool {
     return self_ == UnsafePointer(mut)
   }
   // FIXME: add <, >, <=, >=
 }

 //===--- Integer coercion operator ----------------------------------------===//
 // You may think it's icky, but it sure cleans up a lot of ugly numericCast()
 // invocations in this file.
 postfix operator ^

 extension _SignedInteger {
   static postfix func ^ <U : _SignedInteger>(_ x: Self) -> U {
     return numericCast(x)
   }
   static postfix func ^ <U : UnsignedInteger>(_ x: Self) -> U {
     return numericCast(x)
   }
 }
 extension UnsignedInteger {
   static postfix func ^ <U : UnsignedInteger>(_ x: Self) -> U {
     return numericCast(x)
   }
   static postfix func ^ <U : SignedInteger>(_ x: Self) -> U {
     return numericCast(x)
   }
 }

 //===--- Missing clamping of Comparable to a Range ------------------------===//
 extension Comparable {
   func clamped(to r: ClosedRange<Self>) -> Self {
     return self < r.lowerBound ? r.lowerBound
          : self > r.upperBound ? r.upperBound
          : self
   }
 }

 //===--- Index affordances ------------------------------------------------===//
 extension Collection {
   func index<I: SignedInteger>(atOffset offset: I) -> Index {
     return index(startIndex, offsetBy: offset^)
   }
   func offset(of i: Index) -> IndexDistance {
     return distance(from: startIndex, to: i)
   }
 }

 //===--- someCollection[...] notation -------------------------------------===//
 // Thanks to Joe Groff for the technique
 enum UnboundedRange_ {
   static postfix func ... (_: UnboundedRange_) -> () { fatalError("uncallable") }
 }
 typealias UnboundedRange = (UnboundedRange_)->()

 extension Collection {
   subscript(_: UnboundedRange) -> SubSequence {
     return self[startIndex..<endIndex]
   }
 }

 extension MutableCollection {
   subscript(_: UnboundedRange) -> SubSequence {
     get {
       return self[startIndex...]
     }
     set {
       self[startIndex...] = newValue
     }
   }
 }

 //===--- Algorithms -------------------------------------------------------===//
 extension MutableCollection {
   /// Copies elements from `source` into `self`, starting at the beginning of
   /// each.
   ///
   /// - Returns:
   ///
   ///   - `limit`: the first index in `self` that was not copied into, or
   ///     `endIndex` if all elements were assigned.
   ///
   ///   - `remainder`: the subsequence of source that didn't fit into `self`,
   ///     or `source[endIndex...]` if all elements fit.
   @discardableResult
   mutating func copy<Source: Collection>(from source: Source)
   -> (limit: Index, remainder: Source.SubSequence)
   where Source.SubSequence : Collection,
   Source.SubSequence.Iterator.Element == Iterator.Element,
   Source.SubSequence == Source.SubSequence.SubSequence {
     // This method should be optimizable for segmented collections
     var r = source[...]
     var i: Index = startIndex
     while i != endIndex {
       guard let e = r.popFirst()
       else { return (limit: i, remainder: r) }
       self[i] = e
       i = index(after: i)
     }
     return (limit: endIndex, remainder: r)
   }
 }

 //===--- One-sided ranges -------------------------------------------------===//
 prefix operator ..<
 struct IncompleteRangeUpTo<T: Comparable> {
   init(_ upperBound: T) { self.upperBound = upperBound }
   let upperBound: T
 }
 extension Comparable {
   static prefix func ..<(x: Self) -> IncompleteRangeUpTo<Self> {
     return IncompleteRangeUpTo(x)
   }
 }
 extension Collection {
   subscript(r: IncompleteRangeUpTo<Index>) -> SubSequence {
     return self[self.startIndex..<r.upperBound]
   }
 }
 extension MutableCollection {
   subscript(r: IncompleteRangeUpTo<Index>) -> SubSequence {
     get {
       return self[self.startIndex..<r.upperBound]
     }
     set {
       self[self.startIndex..<r.upperBound] = newValue
     }
   }
 }

 prefix operator ...
 struct IncompleteRangeThrough<T: Comparable> {
   init(_ upperBound: T) { self.upperBound = upperBound }
   let upperBound: T
 }
 extension Comparable {
   static prefix func ...(x: Self) -> IncompleteRangeThrough<Self> {
     return IncompleteRangeThrough(x)
   }
 }
 extension Collection {
   subscript(r: IncompleteRangeThrough<Index>) -> SubSequence {
     return self[self.startIndex...r.upperBound]
   }
 }
 extension MutableCollection {
   subscript(r: IncompleteRangeThrough<Index>) -> SubSequence {
     get {
       return self[self.startIndex...r.upperBound]
     }
     set {
       self[self.startIndex...r.upperBound] = newValue
     }
   }
 }

 postfix operator ...
 struct IncompleteRangeFrom<T: Comparable> {
   init(_ lowerBound: T) { self.lowerBound = lowerBound }
   let lowerBound: T
 }
 extension Comparable {
   static postfix func ...(x: Self) -> IncompleteRangeFrom<Self> {
     return IncompleteRangeFrom(x)
   }
 }
 extension Collection {
   subscript(r: IncompleteRangeFrom<Index>) -> SubSequence {
     return self[r.lowerBound..<self.endIndex]
   }
 }
 extension MutableCollection {
   subscript(r: IncompleteRangeFrom<Index>) -> SubSequence {
     get {
       return self[r.lowerBound..<self.endIndex]
     }
     set {
       self[r.lowerBound..<self.endIndex] = newValue
     }
   }
 }

 //===----------------------------------------------------------------------===//
 //===--- Logging ----------------------------------------------------------===//
 //===----------------------------------------------------------------------===//
 var logging = false
 func debugLog(_ arg: String/*@autoclosure ()->Any*/) {
   guard logging == true else { return }
   print(arg/*()*/)
 }
 func debugLog(_ arg0: String/*@autoclosure ()->Any*/, _ arg1: String/*@autoclosure ()->Any*/) {
   guard logging else { return }
   print(arg0/*()*/, arg1/*()*/)
 }
 //===----------------------------------------------------------------------===//
 //===----------------------------------------------------------------------===//
 //===----------------------------------------------------------------------===//

 /// A collection of `CodeUnit`s to be interpreted by some `Encoding`
 struct UnicodeStorage<
   CodeUnits : RandomAccessCollection,
   Encoding : UnicodeEncoding
 >
 where Encoding.EncodedScalar.Iterator.Element == CodeUnits.Iterator.Element,
   CodeUnits.SubSequence : RandomAccessCollection,
   CodeUnits.SubSequence.Index == CodeUnits.Index,
   CodeUnits.SubSequence.SubSequence == CodeUnits.SubSequence,
   CodeUnits.SubSequence.Iterator.Element == CodeUnits.Iterator.Element {

   init(_ codeUnits: CodeUnits, _: Encoding.Type = Encoding.self) {
     self.codeUnits = codeUnits
   }

   let codeUnits: CodeUnits
 }


 /// A lazy collection of `Encoding.EncodedScalar` that results
 /// from parsing an instance of codeUnits using that `Encoding`.
 extension UnicodeStorage {

   public struct EncodedScalars {
     let codeUnits: CodeUnits

     init(_ codeUnits: CodeUnits, _: Encoding.Type = Encoding.self) {
       self.codeUnits = codeUnits
     }
   }
 }

 extension UnicodeStorage.EncodedScalars {
   // Because parsing produces a buffer and a new index, to avoid
   // repeatedly decoding the same data, this index stores that buffer
   // and the next index.  This would obviously be more complicated if
   // the buffer contained more than a single scalar (and it probably
   // should).
   public struct Index : Comparable {
     let base: CodeUnits.Index
     // FIXME: We might get a much better memory footprint if we used a
     // UInt8 to store the distance between base and next, rather than
     // storing next explicitly.  CodeUnits will be random-access in
     // practice.
     let next: CodeUnits.Index
     // FIXME: there should be an invalid inhabitant we can use in
     // EncodedScalar so as not to waste a separate bool here.
     let scalar: Encoding.EncodedScalar?

     public static func < (lhs: Index, rhs: Index) -> Bool {
       return lhs.base < rhs.base
     }
     public static func == (lhs: Index, rhs: Index) -> Bool {
       return lhs.base == rhs.base
     }
   }
 }

 /// Collection Conformance
 extension UnicodeStorage.EncodedScalars : BidirectionalCollection {
   public var startIndex: Index {
     if _slowPath(codeUnits.isEmpty) { return endIndex }
     let s = codeUnits.startIndex
     return index(after: Index(base: s, next: s, scalar: nil))
   }

   public var endIndex: Index {
     let s = codeUnits.endIndex
     return Index(base: s, next: s, scalar: nil)
   }

   public subscript(i: Index) -> Encoding.EncodedScalar {
     if let r = i.scalar {
       return r
     }
     return index(after:
       Index(base: i.base, next: i.base, scalar: nil)).scalar!
   }

   public func index(after i: Index) -> Index {
     var remainder = codeUnits[i.next...]
     while true {
       switch Encoding.parse1Forward(remainder, knownCount: 0) {
       case .valid(let scalar, let nextIndex):
         return Index(base:i.next, next: nextIndex, scalar: scalar)
       case .error(let nextIndex):
         // FIXME: don't go through UnicodeScalar once this is in the stdlib
         if let replacement = Encoding.encode(
           UTF32.EncodedScalar(UnicodeScalar(0xFFFD)!)) {
           return Index(
             base:i.next, next: nextIndex,
             scalar: replacement)
         }
         remainder = remainder.dropFirst()
       case .emptyInput:
         return endIndex
       }
     }
   }

   public func index(before i: Index) -> Index {
     var remainder = codeUnits[..<i.base]
     while true {
       switch Encoding.parse1Reverse(remainder, knownCount: 0) {
       case .valid(let scalar, let priorIndex):
         return Index(base: priorIndex, next: i.base, scalar: scalar)
       case .error(let priorIndex):
         // FIXME: don't go through UnicodeScalar once this is in the stdlib
         if let replacement = Encoding.encode(
           UTF32.EncodedScalar(UnicodeScalar(0xFFFD)!)) {
           return Index(
             base: priorIndex, next: i.base,
             scalar: replacement)
         }
         remainder = remainder.dropLast()
       case .emptyInput:
         fatalError("Indexing past start of code units")
       }
     }
   }
 }

 /// Given `CodeUnits` representing text that has been encoded with
 /// `FromEncoding`, provides a collection of `ToEncoding.CodeUnit`s
 /// representing the same text.
 struct TranscodedView<
   CodeUnits : RandomAccessCollection,
   FromEncoding : UnicodeEncoding,
   ToEncoding : UnicodeEncoding
 >
 where FromEncoding.EncodedScalar.Iterator.Element == CodeUnits.Iterator.Element,
   CodeUnits.SubSequence : RandomAccessCollection,
   CodeUnits.SubSequence.Index == CodeUnits.Index,
   CodeUnits.SubSequence.SubSequence == CodeUnits.SubSequence,
   CodeUnits.SubSequence.Iterator.Element == CodeUnits.Iterator.Element
 {
   // We could just be a generic typealias as this type, but it turns
   // out to be impossible, or nearly so, to write the init() below.
   // Instead, we wrap an instance of Base.
   typealias Base = FlattenBidirectionalCollection<
     LazyMapBidirectionalCollection<
       UnicodeStorage<CodeUnits, FromEncoding>.EncodedScalars,
       ToEncoding.EncodedScalar
     >
   >
   let base: Base
 }

 extension TranscodedView : BidirectionalCollection {
   typealias SubSequence = BidirectionalSlice<TranscodedView>

   public var startIndex : Base.Index {
     return base.startIndex
   }
   public var endIndex : Base.Index {
     return base.endIndex
   }
   public subscript(i: Base.Index) -> Base.Iterator.Element {
     return base[i]
   }
   public init(_ codeUnits: CodeUnits,
     from src: FromEncoding.Type = FromEncoding.self,
     to dst: ToEncoding.Type = ToEncoding.self
   ) {
     base = Base(UnicodeStorage<CodeUnits, FromEncoding>.EncodedScalars(codeUnits, src).lazy.map {
         dst.encode($0)!
       })
   }
   public func index(after i: Base.Index) -> Base.Index {
     return base.index(after: i)
   }
   public func index(before i: Base.Index) -> Base.Index {
     return base.index(before: i)
   }
 }

 protocol Unicode {
   associatedtype Encoding: UnicodeEncoding
   associatedtype CodeUnits: RandomAccessCollection
   /* where CodeUnits.Iterator.Element == Encoding.CodeUnit */
   var codeUnits: CodeUnits {get}

   associatedtype ValidUTF8View : BidirectionalCollection
   // where ValidUTF8View.Iterator.Element == UTF8.CodeUnit */
   // = TranscodedView<CodeUnits, Encoding, UTF8>
   var utf8: ValidUTF8View {get}

   associatedtype ValidUTF16View : BidirectionalCollection
   // where ValidUTF16View.Iterator.Element == UTF16.CodeUnit
   // = TranscodedView<CodeUnits, Encoding, UTF16>
   var utf16: ValidUTF16View {get}

   associatedtype ValidUTF32View : BidirectionalCollection
   // where ValidUTF32View.Iterator.Element == UTF32.CodeUnit
   // = TranscodedView<CodeUnits, Encoding, UTF32>
   var utf32: ValidUTF32View {get}

   associatedtype ExtendedASCII : BidirectionalCollection // FIXME: Can this be Random Access?
   /* where ExtendedASCII.Iterator.Element == UInt32 */
   var extendedASCII: ExtendedASCII {get}

   associatedtype Characters : BidirectionalCollection
   /* where Characters.Iterator.Element == Character */
   var characters: Characters { get }

   func isASCII(scan: Bool/* = true */) -> Bool
   func isLatin1(scan: Bool/* = true */) -> Bool
   func isNormalizedNFC(scan: Bool/* = true*/) -> Bool
   func isNormalizedNFD(scan: Bool/* = true*/) -> Bool
   func isInFastCOrDForm(scan: Bool/* = true*/) -> Bool
 }

 extension UText {
   /// Invokes body, passing this UText's buffer area as a parameter
   mutating func withBuffer<R>(
     _ body: (UnsafeMutableBufferPointer<UChar>)->R
   ) -> R {
     // Currently we are using the p, q, and r fields to get 12 UWords of
     // contiguous storage on 64-bit machines and 6 on 32-bit.  It's not much.
     return withUnsafeMutablePointer(to: &p) { bufferStart in
       let rawBufferStart = UnsafeRawPointer(bufferStart)
       let capacity = withUnsafeMutablePointer(to: &privP) {
         bufferLimit in
         (
           UnsafeRawPointer(bufferLimit).assumingMemoryBound(to: Int8.self)
           - rawBufferStart.assumingMemoryBound(to: Int8.self)
         ) / MemoryLayout<UChar>.stride
       }
       let start = rawBufferStart.bindMemory(to: UChar.self, capacity: capacity)
       let mutableStart = UnsafeMutablePointer(mutating: start)
       let buffer = UnsafeMutableBufferPointer(start: mutableStart, count: capacity)
       return body(buffer)
     }
   }

   mutating func validate() {
     let base = self.withBuffer { $0.baseAddress! }
     assert(chunkContents! == base, "UText moved!")
   }

   mutating func setup() {
     chunkContents = self.withBuffer { UnsafePointer($0.baseAddress!) }
     pExtra = withUnsafeMutablePointer(to: &self) {
       UnsafeMutableRawPointer($0 + 1)
     }
   }
 }

 fileprivate protocol _UTextable {
   func _nativeLength(_ uText: inout UText) -> Int64
   func _access(_ u: inout UText, _ nativeIndex: Int64, _ forward: Bool) -> Bool

   func _clone(
     _ dst: UnsafeMutablePointer<UText>?, _ u: UnsafePointer<UText>,
     _ deep: Bool, _ status: UnsafeMutablePointer<UErrorCode>?
   ) -> UnsafeMutablePointer<UText>

   func _extract(
     _ u: inout UText,
     _ nativeStart: Int64, _ nativeLimit: Int64,
     _ destination: UnsafeMutableBufferPointer<UChar>,
     _ error: UnsafeMutablePointer<UErrorCode>?
   ) -> Int32

   func _mapOffsetToNative(_ u: UnsafePointer<UText>) -> Int64
   func _mapNativeIndexToUTF16(_ u: UnsafePointer<UText>, _ nativeIndex: Int64) -> Int32
 }

 extension UnicodeStorage : _UTextable {
   fileprivate func _nativeLength(_ uText: inout UText) -> Int64 {
     uText.validate()
     return codeUnits.count^
   }

   fileprivate func _parsedSlice(
     _ offset: Int64,
     _ slice: (CodeUnits.Index) -> CodeUnits.SubSequence
   ) -> UnicodeStorage<CodeUnits.SubSequence,Encoding>.EncodedScalars.SubSequence {
     return UnicodeStorage<CodeUnits.SubSequence, Encoding>(
       slice(codeUnits.index(atOffset: offset)), Encoding.self
     ).scalars.dropFirst(0)
   }

   fileprivate func _parsedSuffix(
     fromOffset offset: Int64
   ) -> UnicodeStorage<CodeUnits.SubSequence,Encoding>.EncodedScalars.SubSequence {
     return _parsedSlice(offset, codeUnits.suffix(from:))
   }

   fileprivate func _clone(
     _ dst: UnsafeMutablePointer<UText>?, _ src: UnsafePointer<UText>,
     _ deep: Bool, _ status: UnsafeMutablePointer<UErrorCode>?
   ) ->  UnsafeMutablePointer<UText> {
     UnsafeMutablePointer(mutating: src).pointee.validate()
     debugLog("_clone with dst = \(String(describing: dst))")
     debugLog("src: \(src.pointee)")
     let r = dst
       ?? UnsafeMutablePointer.allocate(capacity: MemoryLayout<UText>.size)
     r.pointee = src.pointee
     r.pointee.setup()
     r.pointee.validate()
     debugLog("clone result: \(r.pointee)")
     return r
   }

   fileprivate func _access(
     _ u: inout UText, _ nativeTargetIndex: Int64, _ forward: Bool
   ) -> Bool {

     debugLog("_access(u: \(u), nativeTargetIndex: \(nativeTargetIndex), forward: \(forward))")
     u.validate()
     u.chunkOffset = 0

     let inBoundsTarget = nativeTargetIndex - (forward ? 0 : 1)
     if (u.chunkNativeStart..<u.chunkNativeLimit).contains(inBoundsTarget) {

       var parsedChunk = _parsedSuffix(fromOffset: u.chunkNativeStart)

       var nativeOffset = u.chunkNativeStart
       while nativeOffset < nativeTargetIndex,
       let scalar = parsedChunk.popFirst() {
         nativeOffset += scalar.count^
         u.chunkOffset += scalar.utf16.count^
       }
       return true
     }
     debugLog("_access: filling buffer")

     guard (0...codeUnits.count^).contains(nativeTargetIndex)
     else { return false }

     u.chunkLength = 0
     u.chunkNativeStart = nativeTargetIndex
     u.chunkNativeLimit = nativeTargetIndex

     u.withBuffer { buffer in
       if forward {
         let chunkSource = _parsedSuffix(fromOffset: nativeTargetIndex)

         for (i, scalar) in zip(chunkSource.indices, chunkSource) {
           let newChunkLength = u.chunkLength + scalar.utf16.count^
           // don't overfill the buffer
           if newChunkLength > buffer.count^ { break }
           for unit in scalar.utf16 {
             debugLog("# unit: \(String(unit, radix: 16))")
             buffer[u.chunkLength^] = unit
             u.chunkLength += 1
           }
           u.chunkNativeLimit = codeUnits.offset(of: i.next)^
         }
       }
       else {
         let chunkSource
           = _parsedSlice(nativeTargetIndex, codeUnits.prefix(upTo:))

         // FIXME: must call reversed twice below because zip won't return a
         // BidirectionalCollection... which might be hard!
         for (i, scalar) in zip(
           chunkSource.indices.reversed(), chunkSource.reversed()) {

           let newChunkLength = u.chunkLength + scalar.utf16.count^
           // don't overfill the buffer
           if newChunkLength > buffer.count^ { break }
           for unit in scalar.utf16.reversed() {
             buffer[u.chunkLength^] = unit
             u.chunkLength += 1
           }
           u.chunkNativeStart = codeUnits.offset(of: i.base)^
           u.chunkOffset = u.chunkLength
         }
         var b = buffer // copy due to https://bugs.swift.org/browse/SR-3782
         b[..<buffer.index(atOffset: u.chunkLength)].reverse()
       }
     }
     debugLog("_access filled buffer, u = \(u)")
     return true
   }

   fileprivate func _extract(
     _ u: inout UText,
     _ nativeStart: Int64, _ nativeLimit: Int64,
     _ destination: UnsafeMutableBufferPointer<UChar>,
     _ error: UnsafeMutablePointer<UErrorCode>?
   ) -> Int32 {
     debugLog("_extract: \(u)")
     u.validate()

     let s = nativeStart.clamped(to: 0...codeUnits.count^)
     let l = nativeLimit.clamped(to: 0...codeUnits.count^)
     u.chunkNativeStart = l
     u.chunkNativeLimit = l
     u.chunkLength = 0

     if s < l { // anything to extract?
       let base = codeUnits[
         codeUnits.index(atOffset: s)..<codeUnits.index(atOffset: l)
       ]
       let source = TranscodedView(base, from: Encoding.self, to: UTF16.self)
       var d = destination // copy due to https://bugs.swift.org/browse/SR-3782
       let (limit, remainder) = d.copy(from: source)

       // Add null termination if it fits
       if limit < d.endIndex { d[limit] = 0 }

       // If no overflow, we're done
       if remainder.isEmpty { return Int32(destination.offset(of: limit)) }

       // Report the error and account for the overflow length in the return value
       error!.pointee = U_BUFFER_OVERFLOW_ERROR
       return Int32(destination.offset(of: limit) + remainder.count)
     }
     return 0
   }

   fileprivate func _mapOffsetToNative(_ u: UnsafePointer<UText>) -> Int64 {
     UnsafeMutablePointer(mutating: u).pointee.validate()

     if u.pointee.chunkOffset == 0 { return u.pointee.chunkNativeStart }

     let chunkSource = _parsedSuffix(fromOffset: u.pointee.chunkNativeStart)
     var chunkOffset = 0

     for i in chunkSource.indices {
       chunkOffset += chunkSource[i].utf16.count
       if chunkOffset == u.pointee.chunkOffset^ {
         return codeUnits.offset(of: i.next)^
       }
     }
     fatalError("supposed to be unreachable")
   }

   fileprivate func _mapNativeIndexToUTF16(_ u: UnsafePointer<UText>, _ nativeIndex: Int64) -> Int32 {
     debugLog("_mapNativeIndexToUTF16: \(u)")
     UnsafeMutablePointer(mutating: u).pointee.validate()

     if u.pointee.chunkNativeStart == nativeIndex { return 0 }

     let nativeChunk = codeUnits[
       codeUnits.index(atOffset: u.pointee.chunkNativeStart)
       ..<
       codeUnits.index(atOffset: nativeIndex)]

     return TranscodedView(
       nativeChunk, from: Encoding.self, to: UTF16.self
     ).count^
   }

   public func withUText<R>(_ body: (UnsafeMutablePointer<UText>)->R) -> R {

     var copy: _UTextable = self

     return withUnsafePointer(to: &copy) { pSelf in

       var vtable = UTextFuncs(
         tableSize: Int32(MemoryLayout<UTextFuncs>.stride),
         reserved1: 0, reserved2: 0, reserved3: 0,
         clone: { dst, u, deep, err in
           debugLog("clone(\(dst!), \(u!), \(deep), \(String(describing: err)))")
           let _self = u!.pointee.context.assumingMemoryBound(
             to: _UTextable.self).pointee
           return _self._clone(dst, u!, deep != 0, err)
         },

         nativeLength: { u in
           debugLog("nativeLength(\(u!))")
           let _self = u!.pointee.context.assumingMemoryBound(
             to: _UTextable.self).pointee
           let r = _self._nativeLength(&u!.pointee)
           debugLog("# nativeLength: \(r)")
           return r
         },

         access: { u, nativeIndex, forward in
           debugLog("access(\(u!), \(nativeIndex), \(forward))")
           let _self = u!.pointee.context.assumingMemoryBound(
             to: _UTextable.self).pointee
           return _self._access(&u!.pointee, nativeIndex, forward != 0)
             ? 1 : 0
         },

         extract: { u, nativeStart, nativeLimit, dest, destCapacity, status in
           debugLog("extract(\(u!), \(nativeStart), \(nativeLimit), \(dest!), \(destCapacity), \(String(describing: status)))")
           let _self = u!.pointee.context.assumingMemoryBound(
             to: _UTextable.self).pointee

           let destination = UnsafeMutableBufferPointer(
             start: dest, count: destCapacity^)

           return _self._extract(
             &u!.pointee, nativeStart, nativeLimit, destination, status)
         },

         replace: nil,
         copy: nil,

         mapOffsetToNative: { u in
           debugLog("mapOffsetToNative(\(u!.pointee.chunkOffset))")
           let _self = u!.pointee.context.assumingMemoryBound(
             to: _UTextable.self).pointee
           let r = _self._mapOffsetToNative(u!)
           debugLog("# mapOffsetToNative: \(r)")
           return r
         },

         mapNativeIndexToUTF16: { u, nativeIndex in
           debugLog("mapNativeIndexToUTF16(nativeIndex: \(nativeIndex), u: \(u!.pointee))")
           let _self = u!.pointee.context.assumingMemoryBound(
             to: _UTextable.self).pointee
           let r = _self._mapNativeIndexToUTF16(u!, nativeIndex)
           debugLog("# mapNativeIndexToUTF16: \(r)")
           return r
         },
         close: nil,
         spare1: nil, spare2: nil, spare3: nil)

       var u = UText(
         magic: UInt32(UTEXT_MAGIC),
         flags: 0,
         providerProperties: 0,
         sizeOfStruct: Int32(MemoryLayout<UText>.size),
         chunkNativeLimit: 0,
         extraSize: 0,
         nativeIndexingLimit: 0,
         chunkNativeStart: 0,
         chunkOffset: 0,
         chunkLength: 0,
         chunkContents: nil,
         pFuncs: &vtable,
         pExtra: nil,
         context: UnsafeRawPointer(pSelf),
         p: nil, q: nil, r: nil,
         privP: nil,
         a: 0, b: 0, c: 0,
         privA: 0, privB: 0, privC: 0)
       u.setup()
       u.validate()
       return body(&u)
     }
   }
 }

 extension UnicodeStorage {
   var scalars: EncodedScalars {
     return EncodedScalars(codeUnits, Encoding.self)
   }
 }

 extension UErrorCode {
   var isFailure: Bool { return rawValue > U_ZERO_ERROR.rawValue }
   var isWarning: Bool { return rawValue < U_ZERO_ERROR.rawValue }
   var isSuccess: Bool { return rawValue <= U_ZERO_ERROR.rawValue }
 }
 typealias UBreakIterator = OpaquePointer

 struct CharacterView<
   CodeUnits : RandomAccessCollection,
   Encoding : UnicodeEncoding
 >
 where Encoding.EncodedScalar.Iterator.Element == CodeUnits.Iterator.Element,
   CodeUnits.SubSequence : RandomAccessCollection,
   CodeUnits.SubSequence.Index == CodeUnits.Index,
   CodeUnits.SubSequence.SubSequence == CodeUnits.SubSequence,
   CodeUnits.SubSequence.Iterator.Element == CodeUnits.Iterator.Element {

   init(_ codeUnits: CodeUnits, _: Encoding.Type = Encoding.self) {
     self.storage = UnicodeStorage(codeUnits)
   }

   fileprivate let storage: UnicodeStorage<CodeUnits, Encoding>
 }

 extension CharacterView : BidirectionalCollection {
   typealias Index = CodeUnits.Index
   typealias SubSequence = BidirectionalSlice<CharacterView>

   public var startIndex: Index { return storage.codeUnits.startIndex }
   public var endIndex: Index { return storage.codeUnits.endIndex }

   fileprivate func _withUBreakIterator<R>(
     at i: Index, _ body: (UBreakIterator)->R
   ) -> R {
     var err = U_ZERO_ERROR;

     debugLog("ubrk_open")
     let bi = ubrk_open(
       /*type:*/ UBRK_CHARACTER, /*locale:*/ nil,
       /*text:*/ nil, /*textLength:*/ 0, /*status:*/ &err)
     precondition(err.isSuccess, "unexpected ubrk_open failure, \(err)")
     defer { ubrk_close(bi) }

     return storage.withUText { u in
       //let access = u.pointee.pFuncs.pointee.access(u, storage.codeUnits.offset(of: i)^, 1)
       //debugLog("access result:", access)
       debugLog("ubrk_setUText")
       ubrk_setUText(bi, u, &err)
       precondition(err.isSuccess, "unexpected ubrk_setUText failure: \(err)")
       return body(bi!)
     }
   }

   subscript(i: Index) -> Character {
     debugLog("subscript: i=\(i)")
     let j = index(after: i)
     debugLog("subscript: j=\(j)")
     let scalars = UnicodeStorage(storage.codeUnits[i..<j], Encoding.self).scalars
     debugLog("scalars: \(Array(scalars))")
     return Character(scalars.lazy.map { UnicodeScalar($0) })
   }

   func index(after i: Index) -> Index {
     // FIXME: there is always a grapheme break between two scalars that are both
     // < U+0300.  Use that to optimize.  Can we make a stronger statement, that
     // there's always a break before any scalar < U+0300?
     debugLog("index(after: \(i))")
     let nextOffset = _withUBreakIterator(at: i) {
       ubrk_following($0, storage.codeUnits.offset(of: i)^)
     }
     debugLog("  index(after: \(i)): \(nextOffset)")
     return storage.codeUnits.index(atOffset: nextOffset)
   }

   func index(before i: Index) -> Index {
     // FIXME: there is always a grapheme break between two scalars that are both
     // < U+0300.  Use that to optimize.  Can we make a stronger statement, that
     // there's always a break before any scalar < U+0300?
     debugLog("index(before: \(i))")
     let previousOffset = _withUBreakIterator(at: i) {
       ubrk_preceding($0, storage.codeUnits.offset(of: i)^)
     }
     debugLog("  -> \(previousOffset)")
     return storage.codeUnits.index(atOffset: previousOffset)
   }
 }

 struct Latin1String<Base : RandomAccessCollection> : Unicode
 where Base.Iterator.Element == UInt8, Base.Index == Base.SubSequence.Index,
 Base.SubSequence.SubSequence == Base.SubSequence,
 Base.SubSequence : RandomAccessCollection,
 Base.Iterator.Element == UInt8,
 Base.SubSequence.Iterator.Element == Base.Iterator.Element {
   typealias Encoding = Latin1
   typealias CodeUnits = Base
   let codeUnits: CodeUnits
   let _isASCII: Bool?

   init(_ codeUnits: CodeUnits, isASCII: Bool? = nil) {
     self.codeUnits = codeUnits
     self._isASCII = isASCII
   }

   typealias ValidUTF8View = TranscodedView<CodeUnits, Encoding, UTF8>
   var utf8: ValidUTF8View { return ValidUTF8View(codeUnits) }

   typealias ValidUTF16View = TranscodedView<CodeUnits, Encoding, UTF16>
   var utf16: ValidUTF16View { return ValidUTF16View(codeUnits) }

   typealias ValidUTF32View = TranscodedView<CodeUnits, Encoding, UTF32>
   var utf32: ValidUTF32View { return ValidUTF32View(codeUnits) }

   typealias ExtendedASCII = LazyMapRandomAccessCollection<CodeUnits, UInt32>
   var extendedASCII: ExtendedASCII {
     return codeUnits.lazy.map { UInt32($0) }
   }

   typealias Characters = LazyMapRandomAccessCollection<CodeUnits, Character>
   var characters: Characters {
     return codeUnits.lazy.map {
       Character(UnicodeScalar(UInt32($0))!)
     }
   }

   func isASCII(scan: Bool = true) -> Bool {
     if let result = _isASCII { return result }
     return scan && !codeUnits.contains { $0 > 0x7f }
   }
   func isLatin1(scan: Bool = true) -> Bool {
     return true
   }
   func isNormalizedNFC(scan: Bool = true) -> Bool {
     return true
   }
   func isNormalizedNFD(scan: Bool = true) -> Bool {
     return true
   }
   func isInFastCOrDForm(scan: Bool = true) -> Bool {
     return true
   }
 }

 var t = TestSuite("t")
 t.test("basic") {
   let s = "abcdefghijklmnopqrstuvwxyz\n"
   + "🇸🇸🇬🇱🇱🇸🇩🇯🇺🇸\n"
   + "Σὲ 👥🥓γνωρίζω ἀπὸ τὴν κόψη χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά!\n"
   + "Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι,\n"
   + "გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო\n"
   + "Зарегистрируйтесь сейчас на Десятую Международную Конференцию по\n"
   + "  ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช  พระปกเกศกองบู๊กู้ขึ้นใหม่\n"
   + "ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ"
   let s32 = s.unicodeScalars.lazy.map { $0.value }
   let s16 = s.utf16
   let s8 = Array(s.utf8)
   let s16to32 = TranscodedView(s16, from: UTF16.self, to: UTF32.self)
   let s16to8 = TranscodedView(s16, from: UTF16.self, to: UTF8.self)
   let s8to16 = TranscodedView(s8, from: UTF8.self, to: UTF16.self)
   let s8Vto16 = TranscodedView(s8, from: ValidUTF8.self, to: UTF16.self)
   expectTrue(s32.elementsEqual(s16to32))
   expectTrue(s8.elementsEqual(s16to8))
   expectTrue(s16.elementsEqual(s8to16))
   expectTrue(s16.elementsEqual(s8Vto16))

   expectTrue(s32.reversed().elementsEqual(s16to32.reversed()))
   expectTrue(s8.reversed().elementsEqual(s16to8.reversed()))
   expectTrue(s16.reversed().elementsEqual(s8to16.reversed()))
   expectTrue(s16.reversed().elementsEqual(s8Vto16.reversed()))

   do {
     // We happen to know that alphabet is non-ASCII, but we're not going to say
     // anything about that.
     let alphabet = Latin1String(s8.prefix(27))
     expectTrue(alphabet.isASCII())
     expectFalse(alphabet.isASCII(scan: false))

     // We know that if you interpret s8 as Latin1, it has a lot of non-ASCII
     let nonASCII = Latin1String(s8)
     expectFalse(nonASCII.isASCII(scan: true))
     expectFalse(nonASCII.isASCII(scan: false))
   }

   do {
     let alphabet = Latin1String(s8.prefix(27), isASCII: true)
     let nonASCII = Latin1String(s8, isASCII: false)
     expectTrue(alphabet.isASCII())
     expectTrue(alphabet.isASCII(scan: false))
     expectFalse(nonASCII.isASCII(scan: true))
     expectFalse(nonASCII.isASCII(scan: false))
   }
 }

 t.test("CharacterView") {
   // FIXME: precondition checks in Character prevent us from trying this last
   // one.
   let s = "🇸🇸🇬🇱abc🇱🇸🇩🇯🇺🇸\nΣὲ 👥🥓γ͙᷏̃̂᷀νω" // + "👩‍❤️‍👩"
   let a: [Character] = [
     "🇸🇸", "🇬🇱", "a", "b", "c", "🇱🇸", "🇩🇯", "🇺🇸", "\n",
     "Σ", "ὲ", " ", "👥", "🥓", "γ͙᷏̃̂᷀", "ν", "ω"
   ] // + "👩‍❤️‍👩"

   let v8 = CharacterView(Array(s.utf8), UTF8.self)
   expectEqual(a, Array(v8))
   for (n, (c, e)) in zip(v8, a).enumerated() {
     debugLog("###### \(n): \(c) =?= \(e)")
     expectEqual(e, c)
   }
   let v16 = CharacterView(Array(s.utf16), UTF16.self)
   expectEqual(a, Array(v16))

   logging = true
   for (n, (c, e)) in zip(v8.reversed(), a.reversed()).enumerated() {
     debugLog("###### \(n): \(c) =?= \(e)")
     expectEqual(e, c)
   }
   for (n, (c, e)) in zip(v16.reversed(), a.reversed()).enumerated() {
     debugLog("###### \(n): \(c) =?= \(e)")
     expectEqual(e, c)
   }

   // This one demonstrates that we get grapheme breaking of regional indicators
   // (RI) right, while Swift 3 string does not.
   expectFalse(a.elementsEqual(s.characters))
 }
 runAllTests()