blob: 5d84f80ec371d5984e22a4f7da7592f898de4d56 [file] [log] [blame]
//===--- WordCount.swift --------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
import TestsUtils
//
// Mini benchmark counting words in a longer string.
// Measures performance of
// - Iterating over the Characters in a String
// - Extracting short substrings as Strings
// - Set<Character> lookup performance
// - Set<String> init from sequence of short Strings, with duplicates
// - Uniquing initializer for Dictionary with short string keys
//
public let WordCount = [
BenchmarkInfo(
name: "WordSplitASCII",
runFunction: run_WordSplitASCII,
tags: [.validation, .api, .String, .algorithm],
setUpFunction: { buildWorkload() }
),
BenchmarkInfo(
name: "WordSplitUTF16",
runFunction: run_WordSplitUTF16,
tags: [.validation, .api, .String, .algorithm],
setUpFunction: { buildWorkload() }
),
BenchmarkInfo(
name: "WordCountUniqueASCII",
runFunction: run_WordCountUniqueASCII,
tags: [.validation, .api, .String, .Dictionary, .algorithm],
setUpFunction: { buildWorkload() }
),
BenchmarkInfo(
name: "WordCountUniqueUTF16",
runFunction: run_WordCountUniqueUTF16,
tags: [.validation, .api, .String, .Dictionary, .algorithm],
setUpFunction: { buildWorkload() }
),
BenchmarkInfo(
name: "WordCountHistogramASCII",
runFunction: run_WordCountHistogramASCII,
tags: [.validation, .api, .String, .Dictionary, .algorithm],
setUpFunction: { buildWorkload() }
),
BenchmarkInfo(
name: "WordCountHistogramUTF16",
runFunction: run_WordCountHistogramUTF16,
tags: [.validation, .api, .String, .Dictionary, .algorithm],
setUpFunction: { buildWorkload() }
),
]
let asciiText = """
**Welcome to Swift!**
Swift is a high-performance system programming language. It has a clean and
modern syntax, offers seamless access to existing C and Objective-C code and
frameworks, and is memory safe by default.
Although inspired by Objective-C and many other languages, Swift is not itself a
C-derived language. As a complete and independent language, Swift packages core
features like flow control, data structures, and functions, with high-level
constructs like objects, protocols, closures, and generics. Swift embraces
modules, eliminating the need for headers and the code duplication they entail.
To learn more about the programming language, visit swift.org.
## Contributing to Swift
Contributions to Swift are welcomed and encouraged! Please see the
Contributing to Swift guide.
To be a truly great community, Swift.org needs to welcome developers from all
walks of life, with different backgrounds, and with a wide range of
experience. A diverse and friendly community will have more great ideas, more
unique perspectives, and produce more great code. We will work diligently to
make the Swift community welcoming to everyone.
To give clarity of what is expected of our members, Swift has adopted the code
of conduct defined by the Contributor Covenant. This document is used across
many open source communities, and we think it articulates our values well. For
more, see the Code of Conduct.
## Getting Started
These instructions give the most direct path to a working Swift development
environment. To build from source you will need 2 GB of disk space for the
source code and over 20 GB of disk space for the build artifacts. A clean build
can take multiple hours, but incremental builds will finish much faster.
"""
let utf16Text = """
✨🌟 Welcome tö Swift! ⭐️✨
Swift is a high-performance system programming language. It has a clean and
modern syntax, offers seamless access tö existing C and Objective-C code and
frameworks, and is memory safe by default.
Although inspired by Objective-C and many othér languages, Swift is not itself a
C-derived language. As a complete and independent language, Swift packages core
features li\u{30A}ke flow control, data structures, and functions, with
high-level constructs li\u{30A}ke objects, protöcols, closures, and
generics. Swift embraces modules, eliminating thé need for headers and thé code
duplication théy entail.
Tö learn more about thé programming language, visit swift.org.
☞ Contributing tö Swift
Contributions tö Swift are welcomed and encouraged! Please see thé
Contributing tö Swift guide.
Tö be a truly great community, Swift.org needs tö welcome developers from all
walks of life, with different backgrounds, and with a wide range of
experience. A diverse and friendly community will have more great ideas, more
unique perspectives, and produce more great code. We will work diligently tö
make thé Swift community welcoming tö everyone.
Tö give clarity of what is expected of our members, Swift has adopted thé code
of conduct defined by thé Contributör Covenant. This document is used across
many open source communities, and we think it articulates our values well. For
more, see thé Code of Conduct.
☞ Getting Started
Thése instructions give thé most direct path tö a working Swift development
environment. Tö build from source you will need 2 GB of disk space for thé
source code and over 20 GB of disk space for thé build artifacts. A clean build
can take multiple hours, but incremental builds will finish much faster.
"""
@inline(never)
func buildWorkload() {
blackHole(someAlphanumerics)
blackHole(asciiWords)
blackHole(utf16Words)
}
// A partial set of Unicode alphanumeric characters. (ASCII letters with at most
// one diacritic (of a limited selection), plus ASCII digits.)
let someAlphanumerics: Set<Character> = {
let baseAlphabet = Set(
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".unicodeScalars)
let someCombiningDiacriticalMarks: Set<Unicode.Scalar> =
Set((0x300..<0x310).map { Unicode.Scalar($0)! })
var alphanumerics: Set<Character> = []
for base in baseAlphabet {
alphanumerics.insert(Character(base))
for mark in someCombiningDiacriticalMarks {
var v = String.UnicodeScalarView()
v.append(base)
v.append(mark)
alphanumerics.insert(Character(String(v)))
}
}
alphanumerics.formUnion("0123456789")
return alphanumerics
}()
extension Character {
var isAlphanumeric: Bool {
return someAlphanumerics.contains(self)
}
}
struct Words: IteratorProtocol, Sequence {
public typealias Iterator = Words
let text: String
var nextIndex: String.Index
init(_ text: String) {
self.text = text
self.nextIndex = text.startIndex
}
mutating func next() -> String? {
while nextIndex != text.endIndex && !text[nextIndex].isAlphanumeric {
text.formIndex(after: &nextIndex)
}
let start = nextIndex
while nextIndex != text.endIndex && text[nextIndex].isAlphanumeric {
text.formIndex(after: &nextIndex)
}
guard start < nextIndex else { return nil }
return String(text[start..<nextIndex])
}
}
@inline(never)
public func run_WordSplitASCII(_ N: Int) {
for _ in 1...10*N {
let words = Array(Words(identity(asciiText)))
CheckResults(words.count == 280)
blackHole(words)
}
}
@inline(never)
public func run_WordSplitUTF16(_ N: Int) {
for _ in 1...10*N {
let words = Array(Words(identity(utf16Text)))
CheckResults(words.count == 280)
blackHole(words)
}
}
let asciiWords = Array(Words(asciiText))
let utf16Words = Array(Words(utf16Text))
@inline(never)
public func run_WordCountUniqueASCII(_ N: Int) {
for _ in 1...100*N {
let words = Set(identity(asciiWords))
CheckResults(words.count == 168)
blackHole(words)
}
}
@inline(never)
public func run_WordCountUniqueUTF16(_ N: Int) {
for _ in 1...100*N {
let words = Set(identity(utf16Words))
CheckResults(words.count == 168)
blackHole(words)
}
}
/// Returns an array of all words in the supplied string, along with their
/// number of occurances. The array is sorted by decreasing frequency.
/// (Words are case-sensitive and only support a limited subset of Unicode.)
@inline(never)
func histogram<S: Sequence>(for words: S) -> [(String, Int)]
where S.Element == String {
let histogram = Dictionary<String, Int>(
words.lazy.map { ($0, 1) },
uniquingKeysWith: +)
return histogram.sorted { (-$0.1, $0.0) < (-$1.1, $1.0) }
}
@inline(never)
public func run_WordCountHistogramASCII(_ N: Int) {
for _ in 1...100*N {
let words = histogram(for: identity(asciiWords))
CheckResults(words.count == 168)
CheckResults(words[0] == ("and", 15))
blackHole(words)
}
}
@inline(never)
public func run_WordCountHistogramUTF16(_ N: Int) {
for _ in 1...100*N {
let words = histogram(for: identity(utf16Words))
CheckResults(words.count == 168)
CheckResults(words[0] == ("and", 15))
blackHole(words)
}
}