blob: e723134cfe47fc56b99941737914f5c77e700512 [file] [log] [blame]
#![cfg_attr(test, deny(missing_docs))]
#![cfg_attr(test, deny(warnings))]
#![doc(html_root_url = "https://docs.rs/unicase/2.2.0")]
#![cfg_attr(feature = "nightly", feature(test))]
//! # UniCase
//!
//! UniCase provices a way of specifying strings that are case-insensitive.
//!
//! UniCase supports full [Unicode case
//! folding](https://www.w3.org/International/wiki/Case_folding). It can also
//! utilize faster ASCII case comparisons, if both strings are ASCII.
//!
//! Using the `UniCase::new()` constructor will check the string to see if it
//! is all ASCII. When a `UniCase` is compared against another, if both are
//! ASCII, it will use the faster comparison.
//!
//! There also exists the `Ascii` type in this crate, which will always assume
//! to use the ASCII case comparisons, if the encoding is already known.
//!
//! ## Example
//!
//! ```rust
//! use unicase::UniCase;
//!
//! let a = UniCase::new("Maße");
//! let b = UniCase::new("MASSE");
//! let c = UniCase::new("mase");
//!
//! assert_eq!(a, b);
//! assert!(b != c);
//! ```
//!
//! ## Ascii
//!
//! ```rust
//! use unicase::Ascii;
//!
//! let a = Ascii::new("foobar");
//! let b = Ascii::new("FoObAr");
//!
//! assert_eq!(a, b);
//! ```
#[cfg(feature = "nightly")]
extern crate test;
#[cfg(__unicase__iter_cmp)]
use std::cmp::Ordering;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::ops::{Deref, DerefMut};
use std::str::FromStr;
use self::unicode::Unicode;
mod ascii;
mod unicode;
/// Case Insensitive wrapper of strings.
#[derive(Clone, Copy)]
pub struct UniCase<S>(Encoding<S>);
/// Case Insensitive wrapper of Ascii strings.
#[derive(Clone, Copy, Debug)]
pub struct Ascii<S>(S);
/// Compare two string-like types for case-less equality, using unicode folding.
///
/// Equivalent to `UniCase::new(left) == UniCase::new(right)`.
///
/// Note: This will perform a scan for ASCII characters before doing the
/// the comparison. See `UniCase` for more information.
#[inline]
pub fn eq<S: AsRef<str> + ?Sized>(left: &S, right: &S) -> bool {
UniCase::new(left) == UniCase::new(right)
}
/// Compare two string-like types for case-less equality, ignoring ASCII case.
///
/// Equivalent to `Ascii::new(left) == Ascii::new(right)`.
#[inline]
pub fn eq_ascii<S: AsRef<str> + ?Sized>(left: &S, right: &S) -> bool {
Ascii(left) == Ascii(right)
}
#[derive(Clone, Copy, Debug)]
enum Encoding<S> {
Ascii(Ascii<S>),
Unicode(Unicode<S>),
}
macro_rules! inner {
(mut $e:expr) => ({
match &mut $e {
&mut Encoding::Ascii(ref mut s) => &mut s.0,
&mut Encoding::Unicode(ref mut s) => &mut s.0,
}
});
($e:expr) => ({
match &$e {
&Encoding::Ascii(ref s) => &s.0,
&Encoding::Unicode(ref s) => &s.0,
}
});
}
impl<S: AsRef<str>> UniCase<S> {
/// Creates a new `UniCase`.
///
/// Note: This scans the text to determine if it is all ASCII or not.
pub fn new(s: S) -> UniCase<S> {
#[allow(deprecated, unused)]
use std::ascii::AsciiExt;
if s.as_ref().is_ascii() {
UniCase(Encoding::Ascii(Ascii(s)))
} else {
UniCase(Encoding::Unicode(Unicode(s)))
}
}
/// Creates a new `UniCase`, skipping the ASCII check.
pub fn unicode(s: S) -> UniCase<S> {
UniCase(Encoding::Unicode(Unicode(s)))
}
}
impl<S> Deref for UniCase<S> {
type Target = S;
#[inline]
fn deref<'a>(&'a self) -> &'a S {
inner!(self.0)
}
}
impl<S> DerefMut for UniCase<S> {
#[inline]
fn deref_mut<'a>(&'a mut self) -> &'a mut S {
inner!(mut self.0)
}
}
impl<S: AsRef<str>> AsRef<str> for UniCase<S> {
#[inline]
fn as_ref(&self) -> &str {
inner!(self.0).as_ref()
}
}
impl<S: fmt::Debug> fmt::Debug for UniCase<S> {
#[inline]
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(inner!(self.0), fmt)
}
}
impl<S: fmt::Display> fmt::Display for UniCase<S> {
#[inline]
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(inner!(self.0), fmt)
}
}
impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<UniCase<S2>> for UniCase<S1> {
#[inline]
fn eq(&self, other: &UniCase<S2>) -> bool {
match (&self.0, &other.0) {
(&Encoding::Ascii(ref x), &Encoding::Ascii(ref y)) => x == y,
(&Encoding::Unicode(ref x), &Encoding::Unicode(ref y)) => x == y,
(&Encoding::Ascii(ref x), &Encoding::Unicode(ref y)) => &Unicode(x.as_ref()) == y,
(&Encoding::Unicode(ref x), &Encoding::Ascii(ref y)) => x == &Unicode(y.as_ref()),
}
}
}
impl<S: AsRef<str>> Eq for UniCase<S> {}
impl<S: AsRef<str>> Hash for UniCase<S> {
#[inline]
fn hash<H: Hasher>(&self, hasher: &mut H) {
match self.0 {
Encoding::Ascii(ref s) => s.hash(hasher),
Encoding::Unicode(ref s) => s.hash(hasher)
}
}
}
macro_rules! from_impl {
($from:ty => $to:ty; $by:ident) => (
impl<'a> From<$from> for UniCase<$to> {
fn from(s: $from) -> Self {
UniCase::unicode(s.$by())
}
}
);
($from:ty => $to:ty) => ( from_impl!($from => $to; into); )
}
macro_rules! into_impl {
($to:ty) => (
impl<'a> Into<$to> for UniCase<$to> {
fn into(self) -> $to {
match self.0 {
Encoding::Ascii(Ascii(s)) => s,
Encoding::Unicode(Unicode(s)) => s,
}
}
}
);
}
from_impl!(&'a str => &'a str);
from_impl!(&'a str => String);
from_impl!(&'a String => &'a str; as_ref);
from_impl!(String => String);
into_impl!(&'a str);
into_impl!(String);
#[cfg(__unicase__iter_cmp)]
impl<T: AsRef<str>> PartialOrd for UniCase<T> {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
#[cfg(__unicase__iter_cmp)]
impl<T: AsRef<str>> Ord for UniCase<T> {
#[inline]
fn cmp(&self, other: &Self) -> Ordering {
match (&self.0, &other.0) {
(&Encoding::Ascii(ref x), &Encoding::Ascii(ref y)) => x.cmp(y),
(&Encoding::Unicode(ref x), &Encoding::Unicode(ref y)) => x.cmp(y),
(&Encoding::Ascii(ref x), &Encoding::Unicode(ref y)) => Unicode(x.as_ref()).cmp(&Unicode(y.0.as_ref())),
(&Encoding::Unicode(ref x), &Encoding::Ascii(ref y)) => Unicode(x.0.as_ref()).cmp(&Unicode(y.as_ref())),
}
}
}
impl<S: FromStr + AsRef<str>> FromStr for UniCase<S> {
type Err = <S as FromStr>::Err;
fn from_str(s: &str) -> Result<UniCase<S>, Self::Err> {
s.parse().map(UniCase::new)
}
}
#[cfg(test)]
mod tests {
use super::UniCase;
use std::hash::{Hash, Hasher};
#[cfg(not(__unicase__default_hasher))]
use std::hash::SipHasher as DefaultHasher;
#[cfg(__unicase__default_hasher)]
use std::collections::hash_map::DefaultHasher;
fn hash<T: Hash>(t: &T) -> u64 {
let mut s = DefaultHasher::new();
t.hash(&mut s);
s.finish()
}
#[test]
fn test_copy_for_refs() {
fn foo<T>(_: UniCase<T>) {}
let a = UniCase::new("foobar");
foo(a);
foo(a);
}
#[test]
fn test_eq_ascii() {
let a = UniCase::new("foobar");
let b = UniCase::new("FOOBAR");
assert_eq!(a, b);
assert_eq!(hash(&a), hash(&b));
}
#[test]
fn test_eq_unicode() {
let a = UniCase::new("στιγμας");
let b = UniCase::new("στιγμασ");
assert_eq!(a, b);
assert_eq!(hash(&a), hash(&b));
}
#[cfg(feature = "nightly")]
#[bench]
fn bench_unicase_ascii(b: &mut ::test::Bencher) {
b.bytes = b"foobar".len() as u64;
let x = UniCase::new("foobar");
let y = UniCase::new("FOOBAR");
b.iter(|| assert_eq!(x, y));
}
#[cfg(feature = "nightly")]
static SUBJECT: &'static [u8] = b"ffoo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz oo bar baz quux herp derp";
#[cfg(feature = "nightly")]
#[inline(never)]
fn is_ascii(bytes: &[u8]) -> bool {
#[allow(unused)]
use std::ascii::AsciiExt;
bytes.is_ascii()
}
#[cfg(feature = "nightly")]
#[bench]
fn bench_is_ascii(b: &mut ::test::Bencher) {
b.iter(|| assert!(is_ascii(SUBJECT)));
}
#[cfg(feature = "nightly")]
#[bench]
fn bench_is_utf8(b: &mut ::test::Bencher) {
b.iter(|| assert!(::std::str::from_utf8(SUBJECT).is_ok()));
}
#[cfg(__unicase__iter_cmp)]
#[test]
fn test_case_cmp() {
assert!(UniCase::new("a") < UniCase::new("B"));
assert!(UniCase::new("A") < UniCase::new("b"));
assert!(UniCase::new("aa") > UniCase::new("a"));
assert!(UniCase::new("a") < UniCase::new("aa"));
assert!(UniCase::new("a") < UniCase::new("AA"));
}
#[test]
fn test_from_impls() {
let view: &'static str = "foobar";
let _: UniCase<&'static str> = view.into();
let _: UniCase<&str> = view.into();
let _: UniCase<String> = view.into();
let owned: String = view.to_owned();
let _: UniCase<&str> = (&owned).into();
let _: UniCase<String> = owned.into();
}
#[test]
fn test_into_impls() {
let view: UniCase<&'static str> = UniCase::new("foobar");
let _: &'static str = view.into();
let _: &str = view.into();
let owned: UniCase<String> = "foobar".into();
let _: String = owned.clone().into();
let _: &str = owned.as_ref();
}
}