blob: a1d2c2f067448df3aeea3a857c89406d0e575342 [file] [log] [blame]
#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
extern crate alloc;
use alloc::{borrow::Cow, boxed::Box, string::String, sync::Arc};
use core::{
borrow::Borrow,
cmp::{self, Ordering},
convert::Infallible,
fmt, hash, iter, mem, ops,
str::FromStr,
};
/// A `SmolStr` is a string type that has the following properties:
///
/// * `size_of::<SmolStr>() == 24` (therefor `== size_of::<String>()` on 64 bit platforms)
/// * `Clone` is `O(1)`
/// * Strings are stack-allocated if they are:
/// * Up to 23 bytes long
/// * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist
/// solely of consecutive newlines, followed by consecutive spaces
/// * If a string does not satisfy the aforementioned conditions, it is heap-allocated
/// * Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation
///
/// Unlike `String`, however, `SmolStr` is immutable. The primary use case for
/// `SmolStr` is a good enough default storage for tokens of typical programming
/// languages. Strings consisting of a series of newlines, followed by a series of
/// whitespace are a typical pattern in computer programs because of indentation.
/// Note that a specialized interner might be a better solution for some use cases.
///
/// `WS`: A string of 32 newlines followed by 128 spaces.
pub struct SmolStr(Repr);
impl SmolStr {
/// Constructs an inline variant of `SmolStr`.
///
/// This never allocates.
///
/// # Panics
///
/// Panics if `text.len() > 23`.
#[inline]
pub const fn new_inline(text: &str) -> SmolStr {
assert!(text.len() <= INLINE_CAP); // avoids bounds checks in loop
let text = text.as_bytes();
let mut buf = [0; INLINE_CAP];
let mut i = 0;
while i < text.len() {
buf[i] = text[i];
i += 1
}
SmolStr(Repr::Inline {
// SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`
// as we asserted it.
len: unsafe { InlineSize::transmute_from_u8(text.len() as u8) },
buf,
})
}
/// Constructs a `SmolStr` from a statically allocated string.
///
/// This never allocates.
#[inline(always)]
pub const fn new_static(text: &'static str) -> SmolStr {
// NOTE: this never uses the inline storage; if a canonical
// representation is needed, we could check for `len() < INLINE_CAP`
// and call `new_inline`, but this would mean an extra branch.
SmolStr(Repr::Static(text))
}
/// Constructs a `SmolStr` from a `str`, heap-allocating if necessary.
#[inline(always)]
pub fn new(text: impl AsRef<str>) -> SmolStr {
SmolStr(Repr::new(text.as_ref()))
}
/// Returns a `&str` slice of this `SmolStr`.
#[inline(always)]
pub fn as_str(&self) -> &str {
self.0.as_str()
}
/// Returns the length of `self` in bytes.
#[inline(always)]
pub fn len(&self) -> usize {
self.0.len()
}
/// Returns `true` if `self` has a length of zero bytes.
#[inline(always)]
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
/// Returns `true` if `self` is heap-allocated.
#[inline(always)]
pub const fn is_heap_allocated(&self) -> bool {
matches!(self.0, Repr::Heap(..))
}
}
impl Clone for SmolStr {
#[inline]
fn clone(&self) -> Self {
if !self.is_heap_allocated() {
// SAFETY: We verified that the payload of `Repr` is a POD
return unsafe { core::ptr::read(self as *const SmolStr) };
}
Self(self.0.clone())
}
}
impl Default for SmolStr {
#[inline(always)]
fn default() -> SmolStr {
SmolStr(Repr::Inline { len: InlineSize::_V0, buf: [0; INLINE_CAP] })
}
}
impl ops::Deref for SmolStr {
type Target = str;
#[inline(always)]
fn deref(&self) -> &str {
self.as_str()
}
}
// region: PartialEq implementations
impl Eq for SmolStr {}
impl PartialEq<SmolStr> for SmolStr {
fn eq(&self, other: &SmolStr) -> bool {
self.0.ptr_eq(&other.0) || self.as_str() == other.as_str()
}
}
impl PartialEq<str> for SmolStr {
#[inline(always)]
fn eq(&self, other: &str) -> bool {
self.as_str() == other
}
}
impl PartialEq<SmolStr> for str {
#[inline(always)]
fn eq(&self, other: &SmolStr) -> bool {
other == self
}
}
impl<'a> PartialEq<&'a str> for SmolStr {
#[inline(always)]
fn eq(&self, other: &&'a str) -> bool {
self == *other
}
}
impl PartialEq<SmolStr> for &str {
#[inline(always)]
fn eq(&self, other: &SmolStr) -> bool {
*self == other
}
}
impl PartialEq<String> for SmolStr {
#[inline(always)]
fn eq(&self, other: &String) -> bool {
self.as_str() == other
}
}
impl PartialEq<SmolStr> for String {
#[inline(always)]
fn eq(&self, other: &SmolStr) -> bool {
other == self
}
}
impl<'a> PartialEq<&'a String> for SmolStr {
#[inline(always)]
fn eq(&self, other: &&'a String) -> bool {
self == *other
}
}
impl PartialEq<SmolStr> for &String {
#[inline(always)]
fn eq(&self, other: &SmolStr) -> bool {
*self == other
}
}
// endregion: PartialEq implementations
impl Ord for SmolStr {
fn cmp(&self, other: &SmolStr) -> Ordering {
self.as_str().cmp(other.as_str())
}
}
impl PartialOrd for SmolStr {
fn partial_cmp(&self, other: &SmolStr) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl hash::Hash for SmolStr {
fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
self.as_str().hash(hasher);
}
}
impl fmt::Debug for SmolStr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.as_str(), f)
}
}
impl fmt::Display for SmolStr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.as_str(), f)
}
}
impl iter::FromIterator<char> for SmolStr {
fn from_iter<I: iter::IntoIterator<Item = char>>(iter: I) -> SmolStr {
from_char_iter(iter.into_iter())
}
}
#[inline]
fn from_char_iter(iter: impl Iterator<Item = char>) -> SmolStr {
from_buf_and_chars([0; _], 0, iter)
}
fn from_buf_and_chars(
mut buf: [u8; INLINE_CAP],
buf_len: usize,
mut iter: impl Iterator<Item = char>,
) -> SmolStr {
let min_size = iter.size_hint().0 + buf_len;
if min_size > INLINE_CAP {
let heap: String =
core::str::from_utf8(&buf[..buf_len]).unwrap().chars().chain(iter).collect();
if heap.len() <= INLINE_CAP {
// size hint lied
return SmolStr::new_inline(&heap);
}
return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
}
let mut len = buf_len;
while let Some(ch) = iter.next() {
let size = ch.len_utf8();
if size + len > INLINE_CAP {
let (min_remaining, _) = iter.size_hint();
let mut heap = String::with_capacity(size + len + min_remaining);
heap.push_str(core::str::from_utf8(&buf[..len]).unwrap());
heap.push(ch);
heap.extend(iter);
return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
}
ch.encode_utf8(&mut buf[len..]);
len += size;
}
SmolStr(Repr::Inline {
// SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`
// as we otherwise return early.
len: unsafe { InlineSize::transmute_from_u8(len as u8) },
buf,
})
}
fn build_from_str_iter<T>(mut iter: impl Iterator<Item = T>) -> SmolStr
where
T: AsRef<str>,
String: iter::Extend<T>,
{
let mut len = 0;
let mut buf = [0u8; INLINE_CAP];
while let Some(slice) = iter.next() {
let slice = slice.as_ref();
let size = slice.len();
if size + len > INLINE_CAP {
let mut heap = String::with_capacity(size + len);
heap.push_str(core::str::from_utf8(&buf[..len]).unwrap());
heap.push_str(slice);
heap.extend(iter);
return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
}
buf[len..][..size].copy_from_slice(slice.as_bytes());
len += size;
}
SmolStr(Repr::Inline {
// SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`
// as we otherwise return early.
len: unsafe { InlineSize::transmute_from_u8(len as u8) },
buf,
})
}
impl iter::FromIterator<String> for SmolStr {
fn from_iter<I: iter::IntoIterator<Item = String>>(iter: I) -> SmolStr {
build_from_str_iter(iter.into_iter())
}
}
impl<'a> iter::FromIterator<&'a String> for SmolStr {
fn from_iter<I: iter::IntoIterator<Item = &'a String>>(iter: I) -> SmolStr {
SmolStr::from_iter(iter.into_iter().map(|x| x.as_str()))
}
}
impl<'a> iter::FromIterator<&'a str> for SmolStr {
fn from_iter<I: iter::IntoIterator<Item = &'a str>>(iter: I) -> SmolStr {
build_from_str_iter(iter.into_iter())
}
}
impl AsRef<str> for SmolStr {
#[inline(always)]
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl AsRef<[u8]> for SmolStr {
#[inline(always)]
fn as_ref(&self) -> &[u8] {
self.as_str().as_bytes()
}
}
#[cfg(feature = "std")]
impl AsRef<std::ffi::OsStr> for SmolStr {
#[inline(always)]
fn as_ref(&self) -> &std::ffi::OsStr {
AsRef::<std::ffi::OsStr>::as_ref(self.as_str())
}
}
#[cfg(feature = "std")]
impl AsRef<std::path::Path> for SmolStr {
#[inline(always)]
fn as_ref(&self) -> &std::path::Path {
AsRef::<std::path::Path>::as_ref(self.as_str())
}
}
impl From<&str> for SmolStr {
#[inline]
fn from(s: &str) -> SmolStr {
SmolStr::new(s)
}
}
impl From<&mut str> for SmolStr {
#[inline]
fn from(s: &mut str) -> SmolStr {
SmolStr::new(s)
}
}
impl From<&String> for SmolStr {
#[inline]
fn from(s: &String) -> SmolStr {
SmolStr::new(s)
}
}
impl From<String> for SmolStr {
#[inline(always)]
fn from(text: String) -> Self {
Self::new(text)
}
}
impl From<Box<str>> for SmolStr {
#[inline]
fn from(s: Box<str>) -> SmolStr {
SmolStr::new(s)
}
}
impl From<Arc<str>> for SmolStr {
#[inline]
fn from(s: Arc<str>) -> SmolStr {
let repr = Repr::new_on_stack(s.as_ref()).unwrap_or(Repr::Heap(s));
Self(repr)
}
}
impl<'a> From<Cow<'a, str>> for SmolStr {
#[inline]
fn from(s: Cow<'a, str>) -> SmolStr {
SmolStr::new(s)
}
}
impl From<SmolStr> for Arc<str> {
#[inline(always)]
fn from(text: SmolStr) -> Self {
match text.0 {
Repr::Heap(data) => data,
_ => text.as_str().into(),
}
}
}
impl From<SmolStr> for String {
#[inline(always)]
fn from(text: SmolStr) -> Self {
text.as_str().into()
}
}
impl Borrow<str> for SmolStr {
#[inline(always)]
fn borrow(&self) -> &str {
self.as_str()
}
}
impl FromStr for SmolStr {
type Err = Infallible;
#[inline]
fn from_str(s: &str) -> Result<SmolStr, Self::Err> {
Ok(SmolStr::from(s))
}
}
const INLINE_CAP: usize = InlineSize::_V23 as usize;
const N_NEWLINES: usize = 32;
const N_SPACES: usize = 128;
const WS: &str = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n ";
const _: () = {
assert!(WS.len() == N_NEWLINES + N_SPACES);
assert!(WS.as_bytes()[N_NEWLINES - 1] == b'\n');
assert!(WS.as_bytes()[N_NEWLINES] == b' ');
};
/// A [`u8`] with a bunch of niches.
#[derive(Clone, Copy, Debug, PartialEq)]
#[repr(u8)]
enum InlineSize {
_V0 = 0,
_V1,
_V2,
_V3,
_V4,
_V5,
_V6,
_V7,
_V8,
_V9,
_V10,
_V11,
_V12,
_V13,
_V14,
_V15,
_V16,
_V17,
_V18,
_V19,
_V20,
_V21,
_V22,
_V23,
}
impl InlineSize {
/// SAFETY: `value` must be less than or equal to [`INLINE_CAP`]
#[inline(always)]
const unsafe fn transmute_from_u8(value: u8) -> Self {
debug_assert!(value <= InlineSize::_V23 as u8);
// SAFETY: The caller is responsible to uphold this invariant
unsafe { mem::transmute::<u8, Self>(value) }
}
}
#[derive(Clone, Debug)]
enum Repr {
Inline { len: InlineSize, buf: [u8; INLINE_CAP] },
Static(&'static str),
Heap(Arc<str>),
}
impl Repr {
/// This function tries to create a new Repr::Inline or Repr::Static
/// If it isn't possible, this function returns None
fn new_on_stack<T>(text: T) -> Option<Self>
where
T: AsRef<str>,
{
let text = text.as_ref();
let len = text.len();
if len <= INLINE_CAP {
let mut buf = [0; INLINE_CAP];
buf[..len].copy_from_slice(text.as_bytes());
return Some(Repr::Inline {
// SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`
len: unsafe { InlineSize::transmute_from_u8(len as u8) },
buf,
});
}
if len <= N_NEWLINES + N_SPACES {
let bytes = text.as_bytes();
let possible_newline_count = cmp::min(len, N_NEWLINES);
let newlines =
bytes[..possible_newline_count].iter().take_while(|&&b| b == b'\n').count();
let possible_space_count = len - newlines;
if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') {
let spaces = possible_space_count;
let substring = &WS[N_NEWLINES - newlines..N_NEWLINES + spaces];
return Some(Repr::Static(substring));
}
}
None
}
fn new(text: &str) -> Self {
Self::new_on_stack(text).unwrap_or_else(|| Repr::Heap(Arc::from(text)))
}
#[inline(always)]
fn len(&self) -> usize {
match self {
Repr::Heap(data) => data.len(),
Repr::Static(data) => data.len(),
Repr::Inline { len, .. } => *len as usize,
}
}
#[inline(always)]
fn is_empty(&self) -> bool {
match self {
Repr::Heap(data) => data.is_empty(),
Repr::Static(data) => data.is_empty(),
&Repr::Inline { len, .. } => len as u8 == 0,
}
}
#[inline]
fn as_str(&self) -> &str {
match self {
Repr::Heap(data) => data,
Repr::Static(data) => data,
Repr::Inline { len, buf } => {
let len = *len as usize;
// SAFETY: len is guaranteed to be <= INLINE_CAP
let buf = unsafe { buf.get_unchecked(..len) };
// SAFETY: buf is guaranteed to be valid utf8 for ..len bytes
unsafe { ::core::str::from_utf8_unchecked(buf) }
}
}
}
fn ptr_eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::Heap(l0), Self::Heap(r0)) => Arc::ptr_eq(l0, r0),
(Self::Static(l0), Self::Static(r0)) => core::ptr::eq(l0, r0),
(Self::Inline { len: l_len, buf: l_buf }, Self::Inline { len: r_len, buf: r_buf }) => {
l_len == r_len && l_buf == r_buf
}
_ => false,
}
}
}
/// Convert value to [`SmolStr`] using [`fmt::Display`], potentially without allocating.
///
/// Almost identical to [`ToString`], but converts to `SmolStr` instead.
pub trait ToSmolStr {
fn to_smolstr(&self) -> SmolStr;
}
/// [`str`] methods producing [`SmolStr`]s.
pub trait StrExt: private::Sealed {
/// Returns the lowercase equivalent of this string slice as a new [`SmolStr`],
/// potentially without allocating.
///
/// See [`str::to_lowercase`].
#[must_use = "this returns a new SmolStr without modifying the original"]
fn to_lowercase_smolstr(&self) -> SmolStr;
/// Returns the uppercase equivalent of this string slice as a new [`SmolStr`],
/// potentially without allocating.
///
/// See [`str::to_uppercase`].
#[must_use = "this returns a new SmolStr without modifying the original"]
fn to_uppercase_smolstr(&self) -> SmolStr;
/// Returns the ASCII lowercase equivalent of this string slice as a new [`SmolStr`],
/// potentially without allocating.
///
/// See [`str::to_ascii_lowercase`].
#[must_use = "this returns a new SmolStr without modifying the original"]
fn to_ascii_lowercase_smolstr(&self) -> SmolStr;
/// Returns the ASCII uppercase equivalent of this string slice as a new [`SmolStr`],
/// potentially without allocating.
///
/// See [`str::to_ascii_uppercase`].
#[must_use = "this returns a new SmolStr without modifying the original"]
fn to_ascii_uppercase_smolstr(&self) -> SmolStr;
/// Replaces all matches of a &str with another &str returning a new [`SmolStr`],
/// potentially without allocating.
///
/// See [`str::replace`].
#[must_use = "this returns a new SmolStr without modifying the original"]
fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr;
/// Replaces first N matches of a &str with another &str returning a new [`SmolStr`],
/// potentially without allocating.
///
/// See [`str::replacen`].
#[must_use = "this returns a new SmolStr without modifying the original"]
fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr;
}
impl StrExt for str {
#[inline]
fn to_lowercase_smolstr(&self) -> SmolStr {
let len = self.len();
if len <= INLINE_CAP {
let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_lowercase);
from_buf_and_chars(buf, len - rest.len(), rest.chars().flat_map(|c| c.to_lowercase()))
} else {
self.to_lowercase().into()
}
}
#[inline]
fn to_uppercase_smolstr(&self) -> SmolStr {
let len = self.len();
if len <= INLINE_CAP {
let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_uppercase);
from_buf_and_chars(buf, len - rest.len(), rest.chars().flat_map(|c| c.to_uppercase()))
} else {
self.to_uppercase().into()
}
}
#[inline]
fn to_ascii_lowercase_smolstr(&self) -> SmolStr {
let len = self.len();
if len <= INLINE_CAP {
let mut buf = [0u8; INLINE_CAP];
buf[..len].copy_from_slice(self.as_bytes());
buf[..len].make_ascii_lowercase();
SmolStr(Repr::Inline {
// SAFETY: `len` is in bounds
len: unsafe { InlineSize::transmute_from_u8(len as u8) },
buf,
})
} else {
self.to_ascii_lowercase().into()
}
}
#[inline]
fn to_ascii_uppercase_smolstr(&self) -> SmolStr {
let len = self.len();
if len <= INLINE_CAP {
let mut buf = [0u8; INLINE_CAP];
buf[..len].copy_from_slice(self.as_bytes());
buf[..len].make_ascii_uppercase();
SmolStr(Repr::Inline {
// SAFETY: `len` is in bounds
len: unsafe { InlineSize::transmute_from_u8(len as u8) },
buf,
})
} else {
self.to_ascii_uppercase().into()
}
}
#[inline]
fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr {
self.replacen_smolstr(from, to, usize::MAX)
}
#[inline]
fn replacen_smolstr(&self, from: &str, to: &str, mut count: usize) -> SmolStr {
// Fast path for replacing a single ASCII character with another inline.
if let [from_u8] = from.as_bytes()
&& let [to_u8] = to.as_bytes()
{
return if self.len() <= count {
// SAFETY: `from_u8` & `to_u8` are ascii
unsafe { replacen_1_ascii(self, |b| if b == from_u8 { *to_u8 } else { *b }) }
} else {
unsafe {
replacen_1_ascii(self, |b| {
if b == from_u8 && count != 0 {
count -= 1;
*to_u8
} else {
*b
}
})
}
};
}
let mut result = SmolStrBuilder::new();
let mut last_end = 0;
for (start, part) in self.match_indices(from).take(count) {
// SAFETY: `start` is guaranteed to be within the bounds of `self` as per
// `match_indices` and last_end is always less than or equal to `start`
result.push_str(unsafe { self.get_unchecked(last_end..start) });
result.push_str(to);
last_end = start + part.len();
}
// SAFETY: `self.len()` is guaranteed to be within the bounds of `self` and last_end is
// always less than or equal to `self.len()`
result.push_str(unsafe { self.get_unchecked(last_end..self.len()) });
SmolStr::from(result)
}
}
/// SAFETY: `map` fn must only replace ascii with ascii or return unchanged bytes.
#[inline]
unsafe fn replacen_1_ascii(src: &str, mut map: impl FnMut(&u8) -> u8) -> SmolStr {
if src.len() <= INLINE_CAP {
let mut buf = [0u8; INLINE_CAP];
for (idx, b) in src.as_bytes().iter().enumerate() {
buf[idx] = map(b);
}
SmolStr(Repr::Inline {
// SAFETY: `len` is in bounds
len: unsafe { InlineSize::transmute_from_u8(src.len() as u8) },
buf,
})
} else {
let out = src.as_bytes().iter().map(map).collect();
// SAFETY: We replaced ascii with ascii on valid utf8 strings.
unsafe { String::from_utf8_unchecked(out).into() }
}
}
/// Inline version of std fn `convert_while_ascii`. `s` must have len <= 23.
#[inline]
fn inline_convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> ([u8; INLINE_CAP], &str) {
// Process the input in chunks of 16 bytes to enable auto-vectorization.
// Previously the chunk size depended on the size of `usize`,
// but on 32-bit platforms with sse or neon is also the better choice.
// The only downside on other platforms would be a bit more loop-unrolling.
const N: usize = 16;
debug_assert!(s.len() <= INLINE_CAP, "only for inline-able strings");
let mut slice = s.as_bytes();
let mut out = [0u8; INLINE_CAP];
let mut out_slice = &mut out[..slice.len()];
let mut is_ascii = [false; N];
while slice.len() >= N {
// SAFETY: checked in loop condition
let chunk = unsafe { slice.get_unchecked(..N) };
// SAFETY: out_slice has at least same length as input slice and gets sliced with the same offsets
let out_chunk = unsafe { out_slice.get_unchecked_mut(..N) };
for j in 0..N {
is_ascii[j] = chunk[j] <= 127;
}
// Auto-vectorization for this check is a bit fragile, sum and comparing against the chunk
// size gives the best result, specifically a pmovmsk instruction on x86.
// See https://github.com/llvm/llvm-project/issues/96395 for why llvm currently does not
// currently recognize other similar idioms.
if is_ascii.iter().map(|x| *x as u8).sum::<u8>() as usize != N {
break;
}
for j in 0..N {
out_chunk[j] = convert(&chunk[j]);
}
slice = unsafe { slice.get_unchecked(N..) };
out_slice = unsafe { out_slice.get_unchecked_mut(N..) };
}
// handle the remainder as individual bytes
while !slice.is_empty() {
let byte = slice[0];
if byte > 127 {
break;
}
// SAFETY: out_slice has at least same length as input slice
unsafe {
*out_slice.get_unchecked_mut(0) = convert(&byte);
}
slice = unsafe { slice.get_unchecked(1..) };
out_slice = unsafe { out_slice.get_unchecked_mut(1..) };
}
unsafe {
// SAFETY: we know this is a valid char boundary
// since we only skipped over leading ascii bytes
let rest = core::str::from_utf8_unchecked(slice);
(out, rest)
}
}
impl<T> ToSmolStr for T
where
T: fmt::Display + ?Sized,
{
fn to_smolstr(&self) -> SmolStr {
format_smolstr!("{}", self)
}
}
mod private {
/// No downstream impls allowed.
pub trait Sealed {}
impl Sealed for str {}
}
/// Formats arguments to a [`SmolStr`], potentially without allocating.
///
/// See [`alloc::format!`] or [`format_args!`] for syntax documentation.
#[macro_export]
macro_rules! format_smolstr {
($($tt:tt)*) => {{
let mut w = $crate::SmolStrBuilder::new();
::core::fmt::Write::write_fmt(&mut w, format_args!($($tt)*)).expect("a formatting trait implementation returned an error");
w.finish()
}};
}
/// A builder that can be used to efficiently build a [`SmolStr`].
///
/// This won't allocate if the final string fits into the inline buffer.
#[derive(Clone, Default, Debug, PartialEq, Eq)]
pub struct SmolStrBuilder(SmolStrBuilderRepr);
#[derive(Clone, Debug, PartialEq, Eq)]
enum SmolStrBuilderRepr {
Inline { len: usize, buf: [u8; INLINE_CAP] },
Heap(String),
}
impl Default for SmolStrBuilderRepr {
#[inline]
fn default() -> Self {
SmolStrBuilderRepr::Inline { buf: [0; INLINE_CAP], len: 0 }
}
}
impl SmolStrBuilder {
/// Creates a new empty [`SmolStrBuilder`].
#[must_use]
pub const fn new() -> Self {
Self(SmolStrBuilderRepr::Inline { buf: [0; INLINE_CAP], len: 0 })
}
/// Builds a [`SmolStr`] from `self`.
#[must_use]
pub fn finish(&self) -> SmolStr {
SmolStr(match &self.0 {
&SmolStrBuilderRepr::Inline { len, buf } => {
debug_assert!(len <= INLINE_CAP);
Repr::Inline {
// SAFETY: We know that `value.len` is less than or equal to the maximum value of `InlineSize`
len: unsafe { InlineSize::transmute_from_u8(len as u8) },
buf,
}
}
SmolStrBuilderRepr::Heap(heap) => Repr::new(heap),
})
}
/// Appends the given [`char`] to the end of `self`'s buffer.
pub fn push(&mut self, c: char) {
match &mut self.0 {
SmolStrBuilderRepr::Inline { len, buf } => {
let char_len = c.len_utf8();
let new_len = *len + char_len;
if new_len <= INLINE_CAP {
c.encode_utf8(&mut buf[*len..]);
*len += char_len;
} else {
let mut heap = String::with_capacity(new_len);
// copy existing inline bytes over to the heap
// SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes
unsafe { heap.as_mut_vec().extend_from_slice(&buf[..*len]) };
heap.push(c);
self.0 = SmolStrBuilderRepr::Heap(heap);
}
}
SmolStrBuilderRepr::Heap(h) => h.push(c),
}
}
/// Appends a given string slice onto the end of `self`'s buffer.
pub fn push_str(&mut self, s: &str) {
match &mut self.0 {
SmolStrBuilderRepr::Inline { len, buf } => {
let old_len = *len;
*len += s.len();
// if the new length will fit on the stack (even if it fills it entirely)
if *len <= INLINE_CAP {
buf[old_len..*len].copy_from_slice(s.as_bytes());
return; // skip the heap push below
}
let mut heap = String::with_capacity(*len);
// copy existing inline bytes over to the heap
// SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes
unsafe { heap.as_mut_vec().extend_from_slice(&buf[..old_len]) };
heap.push_str(s);
self.0 = SmolStrBuilderRepr::Heap(heap);
}
SmolStrBuilderRepr::Heap(heap) => heap.push_str(s),
}
}
}
impl fmt::Write for SmolStrBuilder {
#[inline]
fn write_str(&mut self, s: &str) -> fmt::Result {
self.push_str(s);
Ok(())
}
}
impl From<SmolStrBuilder> for SmolStr {
fn from(value: SmolStrBuilder) -> Self {
value.finish()
}
}
#[cfg(feature = "arbitrary")]
impl<'a> arbitrary::Arbitrary<'a> for SmolStr {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result<Self, arbitrary::Error> {
let s = <&str>::arbitrary(u)?;
Ok(SmolStr::new(s))
}
}
#[cfg(feature = "borsh")]
mod borsh;
#[cfg(feature = "serde")]
mod serde;
#[test]
fn from_buf_and_chars_size_hinted_heap() {
let str = from_buf_and_chars(
*b"abcdefghijklmnopqr00000",
18,
"_0x1x2x3x4x5x6x7x8x9x10x11x12x13".chars(),
);
assert_eq!(str, "abcdefghijklmnopqr_0x1x2x3x4x5x6x7x8x9x10x11x12x13");
}