blob: 6dbf4e6d2d980ee730cba7c1b635e1904d0c331a [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="generator" content="rustdoc">
<meta name="description" content="API documentation for the Rust `UnicodeSegmentation` trait in crate `unicode_segmentation`.">
<meta name="keywords" content="rust, rustlang, rust-lang, UnicodeSegmentation">
<title>unicode_segmentation::UnicodeSegmentation - Rust</title>
<link rel="stylesheet" type="text/css" href="../normalize.css">
<link rel="stylesheet" type="text/css" href="../rustdoc.css">
<link rel="stylesheet" type="text/css" href="../main.css">
<link rel="shortcut icon" href="https://unicode-rs.github.io/unicode-rs_sm.png">
</head>
<body class="rustdoc trait">
<!--[if lte IE 8]>
<div class="warning">
This old browser is unsupported and will most likely display funky
things.
</div>
<![endif]-->
<nav class="sidebar">
<a href='../unicode_segmentation/index.html'><img src='https://unicode-rs.github.io/unicode-rs_sm.png' alt='logo' width='100'></a>
<p class='location'>Trait UnicodeSegmentation</p><div class="block items"><ul><li><a href="#required-methods">Required Methods</a></li><li><a href="#implementors">Implementors</a></li></ul></div><p class='location'><a href='index.html'>unicode_segmentation</a></p><script>window.sidebarCurrent = {name: 'UnicodeSegmentation', ty: 'trait', relpath: ''};</script><script defer src="sidebar-items.js"></script>
</nav>
<nav class="sub">
<form class="search-form js-only">
<div class="search-container">
<input class="search-input" name="search"
autocomplete="off"
placeholder="Click or press ‘S’ to search, ‘?’ for more options…"
type="search">
</div>
</form>
</nav>
<section id='main' class="content">
<h1 class='fqn'><span class='in-band'>Trait <a href='index.html'>unicode_segmentation</a>::<wbr><a class="trait" href=''>UnicodeSegmentation</a></span><span class='out-of-band'><span id='render-detail'>
<a id="toggle-all-docs" href="javascript:void(0)" title="collapse all docs">
[<span class='inner'>&#x2212;</span>]
</a>
</span><a class='srclink' href='../src/unicode_segmentation/lib.rs.html#82-177' title='goto source code'>[src]</a></span></h1>
<pre class='rust trait'>pub trait UnicodeSegmentation {
fn <a href='#tymethod.graphemes' class='fnname'>graphemes</a>&lt;'a&gt;(&amp;'a self, is_extended: bool) -&gt; <a class="struct" href="../unicode_segmentation/struct.Graphemes.html" title="struct unicode_segmentation::Graphemes">Graphemes</a>&lt;'a&gt;;
<div class='item-spacer'></div> fn <a href='#tymethod.grapheme_indices' class='fnname'>grapheme_indices</a>&lt;'a&gt;(&amp;'a self, is_extended: bool) -&gt; <a class="struct" href="../unicode_segmentation/struct.GraphemeIndices.html" title="struct unicode_segmentation::GraphemeIndices">GraphemeIndices</a>&lt;'a&gt;;
<div class='item-spacer'></div> fn <a href='#tymethod.unicode_words' class='fnname'>unicode_words</a>&lt;'a&gt;(&amp;'a self) -&gt; <a class="struct" href="../unicode_segmentation/struct.UnicodeWords.html" title="struct unicode_segmentation::UnicodeWords">UnicodeWords</a>&lt;'a&gt;;
<div class='item-spacer'></div> fn <a href='#tymethod.split_word_bounds' class='fnname'>split_word_bounds</a>&lt;'a&gt;(&amp;'a self) -&gt; <a class="struct" href="../unicode_segmentation/struct.UWordBounds.html" title="struct unicode_segmentation::UWordBounds">UWordBounds</a>&lt;'a&gt;;
<div class='item-spacer'></div> fn <a href='#tymethod.split_word_bound_indices' class='fnname'>split_word_bound_indices</a>&lt;'a&gt;(&amp;'a self) -&gt; <a class="struct" href="../unicode_segmentation/struct.UWordBoundIndices.html" title="struct unicode_segmentation::UWordBoundIndices">UWordBoundIndices</a>&lt;'a&gt;;
}</pre><div class='docblock'><p>Methods for segmenting strings according to
<a href="http://www.unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.</p>
</div>
<h2 id='required-methods' class='small-section-header'>
Required Methods<a href='#required-methods' class='anchor'></a>
</h2>
<div class='methods'>
<h3 id='tymethod.graphemes' class='method'><span id='graphemes.v' class='invisible'><code>fn <a href='#tymethod.graphemes' class='fnname'>graphemes</a>&lt;'a&gt;(&amp;'a self, is_extended: bool) -&gt; <a class="struct" href="../unicode_segmentation/struct.Graphemes.html" title="struct unicode_segmentation::Graphemes">Graphemes</a>&lt;'a&gt;</code></span></h3><div class='docblock'><p>Returns an iterator over the <a href="http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries">grapheme clusters</a> of <code>self</code>.</p>
<p>If <code>is_extended</code> is true, the iterator is over the
<em>extended grapheme clusters</em>;
otherwise, the iterator is over the <em>legacy grapheme clusters</em>.
<a href="http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries">UAX#29</a>
recommends extended grapheme cluster boundaries for general processing.</p>
<h1 id='examples' class='section-header'><a href='#examples'>Examples</a></h1>
<pre class="rust rust-example-rendered">
<span class="kw">let</span> <span class="ident">gr1</span> <span class="op">=</span> <span class="ident">UnicodeSegmentation</span>::<span class="ident">graphemes</span>(<span class="string">&quot;a\u{310}e\u{301}o\u{308}\u{332}&quot;</span>, <span class="bool-val">true</span>)
.<span class="ident">collect</span>::<span class="op">&lt;</span><span class="ident">Vec</span><span class="op">&lt;</span><span class="kw-2">&amp;</span><span class="ident">str</span><span class="op">&gt;&gt;</span>();
<span class="kw">let</span> <span class="ident">b</span>: <span class="kw-2">&amp;</span>[_] <span class="op">=</span> <span class="kw-2">&amp;</span>[<span class="string">&quot;a\u{310}&quot;</span>, <span class="string">&quot;e\u{301}&quot;</span>, <span class="string">&quot;o\u{308}\u{332}&quot;</span>];
<span class="macro">assert_eq</span><span class="macro">!</span>(<span class="kw-2">&amp;</span><span class="ident">gr1</span>[..], <span class="ident">b</span>);
<span class="kw">let</span> <span class="ident">gr2</span> <span class="op">=</span> <span class="ident">UnicodeSegmentation</span>::<span class="ident">graphemes</span>(<span class="string">&quot;a\r\nb🇷🇺🇸🇹&quot;</span>, <span class="bool-val">true</span>).<span class="ident">collect</span>::<span class="op">&lt;</span><span class="ident">Vec</span><span class="op">&lt;</span><span class="kw-2">&amp;</span><span class="ident">str</span><span class="op">&gt;&gt;</span>();
<span class="kw">let</span> <span class="ident">b</span>: <span class="kw-2">&amp;</span>[_] <span class="op">=</span> <span class="kw-2">&amp;</span>[<span class="string">&quot;a&quot;</span>, <span class="string">&quot;\r\n&quot;</span>, <span class="string">&quot;b&quot;</span>, <span class="string">&quot;🇷🇺&quot;</span>, <span class="string">&quot;🇸🇹&quot;</span>];
<span class="macro">assert_eq</span><span class="macro">!</span>(<span class="kw-2">&amp;</span><span class="ident">gr2</span>[..], <span class="ident">b</span>);</pre>
</div><h3 id='tymethod.grapheme_indices' class='method'><span id='grapheme_indices.v' class='invisible'><code>fn <a href='#tymethod.grapheme_indices' class='fnname'>grapheme_indices</a>&lt;'a&gt;(&amp;'a self, is_extended: bool) -&gt; <a class="struct" href="../unicode_segmentation/struct.GraphemeIndices.html" title="struct unicode_segmentation::GraphemeIndices">GraphemeIndices</a>&lt;'a&gt;</code></span></h3><div class='docblock'><p>Returns an iterator over the grapheme clusters of <code>self</code> and their
byte offsets. See <code>graphemes()</code> for more information.</p>
<h1 id='examples-1' class='section-header'><a href='#examples-1'>Examples</a></h1>
<pre class="rust rust-example-rendered">
<span class="kw">let</span> <span class="ident">gr_inds</span> <span class="op">=</span> <span class="ident">UnicodeSegmentation</span>::<span class="ident">grapheme_indices</span>(<span class="string">&quot;a̐éö̲\r\n&quot;</span>, <span class="bool-val">true</span>)
.<span class="ident">collect</span>::<span class="op">&lt;</span><span class="ident">Vec</span><span class="op">&lt;</span>(<span class="ident">usize</span>, <span class="kw-2">&amp;</span><span class="ident">str</span>)<span class="op">&gt;&gt;</span>();
<span class="kw">let</span> <span class="ident">b</span>: <span class="kw-2">&amp;</span>[_] <span class="op">=</span> <span class="kw-2">&amp;</span>[(<span class="number">0</span>, <span class="string">&quot;a̐&quot;</span>), (<span class="number">3</span>, <span class="string">&quot;é&quot;</span>), (<span class="number">6</span>, <span class="string">&quot;ö̲&quot;</span>), (<span class="number">11</span>, <span class="string">&quot;\r\n&quot;</span>)];
<span class="macro">assert_eq</span><span class="macro">!</span>(<span class="kw-2">&amp;</span><span class="ident">gr_inds</span>[..], <span class="ident">b</span>);</pre>
</div><h3 id='tymethod.unicode_words' class='method'><span id='unicode_words.v' class='invisible'><code>fn <a href='#tymethod.unicode_words' class='fnname'>unicode_words</a>&lt;'a&gt;(&amp;'a self) -&gt; <a class="struct" href="../unicode_segmentation/struct.UnicodeWords.html" title="struct unicode_segmentation::UnicodeWords">UnicodeWords</a>&lt;'a&gt;</code></span></h3><div class='docblock'><p>Returns an iterator over the words of <code>self</code>, separated on
<a href="http://www.unicode.org/reports/tr29/#Word_Boundaries">UAX#29 word boundaries</a>.</p>
<p>Here, &quot;words&quot; are just those substrings which, after splitting on
UAX#29 word boundaries, contain any alphanumeric characters. That is, the
substring must contain at least one character with the
<a href="http://unicode.org/reports/tr44/#Alphabetic">Alphabetic</a>
property, or with
<a href="http://unicode.org/reports/tr44/#General_Category_Values">General_Category=Number</a>.</p>
<h1 id='example' class='section-header'><a href='#example'>Example</a></h1>
<pre class="rust rust-example-rendered">
<span class="kw">let</span> <span class="ident">uws</span> <span class="op">=</span> <span class="string">&quot;The quick (\&quot;brown\&quot;) fox can&#39;t jump 32.3 feet, right?&quot;</span>;
<span class="kw">let</span> <span class="ident">uw1</span> <span class="op">=</span> <span class="ident">uws</span>.<span class="ident">unicode_words</span>().<span class="ident">collect</span>::<span class="op">&lt;</span><span class="ident">Vec</span><span class="op">&lt;</span><span class="kw-2">&amp;</span><span class="ident">str</span><span class="op">&gt;&gt;</span>();
<span class="kw">let</span> <span class="ident">b</span>: <span class="kw-2">&amp;</span>[_] <span class="op">=</span> <span class="kw-2">&amp;</span>[<span class="string">&quot;The&quot;</span>, <span class="string">&quot;quick&quot;</span>, <span class="string">&quot;brown&quot;</span>, <span class="string">&quot;fox&quot;</span>, <span class="string">&quot;can&#39;t&quot;</span>, <span class="string">&quot;jump&quot;</span>, <span class="string">&quot;32.3&quot;</span>, <span class="string">&quot;feet&quot;</span>, <span class="string">&quot;right&quot;</span>];
<span class="macro">assert_eq</span><span class="macro">!</span>(<span class="kw-2">&amp;</span><span class="ident">uw1</span>[..], <span class="ident">b</span>);</pre>
</div><h3 id='tymethod.split_word_bounds' class='method'><span id='split_word_bounds.v' class='invisible'><code>fn <a href='#tymethod.split_word_bounds' class='fnname'>split_word_bounds</a>&lt;'a&gt;(&amp;'a self) -&gt; <a class="struct" href="../unicode_segmentation/struct.UWordBounds.html" title="struct unicode_segmentation::UWordBounds">UWordBounds</a>&lt;'a&gt;</code></span></h3><div class='docblock'><p>Returns an iterator over substrings of <code>self</code> separated on
<a href="http://www.unicode.org/reports/tr29/#Word_Boundaries">UAX#29 word boundaries</a>.</p>
<p>The concatenation of the substrings returned by this function is just the original string.</p>
<h1 id='example-1' class='section-header'><a href='#example-1'>Example</a></h1>
<pre class="rust rust-example-rendered">
<span class="kw">let</span> <span class="ident">swu1</span> <span class="op">=</span> <span class="string">&quot;The quick (\&quot;brown\&quot;) fox&quot;</span>.<span class="ident">split_word_bounds</span>().<span class="ident">collect</span>::<span class="op">&lt;</span><span class="ident">Vec</span><span class="op">&lt;</span><span class="kw-2">&amp;</span><span class="ident">str</span><span class="op">&gt;&gt;</span>();
<span class="kw">let</span> <span class="ident">b</span>: <span class="kw-2">&amp;</span>[_] <span class="op">=</span> <span class="kw-2">&amp;</span>[<span class="string">&quot;The&quot;</span>, <span class="string">&quot; &quot;</span>, <span class="string">&quot;quick&quot;</span>, <span class="string">&quot; &quot;</span>, <span class="string">&quot;(&quot;</span>, <span class="string">&quot;\&quot;&quot;</span>, <span class="string">&quot;brown&quot;</span>, <span class="string">&quot;\&quot;&quot;</span>, <span class="string">&quot;)&quot;</span>, <span class="string">&quot; &quot;</span>, <span class="string">&quot; &quot;</span>, <span class="string">&quot;fox&quot;</span>];
<span class="macro">assert_eq</span><span class="macro">!</span>(<span class="kw-2">&amp;</span><span class="ident">swu1</span>[..], <span class="ident">b</span>);</pre>
</div><h3 id='tymethod.split_word_bound_indices' class='method'><span id='split_word_bound_indices.v' class='invisible'><code>fn <a href='#tymethod.split_word_bound_indices' class='fnname'>split_word_bound_indices</a>&lt;'a&gt;(&amp;'a self) -&gt; <a class="struct" href="../unicode_segmentation/struct.UWordBoundIndices.html" title="struct unicode_segmentation::UWordBoundIndices">UWordBoundIndices</a>&lt;'a&gt;</code></span></h3><div class='docblock'><p>Returns an iterator over substrings of <code>self</code>, split on UAX#29 word boundaries,
and their offsets. See <code>split_word_bounds()</code> for more information.</p>
<h1 id='example-2' class='section-header'><a href='#example-2'>Example</a></h1>
<pre class="rust rust-example-rendered">
<span class="kw">let</span> <span class="ident">swi1</span> <span class="op">=</span> <span class="string">&quot;Brr, it&#39;s 29.3°F!&quot;</span>.<span class="ident">split_word_bound_indices</span>().<span class="ident">collect</span>::<span class="op">&lt;</span><span class="ident">Vec</span><span class="op">&lt;</span>(<span class="ident">usize</span>, <span class="kw-2">&amp;</span><span class="ident">str</span>)<span class="op">&gt;&gt;</span>();
<span class="kw">let</span> <span class="ident">b</span>: <span class="kw-2">&amp;</span>[_] <span class="op">=</span> <span class="kw-2">&amp;</span>[(<span class="number">0</span>, <span class="string">&quot;Brr&quot;</span>), (<span class="number">3</span>, <span class="string">&quot;,&quot;</span>), (<span class="number">4</span>, <span class="string">&quot; &quot;</span>), (<span class="number">5</span>, <span class="string">&quot;it&#39;s&quot;</span>), (<span class="number">9</span>, <span class="string">&quot; &quot;</span>), (<span class="number">10</span>, <span class="string">&quot;29.3&quot;</span>),
(<span class="number">14</span>, <span class="string">&quot;°&quot;</span>), (<span class="number">16</span>, <span class="string">&quot;F&quot;</span>), (<span class="number">17</span>, <span class="string">&quot;!&quot;</span>)];
<span class="macro">assert_eq</span><span class="macro">!</span>(<span class="kw-2">&amp;</span><span class="ident">swi1</span>[..], <span class="ident">b</span>);</pre>
</div></div>
<h2 id='implementors' class='small-section-header'>
Implementors<a href='#implementors' class='anchor'></a>
</h2>
<ul class='item-list' id='implementors-list'>
<li><code>impl UnicodeSegmentation for str</code></li>
</ul><script type="text/javascript" async
src="../implementors/unicode_segmentation/trait.UnicodeSegmentation.js">
</script></section>
<section id='search' class="content hidden"></section>
<section class="footer"></section>
<aside id="help" class="hidden">
<div>
<h1 class="hidden">Help</h1>
<div class="shortcuts">
<h2>Keyboard Shortcuts</h2>
<dl>
<dt>?</dt>
<dd>Show this help dialog</dd>
<dt>S</dt>
<dd>Focus the search field</dd>
<dt>&larrb;</dt>
<dd>Move up in search results</dd>
<dt>&rarrb;</dt>
<dd>Move down in search results</dd>
<dt>&#9166;</dt>
<dd>Go to active search result</dd>
<dt>+</dt>
<dd>Collapse/expand all sections</dd>
</dl>
</div>
<div class="infos">
<h2>Search Tricks</h2>
<p>
Prefix searches with a type followed by a colon (e.g.
<code>fn:</code>) to restrict the search to a given type.
</p>
<p>
Accepted types are: <code>fn</code>, <code>mod</code>,
<code>struct</code>, <code>enum</code>,
<code>trait</code>, <code>type</code>, <code>macro</code>,
and <code>const</code>.
</p>
<p>
Search functions by type signature (e.g.
<code>vec -> usize</code> or <code>* -> vec</code>)
</p>
</div>
</div>
</aside>
<script>
window.rootPath = "../";
window.currentCrate = "unicode_segmentation";
</script>
<script src="../main.js"></script>
<script defer src="../search-index.js"></script>
</body>
</html>