| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="utf-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <meta name="generator" content="rustdoc"> |
| <meta name="description" content="API documentation for the Rust `UnicodeSegmentation` trait in crate `unicode_segmentation`."> |
| <meta name="keywords" content="rust, rustlang, rust-lang, UnicodeSegmentation"> |
| |
| <title>unicode_segmentation::UnicodeSegmentation - Rust</title> |
| |
| <link rel="stylesheet" type="text/css" href="../normalize.css"> |
| <link rel="stylesheet" type="text/css" href="../rustdoc.css"> |
| <link rel="stylesheet" type="text/css" href="../main.css"> |
| |
| |
| <link rel="shortcut icon" href="https://unicode-rs.github.io/unicode-rs_sm.png"> |
| |
| </head> |
| <body class="rustdoc trait"> |
| <!--[if lte IE 8]> |
| <div class="warning"> |
| This old browser is unsupported and will most likely display funky |
| things. |
| </div> |
| <![endif]--> |
| |
| |
| |
| <nav class="sidebar"> |
| <a href='../unicode_segmentation/index.html'><img src='https://unicode-rs.github.io/unicode-rs_sm.png' alt='logo' width='100'></a> |
| <p class='location'>Trait UnicodeSegmentation</p><div class="block items"><ul><li><a href="#required-methods">Required Methods</a></li><li><a href="#implementors">Implementors</a></li></ul></div><p class='location'><a href='index.html'>unicode_segmentation</a></p><script>window.sidebarCurrent = {name: 'UnicodeSegmentation', ty: 'trait', relpath: ''};</script><script defer src="sidebar-items.js"></script> |
| </nav> |
| |
| <nav class="sub"> |
| <form class="search-form js-only"> |
| <div class="search-container"> |
| <input class="search-input" name="search" |
| autocomplete="off" |
| placeholder="Click or press ‘S’ to search, ‘?’ for more options…" |
| type="search"> |
| </div> |
| </form> |
| </nav> |
| |
| <section id='main' class="content"> |
| <h1 class='fqn'><span class='in-band'>Trait <a href='index.html'>unicode_segmentation</a>::<wbr><a class="trait" href=''>UnicodeSegmentation</a></span><span class='out-of-band'><span id='render-detail'> |
| <a id="toggle-all-docs" href="javascript:void(0)" title="collapse all docs"> |
| [<span class='inner'>−</span>] |
| </a> |
| </span><a class='srclink' href='../src/unicode_segmentation/lib.rs.html#82-177' title='goto source code'>[src]</a></span></h1> |
| <pre class='rust trait'>pub trait UnicodeSegmentation { |
| fn <a href='#tymethod.graphemes' class='fnname'>graphemes</a><'a>(&'a self, is_extended: bool) -> <a class="struct" href="../unicode_segmentation/struct.Graphemes.html" title="struct unicode_segmentation::Graphemes">Graphemes</a><'a>; |
| <div class='item-spacer'></div> fn <a href='#tymethod.grapheme_indices' class='fnname'>grapheme_indices</a><'a>(&'a self, is_extended: bool) -> <a class="struct" href="../unicode_segmentation/struct.GraphemeIndices.html" title="struct unicode_segmentation::GraphemeIndices">GraphemeIndices</a><'a>; |
| <div class='item-spacer'></div> fn <a href='#tymethod.unicode_words' class='fnname'>unicode_words</a><'a>(&'a self) -> <a class="struct" href="../unicode_segmentation/struct.UnicodeWords.html" title="struct unicode_segmentation::UnicodeWords">UnicodeWords</a><'a>; |
| <div class='item-spacer'></div> fn <a href='#tymethod.split_word_bounds' class='fnname'>split_word_bounds</a><'a>(&'a self) -> <a class="struct" href="../unicode_segmentation/struct.UWordBounds.html" title="struct unicode_segmentation::UWordBounds">UWordBounds</a><'a>; |
| <div class='item-spacer'></div> fn <a href='#tymethod.split_word_bound_indices' class='fnname'>split_word_bound_indices</a><'a>(&'a self) -> <a class="struct" href="../unicode_segmentation/struct.UWordBoundIndices.html" title="struct unicode_segmentation::UWordBoundIndices">UWordBoundIndices</a><'a>; |
| }</pre><div class='docblock'><p>Methods for segmenting strings according to |
| <a href="http://www.unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.</p> |
| </div> |
| <h2 id='required-methods' class='small-section-header'> |
| Required Methods<a href='#required-methods' class='anchor'></a> |
| </h2> |
| <div class='methods'> |
| <h3 id='tymethod.graphemes' class='method'><span id='graphemes.v' class='invisible'><code>fn <a href='#tymethod.graphemes' class='fnname'>graphemes</a><'a>(&'a self, is_extended: bool) -> <a class="struct" href="../unicode_segmentation/struct.Graphemes.html" title="struct unicode_segmentation::Graphemes">Graphemes</a><'a></code></span></h3><div class='docblock'><p>Returns an iterator over the <a href="http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries">grapheme clusters</a> of <code>self</code>.</p> |
| |
| <p>If <code>is_extended</code> is true, the iterator is over the |
| <em>extended grapheme clusters</em>; |
| otherwise, the iterator is over the <em>legacy grapheme clusters</em>. |
| <a href="http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries">UAX#29</a> |
| recommends extended grapheme cluster boundaries for general processing.</p> |
| |
| <h1 id='examples' class='section-header'><a href='#examples'>Examples</a></h1> |
| <pre class="rust rust-example-rendered"> |
| <span class="kw">let</span> <span class="ident">gr1</span> <span class="op">=</span> <span class="ident">UnicodeSegmentation</span>::<span class="ident">graphemes</span>(<span class="string">"a\u{310}e\u{301}o\u{308}\u{332}"</span>, <span class="bool-val">true</span>) |
| .<span class="ident">collect</span>::<span class="op"><</span><span class="ident">Vec</span><span class="op"><</span><span class="kw-2">&</span><span class="ident">str</span><span class="op">>></span>(); |
| <span class="kw">let</span> <span class="ident">b</span>: <span class="kw-2">&</span>[_] <span class="op">=</span> <span class="kw-2">&</span>[<span class="string">"a\u{310}"</span>, <span class="string">"e\u{301}"</span>, <span class="string">"o\u{308}\u{332}"</span>]; |
| |
| <span class="macro">assert_eq</span><span class="macro">!</span>(<span class="kw-2">&</span><span class="ident">gr1</span>[..], <span class="ident">b</span>); |
| |
| <span class="kw">let</span> <span class="ident">gr2</span> <span class="op">=</span> <span class="ident">UnicodeSegmentation</span>::<span class="ident">graphemes</span>(<span class="string">"a\r\nb🇷🇺🇸🇹"</span>, <span class="bool-val">true</span>).<span class="ident">collect</span>::<span class="op"><</span><span class="ident">Vec</span><span class="op"><</span><span class="kw-2">&</span><span class="ident">str</span><span class="op">>></span>(); |
| <span class="kw">let</span> <span class="ident">b</span>: <span class="kw-2">&</span>[_] <span class="op">=</span> <span class="kw-2">&</span>[<span class="string">"a"</span>, <span class="string">"\r\n"</span>, <span class="string">"b"</span>, <span class="string">"🇷🇺"</span>, <span class="string">"🇸🇹"</span>]; |
| |
| <span class="macro">assert_eq</span><span class="macro">!</span>(<span class="kw-2">&</span><span class="ident">gr2</span>[..], <span class="ident">b</span>);</pre> |
| </div><h3 id='tymethod.grapheme_indices' class='method'><span id='grapheme_indices.v' class='invisible'><code>fn <a href='#tymethod.grapheme_indices' class='fnname'>grapheme_indices</a><'a>(&'a self, is_extended: bool) -> <a class="struct" href="../unicode_segmentation/struct.GraphemeIndices.html" title="struct unicode_segmentation::GraphemeIndices">GraphemeIndices</a><'a></code></span></h3><div class='docblock'><p>Returns an iterator over the grapheme clusters of <code>self</code> and their |
| byte offsets. See <code>graphemes()</code> for more information.</p> |
| |
| <h1 id='examples-1' class='section-header'><a href='#examples-1'>Examples</a></h1> |
| <pre class="rust rust-example-rendered"> |
| <span class="kw">let</span> <span class="ident">gr_inds</span> <span class="op">=</span> <span class="ident">UnicodeSegmentation</span>::<span class="ident">grapheme_indices</span>(<span class="string">"a̐éö̲\r\n"</span>, <span class="bool-val">true</span>) |
| .<span class="ident">collect</span>::<span class="op"><</span><span class="ident">Vec</span><span class="op"><</span>(<span class="ident">usize</span>, <span class="kw-2">&</span><span class="ident">str</span>)<span class="op">>></span>(); |
| <span class="kw">let</span> <span class="ident">b</span>: <span class="kw-2">&</span>[_] <span class="op">=</span> <span class="kw-2">&</span>[(<span class="number">0</span>, <span class="string">"a̐"</span>), (<span class="number">3</span>, <span class="string">"é"</span>), (<span class="number">6</span>, <span class="string">"ö̲"</span>), (<span class="number">11</span>, <span class="string">"\r\n"</span>)]; |
| |
| <span class="macro">assert_eq</span><span class="macro">!</span>(<span class="kw-2">&</span><span class="ident">gr_inds</span>[..], <span class="ident">b</span>);</pre> |
| </div><h3 id='tymethod.unicode_words' class='method'><span id='unicode_words.v' class='invisible'><code>fn <a href='#tymethod.unicode_words' class='fnname'>unicode_words</a><'a>(&'a self) -> <a class="struct" href="../unicode_segmentation/struct.UnicodeWords.html" title="struct unicode_segmentation::UnicodeWords">UnicodeWords</a><'a></code></span></h3><div class='docblock'><p>Returns an iterator over the words of <code>self</code>, separated on |
| <a href="http://www.unicode.org/reports/tr29/#Word_Boundaries">UAX#29 word boundaries</a>.</p> |
| |
| <p>Here, "words" are just those substrings which, after splitting on |
| UAX#29 word boundaries, contain any alphanumeric characters. That is, the |
| substring must contain at least one character with the |
| <a href="http://unicode.org/reports/tr44/#Alphabetic">Alphabetic</a> |
| property, or with |
| <a href="http://unicode.org/reports/tr44/#General_Category_Values">General_Category=Number</a>.</p> |
| |
| <h1 id='example' class='section-header'><a href='#example'>Example</a></h1> |
| <pre class="rust rust-example-rendered"> |
| <span class="kw">let</span> <span class="ident">uws</span> <span class="op">=</span> <span class="string">"The quick (\"brown\") fox can't jump 32.3 feet, right?"</span>; |
| <span class="kw">let</span> <span class="ident">uw1</span> <span class="op">=</span> <span class="ident">uws</span>.<span class="ident">unicode_words</span>().<span class="ident">collect</span>::<span class="op"><</span><span class="ident">Vec</span><span class="op"><</span><span class="kw-2">&</span><span class="ident">str</span><span class="op">>></span>(); |
| <span class="kw">let</span> <span class="ident">b</span>: <span class="kw-2">&</span>[_] <span class="op">=</span> <span class="kw-2">&</span>[<span class="string">"The"</span>, <span class="string">"quick"</span>, <span class="string">"brown"</span>, <span class="string">"fox"</span>, <span class="string">"can't"</span>, <span class="string">"jump"</span>, <span class="string">"32.3"</span>, <span class="string">"feet"</span>, <span class="string">"right"</span>]; |
| |
| <span class="macro">assert_eq</span><span class="macro">!</span>(<span class="kw-2">&</span><span class="ident">uw1</span>[..], <span class="ident">b</span>);</pre> |
| </div><h3 id='tymethod.split_word_bounds' class='method'><span id='split_word_bounds.v' class='invisible'><code>fn <a href='#tymethod.split_word_bounds' class='fnname'>split_word_bounds</a><'a>(&'a self) -> <a class="struct" href="../unicode_segmentation/struct.UWordBounds.html" title="struct unicode_segmentation::UWordBounds">UWordBounds</a><'a></code></span></h3><div class='docblock'><p>Returns an iterator over substrings of <code>self</code> separated on |
| <a href="http://www.unicode.org/reports/tr29/#Word_Boundaries">UAX#29 word boundaries</a>.</p> |
| |
| <p>The concatenation of the substrings returned by this function is just the original string.</p> |
| |
| <h1 id='example-1' class='section-header'><a href='#example-1'>Example</a></h1> |
| <pre class="rust rust-example-rendered"> |
| <span class="kw">let</span> <span class="ident">swu1</span> <span class="op">=</span> <span class="string">"The quick (\"brown\") fox"</span>.<span class="ident">split_word_bounds</span>().<span class="ident">collect</span>::<span class="op"><</span><span class="ident">Vec</span><span class="op"><</span><span class="kw-2">&</span><span class="ident">str</span><span class="op">>></span>(); |
| <span class="kw">let</span> <span class="ident">b</span>: <span class="kw-2">&</span>[_] <span class="op">=</span> <span class="kw-2">&</span>[<span class="string">"The"</span>, <span class="string">" "</span>, <span class="string">"quick"</span>, <span class="string">" "</span>, <span class="string">"("</span>, <span class="string">"\""</span>, <span class="string">"brown"</span>, <span class="string">"\""</span>, <span class="string">")"</span>, <span class="string">" "</span>, <span class="string">" "</span>, <span class="string">"fox"</span>]; |
| |
| <span class="macro">assert_eq</span><span class="macro">!</span>(<span class="kw-2">&</span><span class="ident">swu1</span>[..], <span class="ident">b</span>);</pre> |
| </div><h3 id='tymethod.split_word_bound_indices' class='method'><span id='split_word_bound_indices.v' class='invisible'><code>fn <a href='#tymethod.split_word_bound_indices' class='fnname'>split_word_bound_indices</a><'a>(&'a self) -> <a class="struct" href="../unicode_segmentation/struct.UWordBoundIndices.html" title="struct unicode_segmentation::UWordBoundIndices">UWordBoundIndices</a><'a></code></span></h3><div class='docblock'><p>Returns an iterator over substrings of <code>self</code>, split on UAX#29 word boundaries, |
| and their offsets. See <code>split_word_bounds()</code> for more information.</p> |
| |
| <h1 id='example-2' class='section-header'><a href='#example-2'>Example</a></h1> |
| <pre class="rust rust-example-rendered"> |
| <span class="kw">let</span> <span class="ident">swi1</span> <span class="op">=</span> <span class="string">"Brr, it's 29.3°F!"</span>.<span class="ident">split_word_bound_indices</span>().<span class="ident">collect</span>::<span class="op"><</span><span class="ident">Vec</span><span class="op"><</span>(<span class="ident">usize</span>, <span class="kw-2">&</span><span class="ident">str</span>)<span class="op">>></span>(); |
| <span class="kw">let</span> <span class="ident">b</span>: <span class="kw-2">&</span>[_] <span class="op">=</span> <span class="kw-2">&</span>[(<span class="number">0</span>, <span class="string">"Brr"</span>), (<span class="number">3</span>, <span class="string">","</span>), (<span class="number">4</span>, <span class="string">" "</span>), (<span class="number">5</span>, <span class="string">"it's"</span>), (<span class="number">9</span>, <span class="string">" "</span>), (<span class="number">10</span>, <span class="string">"29.3"</span>), |
| (<span class="number">14</span>, <span class="string">"°"</span>), (<span class="number">16</span>, <span class="string">"F"</span>), (<span class="number">17</span>, <span class="string">"!"</span>)]; |
| |
| <span class="macro">assert_eq</span><span class="macro">!</span>(<span class="kw-2">&</span><span class="ident">swi1</span>[..], <span class="ident">b</span>);</pre> |
| </div></div> |
| <h2 id='implementors' class='small-section-header'> |
| Implementors<a href='#implementors' class='anchor'></a> |
| </h2> |
| <ul class='item-list' id='implementors-list'> |
| <li><code>impl UnicodeSegmentation for str</code></li> |
| </ul><script type="text/javascript" async |
| src="../implementors/unicode_segmentation/trait.UnicodeSegmentation.js"> |
| </script></section> |
| <section id='search' class="content hidden"></section> |
| |
| <section class="footer"></section> |
| |
| <aside id="help" class="hidden"> |
| <div> |
| <h1 class="hidden">Help</h1> |
| |
| <div class="shortcuts"> |
| <h2>Keyboard Shortcuts</h2> |
| |
| <dl> |
| <dt>?</dt> |
| <dd>Show this help dialog</dd> |
| <dt>S</dt> |
| <dd>Focus the search field</dd> |
| <dt>⇤</dt> |
| <dd>Move up in search results</dd> |
| <dt>⇥</dt> |
| <dd>Move down in search results</dd> |
| <dt>⏎</dt> |
| <dd>Go to active search result</dd> |
| <dt>+</dt> |
| <dd>Collapse/expand all sections</dd> |
| </dl> |
| </div> |
| |
| <div class="infos"> |
| <h2>Search Tricks</h2> |
| |
| <p> |
| Prefix searches with a type followed by a colon (e.g. |
| <code>fn:</code>) to restrict the search to a given type. |
| </p> |
| |
| <p> |
| Accepted types are: <code>fn</code>, <code>mod</code>, |
| <code>struct</code>, <code>enum</code>, |
| <code>trait</code>, <code>type</code>, <code>macro</code>, |
| and <code>const</code>. |
| </p> |
| |
| <p> |
| Search functions by type signature (e.g. |
| <code>vec -> usize</code> or <code>* -> vec</code>) |
| </p> |
| </div> |
| </div> |
| </aside> |
| |
| |
| |
| <script> |
| window.rootPath = "../"; |
| window.currentCrate = "unicode_segmentation"; |
| </script> |
| <script src="../main.js"></script> |
| <script defer src="../search-index.js"></script> |
| </body> |
| </html> |