| <!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="Source of the Rust file `lib/line-index/src/lib.rs`."><title>lib.rs - source</title><script>if(window.location.protocol!=="file:")document.head.insertAdjacentHTML("beforeend","SourceSerif4-Regular-6b053e98.ttf.woff2,FiraSans-Italic-81dc35de.woff2,FiraSans-Regular-0fe48ade.woff2,FiraSans-MediumItalic-ccf7e434.woff2,FiraSans-Medium-e1aa3f0a.woff2,SourceCodePro-Regular-8badfe75.ttf.woff2,SourceCodePro-Semibold-aa29a496.ttf.woff2".split(",").map(f=>`<link rel="preload" as="font" type="font/woff2"href="../../static.files/${f}">`).join(""))</script><link rel="stylesheet" href="../../static.files/normalize-9960930a.css"><link rel="stylesheet" href="../../static.files/rustdoc-77263533.css"><meta name="rustdoc-vars" data-root-path="../../" data-static-root-path="../../static.files/" data-current-crate="line_index" data-themes="" data-resource-suffix="" data-rustdoc-version="1.94.0 (4a4ef493e 2026-03-02)" data-channel="1.94.0" data-search-js="search-9e2438ea.js" data-stringdex-js="stringdex-b897f86f.js" data-settings-js="settings-c38705f0.js" ><script src="../../static.files/storage-e2aeef58.js"></script><script defer src="../../static.files/src-script-813739b1.js"></script><script defer src="../../src-files.js"></script><script defer src="../../static.files/main-7bab91a1.js"></script><noscript><link rel="stylesheet" href="../../static.files/noscript-ffcac47a.css"></noscript><link rel="alternate icon" type="image/png" href="../../static.files/favicon-32x32-eab170b8.png"><link rel="icon" type="image/svg+xml" href="../../static.files/favicon-044be391.svg"></head><body class="rustdoc src"><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="sidebar"><div class="src-sidebar-title"><h2>Files</h2></div></nav><div class="sidebar-resizer" title="Drag to resize sidebar"></div><main><section id="main-content" class="content"><div class="main-heading"><h1><div class="sub-heading">line_index/</div>lib.rs</h1><rustdoc-toolbar></rustdoc-toolbar></div><div class="example-wrap digits-3"><pre class="rust"><code><a href=#1 id=1 data-nosnippet>1</a><span class="doccomment">//! See [`LineIndex`]. |
| <a href=#2 id=2 data-nosnippet>2</a> |
| <a href=#3 id=3 data-nosnippet>3</a></span><span class="attr">#![deny(missing_debug_implementations, missing_docs, rust_2018_idioms)] |
| <a href=#4 id=4 data-nosnippet>4</a> |
| <a href=#5 id=5 data-nosnippet>5</a>#[cfg(test)] |
| <a href=#6 id=6 data-nosnippet>6</a></span><span class="kw">mod </span>tests; |
| <a href=#7 id=7 data-nosnippet>7</a> |
| <a href=#8 id=8 data-nosnippet>8</a><span class="kw">use </span>nohash_hasher::IntMap; |
| <a href=#9 id=9 data-nosnippet>9</a> |
| <a href=#10 id=10 data-nosnippet>10</a><span class="kw">pub use </span>text_size::{TextRange, TextSize}; |
| <a href=#11 id=11 data-nosnippet>11</a> |
| <a href=#12 id=12 data-nosnippet>12</a><span class="doccomment">/// `(line, column)` information in the native, UTF-8 encoding. |
| <a href=#13 id=13 data-nosnippet>13</a></span><span class="attr">#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] |
| <a href=#14 id=14 data-nosnippet>14</a></span><span class="kw">pub struct </span>LineCol { |
| <a href=#15 id=15 data-nosnippet>15</a> <span class="doccomment">/// Zero-based. |
| <a href=#16 id=16 data-nosnippet>16</a> </span><span class="kw">pub </span>line: u32, |
| <a href=#17 id=17 data-nosnippet>17</a> <span class="doccomment">/// Zero-based UTF-8 offset. |
| <a href=#18 id=18 data-nosnippet>18</a> </span><span class="kw">pub </span>col: u32, |
| <a href=#19 id=19 data-nosnippet>19</a>} |
| <a href=#20 id=20 data-nosnippet>20</a> |
| <a href=#21 id=21 data-nosnippet>21</a><span class="doccomment">/// A kind of wide character encoding. |
| <a href=#22 id=22 data-nosnippet>22</a></span><span class="attr">#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] |
| <a href=#23 id=23 data-nosnippet>23</a>#[non_exhaustive] |
| <a href=#24 id=24 data-nosnippet>24</a></span><span class="kw">pub enum </span>WideEncoding { |
| <a href=#25 id=25 data-nosnippet>25</a> <span class="doccomment">/// UTF-16. |
| <a href=#26 id=26 data-nosnippet>26</a> </span>Utf16, |
| <a href=#27 id=27 data-nosnippet>27</a> <span class="doccomment">/// UTF-32. |
| <a href=#28 id=28 data-nosnippet>28</a> </span>Utf32, |
| <a href=#29 id=29 data-nosnippet>29</a>} |
| <a href=#30 id=30 data-nosnippet>30</a> |
| <a href=#31 id=31 data-nosnippet>31</a><span class="kw">impl </span>WideEncoding { |
| <a href=#32 id=32 data-nosnippet>32</a> <span class="doccomment">/// Returns the number of code units it takes to encode `text` in this encoding. |
| <a href=#33 id=33 data-nosnippet>33</a> </span><span class="kw">pub fn </span>measure(<span class="kw-2">&</span><span class="self">self</span>, text: <span class="kw-2">&</span>str) -> usize { |
| <a href=#34 id=34 data-nosnippet>34</a> <span class="kw">match </span><span class="self">self </span>{ |
| <a href=#35 id=35 data-nosnippet>35</a> WideEncoding::Utf16 => text.encode_utf16().count(), |
| <a href=#36 id=36 data-nosnippet>36</a> WideEncoding::Utf32 => text.chars().count(), |
| <a href=#37 id=37 data-nosnippet>37</a> } |
| <a href=#38 id=38 data-nosnippet>38</a> } |
| <a href=#39 id=39 data-nosnippet>39</a>} |
| <a href=#40 id=40 data-nosnippet>40</a> |
| <a href=#41 id=41 data-nosnippet>41</a><span class="doccomment">/// `(line, column)` information in wide encodings. |
| <a href=#42 id=42 data-nosnippet>42</a>/// |
| <a href=#43 id=43 data-nosnippet>43</a>/// See [`WideEncoding`] for the kinds of wide encodings available. |
| <a href=#44 id=44 data-nosnippet>44</a></span><span class="comment">// |
| <a href=#45 id=45 data-nosnippet>45</a>// Deliberately not a generic type and different from `LineCol`. |
| <a href=#46 id=46 data-nosnippet>46</a></span><span class="attr">#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] |
| <a href=#47 id=47 data-nosnippet>47</a></span><span class="kw">pub struct </span>WideLineCol { |
| <a href=#48 id=48 data-nosnippet>48</a> <span class="doccomment">/// Zero-based. |
| <a href=#49 id=49 data-nosnippet>49</a> </span><span class="kw">pub </span>line: u32, |
| <a href=#50 id=50 data-nosnippet>50</a> <span class="doccomment">/// Zero-based. |
| <a href=#51 id=51 data-nosnippet>51</a> </span><span class="kw">pub </span>col: u32, |
| <a href=#52 id=52 data-nosnippet>52</a>} |
| <a href=#53 id=53 data-nosnippet>53</a> |
| <a href=#54 id=54 data-nosnippet>54</a><span class="attr">#[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| <a href=#55 id=55 data-nosnippet>55</a></span><span class="kw">struct </span>WideChar { |
| <a href=#56 id=56 data-nosnippet>56</a> <span class="doccomment">/// Start offset of a character inside a line, zero-based. |
| <a href=#57 id=57 data-nosnippet>57</a> </span>start: TextSize, |
| <a href=#58 id=58 data-nosnippet>58</a> <span class="doccomment">/// End offset of a character inside a line, zero-based. |
| <a href=#59 id=59 data-nosnippet>59</a> </span>end: TextSize, |
| <a href=#60 id=60 data-nosnippet>60</a>} |
| <a href=#61 id=61 data-nosnippet>61</a> |
| <a href=#62 id=62 data-nosnippet>62</a><span class="kw">impl </span>WideChar { |
| <a href=#63 id=63 data-nosnippet>63</a> <span class="doccomment">/// Returns the length in 8-bit UTF-8 code units. |
| <a href=#64 id=64 data-nosnippet>64</a> </span><span class="kw">fn </span>len(<span class="kw-2">&</span><span class="self">self</span>) -> TextSize { |
| <a href=#65 id=65 data-nosnippet>65</a> <span class="self">self</span>.end - <span class="self">self</span>.start |
| <a href=#66 id=66 data-nosnippet>66</a> } |
| <a href=#67 id=67 data-nosnippet>67</a> |
| <a href=#68 id=68 data-nosnippet>68</a> <span class="doccomment">/// Returns the length in UTF-16 or UTF-32 code units. |
| <a href=#69 id=69 data-nosnippet>69</a> </span><span class="kw">fn </span>wide_len(<span class="kw-2">&</span><span class="self">self</span>, enc: WideEncoding) -> u32 { |
| <a href=#70 id=70 data-nosnippet>70</a> <span class="kw">match </span>enc { |
| <a href=#71 id=71 data-nosnippet>71</a> WideEncoding::Utf16 => { |
| <a href=#72 id=72 data-nosnippet>72</a> <span class="kw">if </span><span class="self">self</span>.len() == TextSize::from(<span class="number">4</span>) { |
| <a href=#73 id=73 data-nosnippet>73</a> <span class="number">2 |
| <a href=#74 id=74 data-nosnippet>74</a> </span>} <span class="kw">else </span>{ |
| <a href=#75 id=75 data-nosnippet>75</a> <span class="number">1 |
| <a href=#76 id=76 data-nosnippet>76</a> </span>} |
| <a href=#77 id=77 data-nosnippet>77</a> } |
| <a href=#78 id=78 data-nosnippet>78</a> WideEncoding::Utf32 => <span class="number">1</span>, |
| <a href=#79 id=79 data-nosnippet>79</a> } |
| <a href=#80 id=80 data-nosnippet>80</a> } |
| <a href=#81 id=81 data-nosnippet>81</a>} |
| <a href=#82 id=82 data-nosnippet>82</a> |
| <a href=#83 id=83 data-nosnippet>83</a><span class="doccomment">/// Maps flat [`TextSize`] offsets to/from `(line, column)` representation. |
| <a href=#84 id=84 data-nosnippet>84</a></span><span class="attr">#[derive(Debug, Clone, PartialEq, Eq)] |
| <a href=#85 id=85 data-nosnippet>85</a></span><span class="kw">pub struct </span>LineIndex { |
| <a href=#86 id=86 data-nosnippet>86</a> <span class="doccomment">/// Offset the beginning of each line (except the first, which always has offset 0). |
| <a href=#87 id=87 data-nosnippet>87</a> </span>newlines: Box<[TextSize]>, |
| <a href=#88 id=88 data-nosnippet>88</a> <span class="doccomment">/// List of non-ASCII characters on each line. |
| <a href=#89 id=89 data-nosnippet>89</a> </span>line_wide_chars: IntMap<u32, Box<[WideChar]>>, |
| <a href=#90 id=90 data-nosnippet>90</a> <span class="doccomment">/// The length of the entire text. |
| <a href=#91 id=91 data-nosnippet>91</a> </span>len: TextSize, |
| <a href=#92 id=92 data-nosnippet>92</a>} |
| <a href=#93 id=93 data-nosnippet>93</a> |
| <a href=#94 id=94 data-nosnippet>94</a><span class="kw">impl </span>LineIndex { |
| <a href=#95 id=95 data-nosnippet>95</a> <span class="doccomment">/// Returns a `LineIndex` for the `text`. |
| <a href=#96 id=96 data-nosnippet>96</a> </span><span class="kw">pub fn </span>new(text: <span class="kw-2">&</span>str) -> LineIndex { |
| <a href=#97 id=97 data-nosnippet>97</a> <span class="kw">let </span>(newlines, line_wide_chars) = analyze_source_file(text); |
| <a href=#98 id=98 data-nosnippet>98</a> LineIndex { |
| <a href=#99 id=99 data-nosnippet>99</a> newlines: newlines.into_boxed_slice(), |
| <a href=#100 id=100 data-nosnippet>100</a> line_wide_chars, |
| <a href=#101 id=101 data-nosnippet>101</a> len: TextSize::of(text), |
| <a href=#102 id=102 data-nosnippet>102</a> } |
| <a href=#103 id=103 data-nosnippet>103</a> } |
| <a href=#104 id=104 data-nosnippet>104</a> |
| <a href=#105 id=105 data-nosnippet>105</a> <span class="doccomment">/// Transforms the `TextSize` into a `LineCol`. |
| <a href=#106 id=106 data-nosnippet>106</a> /// |
| <a href=#107 id=107 data-nosnippet>107</a> /// # Panics |
| <a href=#108 id=108 data-nosnippet>108</a> /// |
| <a href=#109 id=109 data-nosnippet>109</a> /// If the offset is invalid. See [`Self::try_line_col`]. |
| <a href=#110 id=110 data-nosnippet>110</a> </span><span class="kw">pub fn </span>line_col(<span class="kw-2">&</span><span class="self">self</span>, offset: TextSize) -> LineCol { |
| <a href=#111 id=111 data-nosnippet>111</a> <span class="self">self</span>.try_line_col(offset).expect(<span class="string">"invalid offset"</span>) |
| <a href=#112 id=112 data-nosnippet>112</a> } |
| <a href=#113 id=113 data-nosnippet>113</a> |
| <a href=#114 id=114 data-nosnippet>114</a> <span class="doccomment">/// Transforms the `TextSize` into a `LineCol`. |
| <a href=#115 id=115 data-nosnippet>115</a> /// |
| <a href=#116 id=116 data-nosnippet>116</a> /// Returns `None` if the `offset` was invalid, e.g. if it extends past the end of the text or |
| <a href=#117 id=117 data-nosnippet>117</a> /// points to the middle of a multi-byte character. |
| <a href=#118 id=118 data-nosnippet>118</a> </span><span class="kw">pub fn </span>try_line_col(<span class="kw-2">&</span><span class="self">self</span>, offset: TextSize) -> <span class="prelude-ty">Option</span><LineCol> { |
| <a href=#119 id=119 data-nosnippet>119</a> <span class="kw">if </span>offset > <span class="self">self</span>.len { |
| <a href=#120 id=120 data-nosnippet>120</a> <span class="kw">return </span><span class="prelude-val">None</span>; |
| <a href=#121 id=121 data-nosnippet>121</a> } |
| <a href=#122 id=122 data-nosnippet>122</a> <span class="kw">let </span>line = <span class="self">self</span>.newlines.partition_point(|<span class="kw-2">&</span>it| it <= offset); |
| <a href=#123 id=123 data-nosnippet>123</a> <span class="kw">let </span>start = <span class="self">self</span>.start_offset(line)<span class="question-mark">?</span>; |
| <a href=#124 id=124 data-nosnippet>124</a> <span class="kw">let </span>col = offset - start; |
| <a href=#125 id=125 data-nosnippet>125</a> <span class="kw">let </span>ret = LineCol { line: line <span class="kw">as </span>u32, col: col.into() }; |
| <a href=#126 id=126 data-nosnippet>126</a> <span class="self">self</span>.line_wide_chars |
| <a href=#127 id=127 data-nosnippet>127</a> .get(<span class="kw-2">&</span>ret.line) |
| <a href=#128 id=128 data-nosnippet>128</a> .into_iter() |
| <a href=#129 id=129 data-nosnippet>129</a> .flat_map(|it| it.iter()) |
| <a href=#130 id=130 data-nosnippet>130</a> .all(|it| col <= it.start || it.end <= col) |
| <a href=#131 id=131 data-nosnippet>131</a> .then_some(ret) |
| <a href=#132 id=132 data-nosnippet>132</a> } |
| <a href=#133 id=133 data-nosnippet>133</a> |
| <a href=#134 id=134 data-nosnippet>134</a> <span class="doccomment">/// Transforms the `LineCol` into a `TextSize`. |
| <a href=#135 id=135 data-nosnippet>135</a> </span><span class="kw">pub fn </span>offset(<span class="kw-2">&</span><span class="self">self</span>, line_col: LineCol) -> <span class="prelude-ty">Option</span><TextSize> { |
| <a href=#136 id=136 data-nosnippet>136</a> <span class="self">self</span>.start_offset(line_col.line <span class="kw">as </span>usize).map(|start| start + TextSize::from(line_col.col)) |
| <a href=#137 id=137 data-nosnippet>137</a> } |
| <a href=#138 id=138 data-nosnippet>138</a> |
| <a href=#139 id=139 data-nosnippet>139</a> <span class="kw">fn </span>start_offset(<span class="kw-2">&</span><span class="self">self</span>, line: usize) -> <span class="prelude-ty">Option</span><TextSize> { |
| <a href=#140 id=140 data-nosnippet>140</a> <span class="kw">match </span>line.checked_sub(<span class="number">1</span>) { |
| <a href=#141 id=141 data-nosnippet>141</a> <span class="prelude-val">None </span>=> <span class="prelude-val">Some</span>(TextSize::from(<span class="number">0</span>)), |
| <a href=#142 id=142 data-nosnippet>142</a> <span class="prelude-val">Some</span>(it) => <span class="self">self</span>.newlines.get(it).copied(), |
| <a href=#143 id=143 data-nosnippet>143</a> } |
| <a href=#144 id=144 data-nosnippet>144</a> } |
| <a href=#145 id=145 data-nosnippet>145</a> |
| <a href=#146 id=146 data-nosnippet>146</a> <span class="doccomment">/// Transforms the `LineCol` with the given `WideEncoding` into a `WideLineCol`. |
| <a href=#147 id=147 data-nosnippet>147</a> </span><span class="kw">pub fn </span>to_wide(<span class="kw-2">&</span><span class="self">self</span>, enc: WideEncoding, line_col: LineCol) -> <span class="prelude-ty">Option</span><WideLineCol> { |
| <a href=#148 id=148 data-nosnippet>148</a> <span class="kw">let </span><span class="kw-2">mut </span>col = line_col.col; |
| <a href=#149 id=149 data-nosnippet>149</a> <span class="kw">if let </span><span class="prelude-val">Some</span>(wide_chars) = <span class="self">self</span>.line_wide_chars.get(<span class="kw-2">&</span>line_col.line) { |
| <a href=#150 id=150 data-nosnippet>150</a> <span class="kw">for </span>c <span class="kw">in </span>wide_chars.iter() { |
| <a href=#151 id=151 data-nosnippet>151</a> <span class="kw">if </span>u32::from(c.end) <= line_col.col { |
| <a href=#152 id=152 data-nosnippet>152</a> col = col.checked_sub(u32::from(c.len()) - c.wide_len(enc))<span class="question-mark">?</span>; |
| <a href=#153 id=153 data-nosnippet>153</a> } <span class="kw">else </span>{ |
| <a href=#154 id=154 data-nosnippet>154</a> <span class="comment">// From here on, all utf16 characters come *after* the character we are mapping, |
| <a href=#155 id=155 data-nosnippet>155</a> // so we don't need to take them into account |
| <a href=#156 id=156 data-nosnippet>156</a> </span><span class="kw">break</span>; |
| <a href=#157 id=157 data-nosnippet>157</a> } |
| <a href=#158 id=158 data-nosnippet>158</a> } |
| <a href=#159 id=159 data-nosnippet>159</a> } |
| <a href=#160 id=160 data-nosnippet>160</a> <span class="prelude-val">Some</span>(WideLineCol { line: line_col.line, col }) |
| <a href=#161 id=161 data-nosnippet>161</a> } |
| <a href=#162 id=162 data-nosnippet>162</a> |
| <a href=#163 id=163 data-nosnippet>163</a> <span class="doccomment">/// Transforms the `WideLineCol` with the given `WideEncoding` into a `LineCol`. |
| <a href=#164 id=164 data-nosnippet>164</a> </span><span class="kw">pub fn </span>to_utf8(<span class="kw-2">&</span><span class="self">self</span>, enc: WideEncoding, line_col: WideLineCol) -> <span class="prelude-ty">Option</span><LineCol> { |
| <a href=#165 id=165 data-nosnippet>165</a> <span class="kw">let </span><span class="kw-2">mut </span>col = line_col.col; |
| <a href=#166 id=166 data-nosnippet>166</a> <span class="kw">if let </span><span class="prelude-val">Some</span>(wide_chars) = <span class="self">self</span>.line_wide_chars.get(<span class="kw-2">&</span>line_col.line) { |
| <a href=#167 id=167 data-nosnippet>167</a> <span class="kw">for </span>c <span class="kw">in </span>wide_chars.iter() { |
| <a href=#168 id=168 data-nosnippet>168</a> <span class="kw">if </span>col > u32::from(c.start) { |
| <a href=#169 id=169 data-nosnippet>169</a> col = col.checked_add(u32::from(c.len()) - c.wide_len(enc))<span class="question-mark">?</span>; |
| <a href=#170 id=170 data-nosnippet>170</a> } <span class="kw">else </span>{ |
| <a href=#171 id=171 data-nosnippet>171</a> <span class="comment">// From here on, all utf16 characters come *after* the character we are mapping, |
| <a href=#172 id=172 data-nosnippet>172</a> // so we don't need to take them into account |
| <a href=#173 id=173 data-nosnippet>173</a> </span><span class="kw">break</span>; |
| <a href=#174 id=174 data-nosnippet>174</a> } |
| <a href=#175 id=175 data-nosnippet>175</a> } |
| <a href=#176 id=176 data-nosnippet>176</a> } |
| <a href=#177 id=177 data-nosnippet>177</a> <span class="prelude-val">Some</span>(LineCol { line: line_col.line, col }) |
| <a href=#178 id=178 data-nosnippet>178</a> } |
| <a href=#179 id=179 data-nosnippet>179</a> |
| <a href=#180 id=180 data-nosnippet>180</a> <span class="doccomment">/// Returns the given line's range. |
| <a href=#181 id=181 data-nosnippet>181</a> </span><span class="kw">pub fn </span>line(<span class="kw-2">&</span><span class="self">self</span>, line: u32) -> <span class="prelude-ty">Option</span><TextRange> { |
| <a href=#182 id=182 data-nosnippet>182</a> <span class="kw">let </span>start = <span class="self">self</span>.start_offset(line <span class="kw">as </span>usize)<span class="question-mark">?</span>; |
| <a href=#183 id=183 data-nosnippet>183</a> <span class="kw">let </span>next_newline = <span class="self">self</span>.newlines.get(line <span class="kw">as </span>usize).copied().unwrap_or(<span class="self">self</span>.len); |
| <a href=#184 id=184 data-nosnippet>184</a> <span class="kw">let </span>line_length = next_newline - start; |
| <a href=#185 id=185 data-nosnippet>185</a> <span class="prelude-val">Some</span>(TextRange::new(start, start + line_length)) |
| <a href=#186 id=186 data-nosnippet>186</a> } |
| <a href=#187 id=187 data-nosnippet>187</a> |
| <a href=#188 id=188 data-nosnippet>188</a> <span class="doccomment">/// Given a range [start, end), returns a sorted iterator of non-empty ranges [start, x1), [x1, |
| <a href=#189 id=189 data-nosnippet>189</a> /// x2), ..., [xn, end) where all the xi, which are positions of newlines, are inside the range |
| <a href=#190 id=190 data-nosnippet>190</a> /// [start, end). |
| <a href=#191 id=191 data-nosnippet>191</a> </span><span class="kw">pub fn </span>lines(<span class="kw-2">&</span><span class="self">self</span>, range: TextRange) -> <span class="kw">impl </span>Iterator<Item = TextRange> + <span class="lifetime">'_ </span>{ |
| <a href=#192 id=192 data-nosnippet>192</a> <span class="kw">let </span>lo = <span class="self">self</span>.newlines.partition_point(|<span class="kw-2">&</span>it| it < range.start()); |
| <a href=#193 id=193 data-nosnippet>193</a> <span class="kw">let </span>hi = <span class="self">self</span>.newlines.partition_point(|<span class="kw-2">&</span>it| it <= range.end()); |
| <a href=#194 id=194 data-nosnippet>194</a> <span class="kw">let </span>all = std::iter::once(range.start()) |
| <a href=#195 id=195 data-nosnippet>195</a> .chain(<span class="self">self</span>.newlines[lo..hi].iter().copied()) |
| <a href=#196 id=196 data-nosnippet>196</a> .chain(std::iter::once(range.end())); |
| <a href=#197 id=197 data-nosnippet>197</a> |
| <a href=#198 id=198 data-nosnippet>198</a> all.clone() |
| <a href=#199 id=199 data-nosnippet>199</a> .zip(all.skip(<span class="number">1</span>)) |
| <a href=#200 id=200 data-nosnippet>200</a> .map(|(lo, hi)| TextRange::new(lo, hi)) |
| <a href=#201 id=201 data-nosnippet>201</a> .filter(|it| !it.is_empty()) |
| <a href=#202 id=202 data-nosnippet>202</a> } |
| <a href=#203 id=203 data-nosnippet>203</a> |
| <a href=#204 id=204 data-nosnippet>204</a> <span class="doccomment">/// Returns the length of the original text. |
| <a href=#205 id=205 data-nosnippet>205</a> </span><span class="kw">pub fn </span>len(<span class="kw-2">&</span><span class="self">self</span>) -> TextSize { |
| <a href=#206 id=206 data-nosnippet>206</a> <span class="self">self</span>.len |
| <a href=#207 id=207 data-nosnippet>207</a> } |
| <a href=#208 id=208 data-nosnippet>208</a>} |
| <a href=#209 id=209 data-nosnippet>209</a> |
| <a href=#210 id=210 data-nosnippet>210</a><span class="doccomment">/// This is adapted from the rustc_span crate, <https://github.com/rust-lang/rust/blob/de59844c98f7925242a798a72c59dc3610dd0e2c/compiler/rustc_span/src/analyze_source_file.rs> |
| <a href=#211 id=211 data-nosnippet>211</a></span><span class="kw">fn </span>analyze_source_file(src: <span class="kw-2">&</span>str) -> (Vec<TextSize>, IntMap<u32, Box<[WideChar]>>) { |
| <a href=#212 id=212 data-nosnippet>212</a> <span class="macro">assert!</span>(src.len() < !<span class="number">0u32 </span><span class="kw">as </span>usize); |
| <a href=#213 id=213 data-nosnippet>213</a> <span class="kw">let </span><span class="kw-2">mut </span>lines = <span class="macro">vec!</span>[]; |
| <a href=#214 id=214 data-nosnippet>214</a> <span class="kw">let </span><span class="kw-2">mut </span>line_wide_chars = IntMap::<u32, Vec<WideChar>>::default(); |
| <a href=#215 id=215 data-nosnippet>215</a> |
| <a href=#216 id=216 data-nosnippet>216</a> <span class="comment">// Calls the right implementation, depending on hardware support available. |
| <a href=#217 id=217 data-nosnippet>217</a> </span>analyze_source_file_dispatch(src, <span class="kw-2">&mut </span>lines, <span class="kw-2">&mut </span>line_wide_chars); |
| <a href=#218 id=218 data-nosnippet>218</a> |
| <a href=#219 id=219 data-nosnippet>219</a> (lines, line_wide_chars.into_iter().map(|(k, v)| (k, v.into_boxed_slice())).collect()) |
| <a href=#220 id=220 data-nosnippet>220</a>} |
| <a href=#221 id=221 data-nosnippet>221</a> |
| <a href=#222 id=222 data-nosnippet>222</a><span class="attr">#[cfg(any(target_arch = <span class="string">"x86"</span>, target_arch = <span class="string">"x86_64"</span>))] |
| <a href=#223 id=223 data-nosnippet>223</a></span><span class="kw">fn </span>analyze_source_file_dispatch( |
| <a href=#224 id=224 data-nosnippet>224</a> src: <span class="kw-2">&</span>str, |
| <a href=#225 id=225 data-nosnippet>225</a> lines: <span class="kw-2">&mut </span>Vec<TextSize>, |
| <a href=#226 id=226 data-nosnippet>226</a> multi_byte_chars: <span class="kw-2">&mut </span>IntMap<u32, Vec<WideChar>>, |
| <a href=#227 id=227 data-nosnippet>227</a>) { |
| <a href=#228 id=228 data-nosnippet>228</a> <span class="kw">if </span><span class="macro">is_x86_feature_detected!</span>(<span class="string">"sse2"</span>) { |
| <a href=#229 id=229 data-nosnippet>229</a> <span class="comment">// SAFETY: SSE2 support was checked |
| <a href=#230 id=230 data-nosnippet>230</a> </span><span class="kw">unsafe </span>{ |
| <a href=#231 id=231 data-nosnippet>231</a> analyze_source_file_sse2(src, lines, multi_byte_chars); |
| <a href=#232 id=232 data-nosnippet>232</a> } |
| <a href=#233 id=233 data-nosnippet>233</a> } <span class="kw">else </span>{ |
| <a href=#234 id=234 data-nosnippet>234</a> analyze_source_file_generic(src, src.len(), TextSize::from(<span class="number">0</span>), lines, multi_byte_chars); |
| <a href=#235 id=235 data-nosnippet>235</a> } |
| <a href=#236 id=236 data-nosnippet>236</a>} |
| <a href=#237 id=237 data-nosnippet>237</a> |
| <a href=#238 id=238 data-nosnippet>238</a><span class="attr">#[cfg(all(target_arch = <span class="string">"aarch64"</span>, target_endian = <span class="string">"little"</span>))] |
| <a href=#239 id=239 data-nosnippet>239</a></span><span class="kw">fn </span>analyze_source_file_dispatch( |
| <a href=#240 id=240 data-nosnippet>240</a> src: <span class="kw-2">&</span>str, |
| <a href=#241 id=241 data-nosnippet>241</a> lines: <span class="kw-2">&mut </span>Vec<TextSize>, |
| <a href=#242 id=242 data-nosnippet>242</a> multi_byte_chars: <span class="kw-2">&mut </span>IntMap<u32, Vec<WideChar>>, |
| <a href=#243 id=243 data-nosnippet>243</a>) { |
| <a href=#244 id=244 data-nosnippet>244</a> <span class="kw">if </span><span class="macro">std::arch::is_aarch64_feature_detected!</span>(<span class="string">"neon"</span>) { |
| <a href=#245 id=245 data-nosnippet>245</a> <span class="comment">// SAFETY: NEON support was checked |
| <a href=#246 id=246 data-nosnippet>246</a> </span><span class="kw">unsafe </span>{ |
| <a href=#247 id=247 data-nosnippet>247</a> analyze_source_file_neon(src, lines, multi_byte_chars); |
| <a href=#248 id=248 data-nosnippet>248</a> } |
| <a href=#249 id=249 data-nosnippet>249</a> } <span class="kw">else </span>{ |
| <a href=#250 id=250 data-nosnippet>250</a> analyze_source_file_generic(src, src.len(), TextSize::from(<span class="number">0</span>), lines, multi_byte_chars); |
| <a href=#251 id=251 data-nosnippet>251</a> } |
| <a href=#252 id=252 data-nosnippet>252</a>} |
| <a href=#253 id=253 data-nosnippet>253</a> |
| <a href=#254 id=254 data-nosnippet>254</a><span class="doccomment">/// Checks 16 byte chunks of text at a time. If the chunk contains |
| <a href=#255 id=255 data-nosnippet>255</a>/// something other than printable ASCII characters and newlines, the |
| <a href=#256 id=256 data-nosnippet>256</a>/// function falls back to the generic implementation. Otherwise it uses |
| <a href=#257 id=257 data-nosnippet>257</a>/// SSE2 intrinsics to quickly find all newlines. |
| <a href=#258 id=258 data-nosnippet>258</a></span><span class="attr">#[target_feature(enable = <span class="string">"sse2"</span>)] |
| <a href=#259 id=259 data-nosnippet>259</a>#[cfg(any(target_arch = <span class="string">"x86"</span>, target_arch = <span class="string">"x86_64"</span>))] |
| <a href=#260 id=260 data-nosnippet>260</a></span><span class="comment">// This can be removed once 1.87 is stable due to some intrinsics switching to safe. |
| <a href=#261 id=261 data-nosnippet>261</a></span><span class="attr">#[allow(unsafe_op_in_unsafe_fn)] |
| <a href=#262 id=262 data-nosnippet>262</a></span><span class="kw">unsafe fn </span>analyze_source_file_sse2( |
| <a href=#263 id=263 data-nosnippet>263</a> src: <span class="kw-2">&</span>str, |
| <a href=#264 id=264 data-nosnippet>264</a> lines: <span class="kw-2">&mut </span>Vec<TextSize>, |
| <a href=#265 id=265 data-nosnippet>265</a> multi_byte_chars: <span class="kw-2">&mut </span>IntMap<u32, Vec<WideChar>>, |
| <a href=#266 id=266 data-nosnippet>266</a>) { |
| <a href=#267 id=267 data-nosnippet>267</a> <span class="attr">#[cfg(target_arch = <span class="string">"x86"</span>)] |
| <a href=#268 id=268 data-nosnippet>268</a> </span><span class="kw">use </span>std::arch::x86::<span class="kw-2">*</span>; |
| <a href=#269 id=269 data-nosnippet>269</a> <span class="attr">#[cfg(target_arch = <span class="string">"x86_64"</span>)] |
| <a href=#270 id=270 data-nosnippet>270</a> </span><span class="kw">use </span>std::arch::x86_64::<span class="kw-2">*</span>; |
| <a href=#271 id=271 data-nosnippet>271</a> |
| <a href=#272 id=272 data-nosnippet>272</a> <span class="kw">const </span>CHUNK_SIZE: usize = <span class="number">16</span>; |
| <a href=#273 id=273 data-nosnippet>273</a> |
| <a href=#274 id=274 data-nosnippet>274</a> <span class="kw">let </span>src_bytes = src.as_bytes(); |
| <a href=#275 id=275 data-nosnippet>275</a> |
| <a href=#276 id=276 data-nosnippet>276</a> <span class="kw">let </span>chunk_count = src.len() / CHUNK_SIZE; |
| <a href=#277 id=277 data-nosnippet>277</a> |
| <a href=#278 id=278 data-nosnippet>278</a> <span class="comment">// This variable keeps track of where we should start decoding a |
| <a href=#279 id=279 data-nosnippet>279</a> // chunk. If a multi-byte character spans across chunk boundaries, |
| <a href=#280 id=280 data-nosnippet>280</a> // we need to skip that part in the next chunk because we already |
| <a href=#281 id=281 data-nosnippet>281</a> // handled it. |
| <a href=#282 id=282 data-nosnippet>282</a> </span><span class="kw">let </span><span class="kw-2">mut </span>intra_chunk_offset = <span class="number">0</span>; |
| <a href=#283 id=283 data-nosnippet>283</a> |
| <a href=#284 id=284 data-nosnippet>284</a> <span class="kw">for </span>chunk_index <span class="kw">in </span><span class="number">0</span>..chunk_count { |
| <a href=#285 id=285 data-nosnippet>285</a> <span class="kw">let </span>ptr = src_bytes.as_ptr() <span class="kw">as </span><span class="kw-2">*const </span>__m128i; |
| <a href=#286 id=286 data-nosnippet>286</a> <span class="comment">// We don't know if the pointer is aligned to 16 bytes, so we |
| <a href=#287 id=287 data-nosnippet>287</a> // use `loadu`, which supports unaligned loading. |
| <a href=#288 id=288 data-nosnippet>288</a> </span><span class="kw">let </span>chunk = <span class="kw">unsafe </span>{ _mm_loadu_si128(ptr.add(chunk_index)) }; |
| <a href=#289 id=289 data-nosnippet>289</a> |
| <a href=#290 id=290 data-nosnippet>290</a> <span class="comment">// For character in the chunk, see if its byte value is < 0, which |
| <a href=#291 id=291 data-nosnippet>291</a> // indicates that it's part of a UTF-8 char. |
| <a href=#292 id=292 data-nosnippet>292</a> </span><span class="kw">let </span>multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(<span class="number">0</span>)); |
| <a href=#293 id=293 data-nosnippet>293</a> <span class="comment">// Create a bit mask from the comparison results. |
| <a href=#294 id=294 data-nosnippet>294</a> </span><span class="kw">let </span>multibyte_mask = _mm_movemask_epi8(multibyte_test); |
| <a href=#295 id=295 data-nosnippet>295</a> |
| <a href=#296 id=296 data-nosnippet>296</a> <span class="comment">// If the bit mask is all zero, we only have ASCII chars here: |
| <a href=#297 id=297 data-nosnippet>297</a> </span><span class="kw">if </span>multibyte_mask == <span class="number">0 </span>{ |
| <a href=#298 id=298 data-nosnippet>298</a> <span class="macro">assert!</span>(intra_chunk_offset == <span class="number">0</span>); |
| <a href=#299 id=299 data-nosnippet>299</a> |
| <a href=#300 id=300 data-nosnippet>300</a> <span class="comment">// Check for newlines in the chunk |
| <a href=#301 id=301 data-nosnippet>301</a> </span><span class="kw">let </span>newlines_test = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(<span class="string">b'\n' </span><span class="kw">as </span>i8)); |
| <a href=#302 id=302 data-nosnippet>302</a> <span class="kw">let </span>newlines_mask = _mm_movemask_epi8(newlines_test); |
| <a href=#303 id=303 data-nosnippet>303</a> |
| <a href=#304 id=304 data-nosnippet>304</a> <span class="kw">if </span><span class="macro">newlines_mask !</span>= <span class="number">0 </span>{ |
| <a href=#305 id=305 data-nosnippet>305</a> <span class="comment">// All control characters are newlines, record them |
| <a href=#306 id=306 data-nosnippet>306</a> </span><span class="kw">let </span><span class="kw-2">mut </span>newlines_mask = <span class="number">0xFFFF0000 </span>| newlines_mask <span class="kw">as </span>u32; |
| <a href=#307 id=307 data-nosnippet>307</a> <span class="kw">let </span>output_offset = TextSize::from((chunk_index * CHUNK_SIZE + <span class="number">1</span>) <span class="kw">as </span>u32); |
| <a href=#308 id=308 data-nosnippet>308</a> |
| <a href=#309 id=309 data-nosnippet>309</a> <span class="kw">loop </span>{ |
| <a href=#310 id=310 data-nosnippet>310</a> <span class="kw">let </span>index = newlines_mask.trailing_zeros(); |
| <a href=#311 id=311 data-nosnippet>311</a> |
| <a href=#312 id=312 data-nosnippet>312</a> <span class="kw">if </span>index >= CHUNK_SIZE <span class="kw">as </span>u32 { |
| <a href=#313 id=313 data-nosnippet>313</a> <span class="comment">// We have arrived at the end of the chunk. |
| <a href=#314 id=314 data-nosnippet>314</a> </span><span class="kw">break</span>; |
| <a href=#315 id=315 data-nosnippet>315</a> } |
| <a href=#316 id=316 data-nosnippet>316</a> |
| <a href=#317 id=317 data-nosnippet>317</a> lines.push(TextSize::from(index) + output_offset); |
| <a href=#318 id=318 data-nosnippet>318</a> |
| <a href=#319 id=319 data-nosnippet>319</a> <span class="comment">// Clear the bit, so we can find the next one. |
| <a href=#320 id=320 data-nosnippet>320</a> </span>newlines_mask &= (!<span class="number">1</span>) << index; |
| <a href=#321 id=321 data-nosnippet>321</a> } |
| <a href=#322 id=322 data-nosnippet>322</a> } |
| <a href=#323 id=323 data-nosnippet>323</a> <span class="kw">continue</span>; |
| <a href=#324 id=324 data-nosnippet>324</a> } |
| <a href=#325 id=325 data-nosnippet>325</a> |
| <a href=#326 id=326 data-nosnippet>326</a> <span class="comment">// The slow path. |
| <a href=#327 id=327 data-nosnippet>327</a> // There are control chars in here, fallback to generic decoding. |
| <a href=#328 id=328 data-nosnippet>328</a> </span><span class="kw">let </span>scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset; |
| <a href=#329 id=329 data-nosnippet>329</a> intra_chunk_offset = analyze_source_file_generic( |
| <a href=#330 id=330 data-nosnippet>330</a> <span class="kw-2">&</span>src[scan_start..], |
| <a href=#331 id=331 data-nosnippet>331</a> CHUNK_SIZE - intra_chunk_offset, |
| <a href=#332 id=332 data-nosnippet>332</a> TextSize::from(scan_start <span class="kw">as </span>u32), |
| <a href=#333 id=333 data-nosnippet>333</a> lines, |
| <a href=#334 id=334 data-nosnippet>334</a> multi_byte_chars, |
| <a href=#335 id=335 data-nosnippet>335</a> ); |
| <a href=#336 id=336 data-nosnippet>336</a> } |
| <a href=#337 id=337 data-nosnippet>337</a> |
| <a href=#338 id=338 data-nosnippet>338</a> <span class="comment">// There might still be a tail left to analyze |
| <a href=#339 id=339 data-nosnippet>339</a> </span><span class="kw">let </span>tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset; |
| <a href=#340 id=340 data-nosnippet>340</a> <span class="kw">if </span>tail_start < src.len() { |
| <a href=#341 id=341 data-nosnippet>341</a> analyze_source_file_generic( |
| <a href=#342 id=342 data-nosnippet>342</a> <span class="kw-2">&</span>src[tail_start..], |
| <a href=#343 id=343 data-nosnippet>343</a> src.len() - tail_start, |
| <a href=#344 id=344 data-nosnippet>344</a> TextSize::from(tail_start <span class="kw">as </span>u32), |
| <a href=#345 id=345 data-nosnippet>345</a> lines, |
| <a href=#346 id=346 data-nosnippet>346</a> multi_byte_chars, |
| <a href=#347 id=347 data-nosnippet>347</a> ); |
| <a href=#348 id=348 data-nosnippet>348</a> } |
| <a href=#349 id=349 data-nosnippet>349</a>} |
| <a href=#350 id=350 data-nosnippet>350</a> |
| <a href=#351 id=351 data-nosnippet>351</a><span class="attr">#[target_feature(enable = <span class="string">"neon"</span>)] |
| <a href=#352 id=352 data-nosnippet>352</a>#[cfg(all(target_arch = <span class="string">"aarch64"</span>, target_endian = <span class="string">"little"</span>))] |
| <a href=#353 id=353 data-nosnippet>353</a>#[inline] |
| <a href=#354 id=354 data-nosnippet>354</a></span><span class="comment">// See https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon |
| <a href=#355 id=355 data-nosnippet>355</a>// |
| <a href=#356 id=356 data-nosnippet>356</a>// The mask is a 64-bit integer, where each 4-bit corresponds to a u8 in the |
| <a href=#357 id=357 data-nosnippet>357</a>// input vector. The least significant 4 bits correspond to the first byte in |
| <a href=#358 id=358 data-nosnippet>358</a>// the vector. |
| <a href=#359 id=359 data-nosnippet>359</a>// This can be removed once 1.87 is stable due to some intrinsics switching to safe. |
| <a href=#360 id=360 data-nosnippet>360</a></span><span class="attr">#[allow(unsafe_op_in_unsafe_fn)] |
| <a href=#361 id=361 data-nosnippet>361</a></span><span class="kw">unsafe fn </span>move_mask(v: std::arch::aarch64::uint8x16_t) -> u64 { |
| <a href=#362 id=362 data-nosnippet>362</a> <span class="kw">use </span>std::arch::aarch64::<span class="kw-2">*</span>; |
| <a href=#363 id=363 data-nosnippet>363</a> |
| <a href=#364 id=364 data-nosnippet>364</a> <span class="kw">let </span>nibble_mask = vshrn_n_u16(vreinterpretq_u16_u8(v), <span class="number">4</span>); |
| <a href=#365 id=365 data-nosnippet>365</a> vget_lane_u64(vreinterpret_u64_u8(nibble_mask), <span class="number">0</span>) |
| <a href=#366 id=366 data-nosnippet>366</a>} |
| <a href=#367 id=367 data-nosnippet>367</a> |
| <a href=#368 id=368 data-nosnippet>368</a><span class="attr">#[target_feature(enable = <span class="string">"neon"</span>)] |
| <a href=#369 id=369 data-nosnippet>369</a>#[cfg(all(target_arch = <span class="string">"aarch64"</span>, target_endian = <span class="string">"little"</span>))] |
| <a href=#370 id=370 data-nosnippet>370</a></span><span class="comment">// This can be removed once 1.87 is stable due to some intrinsics switching to safe. |
| <a href=#371 id=371 data-nosnippet>371</a></span><span class="attr">#[allow(unsafe_op_in_unsafe_fn)] |
| <a href=#372 id=372 data-nosnippet>372</a></span><span class="kw">unsafe fn </span>analyze_source_file_neon( |
| <a href=#373 id=373 data-nosnippet>373</a> src: <span class="kw-2">&</span>str, |
| <a href=#374 id=374 data-nosnippet>374</a> lines: <span class="kw-2">&mut </span>Vec<TextSize>, |
| <a href=#375 id=375 data-nosnippet>375</a> multi_byte_chars: <span class="kw-2">&mut </span>IntMap<u32, Vec<WideChar>>, |
| <a href=#376 id=376 data-nosnippet>376</a>) { |
| <a href=#377 id=377 data-nosnippet>377</a> <span class="kw">use </span>std::arch::aarch64::<span class="kw-2">*</span>; |
| <a href=#378 id=378 data-nosnippet>378</a> |
| <a href=#379 id=379 data-nosnippet>379</a> <span class="kw">const </span>CHUNK_SIZE: usize = <span class="number">16</span>; |
| <a href=#380 id=380 data-nosnippet>380</a> |
| <a href=#381 id=381 data-nosnippet>381</a> <span class="kw">let </span>src_bytes = src.as_bytes(); |
| <a href=#382 id=382 data-nosnippet>382</a> |
| <a href=#383 id=383 data-nosnippet>383</a> <span class="kw">let </span>chunk_count = src.len() / CHUNK_SIZE; |
| <a href=#384 id=384 data-nosnippet>384</a> |
| <a href=#385 id=385 data-nosnippet>385</a> <span class="kw">let </span>newline = vdupq_n_s8(<span class="string">b'\n' </span><span class="kw">as </span>i8); |
| <a href=#386 id=386 data-nosnippet>386</a> |
| <a href=#387 id=387 data-nosnippet>387</a> <span class="comment">// This variable keeps track of where we should start decoding a |
| <a href=#388 id=388 data-nosnippet>388</a> // chunk. If a multi-byte character spans across chunk boundaries, |
| <a href=#389 id=389 data-nosnippet>389</a> // we need to skip that part in the next chunk because we already |
| <a href=#390 id=390 data-nosnippet>390</a> // handled it. |
| <a href=#391 id=391 data-nosnippet>391</a> </span><span class="kw">let </span><span class="kw-2">mut </span>intra_chunk_offset = <span class="number">0</span>; |
| <a href=#392 id=392 data-nosnippet>392</a> |
| <a href=#393 id=393 data-nosnippet>393</a> <span class="kw">for </span>chunk_index <span class="kw">in </span><span class="number">0</span>..chunk_count { |
| <a href=#394 id=394 data-nosnippet>394</a> <span class="kw">let </span>ptr = src_bytes.as_ptr() <span class="kw">as </span><span class="kw-2">*const </span>i8; |
| <a href=#395 id=395 data-nosnippet>395</a> <span class="kw">let </span>chunk = <span class="kw">unsafe </span>{ vld1q_s8(ptr.add(chunk_index * CHUNK_SIZE)) }; |
| <a href=#396 id=396 data-nosnippet>396</a> |
| <a href=#397 id=397 data-nosnippet>397</a> <span class="comment">// For character in the chunk, see if its byte value is < 0, which |
| <a href=#398 id=398 data-nosnippet>398</a> // indicates that it's part of a UTF-8 char. |
| <a href=#399 id=399 data-nosnippet>399</a> </span><span class="kw">let </span>multibyte_test = vcltzq_s8(chunk); |
| <a href=#400 id=400 data-nosnippet>400</a> <span class="comment">// Create a bit mask from the comparison results. |
| <a href=#401 id=401 data-nosnippet>401</a> </span><span class="kw">let </span>multibyte_mask = <span class="kw">unsafe </span>{ move_mask(multibyte_test) }; |
| <a href=#402 id=402 data-nosnippet>402</a> |
| <a href=#403 id=403 data-nosnippet>403</a> <span class="comment">// If the bit mask is all zero, we only have ASCII chars here: |
| <a href=#404 id=404 data-nosnippet>404</a> </span><span class="kw">if </span>multibyte_mask == <span class="number">0 </span>{ |
| <a href=#405 id=405 data-nosnippet>405</a> <span class="macro">assert!</span>(intra_chunk_offset == <span class="number">0</span>); |
| <a href=#406 id=406 data-nosnippet>406</a> |
| <a href=#407 id=407 data-nosnippet>407</a> <span class="comment">// Check for newlines in the chunk |
| <a href=#408 id=408 data-nosnippet>408</a> </span><span class="kw">let </span>newlines_test = vceqq_s8(chunk, newline); |
| <a href=#409 id=409 data-nosnippet>409</a> <span class="kw">let </span><span class="kw-2">mut </span>newlines_mask = <span class="kw">unsafe </span>{ move_mask(newlines_test) }; |
| <a href=#410 id=410 data-nosnippet>410</a> |
| <a href=#411 id=411 data-nosnippet>411</a> <span class="comment">// If the bit mask is not all zero, there are newlines in this chunk. |
| <a href=#412 id=412 data-nosnippet>412</a> </span><span class="kw">if </span><span class="macro">newlines_mask !</span>= <span class="number">0 </span>{ |
| <a href=#413 id=413 data-nosnippet>413</a> <span class="kw">let </span>output_offset = TextSize::from((chunk_index * CHUNK_SIZE + <span class="number">1</span>) <span class="kw">as </span>u32); |
| <a href=#414 id=414 data-nosnippet>414</a> |
| <a href=#415 id=415 data-nosnippet>415</a> <span class="kw">while </span><span class="macro">newlines_mask !</span>= <span class="number">0 </span>{ |
| <a href=#416 id=416 data-nosnippet>416</a> <span class="kw">let </span>trailing_zeros = newlines_mask.trailing_zeros(); |
| <a href=#417 id=417 data-nosnippet>417</a> <span class="kw">let </span>index = trailing_zeros / <span class="number">4</span>; |
| <a href=#418 id=418 data-nosnippet>418</a> |
| <a href=#419 id=419 data-nosnippet>419</a> lines.push(TextSize::from(index) + output_offset); |
| <a href=#420 id=420 data-nosnippet>420</a> |
| <a href=#421 id=421 data-nosnippet>421</a> <span class="comment">// Clear the current 4-bit, so we can find the next one. |
| <a href=#422 id=422 data-nosnippet>422</a> </span>newlines_mask &= (!<span class="number">0xF</span>) << trailing_zeros; |
| <a href=#423 id=423 data-nosnippet>423</a> } |
| <a href=#424 id=424 data-nosnippet>424</a> } |
| <a href=#425 id=425 data-nosnippet>425</a> <span class="kw">continue</span>; |
| <a href=#426 id=426 data-nosnippet>426</a> } |
| <a href=#427 id=427 data-nosnippet>427</a> |
| <a href=#428 id=428 data-nosnippet>428</a> <span class="kw">let </span>scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset; |
| <a href=#429 id=429 data-nosnippet>429</a> intra_chunk_offset = analyze_source_file_generic( |
| <a href=#430 id=430 data-nosnippet>430</a> <span class="kw-2">&</span>src[scan_start..], |
| <a href=#431 id=431 data-nosnippet>431</a> CHUNK_SIZE - intra_chunk_offset, |
| <a href=#432 id=432 data-nosnippet>432</a> TextSize::from(scan_start <span class="kw">as </span>u32), |
| <a href=#433 id=433 data-nosnippet>433</a> lines, |
| <a href=#434 id=434 data-nosnippet>434</a> multi_byte_chars, |
| <a href=#435 id=435 data-nosnippet>435</a> ); |
| <a href=#436 id=436 data-nosnippet>436</a> } |
| <a href=#437 id=437 data-nosnippet>437</a> |
| <a href=#438 id=438 data-nosnippet>438</a> <span class="kw">let </span>tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset; |
| <a href=#439 id=439 data-nosnippet>439</a> <span class="kw">if </span>tail_start < src.len() { |
| <a href=#440 id=440 data-nosnippet>440</a> analyze_source_file_generic( |
| <a href=#441 id=441 data-nosnippet>441</a> <span class="kw-2">&</span>src[tail_start..], |
| <a href=#442 id=442 data-nosnippet>442</a> src.len() - tail_start, |
| <a href=#443 id=443 data-nosnippet>443</a> TextSize::from(tail_start <span class="kw">as </span>u32), |
| <a href=#444 id=444 data-nosnippet>444</a> lines, |
| <a href=#445 id=445 data-nosnippet>445</a> multi_byte_chars, |
| <a href=#446 id=446 data-nosnippet>446</a> ); |
| <a href=#447 id=447 data-nosnippet>447</a> } |
| <a href=#448 id=448 data-nosnippet>448</a>} |
| <a href=#449 id=449 data-nosnippet>449</a> |
| <a href=#450 id=450 data-nosnippet>450</a><span class="attr">#[cfg(not(any( |
| <a href=#451 id=451 data-nosnippet>451</a> target_arch = <span class="string">"x86"</span>, |
| <a href=#452 id=452 data-nosnippet>452</a> target_arch = <span class="string">"x86_64"</span>, |
| <a href=#453 id=453 data-nosnippet>453</a> all(target_arch = <span class="string">"aarch64"</span>, target_endian = <span class="string">"little"</span>) |
| <a href=#454 id=454 data-nosnippet>454</a>)))] |
| <a href=#455 id=455 data-nosnippet>455</a></span><span class="comment">// The target (or compiler version) does not support SSE2 ... |
| <a href=#456 id=456 data-nosnippet>456</a></span><span class="kw">fn </span>analyze_source_file_dispatch( |
| <a href=#457 id=457 data-nosnippet>457</a> src: <span class="kw-2">&</span>str, |
| <a href=#458 id=458 data-nosnippet>458</a> lines: <span class="kw-2">&mut </span>Vec<TextSize>, |
| <a href=#459 id=459 data-nosnippet>459</a> multi_byte_chars: <span class="kw-2">&mut </span>IntMap<u32, Vec<WideChar>>, |
| <a href=#460 id=460 data-nosnippet>460</a>) { |
| <a href=#461 id=461 data-nosnippet>461</a> analyze_source_file_generic(src, src.len(), TextSize::from(<span class="number">0</span>), lines, multi_byte_chars); |
| <a href=#462 id=462 data-nosnippet>462</a>} |
| <a href=#463 id=463 data-nosnippet>463</a> |
| <a href=#464 id=464 data-nosnippet>464</a><span class="comment">// `scan_len` determines the number of bytes in `src` to scan. Note that the |
| <a href=#465 id=465 data-nosnippet>465</a>// function can read past `scan_len` if a multi-byte character start within the |
| <a href=#466 id=466 data-nosnippet>466</a>// range but extends past it. The overflow is returned by the function. |
| <a href=#467 id=467 data-nosnippet>467</a></span><span class="kw">fn </span>analyze_source_file_generic( |
| <a href=#468 id=468 data-nosnippet>468</a> src: <span class="kw-2">&</span>str, |
| <a href=#469 id=469 data-nosnippet>469</a> scan_len: usize, |
| <a href=#470 id=470 data-nosnippet>470</a> output_offset: TextSize, |
| <a href=#471 id=471 data-nosnippet>471</a> lines: <span class="kw-2">&mut </span>Vec<TextSize>, |
| <a href=#472 id=472 data-nosnippet>472</a> multi_byte_chars: <span class="kw-2">&mut </span>IntMap<u32, Vec<WideChar>>, |
| <a href=#473 id=473 data-nosnippet>473</a>) -> usize { |
| <a href=#474 id=474 data-nosnippet>474</a> <span class="macro">assert!</span>(src.len() >= scan_len); |
| <a href=#475 id=475 data-nosnippet>475</a> <span class="kw">let </span><span class="kw-2">mut </span>i = <span class="number">0</span>; |
| <a href=#476 id=476 data-nosnippet>476</a> <span class="kw">let </span>src_bytes = src.as_bytes(); |
| <a href=#477 id=477 data-nosnippet>477</a> |
| <a href=#478 id=478 data-nosnippet>478</a> <span class="kw">while </span>i < scan_len { |
| <a href=#479 id=479 data-nosnippet>479</a> <span class="kw">let </span>byte = <span class="kw">unsafe </span>{ |
| <a href=#480 id=480 data-nosnippet>480</a> <span class="comment">// We verified that i < scan_len <= src.len() |
| <a href=#481 id=481 data-nosnippet>481</a> </span><span class="kw-2">*</span>src_bytes.get_unchecked(i) |
| <a href=#482 id=482 data-nosnippet>482</a> }; |
| <a href=#483 id=483 data-nosnippet>483</a> |
| <a href=#484 id=484 data-nosnippet>484</a> <span class="comment">// How much to advance in order to get to the next UTF-8 char in the |
| <a href=#485 id=485 data-nosnippet>485</a> // string. |
| <a href=#486 id=486 data-nosnippet>486</a> </span><span class="kw">let </span><span class="kw-2">mut </span>char_len = <span class="number">1</span>; |
| <a href=#487 id=487 data-nosnippet>487</a> |
| <a href=#488 id=488 data-nosnippet>488</a> <span class="kw">if </span>byte == <span class="string">b'\n' </span>{ |
| <a href=#489 id=489 data-nosnippet>489</a> lines.push(TextSize::from(i <span class="kw">as </span>u32 + <span class="number">1</span>) + output_offset); |
| <a href=#490 id=490 data-nosnippet>490</a> } <span class="kw">else if </span>byte >= <span class="number">127 </span>{ |
| <a href=#491 id=491 data-nosnippet>491</a> <span class="comment">// The slow path: Just decode to `char`. |
| <a href=#492 id=492 data-nosnippet>492</a> </span><span class="kw">let </span>c = src[i..].chars().next().unwrap(); |
| <a href=#493 id=493 data-nosnippet>493</a> char_len = c.len_utf8(); |
| <a href=#494 id=494 data-nosnippet>494</a> |
| <a href=#495 id=495 data-nosnippet>495</a> <span class="comment">// The last element of `lines` represents the offset of the start of |
| <a href=#496 id=496 data-nosnippet>496</a> // current line. To get the offset inside the line, we subtract it. |
| <a href=#497 id=497 data-nosnippet>497</a> </span><span class="kw">let </span>pos = TextSize::from(i <span class="kw">as </span>u32) + output_offset |
| <a href=#498 id=498 data-nosnippet>498</a> - lines.last().unwrap_or(<span class="kw-2">&</span>TextSize::default()); |
| <a href=#499 id=499 data-nosnippet>499</a> |
| <a href=#500 id=500 data-nosnippet>500</a> <span class="kw">if </span>char_len > <span class="number">1 </span>{ |
| <a href=#501 id=501 data-nosnippet>501</a> <span class="macro">assert!</span>((<span class="number">2</span>..=<span class="number">4</span>).contains(<span class="kw-2">&</span>char_len)); |
| <a href=#502 id=502 data-nosnippet>502</a> <span class="kw">let </span>mbc = WideChar { start: pos, end: pos + TextSize::from(char_len <span class="kw">as </span>u32) }; |
| <a href=#503 id=503 data-nosnippet>503</a> multi_byte_chars.entry(lines.len() <span class="kw">as </span>u32).or_default().push(mbc); |
| <a href=#504 id=504 data-nosnippet>504</a> } |
| <a href=#505 id=505 data-nosnippet>505</a> } |
| <a href=#506 id=506 data-nosnippet>506</a> |
| <a href=#507 id=507 data-nosnippet>507</a> i += char_len; |
| <a href=#508 id=508 data-nosnippet>508</a> } |
| <a href=#509 id=509 data-nosnippet>509</a> |
| <a href=#510 id=510 data-nosnippet>510</a> i - scan_len |
| <a href=#511 id=511 data-nosnippet>511</a>} |
| </code></pre></div></section></main></body></html> |