Auto merge of #17003 - krobelus:utf8-positions-multibyte, r=Veykril

Fix off-by-one error converting to LSP UTF8 offsets with multi-byte char

On this file,

```rust
fn main() {
    let 된장 = 1;
}
```

when using `"positionEncodings":["utf-16"]` I get an "unused variable" diagnostic on the variable
name (codepoint offset range `8..10`). So far so good.

When using `positionEncodings":["utf-8"]`, I expect to get the equivalent range in bytes (LSP:
"Character offsets count UTF-8 code units (e.g bytes)."), which is `8..14`, because both
characters are 3 bytes in UTF-8.  However I actually get `10..14`.

Looks like this is because we accidentally treat a 1-based index as an offset value: when
converting from our internal char-indices to LSP byte offsets, we look at one character to many.
This causes wrong results if the extra character is a multi-byte one, such as when computing
the start coordinate of 된장.

Fix that by actually passing an offset. While at it, fix the variable name of the line number,
which is not an offset (yet).

Originally reported at https://github.com/kakoune-lsp/kakoune-lsp/issues/740
diff --git a/crates/hir-def/src/macro_expansion_tests/builtin_derive_macro.rs b/crates/hir-def/src/macro_expansion_tests/builtin_derive_macro.rs
index 89c1b44..163211f 100644
--- a/crates/hir-def/src/macro_expansion_tests/builtin_derive_macro.rs
+++ b/crates/hir-def/src/macro_expansion_tests/builtin_derive_macro.rs
@@ -610,6 +610,10 @@
     field1: i32,
     #[cfg(never)]
     field2: (),
+    #[cfg(feature = "never")]
+    field3: (),
+    #[cfg(not(feature = "never"))]
+    field4: (),
 }
 #[derive(Default)]
 enum Bar {
@@ -618,12 +622,16 @@
     Bar,
 }
 "#,
-        expect![[r#"
+        expect![[r##"
 #[derive(Default)]
 struct Foo {
     field1: i32,
     #[cfg(never)]
     field2: (),
+    #[cfg(feature = "never")]
+    field3: (),
+    #[cfg(not(feature = "never"))]
+    field4: (),
 }
 #[derive(Default)]
 enum Bar {
@@ -635,7 +643,7 @@
 impl < > $crate::default::Default for Foo< > where {
     fn default() -> Self {
         Foo {
-            field1: $crate::default::Default::default(),
+            field1: $crate::default::Default::default(), field4: $crate::default::Default::default(),
         }
     }
 }
@@ -643,6 +651,6 @@
     fn default() -> Self {
         Bar::Bar
     }
-}"#]],
+}"##]],
     );
 }
diff --git a/crates/hir-expand/src/cfg_process.rs b/crates/hir-expand/src/cfg_process.rs
index db3558a..f37ce8b 100644
--- a/crates/hir-expand/src/cfg_process.rs
+++ b/crates/hir-expand/src/cfg_process.rs
@@ -1,57 +1,59 @@
 //! Processes out #[cfg] and #[cfg_attr] attributes from the input for the derive macro
 use std::iter::Peekable;
 
+use base_db::CrateId;
 use cfg::{CfgAtom, CfgExpr};
 use rustc_hash::FxHashSet;
 use syntax::{
     ast::{self, Attr, HasAttrs, Meta, VariantList},
-    AstNode, NodeOrToken, SyntaxElement, SyntaxNode, T,
+    AstNode, NodeOrToken, SyntaxElement, SyntaxKind, SyntaxNode, T,
 };
 use tracing::{debug, warn};
 use tt::SmolStr;
 
 use crate::{db::ExpandDatabase, proc_macro::ProcMacroKind, MacroCallLoc, MacroDefKind};
 
-fn check_cfg_attr(attr: &Attr, loc: &MacroCallLoc, db: &dyn ExpandDatabase) -> Option<bool> {
+fn check_cfg(db: &dyn ExpandDatabase, attr: &Attr, krate: CrateId) -> Option<bool> {
     if !attr.simple_name().as_deref().map(|v| v == "cfg")? {
         return None;
     }
-    debug!("Evaluating cfg {}", attr);
     let cfg = parse_from_attr_meta(attr.meta()?)?;
-    debug!("Checking cfg {:?}", cfg);
-    let enabled = db.crate_graph()[loc.krate].cfg_options.check(&cfg) != Some(false);
+    let enabled = db.crate_graph()[krate].cfg_options.check(&cfg) != Some(false);
     Some(enabled)
 }
 
-fn check_cfg_attr_attr(attr: &Attr, loc: &MacroCallLoc, db: &dyn ExpandDatabase) -> Option<bool> {
+fn check_cfg_attr(db: &dyn ExpandDatabase, attr: &Attr, krate: CrateId) -> Option<bool> {
     if !attr.simple_name().as_deref().map(|v| v == "cfg_attr")? {
         return None;
     }
-    debug!("Evaluating cfg_attr {}", attr);
     let cfg_expr = parse_from_attr_meta(attr.meta()?)?;
-    debug!("Checking cfg_attr {:?}", cfg_expr);
-    let enabled = db.crate_graph()[loc.krate].cfg_options.check(&cfg_expr) != Some(false);
+    let enabled = db.crate_graph()[krate].cfg_options.check(&cfg_expr) != Some(false);
     Some(enabled)
 }
 
 fn process_has_attrs_with_possible_comma<I: HasAttrs>(
-    items: impl Iterator<Item = I>,
-    loc: &MacroCallLoc,
     db: &dyn ExpandDatabase,
+    items: impl Iterator<Item = I>,
+    krate: CrateId,
     remove: &mut FxHashSet<SyntaxElement>,
 ) -> Option<()> {
     for item in items {
         let field_attrs = item.attrs();
         'attrs: for attr in field_attrs {
-            if check_cfg_attr(&attr, loc, db).map(|enabled| !enabled).unwrap_or_default() {
-                debug!("censoring type {:?}", item.syntax());
-                remove.insert(item.syntax().clone().into());
-                // We need to remove the , as well
-                remove_possible_comma(&item, remove);
-                break 'attrs;
+            if let Some(enabled) = check_cfg(db, &attr, krate) {
+                if enabled {
+                    debug!("censoring {:?}", attr.syntax());
+                    remove.insert(attr.syntax().clone().into());
+                } else {
+                    debug!("censoring {:?}", item.syntax());
+                    remove.insert(item.syntax().clone().into());
+                    // We need to remove the , as well
+                    remove_possible_comma(&item, remove);
+                    break 'attrs;
+                }
             }
 
-            if let Some(enabled) = check_cfg_attr_attr(&attr, loc, db) {
+            if let Some(enabled) = check_cfg_attr(db, &attr, krate) {
                 if enabled {
                     debug!("Removing cfg_attr tokens {:?}", attr);
                     let meta = attr.meta()?;
@@ -60,13 +62,13 @@
                 } else {
                     debug!("censoring type cfg_attr {:?}", item.syntax());
                     remove.insert(attr.syntax().clone().into());
-                    continue;
                 }
             }
         }
     }
     Some(())
 }
+
 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
 enum CfgExprStage {
     /// Stripping the CFGExpr part of the attribute
@@ -78,6 +80,7 @@
     // Related Issue: https://github.com/rust-lang/rust-analyzer/issues/10110
     EverythingElse,
 }
+
 /// This function creates its own set of tokens to remove. To help prevent malformed syntax as input.
 fn remove_tokens_within_cfg_attr(meta: Meta) -> Option<FxHashSet<SyntaxElement>> {
     let mut remove: FxHashSet<SyntaxElement> = FxHashSet::default();
@@ -131,23 +134,28 @@
     }
 }
 fn process_enum(
-    variants: VariantList,
-    loc: &MacroCallLoc,
     db: &dyn ExpandDatabase,
+    variants: VariantList,
+    krate: CrateId,
     remove: &mut FxHashSet<SyntaxElement>,
 ) -> Option<()> {
     'variant: for variant in variants.variants() {
         for attr in variant.attrs() {
-            if check_cfg_attr(&attr, loc, db).map(|enabled| !enabled).unwrap_or_default() {
-                // Rustc does not strip the attribute if it is enabled. So we will leave it
-                debug!("censoring type {:?}", variant.syntax());
-                remove.insert(variant.syntax().clone().into());
-                // We need to remove the , as well
-                remove_possible_comma(&variant, remove);
-                continue 'variant;
-            };
+            if let Some(enabled) = check_cfg(db, &attr, krate) {
+                if enabled {
+                    debug!("censoring {:?}", attr.syntax());
+                    remove.insert(attr.syntax().clone().into());
+                } else {
+                    // Rustc does not strip the attribute if it is enabled. So we will leave it
+                    debug!("censoring type {:?}", variant.syntax());
+                    remove.insert(variant.syntax().clone().into());
+                    // We need to remove the , as well
+                    remove_possible_comma(&variant, remove);
+                    continue 'variant;
+                }
+            }
 
-            if let Some(enabled) = check_cfg_attr_attr(&attr, loc, db) {
+            if let Some(enabled) = check_cfg_attr(db, &attr, krate) {
                 if enabled {
                     debug!("Removing cfg_attr tokens {:?}", attr);
                     let meta = attr.meta()?;
@@ -156,17 +164,16 @@
                 } else {
                     debug!("censoring type cfg_attr {:?}", variant.syntax());
                     remove.insert(attr.syntax().clone().into());
-                    continue;
                 }
             }
         }
         if let Some(fields) = variant.field_list() {
             match fields {
                 ast::FieldList::RecordFieldList(fields) => {
-                    process_has_attrs_with_possible_comma(fields.fields(), loc, db, remove)?;
+                    process_has_attrs_with_possible_comma(db, fields.fields(), krate, remove)?;
                 }
                 ast::FieldList::TupleFieldList(fields) => {
-                    process_has_attrs_with_possible_comma(fields.fields(), loc, db, remove)?;
+                    process_has_attrs_with_possible_comma(db, fields.fields(), krate, remove)?;
                 }
             }
         }
@@ -175,9 +182,9 @@
 }
 
 pub(crate) fn process_cfg_attrs(
+    db: &dyn ExpandDatabase,
     node: &SyntaxNode,
     loc: &MacroCallLoc,
-    db: &dyn ExpandDatabase,
 ) -> Option<FxHashSet<SyntaxElement>> {
     // FIXME: #[cfg_eval] is not implemented. But it is not stable yet
     let is_derive = match loc.def.kind {
@@ -193,36 +200,35 @@
 
     let item = ast::Item::cast(node.clone())?;
     for attr in item.attrs() {
-        if let Some(enabled) = check_cfg_attr_attr(&attr, loc, db) {
+        if let Some(enabled) = check_cfg_attr(db, &attr, loc.krate) {
             if enabled {
                 debug!("Removing cfg_attr tokens {:?}", attr);
                 let meta = attr.meta()?;
                 let removes_from_cfg_attr = remove_tokens_within_cfg_attr(meta)?;
                 remove.extend(removes_from_cfg_attr);
             } else {
-                debug!("censoring type cfg_attr {:?}", item.syntax());
+                debug!("Removing type cfg_attr {:?}", item.syntax());
                 remove.insert(attr.syntax().clone().into());
-                continue;
             }
         }
     }
     match item {
         ast::Item::Struct(it) => match it.field_list()? {
             ast::FieldList::RecordFieldList(fields) => {
-                process_has_attrs_with_possible_comma(fields.fields(), loc, db, &mut remove)?;
+                process_has_attrs_with_possible_comma(db, fields.fields(), loc.krate, &mut remove)?;
             }
             ast::FieldList::TupleFieldList(fields) => {
-                process_has_attrs_with_possible_comma(fields.fields(), loc, db, &mut remove)?;
+                process_has_attrs_with_possible_comma(db, fields.fields(), loc.krate, &mut remove)?;
             }
         },
         ast::Item::Enum(it) => {
-            process_enum(it.variant_list()?, loc, db, &mut remove)?;
+            process_enum(db, it.variant_list()?, loc.krate, &mut remove)?;
         }
         ast::Item::Union(it) => {
             process_has_attrs_with_possible_comma(
-                it.record_field_list()?.fields(),
-                loc,
                 db,
+                it.record_field_list()?.fields(),
+                loc.krate,
                 &mut remove,
             )?;
         }
@@ -234,10 +240,22 @@
 /// Parses a `cfg` attribute from the meta
 fn parse_from_attr_meta(meta: Meta) -> Option<CfgExpr> {
     let tt = meta.token_tree()?;
-    let mut iter = tt.token_trees_and_tokens().skip(1).peekable();
+    let mut iter = tt
+        .token_trees_and_tokens()
+        .filter(is_not_whitespace)
+        .skip(1)
+        .take_while(is_not_closing_paren)
+        .peekable();
     next_cfg_expr_from_syntax(&mut iter)
 }
 
+fn is_not_closing_paren(element: &NodeOrToken<ast::TokenTree, syntax::SyntaxToken>) -> bool {
+    !matches!(element, NodeOrToken::Token(token) if (token.kind() == syntax::T![')']))
+}
+fn is_not_whitespace(element: &NodeOrToken<ast::TokenTree, syntax::SyntaxToken>) -> bool {
+    !matches!(element, NodeOrToken::Token(token) if (token.kind() == SyntaxKind::WHITESPACE))
+}
+
 fn next_cfg_expr_from_syntax<I>(iter: &mut Peekable<I>) -> Option<CfgExpr>
 where
     I: Iterator<Item = NodeOrToken<ast::TokenTree, syntax::SyntaxToken>>,
@@ -256,14 +274,13 @@
             let Some(NodeOrToken::Node(tree)) = iter.next() else {
                 return Some(CfgExpr::Invalid);
             };
-            let mut tree_iter = tree.token_trees_and_tokens().skip(1).peekable();
-            while tree_iter
-                .peek()
-                .filter(
-                    |element| matches!(element, NodeOrToken::Token(token) if (token.kind() != syntax::T![')'])),
-                )
-                .is_some()
-            {
+            let mut tree_iter = tree
+                .token_trees_and_tokens()
+                .filter(is_not_whitespace)
+                .skip(1)
+                .take_while(is_not_closing_paren)
+                .peekable();
+            while tree_iter.peek().is_some() {
                 let pred = next_cfg_expr_from_syntax(&mut tree_iter);
                 if let Some(pred) = pred {
                     preds.push(pred);
diff --git a/crates/hir-expand/src/db.rs b/crates/hir-expand/src/db.rs
index 5461c1c..a961ad1 100644
--- a/crates/hir-expand/src/db.rs
+++ b/crates/hir-expand/src/db.rs
@@ -175,7 +175,7 @@
             };
 
             let censor_cfg =
-                cfg_process::process_cfg_attrs(speculative_args, &loc, db).unwrap_or_default();
+                cfg_process::process_cfg_attrs(db, speculative_args, &loc).unwrap_or_default();
             let mut fixups = fixup::fixup_syntax(span_map, speculative_args, span);
             fixups.append.retain(|it, _| match it {
                 syntax::NodeOrToken::Token(_) => true,
@@ -462,7 +462,7 @@
 
     let (mut tt, undo_info) = {
         let syntax = item_node.syntax();
-        let censor_cfg = cfg_process::process_cfg_attrs(syntax, &loc, db).unwrap_or_default();
+        let censor_cfg = cfg_process::process_cfg_attrs(db, syntax, &loc).unwrap_or_default();
         let mut fixups = fixup::fixup_syntax(map.as_ref(), syntax, span);
         fixups.append.retain(|it, _| match it {
             syntax::NodeOrToken::Token(_) => true,
diff --git a/crates/ide-completion/src/completions/postfix.rs b/crates/ide-completion/src/completions/postfix.rs
index 554f7e0..ae061c8 100644
--- a/crates/ide-completion/src/completions/postfix.rs
+++ b/crates/ide-completion/src/completions/postfix.rs
@@ -9,6 +9,7 @@
     ty_filter::TryEnum,
     SnippetCap,
 };
+use stdx::never;
 use syntax::{
     ast::{self, make, AstNode, AstToken},
     SyntaxKind::{BLOCK_EXPR, EXPR_STMT, FOR_EXPR, IF_EXPR, LOOP_EXPR, STMT_LIST, WHILE_EXPR},
@@ -319,7 +320,9 @@
 ) -> Option<impl Fn(&str, &str, &str) -> Builder + 'ctx> {
     let receiver_range = ctx.sema.original_range_opt(receiver.syntax())?.range;
     if ctx.source_range().end() < receiver_range.start() {
-        // This shouldn't happen, yet it does. I assume this might be due to an incorrect token mapping.
+        // This shouldn't happen, yet it does. I assume this might be due to an incorrect token
+        // mapping.
+        never!();
         return None;
     }
     let delete_range = TextRange::new(receiver_range.start(), ctx.source_range().end());