Merge pull request #16 from shepmaster/dot-delimited-demangling
Support demangling symbols with dot-delimited words at the end
diff --git a/src/lib.rs b/src/lib.rs
index d14f4f6..2d0d7c8 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -36,6 +36,7 @@
pub struct Demangle<'a> {
original: &'a str,
inner: &'a str,
+ suffix: &'a str,
valid: bool,
/// The number of ::-separated elements in the original name.
elements: usize,
@@ -43,15 +44,14 @@
/// De-mangles a Rust symbol into a more readable version
///
-/// All rust symbols by default are mangled as they contain characters that
+/// All Rust symbols by default are mangled as they contain characters that
/// cannot be represented in all object files. The mangling mechanism is similar
/// to C++'s, but Rust has a few specifics to handle items like lifetimes in
/// symbols.
///
-/// This function will take a **mangled** symbol (typically acquired from a
-/// `Symbol` which is in turn resolved from a `Frame`) and then writes the
-/// de-mangled version into the given `writer`. If the symbol does not look like
-/// a mangled symbol, it is still written to `writer`.
+/// This function will take a **mangled** symbol and return a value. When printed,
+/// the de-mangled version will be written. If the symbol does not look like
+/// a mangled symbol, the original value will be written instead.
///
/// # Examples
///
@@ -63,7 +63,7 @@
/// assert_eq!(demangle("foo").to_string(), "foo");
/// ```
-// All rust symbols are in theory lists of "::"-separated identifiers. Some
+// All Rust symbols are in theory lists of "::"-separated identifiers. Some
// assemblers, however, can't handle these characters in symbol names. To get
// around this, we use C++-style mangling. The mangling method is:
//
@@ -82,7 +82,7 @@
// etc. Additionally, this doesn't handle glue symbols at all.
pub fn demangle(mut s: &str) -> Demangle {
// During ThinLTO LLVM may import and rename internal symbols, so strip out
- // those endings first as they're on of the last manglings applied to symbol
+ // those endings first as they're one of the last manglings applied to symbol
// names.
let llvm = ".llvm.";
if let Some(i) = s.find(llvm) {
@@ -99,8 +99,20 @@
}
}
+ // Output like LLVM IR adds extra period-delimited words. See if
+ // we are in that case and save the trailing words if so.
+ let mut suffix = "";
+ if let Some(i) = s.rfind("E.") {
+ let (head, tail) = s.split_at(i + 1); // After the E, before the period
+
+ if is_symbol_like(tail) {
+ s = head;
+ suffix = tail;
+ }
+ }
+
// First validate the symbol. If it doesn't look like anything we're
- // expecting, we just print it literally. Note that we must handle non-rust
+ // expecting, we just print it literally. Note that we must handle non-Rust
// symbols because we could have any function in the backtrace.
let mut valid = true;
let mut inner = s;
@@ -156,6 +168,7 @@
Demangle {
inner: inner,
+ suffix: suffix,
valid: valid,
elements: elements,
original: s,
@@ -203,6 +216,35 @@
s.starts_with('h') && s[1..].chars().all(|c| c.is_digit(16))
}
+fn is_symbol_like(s: &str) -> bool {
+ s.chars().all(|c| {
+ // Once `char::is_ascii_punctuation` and `char::is_ascii_alphanumeric`
+ // have been stable for long enough, use those instead for clarity
+ is_ascii_alphanumeric(c) || is_ascii_punctuation(c)
+ })
+}
+
+// Copied from the documentation of `char::is_ascii_alphanumeric`
+fn is_ascii_alphanumeric(c: char) -> bool {
+ match c {
+ '\u{0041}' ... '\u{005A}' |
+ '\u{0061}' ... '\u{007A}' |
+ '\u{0030}' ... '\u{0039}' => true,
+ _ => false,
+ }
+}
+
+// Copied from the documentation of `char::is_ascii_punctuation`
+fn is_ascii_punctuation(c: char) -> bool {
+ match c {
+ '\u{0021}' ... '\u{002F}' |
+ '\u{003A}' ... '\u{0040}' |
+ '\u{005B}' ... '\u{0060}' |
+ '\u{007B}' ... '\u{007E}' => true,
+ _ => false,
+ }
+}
+
impl<'a> fmt::Display for Demangle<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
// Alright, let's do this.
@@ -289,6 +331,8 @@
}
}
+ try!(f.write_str(self.suffix));
+
Ok(())
}
}
@@ -400,6 +444,17 @@
}
#[test]
+ fn demangle_llvm_ir_branch_labels() {
+ t!("_ZN4core5slice77_$LT$impl$u20$core..ops..index..IndexMut$LT$I$GT$$u20$for$u20$$u5b$T$u5d$$GT$9index_mut17haf9727c2edfbc47bE.exit.i.i", "core::slice::<impl core::ops::index::IndexMut<I> for [T]>::index_mut::haf9727c2edfbc47b.exit.i.i");
+ t_nohash!("_ZN4core5slice77_$LT$impl$u20$core..ops..index..IndexMut$LT$I$GT$$u20$for$u20$$u5b$T$u5d$$GT$9index_mut17haf9727c2edfbc47bE.exit.i.i", "core::slice::<impl core::ops::index::IndexMut<I> for [T]>::index_mut.exit.i.i");
+ }
+
+ #[test]
+ fn demangle_ignores_suffix_that_doesnt_look_like_a_symbol() {
+ t!("_ZN3fooE.llvm moocow", "_ZN3fooE.llvm moocow");
+ }
+
+ #[test]
fn dont_panic() {
super::demangle("_ZN2222222222222222222222EE").to_string();
super::demangle("_ZN5*70527e27.ll34csaғE").to_string();