| package filters |
| |
| import ( |
| "unicode/utf8" |
| |
| "github.com/pkg/errors" |
| ) |
| |
| // NOTE(stevvooe): Most of this code in this file is copied from the stdlib |
| // strconv package and modified to be able to handle quoting with `/` and `|` |
| // as delimiters. The copyright is held by the Go authors. |
| |
| var errQuoteSyntax = errors.New("quote syntax error") |
| |
| // UnquoteChar decodes the first character or byte in the escaped string |
| // or character literal represented by the string s. |
| // It returns four values: |
| // |
| // 1) value, the decoded Unicode code point or byte value; |
| // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; |
| // 3) tail, the remainder of the string after the character; and |
| // 4) an error that will be nil if the character is syntactically valid. |
| // |
| // The second argument, quote, specifies the type of literal being parsed |
| // and therefore which escaped quote character is permitted. |
| // If set to a single quote, it permits the sequence \' and disallows unescaped '. |
| // If set to a double quote, it permits \" and disallows unescaped ". |
| // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. |
| // |
| // This is from Go strconv package, modified to support `|` and `/` as double |
| // quotes for use with regular expressions. |
| func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { |
| // easy cases |
| switch c := s[0]; { |
| case c == quote && (quote == '\'' || quote == '"' || quote == '/' || quote == '|'): |
| err = errQuoteSyntax |
| return |
| case c >= utf8.RuneSelf: |
| r, size := utf8.DecodeRuneInString(s) |
| return r, true, s[size:], nil |
| case c != '\\': |
| return rune(s[0]), false, s[1:], nil |
| } |
| |
| // hard case: c is backslash |
| if len(s) <= 1 { |
| err = errQuoteSyntax |
| return |
| } |
| c := s[1] |
| s = s[2:] |
| |
| switch c { |
| case 'a': |
| value = '\a' |
| case 'b': |
| value = '\b' |
| case 'f': |
| value = '\f' |
| case 'n': |
| value = '\n' |
| case 'r': |
| value = '\r' |
| case 't': |
| value = '\t' |
| case 'v': |
| value = '\v' |
| case 'x', 'u', 'U': |
| n := 0 |
| switch c { |
| case 'x': |
| n = 2 |
| case 'u': |
| n = 4 |
| case 'U': |
| n = 8 |
| } |
| var v rune |
| if len(s) < n { |
| err = errQuoteSyntax |
| return |
| } |
| for j := 0; j < n; j++ { |
| x, ok := unhex(s[j]) |
| if !ok { |
| err = errQuoteSyntax |
| return |
| } |
| v = v<<4 | x |
| } |
| s = s[n:] |
| if c == 'x' { |
| // single-byte string, possibly not UTF-8 |
| value = v |
| break |
| } |
| if v > utf8.MaxRune { |
| err = errQuoteSyntax |
| return |
| } |
| value = v |
| multibyte = true |
| case '0', '1', '2', '3', '4', '5', '6', '7': |
| v := rune(c) - '0' |
| if len(s) < 2 { |
| err = errQuoteSyntax |
| return |
| } |
| for j := 0; j < 2; j++ { // one digit already; two more |
| x := rune(s[j]) - '0' |
| if x < 0 || x > 7 { |
| err = errQuoteSyntax |
| return |
| } |
| v = (v << 3) | x |
| } |
| s = s[2:] |
| if v > 255 { |
| err = errQuoteSyntax |
| return |
| } |
| value = v |
| case '\\': |
| value = '\\' |
| case '\'', '"', '|', '/': |
| if c != quote { |
| err = errQuoteSyntax |
| return |
| } |
| value = rune(c) |
| default: |
| err = errQuoteSyntax |
| return |
| } |
| tail = s |
| return |
| } |
| |
| // unquote interprets s as a single-quoted, double-quoted, |
| // or backquoted Go string literal, returning the string value |
| // that s quotes. (If s is single-quoted, it would be a Go |
| // character literal; Unquote returns the corresponding |
| // one-character string.) |
| // |
| // This is modified from the standard library to support `|` and `/` as quote |
| // characters for use with regular expressions. |
| func unquote(s string) (string, error) { |
| n := len(s) |
| if n < 2 { |
| return "", errQuoteSyntax |
| } |
| quote := s[0] |
| if quote != s[n-1] { |
| return "", errQuoteSyntax |
| } |
| s = s[1 : n-1] |
| |
| if quote == '`' { |
| if contains(s, '`') { |
| return "", errQuoteSyntax |
| } |
| if contains(s, '\r') { |
| // -1 because we know there is at least one \r to remove. |
| buf := make([]byte, 0, len(s)-1) |
| for i := 0; i < len(s); i++ { |
| if s[i] != '\r' { |
| buf = append(buf, s[i]) |
| } |
| } |
| return string(buf), nil |
| } |
| return s, nil |
| } |
| if quote != '"' && quote != '\'' && quote != '|' && quote != '/' { |
| return "", errQuoteSyntax |
| } |
| if contains(s, '\n') { |
| return "", errQuoteSyntax |
| } |
| |
| // Is it trivial? Avoid allocation. |
| if !contains(s, '\\') && !contains(s, quote) { |
| switch quote { |
| case '"', '/', '|': // pipe and slash are treated like double quote |
| return s, nil |
| case '\'': |
| r, size := utf8.DecodeRuneInString(s) |
| if size == len(s) && (r != utf8.RuneError || size != 1) { |
| return s, nil |
| } |
| } |
| } |
| |
| var runeTmp [utf8.UTFMax]byte |
| buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. |
| for len(s) > 0 { |
| c, multibyte, ss, err := unquoteChar(s, quote) |
| if err != nil { |
| return "", err |
| } |
| s = ss |
| if c < utf8.RuneSelf || !multibyte { |
| buf = append(buf, byte(c)) |
| } else { |
| n := utf8.EncodeRune(runeTmp[:], c) |
| buf = append(buf, runeTmp[:n]...) |
| } |
| if quote == '\'' && len(s) != 0 { |
| // single-quoted must be single character |
| return "", errQuoteSyntax |
| } |
| } |
| return string(buf), nil |
| } |
| |
| // contains reports whether the string contains the byte c. |
| func contains(s string, c byte) bool { |
| for i := 0; i < len(s); i++ { |
| if s[i] == c { |
| return true |
| } |
| } |
| return false |
| } |
| |
| func unhex(b byte) (v rune, ok bool) { |
| c := rune(b) |
| switch { |
| case '0' <= c && c <= '9': |
| return c - '0', true |
| case 'a' <= c && c <= 'f': |
| return c - 'a' + 10, true |
| case 'A' <= c && c <= 'F': |
| return c - 'A' + 10, true |
| } |
| return |
| } |