blob: 3a8e72395c66d491b711323467d9d59919ce40bb [file] [log] [blame]
package filters
import (
"fmt"
"unicode"
"unicode/utf8"
)
const (
tokenEOF = -(iota + 1)
tokenQuoted
tokenValue
tokenField
tokenSeparator
tokenOperator
tokenIllegal
)
type token rune
func (t token) String() string {
switch t {
case tokenEOF:
return "EOF"
case tokenQuoted:
return "Quoted"
case tokenValue:
return "Value"
case tokenField:
return "Field"
case tokenSeparator:
return "Separator"
case tokenOperator:
return "Operator"
case tokenIllegal:
return "Illegal"
}
return string(t)
}
func (t token) GoString() string {
return "token" + t.String()
}
type scanner struct {
input string
pos int
ppos int // bounds the current rune in the string
value bool
}
func (s *scanner) init(input string) {
s.input = input
s.pos = 0
s.ppos = 0
}
func (s *scanner) next() rune {
if s.pos >= len(s.input) {
return tokenEOF
}
s.pos = s.ppos
r, w := utf8.DecodeRuneInString(s.input[s.ppos:])
s.ppos += w
if r == utf8.RuneError {
if w > 0 {
return tokenIllegal
}
return tokenEOF
}
if r == 0 {
return tokenIllegal
}
return r
}
func (s *scanner) peek() rune {
pos := s.pos
ppos := s.ppos
ch := s.next()
s.pos = pos
s.ppos = ppos
return ch
}
func (s *scanner) scan() (nextp int, tk token, text string) {
var (
ch = s.next()
pos = s.pos
)
chomp:
switch {
case ch == tokenEOF:
case ch == tokenIllegal:
case isQuoteRune(ch):
s.scanQuoted(ch)
return pos, tokenQuoted, s.input[pos:s.ppos]
case isSeparatorRune(ch):
s.value = false
return pos, tokenSeparator, s.input[pos:s.ppos]
case isOperatorRune(ch):
s.scanOperator()
s.value = true
return pos, tokenOperator, s.input[pos:s.ppos]
case unicode.IsSpace(ch):
// chomp
ch = s.next()
pos = s.pos
goto chomp
case s.value:
s.scanValue()
s.value = false
return pos, tokenValue, s.input[pos:s.ppos]
case isFieldRune(ch):
s.scanField()
return pos, tokenField, s.input[pos:s.ppos]
}
return s.pos, token(ch), ""
}
func (s *scanner) scanField() {
for {
ch := s.peek()
if !isFieldRune(ch) {
break
}
s.next()
}
}
func (s *scanner) scanOperator() {
for {
ch := s.peek()
switch ch {
case '=', '!', '~':
s.next()
default:
return
}
}
}
func (s *scanner) scanValue() {
for {
ch := s.peek()
if !isValueRune(ch) {
break
}
s.next()
}
}
func (s *scanner) scanQuoted(quote rune) {
ch := s.next() // read character after quote
for ch != quote {
if ch == '\n' || ch < 0 {
s.error("literal not terminated")
return
}
if ch == '\\' {
ch = s.scanEscape(quote)
} else {
ch = s.next()
}
}
return
}
func (s *scanner) scanEscape(quote rune) rune {
ch := s.next() // read character after '/'
switch ch {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
// nothing to do
ch = s.next()
case '0', '1', '2', '3', '4', '5', '6', '7':
ch = s.scanDigits(ch, 8, 3)
case 'x':
ch = s.scanDigits(s.next(), 16, 2)
case 'u':
ch = s.scanDigits(s.next(), 16, 4)
case 'U':
ch = s.scanDigits(s.next(), 16, 8)
default:
s.error("illegal char escape")
}
return ch
}
func (s *scanner) scanDigits(ch rune, base, n int) rune {
for n > 0 && digitVal(ch) < base {
ch = s.next()
n--
}
if n > 0 {
s.error("illegal char escape")
}
return ch
}
func (s *scanner) error(msg string) {
fmt.Println("error fixme", msg)
}
func digitVal(ch rune) int {
switch {
case '0' <= ch && ch <= '9':
return int(ch - '0')
case 'a' <= ch && ch <= 'f':
return int(ch - 'a' + 10)
case 'A' <= ch && ch <= 'F':
return int(ch - 'A' + 10)
}
return 16 // larger than any legal digit val
}
func isFieldRune(r rune) bool {
return (r == '_' || isAlphaRune(r) || isDigitRune(r))
}
func isAlphaRune(r rune) bool {
return r >= 'A' && r <= 'Z' || r >= 'a' && r <= 'z'
}
func isDigitRune(r rune) bool {
return r >= '0' && r <= '9'
}
func isOperatorRune(r rune) bool {
switch r {
case '=', '!', '~':
return true
}
return false
}
func isQuoteRune(r rune) bool {
switch r {
case '/', '|', '"': // maybe add single quoting?
return true
}
return false
}
func isSeparatorRune(r rune) bool {
switch r {
case ',', '.':
return true
}
return false
}
func isValueRune(r rune) bool {
return r != ',' && !unicode.IsSpace(r) &&
(unicode.IsLetter(r) ||
unicode.IsDigit(r) ||
unicode.IsNumber(r) ||
unicode.IsGraphic(r) ||
unicode.IsPunct(r))
}