| package toml |
| |
| import ( |
| "fmt" |
| "strings" |
| "unicode" |
| "unicode/utf8" |
| ) |
| |
| type itemType int |
| |
| const ( |
| itemError itemType = iota |
| itemNIL // used in the parser to indicate no type |
| itemEOF |
| itemText |
| itemString |
| itemRawString |
| itemMultilineString |
| itemRawMultilineString |
| itemBool |
| itemInteger |
| itemFloat |
| itemDatetime |
| itemArray // the start of an array |
| itemArrayEnd |
| itemTableStart |
| itemTableEnd |
| itemArrayTableStart |
| itemArrayTableEnd |
| itemKeyStart |
| itemCommentStart |
| itemInlineTableStart |
| itemInlineTableEnd |
| ) |
| |
| const ( |
| eof = 0 |
| comma = ',' |
| tableStart = '[' |
| tableEnd = ']' |
| arrayTableStart = '[' |
| arrayTableEnd = ']' |
| tableSep = '.' |
| keySep = '=' |
| arrayStart = '[' |
| arrayEnd = ']' |
| commentStart = '#' |
| stringStart = '"' |
| stringEnd = '"' |
| rawStringStart = '\'' |
| rawStringEnd = '\'' |
| inlineTableStart = '{' |
| inlineTableEnd = '}' |
| ) |
| |
| type stateFn func(lx *lexer) stateFn |
| |
| type lexer struct { |
| input string |
| start int |
| pos int |
| line int |
| state stateFn |
| items chan item |
| |
| // Allow for backing up up to three runes. |
| // This is necessary because TOML contains 3-rune tokens (""" and '''). |
| prevWidths [3]int |
| nprev int // how many of prevWidths are in use |
| // If we emit an eof, we can still back up, but it is not OK to call |
| // next again. |
| atEOF bool |
| |
| // A stack of state functions used to maintain context. |
| // The idea is to reuse parts of the state machine in various places. |
| // For example, values can appear at the top level or within arbitrarily |
| // nested arrays. The last state on the stack is used after a value has |
| // been lexed. Similarly for comments. |
| stack []stateFn |
| } |
| |
| type item struct { |
| typ itemType |
| val string |
| line int |
| } |
| |
| func (lx *lexer) nextItem() item { |
| for { |
| select { |
| case item := <-lx.items: |
| return item |
| default: |
| lx.state = lx.state(lx) |
| } |
| } |
| } |
| |
| func lex(input string) *lexer { |
| lx := &lexer{ |
| input: input, |
| state: lexTop, |
| line: 1, |
| items: make(chan item, 10), |
| stack: make([]stateFn, 0, 10), |
| } |
| return lx |
| } |
| |
| func (lx *lexer) push(state stateFn) { |
| lx.stack = append(lx.stack, state) |
| } |
| |
| func (lx *lexer) pop() stateFn { |
| if len(lx.stack) == 0 { |
| return lx.errorf("BUG in lexer: no states to pop") |
| } |
| last := lx.stack[len(lx.stack)-1] |
| lx.stack = lx.stack[0 : len(lx.stack)-1] |
| return last |
| } |
| |
| func (lx *lexer) current() string { |
| return lx.input[lx.start:lx.pos] |
| } |
| |
| func (lx *lexer) emit(typ itemType) { |
| lx.items <- item{typ, lx.current(), lx.line} |
| lx.start = lx.pos |
| } |
| |
| func (lx *lexer) emitTrim(typ itemType) { |
| lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line} |
| lx.start = lx.pos |
| } |
| |
| func (lx *lexer) next() (r rune) { |
| if lx.atEOF { |
| panic("next called after EOF") |
| } |
| if lx.pos >= len(lx.input) { |
| lx.atEOF = true |
| return eof |
| } |
| |
| if lx.input[lx.pos] == '\n' { |
| lx.line++ |
| } |
| lx.prevWidths[2] = lx.prevWidths[1] |
| lx.prevWidths[1] = lx.prevWidths[0] |
| if lx.nprev < 3 { |
| lx.nprev++ |
| } |
| r, w := utf8.DecodeRuneInString(lx.input[lx.pos:]) |
| lx.prevWidths[0] = w |
| lx.pos += w |
| return r |
| } |
| |
| // ignore skips over the pending input before this point. |
| func (lx *lexer) ignore() { |
| lx.start = lx.pos |
| } |
| |
| // backup steps back one rune. Can be called only twice between calls to next. |
| func (lx *lexer) backup() { |
| if lx.atEOF { |
| lx.atEOF = false |
| return |
| } |
| if lx.nprev < 1 { |
| panic("backed up too far") |
| } |
| w := lx.prevWidths[0] |
| lx.prevWidths[0] = lx.prevWidths[1] |
| lx.prevWidths[1] = lx.prevWidths[2] |
| lx.nprev-- |
| lx.pos -= w |
| if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' { |
| lx.line-- |
| } |
| } |
| |
| // accept consumes the next rune if it's equal to `valid`. |
| func (lx *lexer) accept(valid rune) bool { |
| if lx.next() == valid { |
| return true |
| } |
| lx.backup() |
| return false |
| } |
| |
| // peek returns but does not consume the next rune in the input. |
| func (lx *lexer) peek() rune { |
| r := lx.next() |
| lx.backup() |
| return r |
| } |
| |
| // skip ignores all input that matches the given predicate. |
| func (lx *lexer) skip(pred func(rune) bool) { |
| for { |
| r := lx.next() |
| if pred(r) { |
| continue |
| } |
| lx.backup() |
| lx.ignore() |
| return |
| } |
| } |
| |
| // errorf stops all lexing by emitting an error and returning `nil`. |
| // Note that any value that is a character is escaped if it's a special |
| // character (newlines, tabs, etc.). |
| func (lx *lexer) errorf(format string, values ...interface{}) stateFn { |
| lx.items <- item{ |
| itemError, |
| fmt.Sprintf(format, values...), |
| lx.line, |
| } |
| return nil |
| } |
| |
| // lexTop consumes elements at the top level of TOML data. |
| func lexTop(lx *lexer) stateFn { |
| r := lx.next() |
| if isWhitespace(r) || isNL(r) { |
| return lexSkip(lx, lexTop) |
| } |
| switch r { |
| case commentStart: |
| lx.push(lexTop) |
| return lexCommentStart |
| case tableStart: |
| return lexTableStart |
| case eof: |
| if lx.pos > lx.start { |
| return lx.errorf("unexpected EOF") |
| } |
| lx.emit(itemEOF) |
| return nil |
| } |
| |
| // At this point, the only valid item can be a key, so we back up |
| // and let the key lexer do the rest. |
| lx.backup() |
| lx.push(lexTopEnd) |
| return lexKeyStart |
| } |
| |
| // lexTopEnd is entered whenever a top-level item has been consumed. (A value |
| // or a table.) It must see only whitespace, and will turn back to lexTop |
| // upon a newline. If it sees EOF, it will quit the lexer successfully. |
| func lexTopEnd(lx *lexer) stateFn { |
| r := lx.next() |
| switch { |
| case r == commentStart: |
| // a comment will read to a newline for us. |
| lx.push(lexTop) |
| return lexCommentStart |
| case isWhitespace(r): |
| return lexTopEnd |
| case isNL(r): |
| lx.ignore() |
| return lexTop |
| case r == eof: |
| lx.emit(itemEOF) |
| return nil |
| } |
| return lx.errorf("expected a top-level item to end with a newline, "+ |
| "comment, or EOF, but got %q instead", r) |
| } |
| |
| // lexTable lexes the beginning of a table. Namely, it makes sure that |
| // it starts with a character other than '.' and ']'. |
| // It assumes that '[' has already been consumed. |
| // It also handles the case that this is an item in an array of tables. |
| // e.g., '[[name]]'. |
| func lexTableStart(lx *lexer) stateFn { |
| if lx.peek() == arrayTableStart { |
| lx.next() |
| lx.emit(itemArrayTableStart) |
| lx.push(lexArrayTableEnd) |
| } else { |
| lx.emit(itemTableStart) |
| lx.push(lexTableEnd) |
| } |
| return lexTableNameStart |
| } |
| |
| func lexTableEnd(lx *lexer) stateFn { |
| lx.emit(itemTableEnd) |
| return lexTopEnd |
| } |
| |
| func lexArrayTableEnd(lx *lexer) stateFn { |
| if r := lx.next(); r != arrayTableEnd { |
| return lx.errorf("expected end of table array name delimiter %q, "+ |
| "but got %q instead", arrayTableEnd, r) |
| } |
| lx.emit(itemArrayTableEnd) |
| return lexTopEnd |
| } |
| |
| func lexTableNameStart(lx *lexer) stateFn { |
| lx.skip(isWhitespace) |
| switch r := lx.peek(); { |
| case r == tableEnd || r == eof: |
| return lx.errorf("unexpected end of table name " + |
| "(table names cannot be empty)") |
| case r == tableSep: |
| return lx.errorf("unexpected table separator " + |
| "(table names cannot be empty)") |
| case r == stringStart || r == rawStringStart: |
| lx.ignore() |
| lx.push(lexTableNameEnd) |
| return lexValue // reuse string lexing |
| default: |
| return lexBareTableName |
| } |
| } |
| |
| // lexBareTableName lexes the name of a table. It assumes that at least one |
| // valid character for the table has already been read. |
| func lexBareTableName(lx *lexer) stateFn { |
| r := lx.next() |
| if isBareKeyChar(r) { |
| return lexBareTableName |
| } |
| lx.backup() |
| lx.emit(itemText) |
| return lexTableNameEnd |
| } |
| |
| // lexTableNameEnd reads the end of a piece of a table name, optionally |
| // consuming whitespace. |
| func lexTableNameEnd(lx *lexer) stateFn { |
| lx.skip(isWhitespace) |
| switch r := lx.next(); { |
| case isWhitespace(r): |
| return lexTableNameEnd |
| case r == tableSep: |
| lx.ignore() |
| return lexTableNameStart |
| case r == tableEnd: |
| return lx.pop() |
| default: |
| return lx.errorf("expected '.' or ']' to end table name, "+ |
| "but got %q instead", r) |
| } |
| } |
| |
| // lexKeyStart consumes a key name up until the first non-whitespace character. |
| // lexKeyStart will ignore whitespace. |
| func lexKeyStart(lx *lexer) stateFn { |
| r := lx.peek() |
| switch { |
| case r == keySep: |
| return lx.errorf("unexpected key separator %q", keySep) |
| case isWhitespace(r) || isNL(r): |
| lx.next() |
| return lexSkip(lx, lexKeyStart) |
| case r == stringStart || r == rawStringStart: |
| lx.ignore() |
| lx.emit(itemKeyStart) |
| lx.push(lexKeyEnd) |
| return lexValue // reuse string lexing |
| default: |
| lx.ignore() |
| lx.emit(itemKeyStart) |
| return lexBareKey |
| } |
| } |
| |
| // lexBareKey consumes the text of a bare key. Assumes that the first character |
| // (which is not whitespace) has not yet been consumed. |
| func lexBareKey(lx *lexer) stateFn { |
| switch r := lx.next(); { |
| case isBareKeyChar(r): |
| return lexBareKey |
| case isWhitespace(r): |
| lx.backup() |
| lx.emit(itemText) |
| return lexKeyEnd |
| case r == keySep: |
| lx.backup() |
| lx.emit(itemText) |
| return lexKeyEnd |
| default: |
| return lx.errorf("bare keys cannot contain %q", r) |
| } |
| } |
| |
| // lexKeyEnd consumes the end of a key and trims whitespace (up to the key |
| // separator). |
| func lexKeyEnd(lx *lexer) stateFn { |
| switch r := lx.next(); { |
| case r == keySep: |
| return lexSkip(lx, lexValue) |
| case isWhitespace(r): |
| return lexSkip(lx, lexKeyEnd) |
| default: |
| return lx.errorf("expected key separator %q, but got %q instead", |
| keySep, r) |
| } |
| } |
| |
| // lexValue starts the consumption of a value anywhere a value is expected. |
| // lexValue will ignore whitespace. |
| // After a value is lexed, the last state on the next is popped and returned. |
| func lexValue(lx *lexer) stateFn { |
| // We allow whitespace to precede a value, but NOT newlines. |
| // In array syntax, the array states are responsible for ignoring newlines. |
| r := lx.next() |
| switch { |
| case isWhitespace(r): |
| return lexSkip(lx, lexValue) |
| case isDigit(r): |
| lx.backup() // avoid an extra state and use the same as above |
| return lexNumberOrDateStart |
| } |
| switch r { |
| case arrayStart: |
| lx.ignore() |
| lx.emit(itemArray) |
| return lexArrayValue |
| case inlineTableStart: |
| lx.ignore() |
| lx.emit(itemInlineTableStart) |
| return lexInlineTableValue |
| case stringStart: |
| if lx.accept(stringStart) { |
| if lx.accept(stringStart) { |
| lx.ignore() // Ignore """ |
| return lexMultilineString |
| } |
| lx.backup() |
| } |
| lx.ignore() // ignore the '"' |
| return lexString |
| case rawStringStart: |
| if lx.accept(rawStringStart) { |
| if lx.accept(rawStringStart) { |
| lx.ignore() // Ignore """ |
| return lexMultilineRawString |
| } |
| lx.backup() |
| } |
| lx.ignore() // ignore the "'" |
| return lexRawString |
| case '+', '-': |
| return lexNumberStart |
| case '.': // special error case, be kind to users |
| return lx.errorf("floats must start with a digit, not '.'") |
| } |
| if unicode.IsLetter(r) { |
| // Be permissive here; lexBool will give a nice error if the |
| // user wrote something like |
| // x = foo |
| // (i.e. not 'true' or 'false' but is something else word-like.) |
| lx.backup() |
| return lexBool |
| } |
| return lx.errorf("expected value but found %q instead", r) |
| } |
| |
| // lexArrayValue consumes one value in an array. It assumes that '[' or ',' |
| // have already been consumed. All whitespace and newlines are ignored. |
| func lexArrayValue(lx *lexer) stateFn { |
| r := lx.next() |
| switch { |
| case isWhitespace(r) || isNL(r): |
| return lexSkip(lx, lexArrayValue) |
| case r == commentStart: |
| lx.push(lexArrayValue) |
| return lexCommentStart |
| case r == comma: |
| return lx.errorf("unexpected comma") |
| case r == arrayEnd: |
| // NOTE(caleb): The spec isn't clear about whether you can have |
| // a trailing comma or not, so we'll allow it. |
| return lexArrayEnd |
| } |
| |
| lx.backup() |
| lx.push(lexArrayValueEnd) |
| return lexValue |
| } |
| |
| // lexArrayValueEnd consumes everything between the end of an array value and |
| // the next value (or the end of the array): it ignores whitespace and newlines |
| // and expects either a ',' or a ']'. |
| func lexArrayValueEnd(lx *lexer) stateFn { |
| r := lx.next() |
| switch { |
| case isWhitespace(r) || isNL(r): |
| return lexSkip(lx, lexArrayValueEnd) |
| case r == commentStart: |
| lx.push(lexArrayValueEnd) |
| return lexCommentStart |
| case r == comma: |
| lx.ignore() |
| return lexArrayValue // move on to the next value |
| case r == arrayEnd: |
| return lexArrayEnd |
| } |
| return lx.errorf( |
| "expected a comma or array terminator %q, but got %q instead", |
| arrayEnd, r, |
| ) |
| } |
| |
| // lexArrayEnd finishes the lexing of an array. |
| // It assumes that a ']' has just been consumed. |
| func lexArrayEnd(lx *lexer) stateFn { |
| lx.ignore() |
| lx.emit(itemArrayEnd) |
| return lx.pop() |
| } |
| |
| // lexInlineTableValue consumes one key/value pair in an inline table. |
| // It assumes that '{' or ',' have already been consumed. Whitespace is ignored. |
| func lexInlineTableValue(lx *lexer) stateFn { |
| r := lx.next() |
| switch { |
| case isWhitespace(r): |
| return lexSkip(lx, lexInlineTableValue) |
| case isNL(r): |
| return lx.errorf("newlines not allowed within inline tables") |
| case r == commentStart: |
| lx.push(lexInlineTableValue) |
| return lexCommentStart |
| case r == comma: |
| return lx.errorf("unexpected comma") |
| case r == inlineTableEnd: |
| return lexInlineTableEnd |
| } |
| lx.backup() |
| lx.push(lexInlineTableValueEnd) |
| return lexKeyStart |
| } |
| |
| // lexInlineTableValueEnd consumes everything between the end of an inline table |
| // key/value pair and the next pair (or the end of the table): |
| // it ignores whitespace and expects either a ',' or a '}'. |
| func lexInlineTableValueEnd(lx *lexer) stateFn { |
| r := lx.next() |
| switch { |
| case isWhitespace(r): |
| return lexSkip(lx, lexInlineTableValueEnd) |
| case isNL(r): |
| return lx.errorf("newlines not allowed within inline tables") |
| case r == commentStart: |
| lx.push(lexInlineTableValueEnd) |
| return lexCommentStart |
| case r == comma: |
| lx.ignore() |
| return lexInlineTableValue |
| case r == inlineTableEnd: |
| return lexInlineTableEnd |
| } |
| return lx.errorf("expected a comma or an inline table terminator %q, "+ |
| "but got %q instead", inlineTableEnd, r) |
| } |
| |
| // lexInlineTableEnd finishes the lexing of an inline table. |
| // It assumes that a '}' has just been consumed. |
| func lexInlineTableEnd(lx *lexer) stateFn { |
| lx.ignore() |
| lx.emit(itemInlineTableEnd) |
| return lx.pop() |
| } |
| |
| // lexString consumes the inner contents of a string. It assumes that the |
| // beginning '"' has already been consumed and ignored. |
| func lexString(lx *lexer) stateFn { |
| r := lx.next() |
| switch { |
| case r == eof: |
| return lx.errorf("unexpected EOF") |
| case isNL(r): |
| return lx.errorf("strings cannot contain newlines") |
| case r == '\\': |
| lx.push(lexString) |
| return lexStringEscape |
| case r == stringEnd: |
| lx.backup() |
| lx.emit(itemString) |
| lx.next() |
| lx.ignore() |
| return lx.pop() |
| } |
| return lexString |
| } |
| |
| // lexMultilineString consumes the inner contents of a string. It assumes that |
| // the beginning '"""' has already been consumed and ignored. |
| func lexMultilineString(lx *lexer) stateFn { |
| switch lx.next() { |
| case eof: |
| return lx.errorf("unexpected EOF") |
| case '\\': |
| return lexMultilineStringEscape |
| case stringEnd: |
| if lx.accept(stringEnd) { |
| if lx.accept(stringEnd) { |
| lx.backup() |
| lx.backup() |
| lx.backup() |
| lx.emit(itemMultilineString) |
| lx.next() |
| lx.next() |
| lx.next() |
| lx.ignore() |
| return lx.pop() |
| } |
| lx.backup() |
| } |
| } |
| return lexMultilineString |
| } |
| |
| // lexRawString consumes a raw string. Nothing can be escaped in such a string. |
| // It assumes that the beginning "'" has already been consumed and ignored. |
| func lexRawString(lx *lexer) stateFn { |
| r := lx.next() |
| switch { |
| case r == eof: |
| return lx.errorf("unexpected EOF") |
| case isNL(r): |
| return lx.errorf("strings cannot contain newlines") |
| case r == rawStringEnd: |
| lx.backup() |
| lx.emit(itemRawString) |
| lx.next() |
| lx.ignore() |
| return lx.pop() |
| } |
| return lexRawString |
| } |
| |
| // lexMultilineRawString consumes a raw string. Nothing can be escaped in such |
| // a string. It assumes that the beginning "'''" has already been consumed and |
| // ignored. |
| func lexMultilineRawString(lx *lexer) stateFn { |
| switch lx.next() { |
| case eof: |
| return lx.errorf("unexpected EOF") |
| case rawStringEnd: |
| if lx.accept(rawStringEnd) { |
| if lx.accept(rawStringEnd) { |
| lx.backup() |
| lx.backup() |
| lx.backup() |
| lx.emit(itemRawMultilineString) |
| lx.next() |
| lx.next() |
| lx.next() |
| lx.ignore() |
| return lx.pop() |
| } |
| lx.backup() |
| } |
| } |
| return lexMultilineRawString |
| } |
| |
| // lexMultilineStringEscape consumes an escaped character. It assumes that the |
| // preceding '\\' has already been consumed. |
| func lexMultilineStringEscape(lx *lexer) stateFn { |
| // Handle the special case first: |
| if isNL(lx.next()) { |
| return lexMultilineString |
| } |
| lx.backup() |
| lx.push(lexMultilineString) |
| return lexStringEscape(lx) |
| } |
| |
| func lexStringEscape(lx *lexer) stateFn { |
| r := lx.next() |
| switch r { |
| case 'b': |
| fallthrough |
| case 't': |
| fallthrough |
| case 'n': |
| fallthrough |
| case 'f': |
| fallthrough |
| case 'r': |
| fallthrough |
| case '"': |
| fallthrough |
| case '\\': |
| return lx.pop() |
| case 'u': |
| return lexShortUnicodeEscape |
| case 'U': |
| return lexLongUnicodeEscape |
| } |
| return lx.errorf("invalid escape character %q; only the following "+ |
| "escape characters are allowed: "+ |
| `\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r) |
| } |
| |
| func lexShortUnicodeEscape(lx *lexer) stateFn { |
| var r rune |
| for i := 0; i < 4; i++ { |
| r = lx.next() |
| if !isHexadecimal(r) { |
| return lx.errorf(`expected four hexadecimal digits after '\u', `+ |
| "but got %q instead", lx.current()) |
| } |
| } |
| return lx.pop() |
| } |
| |
| func lexLongUnicodeEscape(lx *lexer) stateFn { |
| var r rune |
| for i := 0; i < 8; i++ { |
| r = lx.next() |
| if !isHexadecimal(r) { |
| return lx.errorf(`expected eight hexadecimal digits after '\U', `+ |
| "but got %q instead", lx.current()) |
| } |
| } |
| return lx.pop() |
| } |
| |
| // lexNumberOrDateStart consumes either an integer, a float, or datetime. |
| func lexNumberOrDateStart(lx *lexer) stateFn { |
| r := lx.next() |
| if isDigit(r) { |
| return lexNumberOrDate |
| } |
| switch r { |
| case '_': |
| return lexNumber |
| case 'e', 'E': |
| return lexFloat |
| case '.': |
| return lx.errorf("floats must start with a digit, not '.'") |
| } |
| return lx.errorf("expected a digit but got %q", r) |
| } |
| |
| // lexNumberOrDate consumes either an integer, float or datetime. |
| func lexNumberOrDate(lx *lexer) stateFn { |
| r := lx.next() |
| if isDigit(r) { |
| return lexNumberOrDate |
| } |
| switch r { |
| case '-': |
| return lexDatetime |
| case '_': |
| return lexNumber |
| case '.', 'e', 'E': |
| return lexFloat |
| } |
| |
| lx.backup() |
| lx.emit(itemInteger) |
| return lx.pop() |
| } |
| |
| // lexDatetime consumes a Datetime, to a first approximation. |
| // The parser validates that it matches one of the accepted formats. |
| func lexDatetime(lx *lexer) stateFn { |
| r := lx.next() |
| if isDigit(r) { |
| return lexDatetime |
| } |
| switch r { |
| case '-', 'T', ':', '.', 'Z', '+': |
| return lexDatetime |
| } |
| |
| lx.backup() |
| lx.emit(itemDatetime) |
| return lx.pop() |
| } |
| |
| // lexNumberStart consumes either an integer or a float. It assumes that a sign |
| // has already been read, but that *no* digits have been consumed. |
| // lexNumberStart will move to the appropriate integer or float states. |
| func lexNumberStart(lx *lexer) stateFn { |
| // We MUST see a digit. Even floats have to start with a digit. |
| r := lx.next() |
| if !isDigit(r) { |
| if r == '.' { |
| return lx.errorf("floats must start with a digit, not '.'") |
| } |
| return lx.errorf("expected a digit but got %q", r) |
| } |
| return lexNumber |
| } |
| |
| // lexNumber consumes an integer or a float after seeing the first digit. |
| func lexNumber(lx *lexer) stateFn { |
| r := lx.next() |
| if isDigit(r) { |
| return lexNumber |
| } |
| switch r { |
| case '_': |
| return lexNumber |
| case '.', 'e', 'E': |
| return lexFloat |
| } |
| |
| lx.backup() |
| lx.emit(itemInteger) |
| return lx.pop() |
| } |
| |
| // lexFloat consumes the elements of a float. It allows any sequence of |
| // float-like characters, so floats emitted by the lexer are only a first |
| // approximation and must be validated by the parser. |
| func lexFloat(lx *lexer) stateFn { |
| r := lx.next() |
| if isDigit(r) { |
| return lexFloat |
| } |
| switch r { |
| case '_', '.', '-', '+', 'e', 'E': |
| return lexFloat |
| } |
| |
| lx.backup() |
| lx.emit(itemFloat) |
| return lx.pop() |
| } |
| |
| // lexBool consumes a bool string: 'true' or 'false. |
| func lexBool(lx *lexer) stateFn { |
| var rs []rune |
| for { |
| r := lx.next() |
| if !unicode.IsLetter(r) { |
| lx.backup() |
| break |
| } |
| rs = append(rs, r) |
| } |
| s := string(rs) |
| switch s { |
| case "true", "false": |
| lx.emit(itemBool) |
| return lx.pop() |
| } |
| return lx.errorf("expected value but found %q instead", s) |
| } |
| |
| // lexCommentStart begins the lexing of a comment. It will emit |
| // itemCommentStart and consume no characters, passing control to lexComment. |
| func lexCommentStart(lx *lexer) stateFn { |
| lx.ignore() |
| lx.emit(itemCommentStart) |
| return lexComment |
| } |
| |
| // lexComment lexes an entire comment. It assumes that '#' has been consumed. |
| // It will consume *up to* the first newline character, and pass control |
| // back to the last state on the stack. |
| func lexComment(lx *lexer) stateFn { |
| r := lx.peek() |
| if isNL(r) || r == eof { |
| lx.emit(itemText) |
| return lx.pop() |
| } |
| lx.next() |
| return lexComment |
| } |
| |
| // lexSkip ignores all slurped input and moves on to the next state. |
| func lexSkip(lx *lexer, nextState stateFn) stateFn { |
| return func(lx *lexer) stateFn { |
| lx.ignore() |
| return nextState |
| } |
| } |
| |
| // isWhitespace returns true if `r` is a whitespace character according |
| // to the spec. |
| func isWhitespace(r rune) bool { |
| return r == '\t' || r == ' ' |
| } |
| |
| func isNL(r rune) bool { |
| return r == '\n' || r == '\r' |
| } |
| |
| func isDigit(r rune) bool { |
| return r >= '0' && r <= '9' |
| } |
| |
| func isHexadecimal(r rune) bool { |
| return (r >= '0' && r <= '9') || |
| (r >= 'a' && r <= 'f') || |
| (r >= 'A' && r <= 'F') |
| } |
| |
| func isBareKeyChar(r rune) bool { |
| return (r >= 'A' && r <= 'Z') || |
| (r >= 'a' && r <= 'z') || |
| (r >= '0' && r <= '9') || |
| r == '_' || |
| r == '-' |
| } |
| |
| func (itype itemType) String() string { |
| switch itype { |
| case itemError: |
| return "Error" |
| case itemNIL: |
| return "NIL" |
| case itemEOF: |
| return "EOF" |
| case itemText: |
| return "Text" |
| case itemString, itemRawString, itemMultilineString, itemRawMultilineString: |
| return "String" |
| case itemBool: |
| return "Bool" |
| case itemInteger: |
| return "Integer" |
| case itemFloat: |
| return "Float" |
| case itemDatetime: |
| return "DateTime" |
| case itemTableStart: |
| return "TableStart" |
| case itemTableEnd: |
| return "TableEnd" |
| case itemKeyStart: |
| return "KeyStart" |
| case itemArray: |
| return "Array" |
| case itemArrayEnd: |
| return "ArrayEnd" |
| case itemCommentStart: |
| return "CommentStart" |
| } |
| panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype))) |
| } |
| |
| func (item item) String() string { |
| return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val) |
| } |