| package toml |
| |
| import ( |
| "fmt" |
| "log" |
| "strconv" |
| "strings" |
| "time" |
| "unicode" |
| "unicode/utf8" |
| ) |
| |
| type parser struct { |
| mapping map[string]interface{} |
| types map[string]tomlType |
| lx *lexer |
| |
| // A list of keys in the order that they appear in the TOML data. |
| ordered []Key |
| |
| // the full key for the current hash in scope |
| context Key |
| |
| // the base key name for everything except hashes |
| currentKey string |
| |
| // rough approximation of line number |
| approxLine int |
| |
| // A map of 'key.group.names' to whether they were created implicitly. |
| implicits map[string]bool |
| } |
| |
| type parseError string |
| |
| func (pe parseError) Error() string { |
| return string(pe) |
| } |
| |
| func parse(data string) (p *parser, err error) { |
| defer func() { |
| if r := recover(); r != nil { |
| var ok bool |
| if err, ok = r.(parseError); ok { |
| return |
| } |
| panic(r) |
| } |
| }() |
| |
| p = &parser{ |
| mapping: make(map[string]interface{}), |
| types: make(map[string]tomlType), |
| lx: lex(data), |
| ordered: make([]Key, 0), |
| implicits: make(map[string]bool), |
| } |
| for { |
| item := p.next() |
| if item.typ == itemEOF { |
| break |
| } |
| p.topLevel(item) |
| } |
| |
| return p, nil |
| } |
| |
| func (p *parser) panicf(format string, v ...interface{}) { |
| msg := fmt.Sprintf("Near line %d (last key parsed '%s'): %s", |
| p.approxLine, p.current(), fmt.Sprintf(format, v...)) |
| panic(parseError(msg)) |
| } |
| |
| func (p *parser) next() item { |
| it := p.lx.nextItem() |
| if it.typ == itemError { |
| p.panicf("%s", it.val) |
| } |
| return it |
| } |
| |
| func (p *parser) bug(format string, v ...interface{}) { |
| log.Fatalf("BUG: %s\n\n", fmt.Sprintf(format, v...)) |
| } |
| |
| func (p *parser) expect(typ itemType) item { |
| it := p.next() |
| p.assertEqual(typ, it.typ) |
| return it |
| } |
| |
| func (p *parser) assertEqual(expected, got itemType) { |
| if expected != got { |
| p.bug("Expected '%s' but got '%s'.", expected, got) |
| } |
| } |
| |
| func (p *parser) topLevel(item item) { |
| switch item.typ { |
| case itemCommentStart: |
| p.approxLine = item.line |
| p.expect(itemText) |
| case itemTableStart: |
| kg := p.next() |
| p.approxLine = kg.line |
| |
| var key Key |
| for ; kg.typ != itemTableEnd && kg.typ != itemEOF; kg = p.next() { |
| key = append(key, p.keyString(kg)) |
| } |
| p.assertEqual(itemTableEnd, kg.typ) |
| |
| p.establishContext(key, false) |
| p.setType("", tomlHash) |
| p.ordered = append(p.ordered, key) |
| case itemArrayTableStart: |
| kg := p.next() |
| p.approxLine = kg.line |
| |
| var key Key |
| for ; kg.typ != itemArrayTableEnd && kg.typ != itemEOF; kg = p.next() { |
| key = append(key, p.keyString(kg)) |
| } |
| p.assertEqual(itemArrayTableEnd, kg.typ) |
| |
| p.establishContext(key, true) |
| p.setType("", tomlArrayHash) |
| p.ordered = append(p.ordered, key) |
| case itemKeyStart: |
| kname := p.next() |
| p.approxLine = kname.line |
| p.currentKey = p.keyString(kname) |
| |
| val, typ := p.value(p.next()) |
| p.setValue(p.currentKey, val) |
| p.setType(p.currentKey, typ) |
| p.ordered = append(p.ordered, p.context.add(p.currentKey)) |
| p.currentKey = "" |
| default: |
| p.bug("Unexpected type at top level: %s", item.typ) |
| } |
| } |
| |
| // Gets a string for a key (or part of a key in a table name). |
| func (p *parser) keyString(it item) string { |
| switch it.typ { |
| case itemText: |
| return it.val |
| case itemString, itemMultilineString, |
| itemRawString, itemRawMultilineString: |
| s, _ := p.value(it) |
| return s.(string) |
| default: |
| p.bug("Unexpected key type: %s", it.typ) |
| panic("unreachable") |
| } |
| } |
| |
| // value translates an expected value from the lexer into a Go value wrapped |
| // as an empty interface. |
| func (p *parser) value(it item) (interface{}, tomlType) { |
| switch it.typ { |
| case itemString: |
| return p.replaceEscapes(it.val), p.typeOfPrimitive(it) |
| case itemMultilineString: |
| trimmed := stripFirstNewline(stripEscapedWhitespace(it.val)) |
| return p.replaceEscapes(trimmed), p.typeOfPrimitive(it) |
| case itemRawString: |
| return it.val, p.typeOfPrimitive(it) |
| case itemRawMultilineString: |
| return stripFirstNewline(it.val), p.typeOfPrimitive(it) |
| case itemBool: |
| switch it.val { |
| case "true": |
| return true, p.typeOfPrimitive(it) |
| case "false": |
| return false, p.typeOfPrimitive(it) |
| } |
| p.bug("Expected boolean value, but got '%s'.", it.val) |
| case itemInteger: |
| num, err := strconv.ParseInt(it.val, 10, 64) |
| if err != nil { |
| // See comment below for floats describing why we make a |
| // distinction between a bug and a user error. |
| if e, ok := err.(*strconv.NumError); ok && |
| e.Err == strconv.ErrRange { |
| |
| p.panicf("Integer '%s' is out of the range of 64-bit "+ |
| "signed integers.", it.val) |
| } else { |
| p.bug("Expected integer value, but got '%s'.", it.val) |
| } |
| } |
| return num, p.typeOfPrimitive(it) |
| case itemFloat: |
| num, err := strconv.ParseFloat(it.val, 64) |
| if err != nil { |
| // Distinguish float values. Normally, it'd be a bug if the lexer |
| // provides an invalid float, but it's possible that the float is |
| // out of range of valid values (which the lexer cannot determine). |
| // So mark the former as a bug but the latter as a legitimate user |
| // error. |
| // |
| // This is also true for integers. |
| if e, ok := err.(*strconv.NumError); ok && |
| e.Err == strconv.ErrRange { |
| |
| p.panicf("Float '%s' is out of the range of 64-bit "+ |
| "IEEE-754 floating-point numbers.", it.val) |
| } else { |
| p.bug("Expected float value, but got '%s'.", it.val) |
| } |
| } |
| return num, p.typeOfPrimitive(it) |
| case itemDatetime: |
| t, err := time.Parse("2006-01-02T15:04:05Z", it.val) |
| if err != nil { |
| p.bug("Expected Zulu formatted DateTime, but got '%s'.", it.val) |
| } |
| return t, p.typeOfPrimitive(it) |
| case itemArray: |
| array := make([]interface{}, 0) |
| types := make([]tomlType, 0) |
| |
| for it = p.next(); it.typ != itemArrayEnd; it = p.next() { |
| if it.typ == itemCommentStart { |
| p.expect(itemText) |
| continue |
| } |
| |
| val, typ := p.value(it) |
| array = append(array, val) |
| types = append(types, typ) |
| } |
| return array, p.typeOfArray(types) |
| } |
| p.bug("Unexpected value type: %s", it.typ) |
| panic("unreachable") |
| } |
| |
| // establishContext sets the current context of the parser, |
| // where the context is either a hash or an array of hashes. Which one is |
| // set depends on the value of the `array` parameter. |
| // |
| // Establishing the context also makes sure that the key isn't a duplicate, and |
| // will create implicit hashes automatically. |
| func (p *parser) establishContext(key Key, array bool) { |
| var ok bool |
| |
| // Always start at the top level and drill down for our context. |
| hashContext := p.mapping |
| keyContext := make(Key, 0) |
| |
| // We only need implicit hashes for key[0:-1] |
| for _, k := range key[0 : len(key)-1] { |
| _, ok = hashContext[k] |
| keyContext = append(keyContext, k) |
| |
| // No key? Make an implicit hash and move on. |
| if !ok { |
| p.addImplicit(keyContext) |
| hashContext[k] = make(map[string]interface{}) |
| } |
| |
| // If the hash context is actually an array of tables, then set |
| // the hash context to the last element in that array. |
| // |
| // Otherwise, it better be a table, since this MUST be a key group (by |
| // virtue of it not being the last element in a key). |
| switch t := hashContext[k].(type) { |
| case []map[string]interface{}: |
| hashContext = t[len(t)-1] |
| case map[string]interface{}: |
| hashContext = t |
| default: |
| p.panicf("Key '%s' was already created as a hash.", keyContext) |
| } |
| } |
| |
| p.context = keyContext |
| if array { |
| // If this is the first element for this array, then allocate a new |
| // list of tables for it. |
| k := key[len(key)-1] |
| if _, ok := hashContext[k]; !ok { |
| hashContext[k] = make([]map[string]interface{}, 0, 5) |
| } |
| |
| // Add a new table. But make sure the key hasn't already been used |
| // for something else. |
| if hash, ok := hashContext[k].([]map[string]interface{}); ok { |
| hashContext[k] = append(hash, make(map[string]interface{})) |
| } else { |
| p.panicf("Key '%s' was already created and cannot be used as "+ |
| "an array.", keyContext) |
| } |
| } else { |
| p.setValue(key[len(key)-1], make(map[string]interface{})) |
| } |
| p.context = append(p.context, key[len(key)-1]) |
| } |
| |
| // setValue sets the given key to the given value in the current context. |
| // It will make sure that the key hasn't already been defined, account for |
| // implicit key groups. |
| func (p *parser) setValue(key string, value interface{}) { |
| var tmpHash interface{} |
| var ok bool |
| |
| hash := p.mapping |
| keyContext := make(Key, 0) |
| for _, k := range p.context { |
| keyContext = append(keyContext, k) |
| if tmpHash, ok = hash[k]; !ok { |
| p.bug("Context for key '%s' has not been established.", keyContext) |
| } |
| switch t := tmpHash.(type) { |
| case []map[string]interface{}: |
| // The context is a table of hashes. Pick the most recent table |
| // defined as the current hash. |
| hash = t[len(t)-1] |
| case map[string]interface{}: |
| hash = t |
| default: |
| p.bug("Expected hash to have type 'map[string]interface{}', but "+ |
| "it has '%T' instead.", tmpHash) |
| } |
| } |
| keyContext = append(keyContext, key) |
| |
| if _, ok := hash[key]; ok { |
| // Typically, if the given key has already been set, then we have |
| // to raise an error since duplicate keys are disallowed. However, |
| // it's possible that a key was previously defined implicitly. In this |
| // case, it is allowed to be redefined concretely. (See the |
| // `tests/valid/implicit-and-explicit-after.toml` test in `toml-test`.) |
| // |
| // But we have to make sure to stop marking it as an implicit. (So that |
| // another redefinition provokes an error.) |
| // |
| // Note that since it has already been defined (as a hash), we don't |
| // want to overwrite it. So our business is done. |
| if p.isImplicit(keyContext) { |
| p.removeImplicit(keyContext) |
| return |
| } |
| |
| // Otherwise, we have a concrete key trying to override a previous |
| // key, which is *always* wrong. |
| p.panicf("Key '%s' has already been defined.", keyContext) |
| } |
| hash[key] = value |
| } |
| |
| // setType sets the type of a particular value at a given key. |
| // It should be called immediately AFTER setValue. |
| // |
| // Note that if `key` is empty, then the type given will be applied to the |
| // current context (which is either a table or an array of tables). |
| func (p *parser) setType(key string, typ tomlType) { |
| keyContext := make(Key, 0, len(p.context)+1) |
| for _, k := range p.context { |
| keyContext = append(keyContext, k) |
| } |
| if len(key) > 0 { // allow type setting for hashes |
| keyContext = append(keyContext, key) |
| } |
| p.types[keyContext.String()] = typ |
| } |
| |
| // addImplicit sets the given Key as having been created implicitly. |
| func (p *parser) addImplicit(key Key) { |
| p.implicits[key.String()] = true |
| } |
| |
| // removeImplicit stops tagging the given key as having been implicitly |
| // created. |
| func (p *parser) removeImplicit(key Key) { |
| p.implicits[key.String()] = false |
| } |
| |
| // isImplicit returns true if the key group pointed to by the key was created |
| // implicitly. |
| func (p *parser) isImplicit(key Key) bool { |
| return p.implicits[key.String()] |
| } |
| |
| // current returns the full key name of the current context. |
| func (p *parser) current() string { |
| if len(p.currentKey) == 0 { |
| return p.context.String() |
| } |
| if len(p.context) == 0 { |
| return p.currentKey |
| } |
| return fmt.Sprintf("%s.%s", p.context, p.currentKey) |
| } |
| |
| func stripFirstNewline(s string) string { |
| if len(s) == 0 || s[0] != '\n' { |
| return s |
| } |
| return s[1:len(s)] |
| } |
| |
| func stripEscapedWhitespace(s string) string { |
| esc := strings.Split(s, "\\\n") |
| if len(esc) > 1 { |
| for i := 1; i < len(esc); i++ { |
| esc[i] = strings.TrimLeftFunc(esc[i], unicode.IsSpace) |
| } |
| } |
| return strings.Join(esc, "") |
| } |
| |
| func (p *parser) replaceEscapes(str string) string { |
| var replaced []rune |
| s := []byte(str) |
| r := 0 |
| for r < len(s) { |
| if s[r] != '\\' { |
| c, size := utf8.DecodeRune(s[r:]) |
| r += size |
| replaced = append(replaced, c) |
| continue |
| } |
| r += 1 |
| if r >= len(s) { |
| p.bug("Escape sequence at end of string.") |
| return "" |
| } |
| switch s[r] { |
| default: |
| p.bug("Expected valid escape code after \\, but got %q.", s[r]) |
| return "" |
| case 'b': |
| replaced = append(replaced, rune(0x0008)) |
| r += 1 |
| case 't': |
| replaced = append(replaced, rune(0x0009)) |
| r += 1 |
| case 'n': |
| replaced = append(replaced, rune(0x000A)) |
| r += 1 |
| case 'f': |
| replaced = append(replaced, rune(0x000C)) |
| r += 1 |
| case 'r': |
| replaced = append(replaced, rune(0x000D)) |
| r += 1 |
| case '"': |
| replaced = append(replaced, rune(0x0022)) |
| r += 1 |
| case '\\': |
| replaced = append(replaced, rune(0x005C)) |
| r += 1 |
| case 'u': |
| // At this point, we know we have a Unicode escape of the form |
| // `uXXXX` at [r, r+5). (Because the lexer guarantees this |
| // for us.) |
| escaped := p.asciiEscapeToUnicode(s[r+1 : r+5]) |
| replaced = append(replaced, escaped) |
| r += 5 |
| case 'U': |
| // At this point, we know we have a Unicode escape of the form |
| // `uXXXX` at [r, r+9). (Because the lexer guarantees this |
| // for us.) |
| escaped := p.asciiEscapeToUnicode(s[r+1 : r+9]) |
| replaced = append(replaced, escaped) |
| r += 9 |
| } |
| } |
| return string(replaced) |
| } |
| |
| func (p *parser) asciiEscapeToUnicode(bs []byte) rune { |
| s := string(bs) |
| hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32) |
| if err != nil { |
| p.bug("Could not parse '%s' as a hexadecimal number, but the "+ |
| "lexer claims it's OK: %s", s, err) |
| } |
| |
| // BUG(burntsushi) |
| // I honestly don't understand how this works. I can't seem |
| // to find a way to make this fail. I figured this would fail on invalid |
| // UTF-8 characters like U+DCFF, but it doesn't. |
| if !utf8.ValidString(string(rune(hex))) { |
| p.panicf("Escaped character '\\u%s' is not valid UTF-8.", s) |
| } |
| return rune(hex) |
| } |
| |
| func isStringType(ty itemType) bool { |
| return ty == itemString || ty == itemMultilineString || |
| ty == itemRawString || ty == itemRawMultilineString |
| } |