| // Package parser implements a parser and parse tree dumper for Dockerfiles. |
| package parser |
| |
| import ( |
| "bufio" |
| "bytes" |
| "fmt" |
| "io" |
| "regexp" |
| "runtime" |
| "strconv" |
| "strings" |
| "unicode" |
| |
| "github.com/docker/docker/builder/dockerfile/command" |
| "github.com/docker/docker/pkg/system" |
| "github.com/pkg/errors" |
| ) |
| |
| // Node is a structure used to represent a parse tree. |
| // |
| // In the node there are three fields, Value, Next, and Children. Value is the |
| // current token's string value. Next is always the next non-child token, and |
| // children contains all the children. Here's an example: |
| // |
| // (value next (child child-next child-next-next) next-next) |
| // |
| // This data structure is frankly pretty lousy for handling complex languages, |
| // but lucky for us the Dockerfile isn't very complicated. This structure |
| // works a little more effectively than a "proper" parse tree for our needs. |
| // |
| type Node struct { |
| Value string // actual content |
| Next *Node // the next item in the current sexp |
| Children []*Node // the children of this sexp |
| Attributes map[string]bool // special attributes for this node |
| Original string // original line used before parsing |
| Flags []string // only top Node should have this set |
| StartLine int // the line in the original dockerfile where the node begins |
| endLine int // the line in the original dockerfile where the node ends |
| } |
| |
| // Dump dumps the AST defined by `node` as a list of sexps. |
| // Returns a string suitable for printing. |
| func (node *Node) Dump() string { |
| str := "" |
| str += node.Value |
| |
| if len(node.Flags) > 0 { |
| str += fmt.Sprintf(" %q", node.Flags) |
| } |
| |
| for _, n := range node.Children { |
| str += "(" + n.Dump() + ")\n" |
| } |
| |
| for n := node.Next; n != nil; n = n.Next { |
| if len(n.Children) > 0 { |
| str += " " + n.Dump() |
| } else { |
| str += " " + strconv.Quote(n.Value) |
| } |
| } |
| |
| return strings.TrimSpace(str) |
| } |
| |
| func (node *Node) lines(start, end int) { |
| node.StartLine = start |
| node.endLine = end |
| } |
| |
| // AddChild adds a new child node, and updates line information |
| func (node *Node) AddChild(child *Node, startLine, endLine int) { |
| child.lines(startLine, endLine) |
| if node.StartLine < 0 { |
| node.StartLine = startLine |
| } |
| node.endLine = endLine |
| node.Children = append(node.Children, child) |
| } |
| |
| var ( |
| dispatch map[string]func(string, *Directive) (*Node, map[string]bool, error) |
| tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`) |
| tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`) |
| tokenPlatformCommand = regexp.MustCompile(`^#[ \t]*platform[ \t]*=[ \t]*(?P<platform>.*)$`) |
| tokenComment = regexp.MustCompile(`^#.*$`) |
| ) |
| |
| // DefaultEscapeToken is the default escape token |
| const DefaultEscapeToken = '\\' |
| |
| // Directive is the structure used during a build run to hold the state of |
| // parsing directives. |
| type Directive struct { |
| escapeToken rune // Current escape token |
| platformToken string // Current platform token |
| lineContinuationRegex *regexp.Regexp // Current line continuation regex |
| processingComplete bool // Whether we are done looking for directives |
| escapeSeen bool // Whether the escape directive has been seen |
| platformSeen bool // Whether the platform directive has been seen |
| } |
| |
| // setEscapeToken sets the default token for escaping characters in a Dockerfile. |
| func (d *Directive) setEscapeToken(s string) error { |
| if s != "`" && s != "\\" { |
| return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s) |
| } |
| d.escapeToken = rune(s[0]) |
| d.lineContinuationRegex = regexp.MustCompile(`\` + s + `[ \t]*$`) |
| return nil |
| } |
| |
| // setPlatformToken sets the default platform for pulling images in a Dockerfile. |
| func (d *Directive) setPlatformToken(s string) error { |
| s = strings.ToLower(s) |
| valid := []string{runtime.GOOS} |
| if system.LCOWSupported() { |
| valid = append(valid, "linux") |
| } |
| for _, item := range valid { |
| if s == item { |
| d.platformToken = s |
| return nil |
| } |
| } |
| return fmt.Errorf("invalid PLATFORM '%s'. Must be one of %v", s, valid) |
| } |
| |
| // possibleParserDirective looks for one or more parser directives '# escapeToken=<char>' and |
| // '# platform=<string>'. Parser directives must precede any builder instruction |
| // or other comments, and cannot be repeated. |
| func (d *Directive) possibleParserDirective(line string) error { |
| if d.processingComplete { |
| return nil |
| } |
| |
| tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line)) |
| if len(tecMatch) != 0 { |
| for i, n := range tokenEscapeCommand.SubexpNames() { |
| if n == "escapechar" { |
| if d.escapeSeen { |
| return errors.New("only one escape parser directive can be used") |
| } |
| d.escapeSeen = true |
| return d.setEscapeToken(tecMatch[i]) |
| } |
| } |
| } |
| |
| // Only recognise a platform token if LCOW is supported |
| if system.LCOWSupported() { |
| tpcMatch := tokenPlatformCommand.FindStringSubmatch(strings.ToLower(line)) |
| if len(tpcMatch) != 0 { |
| for i, n := range tokenPlatformCommand.SubexpNames() { |
| if n == "platform" { |
| if d.platformSeen { |
| return errors.New("only one platform parser directive can be used") |
| } |
| d.platformSeen = true |
| return d.setPlatformToken(tpcMatch[i]) |
| } |
| } |
| } |
| } |
| |
| d.processingComplete = true |
| return nil |
| } |
| |
| // NewDefaultDirective returns a new Directive with the default escapeToken token |
| func NewDefaultDirective() *Directive { |
| directive := Directive{} |
| directive.setEscapeToken(string(DefaultEscapeToken)) |
| return &directive |
| } |
| |
| func init() { |
| // Dispatch Table. see line_parsers.go for the parse functions. |
| // The command is parsed and mapped to the line parser. The line parser |
| // receives the arguments but not the command, and returns an AST after |
| // reformulating the arguments according to the rules in the parser |
| // functions. Errors are propagated up by Parse() and the resulting AST can |
| // be incorporated directly into the existing AST as a next. |
| dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){ |
| command.Add: parseMaybeJSONToList, |
| command.Arg: parseNameOrNameVal, |
| command.Cmd: parseMaybeJSON, |
| command.Copy: parseMaybeJSONToList, |
| command.Entrypoint: parseMaybeJSON, |
| command.Env: parseEnv, |
| command.Expose: parseStringsWhitespaceDelimited, |
| command.From: parseStringsWhitespaceDelimited, |
| command.Healthcheck: parseHealthConfig, |
| command.Label: parseLabel, |
| command.Maintainer: parseString, |
| command.Onbuild: parseSubCommand, |
| command.Run: parseMaybeJSON, |
| command.Shell: parseMaybeJSON, |
| command.StopSignal: parseString, |
| command.User: parseString, |
| command.Volume: parseMaybeJSONToList, |
| command.Workdir: parseString, |
| } |
| } |
| |
| // newNodeFromLine splits the line into parts, and dispatches to a function |
| // based on the command and command arguments. A Node is created from the |
| // result of the dispatch. |
| func newNodeFromLine(line string, directive *Directive) (*Node, error) { |
| cmd, flags, args, err := splitCommand(line) |
| if err != nil { |
| return nil, err |
| } |
| |
| fn := dispatch[cmd] |
| // Ignore invalid Dockerfile instructions |
| if fn == nil { |
| fn = parseIgnore |
| } |
| next, attrs, err := fn(args, directive) |
| if err != nil { |
| return nil, err |
| } |
| |
| return &Node{ |
| Value: cmd, |
| Original: line, |
| Flags: flags, |
| Next: next, |
| Attributes: attrs, |
| }, nil |
| } |
| |
| // Result is the result of parsing a Dockerfile |
| type Result struct { |
| AST *Node |
| EscapeToken rune |
| // TODO @jhowardmsft - see https://github.com/moby/moby/issues/34617 |
| // This next field will be removed in a future update for LCOW support. |
| OS string |
| Warnings []string |
| } |
| |
| // PrintWarnings to the writer |
| func (r *Result) PrintWarnings(out io.Writer) { |
| if len(r.Warnings) == 0 { |
| return |
| } |
| fmt.Fprintf(out, strings.Join(r.Warnings, "\n")+"\n") |
| } |
| |
| // Parse reads lines from a Reader, parses the lines into an AST and returns |
| // the AST and escape token |
| func Parse(rwc io.Reader) (*Result, error) { |
| d := NewDefaultDirective() |
| currentLine := 0 |
| root := &Node{StartLine: -1} |
| scanner := bufio.NewScanner(rwc) |
| warnings := []string{} |
| |
| var err error |
| for scanner.Scan() { |
| bytesRead := scanner.Bytes() |
| if currentLine == 0 { |
| // First line, strip the byte-order-marker if present |
| bytesRead = bytes.TrimPrefix(bytesRead, utf8bom) |
| } |
| bytesRead, err = processLine(d, bytesRead, true) |
| if err != nil { |
| return nil, err |
| } |
| currentLine++ |
| |
| startLine := currentLine |
| line, isEndOfLine := trimContinuationCharacter(string(bytesRead), d) |
| if isEndOfLine && line == "" { |
| continue |
| } |
| |
| var hasEmptyContinuationLine bool |
| for !isEndOfLine && scanner.Scan() { |
| bytesRead, err := processLine(d, scanner.Bytes(), false) |
| if err != nil { |
| return nil, err |
| } |
| currentLine++ |
| |
| if isComment(scanner.Bytes()) { |
| // original line was a comment (processLine strips comments) |
| continue |
| } |
| if isEmptyContinuationLine(bytesRead) { |
| hasEmptyContinuationLine = true |
| continue |
| } |
| |
| continuationLine := string(bytesRead) |
| continuationLine, isEndOfLine = trimContinuationCharacter(continuationLine, d) |
| line += continuationLine |
| } |
| |
| if hasEmptyContinuationLine { |
| warning := "[WARNING]: Empty continuation line found in:\n " + line |
| warnings = append(warnings, warning) |
| } |
| |
| child, err := newNodeFromLine(line, d) |
| if err != nil { |
| return nil, err |
| } |
| root.AddChild(child, startLine, currentLine) |
| } |
| |
| if len(warnings) > 0 { |
| warnings = append(warnings, "[WARNING]: Empty continuation lines will become errors in a future release.") |
| } |
| return &Result{ |
| AST: root, |
| Warnings: warnings, |
| EscapeToken: d.escapeToken, |
| OS: d.platformToken, |
| }, handleScannerError(scanner.Err()) |
| } |
| |
| func trimComments(src []byte) []byte { |
| return tokenComment.ReplaceAll(src, []byte{}) |
| } |
| |
| func trimWhitespace(src []byte) []byte { |
| return bytes.TrimLeftFunc(src, unicode.IsSpace) |
| } |
| |
| func isComment(line []byte) bool { |
| return tokenComment.Match(trimWhitespace(line)) |
| } |
| |
| func isEmptyContinuationLine(line []byte) bool { |
| return len(trimWhitespace(line)) == 0 |
| } |
| |
| var utf8bom = []byte{0xEF, 0xBB, 0xBF} |
| |
| func trimContinuationCharacter(line string, d *Directive) (string, bool) { |
| if d.lineContinuationRegex.MatchString(line) { |
| line = d.lineContinuationRegex.ReplaceAllString(line, "") |
| return line, false |
| } |
| return line, true |
| } |
| |
| // TODO: remove stripLeftWhitespace after deprecation period. It seems silly |
| // to preserve whitespace on continuation lines. Why is that done? |
| func processLine(d *Directive, token []byte, stripLeftWhitespace bool) ([]byte, error) { |
| if stripLeftWhitespace { |
| token = trimWhitespace(token) |
| } |
| return trimComments(token), d.possibleParserDirective(string(token)) |
| } |
| |
| func handleScannerError(err error) error { |
| switch err { |
| case bufio.ErrTooLong: |
| return errors.Errorf("dockerfile line greater than max allowed size of %d", bufio.MaxScanTokenSize-1) |
| default: |
| return err |
| } |
| } |