| // Copyright 2020 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| package codifier |
| |
| import ( |
| "errors" |
| "fmt" |
| "regexp" |
| "strings" |
| ) |
| |
| var commentRE = regexp.MustCompile("#.*") |
| |
| // SectionType is a type used to return the parts of sections when they're found. |
| // For example, a section like `group("tests") { foo }` would be parsed into |
| // SectionType(name="tests", kind="group", contents=`group("tests") { foo }`) |
| type SectionType struct { |
| Name, Kind, Contents string |
| } |
| |
| // findMatchingDelimiters finds the first occurrence of the open delimiter, then returns the string |
| // from the beginning of the string up and including the matching close delimiter. For example, |
| // findMatchingDelimiters("aaa{bbb}ccc") -> "aaa{bbb}" |
| // Note that this algorithm requires correctly balanced nested delimiters. |
| // "{{}{}}" will return "{{}{}}", but "{{}" will return "". |
| func findMatchingDelimiters(source string, openDelimiter, closeDelimiter rune) string { |
| var count int |
| // TODO(gboone@): This line does a utf-8 decode-and-copy every single time. |
| // Consider []byte instead for better performance. Note that that the current |
| // bottleneck for Codifier is the time spent building and testing which are |
| // O(mins). But users who change many files without building/testing could see |
| // an improvement. |
| runes := []rune(source) |
| for i, c := range runes { |
| if c == openDelimiter { |
| count++ |
| } |
| if c == closeDelimiter { |
| count-- |
| if count == 0 { |
| return string(runes[0 : i+1]) |
| } |
| } |
| } |
| return "" |
| } |
| |
| // findIndicesForAll returns all of the indices of the sections that match the given |
| // sectionType and sectionName. The returned indices list will contain, for each |
| // section, the start and end of the match including up to the opening |
| // delimiter. The sectionType and sectionName may include regular expression |
| // syntax, allowing for more flexible matching. |
| func findIndicesForAll(source, sectionType, sectionName string, openDelimiter rune) ([][]int, error) { |
| if source == "" { |
| return nil, errors.New("findIndicesForAll(): blank source") |
| } |
| if sectionType == "" { |
| return nil, errors.New("findIndicesForAll(): blank sectionType") |
| } |
| if sectionName == "" { |
| return nil, errors.New("findIndicesForAll(): blank sectionName") |
| } |
| |
| delStr := string(openDelimiter) |
| if openDelimiter == '[' { |
| delStr = `\[` |
| } |
| // Regular expression: matching over multiple lines, look for "sectionType("sectionName"){", |
| // allowing spaces between the operators, where { is the given openDelimiter. |
| reStr := fmt.Sprintf(`(?s)\s*%s\s*\(\s*"%s"\s*\)\s*%s\s*`, sectionType, sectionName, delStr) |
| r, err := regexp.Compile(reStr) |
| if err != nil { |
| return nil, fmt.Errorf("findIndicesForAll(): could not compile regex (%q): %w", reStr, err) |
| } |
| return r.FindAllStringIndex(source, -1), nil |
| } |
| |
| // findIndicesForAllSectionsOfType returns all of the indices of the sections |
| // that match the given sectionType. The returned indices list will contain, for |
| // each section, the start and end of the match including up to the opening |
| // delimiter, then the start and end of the section name. A sectionType must |
| // match as a whole word. For example, sectionType "test" will not match |
| // "foo-test(bar)". The sectionType may include regular expression syntax, |
| // allowing for more flexible matching. |
| func findIndicesForAllSectionsOfType(source, sectionType string, openDelimiter rune) ([][]int, error) { |
| if source == "" { |
| return nil, errors.New("findIndicesForAllSectionsOfType(): blank source") |
| } |
| if sectionType == "" { |
| return nil, errors.New("findIndicesForAllSectionsOfType(): blank sectionType") |
| } |
| |
| delStr := string(openDelimiter) |
| if openDelimiter == '[' { |
| delStr = `\[` |
| } |
| // Regular expression: matching over multiple lines, first match a |
| // non-capturing group with one of the following: start of string, end of |
| // string, whitespace, or a parenthesis, bracket, or brace. This group ensures |
| // that the section type is matched on a word boundary, so ^foo would match |
| // while ^test-foo would not. Next, look for "sectionType("x"){", allowing |
| // spaces between the operators, where x is a name to match and { is the given |
| // openDelimiter. |
| reStr := fmt.Sprintf(`(?s)(?:\A|\z|[\(\)\[\]\{\}]|\s)%s\s*\(\s*\"(.*?)\"\s*\)\s*%s\s*`, sectionType, delStr) |
| r, err := regexp.Compile(reStr) |
| if err != nil { |
| return nil, fmt.Errorf("findIndicesForAllSectionsOfType(): could not compile regex (%q): %w", reStr, err) |
| } |
| matches := r.FindAllStringSubmatchIndex(source, -1) |
| // If there is match on the initial, non-carpturing group, it will be included |
| // in the match location. Increment the starts to not include them in our |
| // matches. |
| for _, m := range matches { |
| if m[0] != 0 { |
| m[0]++ |
| } |
| } |
| return matches, nil |
| } |
| |
| // findIndicesForAllSectionsNamed returns all of the indices of the sections |
| // that match the given sectionType. The returned indices list will contain, for |
| // each section, the start and end of the match including up to the opening |
| // delimiter, then the start and end of the section type. The sectionName may |
| // include regular expression syntax, allowing for more flexible matching. |
| func findIndicesForAllSectionsNamed(source, sectionName string, openDelimiter rune) ([][]int, error) { |
| if source == "" { |
| return nil, errors.New("findIndicesForAllSectionsNamed(): blank source") |
| } |
| if sectionName == "" { |
| return nil, errors.New("findIndicesForAllSectionsNamed(): blank sectionName") |
| } |
| |
| delStr := string(openDelimiter) |
| if openDelimiter == '[' { |
| delStr = `\[` |
| } |
| // Regular expression: matching over multiple lines, look for "sectionType("sectionName"){", |
| // allowing spaces between the operators, where { is the given openDelimiter. |
| reStr := fmt.Sprintf(`(?s)(?:\A|\z|[\(\)\[\]\{\}]|\s)(\w+)\s*\(\s*"%s"\s*\)\s*%s\s*`, sectionName, delStr) |
| r, err := regexp.Compile(reStr) |
| if err != nil { |
| return nil, fmt.Errorf("findIndicesForAllSectionsNamed(): could not compile regex (%q): %w", reStr, err) |
| } |
| matches := r.FindAllStringSubmatchIndex(source, -1) |
| // If there is match on the initial, non-carpturing group, it will be included |
| // in the match location. Increment the starts to not include them in our |
| // matches. |
| for _, m := range matches { |
| if m[0] != 0 { |
| m[0]++ |
| } |
| } |
| return matches, nil |
| } |
| |
| // sectionsWithTypeAndName returns all of the sections with the given type and |
| // name that are present in the given source. For example, |
| // sectionsWithTypeAndName("group", "tests", '{', '}') would return, all of the |
| // `group("tests") {...}` present. It returns a slice of SectionType containing |
| // the name, type, and contents for each section or nil if nothing found. |
| func sectionsWithTypeAndName(source, sectionType, sectionName string, openDelimeter, closeDelimeter rune) ([]SectionType, error) { |
| if source == "" { |
| return nil, errors.New("sectionsWithTypeAndName(): blank source given") |
| } |
| if sectionType == "" { |
| return nil, errors.New("sectionsWithTypeAndName(): blank sectionType given") |
| } |
| if sectionName == "" { |
| return nil, errors.New("sectionsWithTypeAndName(): blank sectionName given") |
| } |
| var sections []SectionType |
| sectionIndices, err := findIndicesForAll(source, sectionType, sectionName, openDelimeter) |
| if err != nil || sectionIndices == nil { |
| return nil, fmt.Errorf("sectionsWithTypeAndName(): section type %q with delimiters '%c', '%c' not found", |
| sectionType, openDelimeter, closeDelimeter) |
| } |
| for _, indices := range sectionIndices { |
| if len(indices) != 2 { |
| return nil, errors.New("sectionsWithTypeAndName(): section malformed") |
| } |
| start := indices[0] |
| contents := findMatchingDelimiters(source[start:], openDelimeter, closeDelimeter) |
| if contents == "" { |
| return nil, fmt.Errorf("sectionsWithTypeAndName(): couldn't find matching delimiters for %c and %c", |
| openDelimeter, closeDelimeter) |
| } |
| sections = append(sections, SectionType{sectionName, sectionType, contents}) |
| } |
| |
| return sections, nil |
| } |
| |
| // sectionsOfType returns all of the sections of the given type. For example, |
| // sectionsOfType("group") would return, say, `group("foo"), group("bar"), |
| // group("baz")`. It returns a slice of SectionType containing the name, type, |
| // and contents for each section or nil if nothing found. |
| func sectionsOfType(source, sectionType string, openDelimeter, closeDelimeter rune) ([]SectionType, error) { |
| if source == "" { |
| return nil, errors.New("sectionsOfType(): blank source given") |
| } |
| if sectionType == "" { |
| return nil, errors.New("sectionsOfType(): blank section given") |
| } |
| var sections []SectionType |
| sectionIndices, err := findIndicesForAllSectionsOfType(source, sectionType, openDelimeter) |
| if err != nil || sectionIndices == nil { |
| return nil, fmt.Errorf("sectionsOfType(): section type %q with delimiters '%c', '%c' not found", |
| sectionType, openDelimeter, closeDelimeter) |
| } |
| for _, indices := range sectionIndices { |
| if len(indices) != 4 { |
| return nil, errors.New("sectionsOfType(): section missing name indices") |
| } |
| start := indices[0] |
| name := source[indices[2]:indices[3]] |
| if name == "" { |
| return nil, errors.New("sectionsOfType(): section name blank") |
| } |
| contents := findMatchingDelimiters(source[start:], openDelimeter, closeDelimeter) |
| if contents == "" { |
| return nil, fmt.Errorf("sectionsOfType(): couldn't find matching delimiters for %c and %c", |
| openDelimeter, closeDelimeter) |
| } |
| sections = append(sections, SectionType{name, sectionType, contents}) |
| } |
| |
| return sections, nil |
| } |
| |
| // sectionsNamed returns all of the sections with the the given name. For |
| // example, sectionsNamed("tests") would return, say, `group("tests")`. It |
| // returns a slice of SectionType containing the name, type, and contents for |
| // each section or nil if nothing found. |
| func sectionsNamed(source, sectionName string, openDelimeter, closeDelimeter rune) ([]SectionType, error) { |
| if source == "" { |
| return nil, errors.New("sectionsNamed(): blank source given") |
| } |
| if sectionName == "" { |
| return nil, errors.New("sectionsNamed(): blank sectionName given") |
| } |
| var sections []SectionType |
| sectionIndices, err := findIndicesForAllSectionsNamed(source, sectionName, openDelimeter) |
| if err != nil || sectionIndices == nil { |
| return nil, fmt.Errorf("sectionsNamed(): section named %q with delimiters '%c', '%c' not found in %q", |
| sectionName, openDelimeter, closeDelimeter, source) |
| } |
| for _, indices := range sectionIndices { |
| if len(indices) != 4 { |
| return nil, errors.New("sectionsNamed(): section missing section type indices") |
| } |
| start := indices[0] |
| kind := source[indices[2]:indices[3]] |
| if kind == "" { |
| return nil, errors.New("sectionsNamed(): section type blank") |
| } |
| contents := findMatchingDelimiters(source[start:], openDelimeter, closeDelimeter) |
| if contents == "" { |
| return nil, fmt.Errorf("sectionsNamed(): couldn't find matching delimiters for %c and %c", |
| openDelimeter, closeDelimeter) |
| } |
| sections = append(sections, SectionType{sectionName, kind, contents}) |
| } |
| |
| return sections, nil |
| } |
| |
| // extractList returns a slice of strings, one for each item in a name/value |
| // string. The string may be of the form <name> = [a,b,c] or <name> += [a,b,c]. |
| // The items in the list are modified to remove leading or trailing quotes, |
| // spaces, and linefeeds. The name may include regular expression syntax, |
| // allowing for more flexible matching. |
| func extractList(source, name string) ([]string, error) { |
| var list []string |
| reStr := fmt.Sprintf(`(?s)\s*%s\s*\+?=\s*\[\s*(.+)\s*\]`, name) |
| // Regular expression: matching over multiple lines, look for "deps=[X]" or |
| // "deps+=[X]", allowing spaces between the operators, and returning X, the |
| // text between the brackets. Use ".+" to match at least one character, which |
| // could be a quotation mark. |
| r, err := regexp.Compile(reStr) |
| if err != nil { |
| return nil, fmt.Errorf("extractList(): could not compile regex (%q): %w", reStr, err) |
| } |
| matches := r.FindAllStringSubmatch(source, -1) |
| for _, m := range matches { |
| if m[1] == "" { |
| continue |
| } |
| for _, s := range strings.Split(m[1], ",") { |
| s = commentRE.ReplaceAllString(s, "") // trim # and following characters. |
| item := strings.Trim(s, "\\'\"\n\t ") |
| if item != "" { |
| list = append(list, item) |
| } |
| } |
| } |
| return list, nil |
| } |
| |
| // getValue returns the value of a given key in the string. The key/value pairs |
| // are in the form `key = "value"` where the quotes may be single or double. If |
| // there are multiple key/value pairs, the first is returned. If there is no |
| // key/value pair, an empty string is returned. The key may include regular |
| // expression syntax, allowing for more flexible matching. |
| func getValue(source, key string) (string, error) { |
| // Regex: Over multiple lines, match some space, the key, some space, then =, |
| // some space, then the value as within quotes. The value defined in the |
| // subgroup is everything between one double or single quotes and may include |
| // underscores, dashes, letters, numbers, or symbols. |
| reStr := fmt.Sprintf(`(?s)\s*%s\s*=\s*[\"\']{1}([_\-\p{L}\p{N}\p{S}]+)[\"\']{1}`, key) |
| r, err := regexp.Compile(reStr) |
| if err != nil { |
| return "", fmt.Errorf("getValue(): could not compile regex (%q): %w", reStr, err) |
| } |
| matches := r.FindStringSubmatch(source) |
| if matches == nil { |
| return "", nil |
| } |
| if len(matches) != 2 { |
| return "", fmt.Errorf("found more than one match in Get(%q, %q)", source, key) |
| } |
| return matches[1], nil |
| } |
| |
| // extractNamedList returns all of the items with the given name in a source |
| // string, combining them from multiple clauses. Clauses are like <name> = |
| // [a,b,c] or <name> += [a,b,c]. The name may include regular expression syntax, |
| // allowing for more flexible matching. |
| func extractNamedList(source, name string) ([]string, error) { |
| var allItems []string |
| // Regular expression: matching over multiple lines, look for "NNNN=[" or |
| // "NNNN+=[", allowing spaces between the operators, where NNNN is the name. |
| reStr := fmt.Sprintf(`(?s)\s*%s\s*\+?=\s*\[\s*`, name) |
| r, err := regexp.Compile(reStr) |
| if err != nil { |
| return nil, fmt.Errorf("extractNamedList(): could not compile regex (%q): %w", reStr, err) |
| } |
| depLocations := r.FindAllStringIndex(source, -1) |
| if depLocations == nil { |
| return allItems, nil |
| } |
| for _, loc := range depLocations { |
| itemSection := findMatchingDelimiters(source[loc[0]:], '[', ']') |
| item, err := extractList(itemSection, name) |
| if err != nil { |
| return allItems, fmt.Errorf("extractNamedList(): %w", err) |
| } |
| allItems = append(allItems, item...) |
| } |
| return allItems, nil |
| } |