blob: 45e526e18a4dfe49db38afad207d5dc0e3c49143 [file] [log] [blame]
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package codifier
import (
"errors"
"fmt"
"regexp"
"strings"
)
var commentRE = regexp.MustCompile("#.*")
// SectionType is a type used to return the parts of sections when they're found.
// For example, a section like `group("tests") { foo }` would be parsed into
// SectionType(name="tests", kind="group", contents=`group("tests") { foo }`)
type SectionType struct {
Name, Kind, Contents string
}
// findMatchingDelimiters finds the first occurrence of the open delimiter, then returns the string
// from the beginning of the string up and including the matching close delimiter. For example,
// findMatchingDelimiters("aaa{bbb}ccc") -> "aaa{bbb}"
// Note that this algorithm requires correctly balanced nested delimiters.
// "{{}{}}" will return "{{}{}}", but "{{}" will return "".
func findMatchingDelimiters(source string, openDelimiter, closeDelimiter rune) string {
var count int
// TODO(gboone@): This line does a utf-8 decode-and-copy every single time.
// Consider []byte instead for better performance. Note that that the current
// bottleneck for Codifier is the time spent building and testing which are
// O(mins). But users who change many files without building/testing could see
// an improvement.
runes := []rune(source)
for i, c := range runes {
if c == openDelimiter {
count++
}
if c == closeDelimiter {
count--
if count == 0 {
return string(runes[0 : i+1])
}
}
}
return ""
}
// findIndicesForAll returns all of the indices of the sections that match the given
// sectionType and sectionName. The returned indices list will contain, for each
// section, the start and end of the match including up to the opening
// delimiter. The sectionType and sectionName may include regular expression
// syntax, allowing for more flexible matching.
func findIndicesForAll(source, sectionType, sectionName string, openDelimiter rune) ([][]int, error) {
if source == "" {
return nil, errors.New("findIndicesForAll(): blank source")
}
if sectionType == "" {
return nil, errors.New("findIndicesForAll(): blank sectionType")
}
if sectionName == "" {
return nil, errors.New("findIndicesForAll(): blank sectionName")
}
delStr := string(openDelimiter)
if openDelimiter == '[' {
delStr = `\[`
}
// Regular expression: matching over multiple lines, look for "sectionType("sectionName"){",
// allowing spaces between the operators, where { is the given openDelimiter.
reStr := fmt.Sprintf(`(?s)\s*%s\s*\(\s*"%s"\s*\)\s*%s\s*`, sectionType, sectionName, delStr)
r, err := regexp.Compile(reStr)
if err != nil {
return nil, fmt.Errorf("findIndicesForAll(): could not compile regex (%q): %w", reStr, err)
}
return r.FindAllStringIndex(source, -1), nil
}
// findIndicesForAllSectionsOfType returns all of the indices of the sections
// that match the given sectionType. The returned indices list will contain, for
// each section, the start and end of the match including up to the opening
// delimiter, then the start and end of the section name. A sectionType must
// match as a whole word. For example, sectionType "test" will not match
// "foo-test(bar)". The sectionType may include regular expression syntax,
// allowing for more flexible matching.
func findIndicesForAllSectionsOfType(source, sectionType string, openDelimiter rune) ([][]int, error) {
if source == "" {
return nil, errors.New("findIndicesForAllSectionsOfType(): blank source")
}
if sectionType == "" {
return nil, errors.New("findIndicesForAllSectionsOfType(): blank sectionType")
}
delStr := string(openDelimiter)
if openDelimiter == '[' {
delStr = `\[`
}
// Regular expression: matching over multiple lines, first match a
// non-capturing group with one of the following: start of string, end of
// string, whitespace, or a parenthesis, bracket, or brace. This group ensures
// that the section type is matched on a word boundary, so ^foo would match
// while ^test-foo would not. Next, look for "sectionType("x"){", allowing
// spaces between the operators, where x is a name to match and { is the given
// openDelimiter.
reStr := fmt.Sprintf(`(?s)(?:\A|\z|[\(\)\[\]\{\}]|\s)%s\s*\(\s*\"(.*?)\"\s*\)\s*%s\s*`, sectionType, delStr)
r, err := regexp.Compile(reStr)
if err != nil {
return nil, fmt.Errorf("findIndicesForAllSectionsOfType(): could not compile regex (%q): %w", reStr, err)
}
matches := r.FindAllStringSubmatchIndex(source, -1)
// If there is match on the initial, non-carpturing group, it will be included
// in the match location. Increment the starts to not include them in our
// matches.
for _, m := range matches {
if m[0] != 0 {
m[0]++
}
}
return matches, nil
}
// findIndicesForAllSectionsNamed returns all of the indices of the sections
// that match the given sectionType. The returned indices list will contain, for
// each section, the start and end of the match including up to the opening
// delimiter, then the start and end of the section type. The sectionName may
// include regular expression syntax, allowing for more flexible matching.
func findIndicesForAllSectionsNamed(source, sectionName string, openDelimiter rune) ([][]int, error) {
if source == "" {
return nil, errors.New("findIndicesForAllSectionsNamed(): blank source")
}
if sectionName == "" {
return nil, errors.New("findIndicesForAllSectionsNamed(): blank sectionName")
}
delStr := string(openDelimiter)
if openDelimiter == '[' {
delStr = `\[`
}
// Regular expression: matching over multiple lines, look for "sectionType("sectionName"){",
// allowing spaces between the operators, where { is the given openDelimiter.
reStr := fmt.Sprintf(`(?s)(?:\A|\z|[\(\)\[\]\{\}]|\s)(\w+)\s*\(\s*"%s"\s*\)\s*%s\s*`, sectionName, delStr)
r, err := regexp.Compile(reStr)
if err != nil {
return nil, fmt.Errorf("findIndicesForAllSectionsNamed(): could not compile regex (%q): %w", reStr, err)
}
matches := r.FindAllStringSubmatchIndex(source, -1)
// If there is match on the initial, non-carpturing group, it will be included
// in the match location. Increment the starts to not include them in our
// matches.
for _, m := range matches {
if m[0] != 0 {
m[0]++
}
}
return matches, nil
}
// sectionsWithTypeAndName returns all of the sections with the given type and
// name that are present in the given source. For example,
// sectionsWithTypeAndName("group", "tests", '{', '}') would return, all of the
// `group("tests") {...}` present. It returns a slice of SectionType containing
// the name, type, and contents for each section or nil if nothing found.
func sectionsWithTypeAndName(source, sectionType, sectionName string, openDelimeter, closeDelimeter rune) ([]SectionType, error) {
if source == "" {
return nil, errors.New("sectionsWithTypeAndName(): blank source given")
}
if sectionType == "" {
return nil, errors.New("sectionsWithTypeAndName(): blank sectionType given")
}
if sectionName == "" {
return nil, errors.New("sectionsWithTypeAndName(): blank sectionName given")
}
var sections []SectionType
sectionIndices, err := findIndicesForAll(source, sectionType, sectionName, openDelimeter)
if err != nil || sectionIndices == nil {
return nil, fmt.Errorf("sectionsWithTypeAndName(): section type %q with delimiters '%c', '%c' not found",
sectionType, openDelimeter, closeDelimeter)
}
for _, indices := range sectionIndices {
if len(indices) != 2 {
return nil, errors.New("sectionsWithTypeAndName(): section malformed")
}
start := indices[0]
contents := findMatchingDelimiters(source[start:], openDelimeter, closeDelimeter)
if contents == "" {
return nil, fmt.Errorf("sectionsWithTypeAndName(): couldn't find matching delimiters for %c and %c",
openDelimeter, closeDelimeter)
}
sections = append(sections, SectionType{sectionName, sectionType, contents})
}
return sections, nil
}
// sectionsOfType returns all of the sections of the given type. For example,
// sectionsOfType("group") would return, say, `group("foo"), group("bar"),
// group("baz")`. It returns a slice of SectionType containing the name, type,
// and contents for each section or nil if nothing found.
func sectionsOfType(source, sectionType string, openDelimeter, closeDelimeter rune) ([]SectionType, error) {
if source == "" {
return nil, errors.New("sectionsOfType(): blank source given")
}
if sectionType == "" {
return nil, errors.New("sectionsOfType(): blank section given")
}
var sections []SectionType
sectionIndices, err := findIndicesForAllSectionsOfType(source, sectionType, openDelimeter)
if err != nil || sectionIndices == nil {
return nil, fmt.Errorf("sectionsOfType(): section type %q with delimiters '%c', '%c' not found",
sectionType, openDelimeter, closeDelimeter)
}
for _, indices := range sectionIndices {
if len(indices) != 4 {
return nil, errors.New("sectionsOfType(): section missing name indices")
}
start := indices[0]
name := source[indices[2]:indices[3]]
if name == "" {
return nil, errors.New("sectionsOfType(): section name blank")
}
contents := findMatchingDelimiters(source[start:], openDelimeter, closeDelimeter)
if contents == "" {
return nil, fmt.Errorf("sectionsOfType(): couldn't find matching delimiters for %c and %c",
openDelimeter, closeDelimeter)
}
sections = append(sections, SectionType{name, sectionType, contents})
}
return sections, nil
}
// sectionsNamed returns all of the sections with the the given name. For
// example, sectionsNamed("tests") would return, say, `group("tests")`. It
// returns a slice of SectionType containing the name, type, and contents for
// each section or nil if nothing found.
func sectionsNamed(source, sectionName string, openDelimeter, closeDelimeter rune) ([]SectionType, error) {
if source == "" {
return nil, errors.New("sectionsNamed(): blank source given")
}
if sectionName == "" {
return nil, errors.New("sectionsNamed(): blank sectionName given")
}
var sections []SectionType
sectionIndices, err := findIndicesForAllSectionsNamed(source, sectionName, openDelimeter)
if err != nil || sectionIndices == nil {
return nil, fmt.Errorf("sectionsNamed(): section named %q with delimiters '%c', '%c' not found in %q",
sectionName, openDelimeter, closeDelimeter, source)
}
for _, indices := range sectionIndices {
if len(indices) != 4 {
return nil, errors.New("sectionsNamed(): section missing section type indices")
}
start := indices[0]
kind := source[indices[2]:indices[3]]
if kind == "" {
return nil, errors.New("sectionsNamed(): section type blank")
}
contents := findMatchingDelimiters(source[start:], openDelimeter, closeDelimeter)
if contents == "" {
return nil, fmt.Errorf("sectionsNamed(): couldn't find matching delimiters for %c and %c",
openDelimeter, closeDelimeter)
}
sections = append(sections, SectionType{sectionName, kind, contents})
}
return sections, nil
}
// extractList returns a slice of strings, one for each item in a name/value
// string. The string may be of the form <name> = [a,b,c] or <name> += [a,b,c].
// The items in the list are modified to remove leading or trailing quotes,
// spaces, and linefeeds. The name may include regular expression syntax,
// allowing for more flexible matching.
func extractList(source, name string) ([]string, error) {
var list []string
reStr := fmt.Sprintf(`(?s)\s*%s\s*\+?=\s*\[\s*(.+)\s*\]`, name)
// Regular expression: matching over multiple lines, look for "deps=[X]" or
// "deps+=[X]", allowing spaces between the operators, and returning X, the
// text between the brackets. Use ".+" to match at least one character, which
// could be a quotation mark.
r, err := regexp.Compile(reStr)
if err != nil {
return nil, fmt.Errorf("extractList(): could not compile regex (%q): %w", reStr, err)
}
matches := r.FindAllStringSubmatch(source, -1)
for _, m := range matches {
if m[1] == "" {
continue
}
for _, s := range strings.Split(m[1], ",") {
s = commentRE.ReplaceAllString(s, "") // trim # and following characters.
item := strings.Trim(s, "\\'\"\n\t ")
if item != "" {
list = append(list, item)
}
}
}
return list, nil
}
// getValue returns the value of a given key in the string. The key/value pairs
// are in the form `key = "value"` where the quotes may be single or double. If
// there are multiple key/value pairs, the first is returned. If there is no
// key/value pair, an empty string is returned. The key may include regular
// expression syntax, allowing for more flexible matching.
func getValue(source, key string) (string, error) {
// Regex: Over multiple lines, match some space, the key, some space, then =,
// some space, then the value as within quotes. The value defined in the
// subgroup is everything between one double or single quotes and may include
// underscores, dashes, letters, numbers, or symbols.
reStr := fmt.Sprintf(`(?s)\s*%s\s*=\s*[\"\']{1}([_\-\p{L}\p{N}\p{S}]+)[\"\']{1}`, key)
r, err := regexp.Compile(reStr)
if err != nil {
return "", fmt.Errorf("getValue(): could not compile regex (%q): %w", reStr, err)
}
matches := r.FindStringSubmatch(source)
if matches == nil {
return "", nil
}
if len(matches) != 2 {
return "", fmt.Errorf("found more than one match in Get(%q, %q)", source, key)
}
return matches[1], nil
}
// extractNamedList returns all of the items with the given name in a source
// string, combining them from multiple clauses. Clauses are like <name> =
// [a,b,c] or <name> += [a,b,c]. The name may include regular expression syntax,
// allowing for more flexible matching.
func extractNamedList(source, name string) ([]string, error) {
var allItems []string
// Regular expression: matching over multiple lines, look for "NNNN=[" or
// "NNNN+=[", allowing spaces between the operators, where NNNN is the name.
reStr := fmt.Sprintf(`(?s)\s*%s\s*\+?=\s*\[\s*`, name)
r, err := regexp.Compile(reStr)
if err != nil {
return nil, fmt.Errorf("extractNamedList(): could not compile regex (%q): %w", reStr, err)
}
depLocations := r.FindAllStringIndex(source, -1)
if depLocations == nil {
return allItems, nil
}
for _, loc := range depLocations {
itemSection := findMatchingDelimiters(source[loc[0]:], '[', ']')
item, err := extractList(itemSection, name)
if err != nil {
return allItems, fmt.Errorf("extractNamedList(): %w", err)
}
allItems = append(allItems, item...)
}
return allItems, nil
}