// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

package main

import (
	"bytes"
	"errors"
	"fmt"
	"io/ioutil"
	"os"
	"path/filepath"
	"regexp"
	"strconv"
	"strings"
	"sync"
)

// A library is a FIDL library represented by its name (as spelled in its
// "library" declaration, not necessarily matching directory or file names).
type library string

// The zx library describes Zircon syscalls.
const zxLibrary library = "zx"

// A libraryMap stores information derived about a group of libraries.
type libraryMap map[library]libraryInfo

type libraryInfo struct {
	// Directory containing the library.
	dir string
	// FIDL files that make up the library (relative to dir, or absolute).
	files []string
	// Direct dependencies (imported by "using" declarations in files).
	deps []library
}

// A libraryCache is a thread-safe, append-only libraryMap.
type libraryCache struct {
	sync.RWMutex
	libs libraryMap
}

func (c *libraryCache) get(lib library) (libraryInfo, bool) {
	c.RLock()
	defer c.RUnlock()
	info, ok := c.libs[lib]
	return info, ok
}

func (c *libraryCache) storeNew(libs libraryMap) {
	// Avoid write contention by skipping the operation if all the libraries in
	// libs are already in the cache.
	if !c.hasNew(libs) {
		return
	}
	c.Lock()
	defer c.Unlock()
	for lib, info := range libs {
		if _, ok := c.libs[lib]; !ok {
			c.libs[lib] = info
		}
	}
}

func (c *libraryCache) hasNew(libs libraryMap) bool {
	c.RLock()
	defer c.RUnlock()
	for lib := range libs {
		if _, ok := c.libs[lib]; !ok {
			return true
		}
	}
	return false
}

// libraryAnalyzer analyzes FIDL files to infer library dependencies. It caches
// information to speed up future analyses.
type libraryAnalyzer struct {
	// Paths in which to search for FIDL library files. Given library foo.bar,
	// the analyzer will check P/foo.bar and P/foo-bar for each path P in paths.
	// If one of these exists, it reads all descendent .fidl files.
	paths []string
	// Global cache of library information.
	cache libraryCache
}

func newLibraryAnalyzer(paths []string) *libraryAnalyzer {
	return &libraryAnalyzer{
		paths: paths,
		cache: libraryCache{
			libs: make(libraryMap),
		},
	}
}

const existingLibraryNotice = "" +
	"NOTE: Your file will be compiled with the others in this library. " +
	"If you don't want this behavior, choose a different library name."

// analyze analyzes content, returning a libraryMap that includes content's
// library together with all its transitive dependencies, and annotations on the
// "library" and "using" declarations indicating their inferred location. It
// assumes that filename is a FIDL file containing content. If any library
// cannot be located in a.paths, it is silently ignored.
func (a *libraryAnalyzer) analyze(filename, content string) (libraryMap, []annotation) {
	result := make(libraryMap)

	// Start with a stack containing the root library (if it's a platform source
	// library, meaning we can expect to find it in the tree) and its imports.
	var stack []library
	rootLibraryMatch, rootLibraryOk := parseLibraryMatch(content)
	if rootLibraryOk && rootLibraryMatch.lib.isPlatform() {
		stack = append(stack, rootLibraryMatch.lib)
	}
	rootImports := parsePlatformImportsMatch(content)
	for _, m := range rootImports {
		stack = append(stack, m.lib)
	}

	// Explore all dependencies via depth-first search.
	for len(stack) != 0 {
		// Pop a library and check if it needs processing.
		var lib library
		lib, stack = stack[len(stack)-1], stack[:len(stack)-1]
		if _, ok := result[lib]; ok {
			continue
		}

		// Check the analyzer's cache.
		if info, ok := a.cache.get(lib); ok {
			result[lib] = info
			stack = append(stack, info.deps...)
			continue
		}

		// Find the library's files and dependencies.
		var info libraryInfo
		for _, path := range a.paths {
			for _, dir := range lib.possibleDirs() {
				if !info.empty() {
					break
				}
				info.dir = filepath.Join(path, dir)
				// Keep track of already-seen imports to ensure that info.deps
				// has no duplicates (there won't be duplicates in a valid FIDL
				// file, but separate files might import the same library).
				seen := map[library]struct{}{}
				// Visit all .fidl files, including in subdirectories:
				filepath.Walk(info.dir, func(file string, fi os.FileInfo, err error) error {
					if err != nil || fi.IsDir() || filepath.Ext(file) != ".fidl" {
						return err
					}
					rel, err := filepath.Rel(info.dir, file)
					if err != nil {
						return err
					}
					b, err := ioutil.ReadFile(file)
					if err != nil {
						return err
					}
					declaredLib, ok := parseLibrary(b)
					if !ok || declaredLib != lib {
						return nil
					}
					info.files = append(info.files, rel)
					for _, dep := range parsePlatformImports(b) {
						if _, ok := seen[dep]; ok {
							continue
						}
						seen[dep] = struct{}{}
						info.deps = append(info.deps, dep)
						stack = append(stack, dep)
					}
					return nil
				})
			}
		}
		if !info.empty() {
			result[lib] = info
		}
	}

	// Cache reusable results first, before adding request-specific information.
	a.cache.storeNew(result)

	// The following handles two cases:
	//   1. (More common) The user typed something like `library foo;`. This
	//      is a new library outside the source tree, consisting of one file.
	//   2. The user typed something like `library fuchsia.io;`. This behaves as
	//      if they were adding a new file to the fuchsia.io library, alongside
	//      the other ones in the source tree.
	rootLib := rootLibraryMatch.lib
	if !rootLibraryOk {
		// The name doesn't really matter. We just need an entry in the map.
		rootLib = "fidlbolt"
	}
	var anns []annotation
	rootInfo := libraryInfo{files: []string{filename}}
	for _, m := range rootImports {
		rootInfo.deps = append(rootInfo.deps, m.lib)
		if info, ok := result[m.lib]; ok {
			text := fmt.Sprintf("Found %s in %s", m.lib, info.userVisibleDir())
			anns = append(anns, m.annotation(Info, text))
		}
	}
	if info, ok := result[rootLib]; ok {
		rootInfo = mergeInfo(rootInfo, info)
		if rootLibraryOk {
			text := fmt.Sprintf("Found %s in %s\n\n%s",
				rootLib, info.userVisibleDir(), existingLibraryNotice)
			anns = append(anns, rootLibraryMatch.annotation(Info, text))
		}
	}
	result[rootLib] = rootInfo

	return result, anns
}

// fidlcFileArgs returns a list of arguments to pass to fidlc to compile the
// libraries in m. This is a fairly expensive operation. It returns an error if
// there is an import cycle.
func (m libraryMap) fidlcFileArgs() ([]string, error) {
	sorted, err := m.topologicalSort()
	if err != nil {
		return nil, err
	}

	// Iterate in reverse order so that dependencies come before dependents.
	var args []string
	for i := len(sorted) - 1; i >= 0; i-- {
		lib := sorted[i]
		args = append(args, "--files")
		info := m[lib]
		for _, file := range info.files {
			// When the user declares their file as belong to an existing
			// library, their file will be an absolute path in a temp directory.
			// Otherwise, all files are relative to info.dir.
			if filepath.IsAbs(file) {
				args = append(args, file)
			} else {
				args = append(args, filepath.Join(info.dir, file))
			}
		}
	}
	return args, nil
}

// dependencyGraph returns a map representing the the dependency graph of m.
// Each key is a library name, and each value is another map whose keys are its
// direct dependencies.
func (m libraryMap) dependencyGraph() (map[string]interface{}, error) {
	// Make sure there are no cycles, otherwise this will never terminate.
	if _, err := m.topologicalSort(); err != nil {
		return nil, err
	}

	sources := map[library]struct{}{}
	for lib := range m {
		sources[lib] = struct{}{}
	}
	for _, info := range m {
		for _, dep := range info.deps {
			delete(sources, dep)
		}
	}
	var start []library
	for lib := range sources {
		start = append(start, lib)
	}

	var gen func(libs []library) map[string]interface{}
	gen = func(libs []library) map[string]interface{} {
		g := map[string]interface{}{}
		for _, lib := range libs {
			g[string(lib)] = gen(m[library(lib)].deps)
		}
		return g
	}
	return gen(start), nil
}

// topologicalSort runs topological sort on m, whose deps fields define a
// directed graph in adjacency list form. In the resulting list, libraries only
// depend on other libraries later in the list, not on earlier ones. Returns
// an error if there is a cycle.
func (m libraryMap) topologicalSort() ([]library, error) {
	var sorted []library
	incoming := map[library]int{}
	for u := range m {
		incoming[u] = 0
	}
	for _, info := range m {
		for _, v := range info.deps {
			incoming[v]++
		}
	}
	var src []library
	for u, deg := range incoming {
		if deg == 0 {
			src = append(src, u)
		}
	}
	for len(src) != 0 {
		var u library
		u, src = src[len(src)-1], src[:len(src)-1]
		sorted = append(sorted, u)
		for _, v := range m[u].deps {
			incoming[v]--
			if incoming[v] == 0 {
				src = append(src, v)
			}
		}
	}
	for _, deg := range incoming {
		if deg != 0 {
			return nil, errors.New("detected an import cycle")
		}
	}
	return sorted, nil
}

// possibleDirs returns directory names that lib's files might be found in.
func (lib library) possibleDirs() []string {
	if lib == zxLibrary {
		// The zx library is a special case: its .fidl files are in a
		// directory named "vsdo" (fuchsia/zircon/vdso), not "zx".
		return []string{"vdso"}
	}
	return []string{string(lib), strings.ReplaceAll(string(lib), ".", "-")}
}

// isPlatform returns true if lib adheres to the naming scheme for platform
// source libraries (enforced by the FIDL linter).
func (lib library) isPlatform() bool {
	s := string(lib)
	return strings.HasPrefix(s, "fidl.") ||
		strings.HasPrefix(s, "fuchsia.") ||
		strings.HasPrefix(s, "test.")
}

// empty returns true if i represents an empty library (has no files).
func (i *libraryInfo) empty() bool {
	return len(i.files) == 0
}

// userVisibleDir returns a representation of i.dir to show to the user.
func (i *libraryInfo) userVisibleDir() string {
	dir := filepath.ToSlash(i.dir)
	if i := strings.Index(dir, "/fuchsia/"); i != -1 {
		return dir[i+1:]
	}
	return dir
}

// mergeInfo returns a combination of the information in info and other. It
// assumes that only one of them has dir set.
func mergeInfo(info, other libraryInfo) libraryInfo {
	if info.dir != "" && other.dir != "" {
		panic("mergeInfo called with incompatible dirs")
	}
	if info.dir == "" {
		info.dir = other.dir
	}
	info.files = append(info.files, other.files...)
	deps := map[library]struct{}{}
	for _, lib := range info.deps {
		deps[lib] = struct{}{}
	}
	for _, lib := range other.deps {
		if _, ok := deps[lib]; !ok {
			info.deps = append(info.deps, lib)
		}
	}
	return info
}

var (
	// https://fuchsia.dev/fuchsia-src/development/languages/fidl/reference/language.md#identifiers
	fidlIdentifierPattern = `[a-zA-Z](?:[a-zA-Z0-9_]*[a-zA-Z0-9])?`

	// Although "library" can be used anywhere (e.g. as a type name), this regex
	// is robust because the the library declaration must appear at the top of
	// the file (only comments and whitespace can precede it).
	libraryRegexp = regexp.MustCompile(`` +
		`^(?:\s*//[^\n]*\n)*\s*` +
		`library\s+(` +
		fidlIdentifierPattern +
		`(?:\.` + fidlIdentifierPattern + `)*` +
		`)\s*;`)

	// Although "using" can be used anywhere (e.g. as a type name), this regex
	// is robust because it only matches imports of platform libraries (ones
	// that start with fidl, fuchsia, or test) and the special library zx. The
	// only problem is it can match commented imports, so we have to check for
	// this after matching.
	platformImportRegexp = regexp.MustCompile(`` +
		`\busing\s+(` +
		`zx|` +
		`(?:fidl|fuchsia|test)` +
		`(?:\.` + fidlIdentifierPattern + `)+` +
		`)` +
		`(?:\s+as\s+` + fidlIdentifierPattern + `)?` +
		`\s*;`)
)

type libraryMatch struct {
	lib          library
	line, column int
}

func parseLibrary(fidl []byte) (library, bool) {
	m := libraryRegexp.FindSubmatch(fidl)
	if m == nil {
		return "", false
	}
	return library(m[1]), true
}

func parseLibraryMatch(fidl string) (libraryMatch, bool) {
	m := libraryRegexp.FindStringSubmatchIndex(fidl)
	if m == nil {
		return libraryMatch{}, false
	}
	return toLibraryMatch(fidl, m[2], m[3]), true
}

func parsePlatformImports(fidl []byte) []library {
	var libs []library
	for _, m := range platformImportRegexp.FindAllSubmatchIndex(fidl, -1) {
		// Make sure the line isn't commented. Technically this breaks if the
		// line contains a string const with "//" in it. This isn't a big deal
		// because imports are supposed to come before all other declarations.
		i := bytes.LastIndexByte(fidl[:m[2]], '\n')
		if i == -1 || !bytes.Contains(fidl[i:m[2]], []byte("//")) {
			libs = append(libs, library(fidl[m[2]:m[3]]))
		}
	}
	return libs
}

func parsePlatformImportsMatch(fidl string) []libraryMatch {
	var libs []libraryMatch
	for _, m := range platformImportRegexp.FindAllStringSubmatchIndex(fidl, -1) {
		// See the comment in parsePlatformImports.
		i := strings.LastIndexByte(fidl[:m[2]], '\n')
		if i == -1 || !strings.Contains(fidl[i:m[2]], "//") {
			libs = append(libs, toLibraryMatch(fidl, m[2], m[3]))
		}
	}
	return libs
}

func toLibraryMatch(fidl string, start, end int) libraryMatch {
	return libraryMatch{
		lib:    library(fidl[start:end]),
		line:   strings.Count(fidl[:start], "\n") + 1,
		column: start - strings.LastIndexByte(fidl[:start], '\n'),
	}
}

func (m *libraryMatch) annotation(kind annotationKind, text string) annotation {
	return annotation{
		Line:   m.line,
		Column: m.column,
		Kind:   kind,
		Text:   text,
	}
}

var fidlAnnotationRegexp = regexp.MustCompile(`` +
	// Multi-line mode: ^ and $ match begin/end of line OR text.
	`(?m)` +
	`^/.*/fidlbolt\.fidl:(\d+):(\d+).*?` +
	`(` +
	string(Info) + `|` +
	string(Warning) + `|` +
	string(Error) + `)` +
	`:\s*(.*)$`)

// parseFidlAnnotations parses a list of annotations from error output. It
// assumes that the output refers to a file called "fidlbolt.fidl".
// TODO: Use fidlc --format=json instead of regex parsing.
func parseFidlAnnotations(output string) []annotation {
	matches := fidlAnnotationRegexp.FindAllStringSubmatch(output, -1)
	var anns []annotation
	for _, m := range matches {
		line, err := strconv.Atoi(m[1])
		if err != nil {
			continue
		}
		column, err := strconv.Atoi(m[2])
		if err != nil {
			continue
		}
		anns = append(anns, annotation{
			Line:   line,
			Column: column,
			Kind:   annotationKind(m[3]),
			Text:   m[4],
		})
	}
	return anns
}
