Refactor generation of submodule tree for use in 'jiri snaphot'

Add support for --submodules to generate jiri snapshot compatible with
git submodules.

Bug: [google internal] b/189478693

Testing:
- Unit tests
- Verified jiri snapshot manually

Change-Id: Id0c25e7154f0299779d14edfc5e740ee2ae4b90c
Reviewed-on: https://fuchsia-review.googlesource.com/c/jiri/+/540969
Commit-Queue: Ian Kasprzak <iankaz@google.com>
Reviewed-by: Anirudh Mathukumilli <rudymathu@google.com>
diff --git a/cmd/jiri/generate-gitmodules.go b/cmd/jiri/generate-gitmodules.go
index 35c4f33..262089b 100644
--- a/cmd/jiri/generate-gitmodules.go
+++ b/cmd/jiri/generate-gitmodules.go
@@ -8,11 +8,9 @@
 	"bytes"
 	"crypto/sha256"
 	"encoding/hex"
-	"errors"
 	"fmt"
 	"io/ioutil"
 	"path/filepath"
-	"sort"
 	"strings"
 
 	"go.fuchsia.dev/jiri"
@@ -77,143 +75,10 @@
 	return writeGitModules(jirix, localProjects, gitmodulesPath, gitattributesPath)
 }
 
-func (p *projectTreeRoot) add(jirix *jiri.X, proj project.Project) error {
-	if p == nil || p.root == nil {
-		return errors.New("add called with nil root pointer")
-	}
-
-	if proj.Path == "." || proj.Path == "" || proj.Path == string(filepath.Separator) {
-		// Skip fuchsia.git project
-		p.dropped[proj.Key()] = proj
-		return nil
-	}
-
-	// git submodule does not support one submodule to be placed under the path
-	// of another submodule, therefore, it is necessary to detect nested
-	// projects in jiri manifests and drop them from gitmodules file.
-	//
-	// The nested project detection is based on only 1 rule:
-	// If the path of project A (pathA) is the parent directory of project B,
-	// project B will be considered as nested under project A. It will be recorded
-	// in "dropped" map.
-	//
-	// Due to the introduction of fuchsia.git, based on the rule above, all
-	// other projects will be considered as nested project under fuchsia.git,
-	// therefore, fuchsia.git is excluded in this detection process.
-	//
-	// The detection algorithm works in following ways:
-	//
-	// Assuming we have two project: "projA" and "projB", "projA" is located at
-	// "$JIRI_ROOT/a" and projB is located as "$JIRI_ROOT/b/c".
-	// The projectTree will look like the following chart:
-	//
-	//                   a    +-------+
-	//               +--------+ projA |
-	//               |        +-------+
-	// +---------+   |
-	// |nil(root)+---+
-	// +---------+   |
-	//               |   b    +-------+   c   +-------+
-	//               +--------+  nil  +-------+ projB |
-	//                        +-------+       +-------+
-	//
-	// The text inside each block represents the projectTree.project field,
-	// each edge represents a key of projectTree.children field.
-	//
-	// Assuming we adds project "projC" whose path is "$JIRI_ROOT/a/d", it will
-	// be dropped as the children of root already have key "a" and
-	// children["a"].project is not pointed to nil, which means "projC" is
-	// nested under "projA".
-	//
-	// Assuming we adds project "projD" whose path is "$JIRI_ROOT/d", it will
-	// be added successfully since root.children does not have key "d" yet,
-	// which means "projD" is not nested under any known project and no project
-	// is currently nested under "projD" yet.
-	//
-	// Assuming we adds project "projE" whose path is "$JIRI_ROOT/b", it will
-	// be added successfully and "projB" will be dropped. The reason is that
-	// root.children["b"].project is nil but root.children["b"].children is not
-	// empty, so any projects that can be reached from root.children["b"]
-	// should be dropped as they are nested under "projE".
-	elmts := strings.Split(proj.Path, string(filepath.Separator))
-	pin := p.root
-	for i := 0; i < len(elmts); i++ {
-		if child, ok := pin.children[elmts[i]]; ok {
-			if child.project != nil {
-				// proj is nested under next.project, drop proj
-				jirix.Logger.Debugf("project %q:%q nested under project %q:%q", proj.Path, proj.Remote, proj.Path, child.project.Remote)
-				p.dropped[proj.Key()] = proj
-				return nil
-			}
-			pin = child
-		} else {
-			child = &projectTree{nil, make(map[string]*projectTree)}
-			pin.children[elmts[i]] = child
-			pin = child
-		}
-	}
-	if len(pin.children) != 0 {
-		// There is one or more project nested under proj.
-		jirix.Logger.Debugf("following project nested under project %q:%q", proj.Path, proj.Remote)
-		if err := p.prune(jirix, pin); err != nil {
-			return err
-		}
-		jirix.Logger.Debugf("\n")
-	}
-	pin.project = &proj
-	return nil
-}
-
-func (p *projectTreeRoot) prune(jirix *jiri.X, node *projectTree) error {
-	// Looking for projects nested under node using BFS
-	workList := make([]*projectTree, 0)
-	workList = append(workList, node)
-
-	for len(workList) > 0 {
-		item := workList[0]
-		if item == nil {
-			return errors.New("purgeLeaves encountered a nil node")
-		}
-		workList = workList[1:]
-		if item.project != nil {
-			p.dropped[item.project.Key()] = *item.project
-			jirix.Logger.Debugf("\tnested project %q:%q", item.project.Path, item.project.Remote)
-		}
-		for _, v := range item.children {
-			workList = append(workList, v)
-		}
-	}
-
-	// Purge leaves under node
-	node.children = make(map[string]*projectTree)
-	return nil
-}
-
 func writeGitModules(jirix *jiri.X, projects project.Projects, gitmodulesPath, gitattributesPath string) error {
-	projEntries := make([]project.Project, len(projects))
-
-	// relativaize the paths and copy projects from map to slice for sorting.
-	i := 0
-	for _, v := range projects {
-		relPath, err := makePathRel(jirix.Root, v.Path)
-		if err != nil {
-			return err
-		}
-		v.Path = relPath
-		projEntries[i] = v
-		i++
-	}
-	sort.Slice(projEntries, func(i, j int) bool {
-		return string(projEntries[i].Key()) < string(projEntries[j].Key())
-	})
-
-	// Create path prefix tree to collect all nested projects
-	root := projectTree{nil, make(map[string]*projectTree)}
-	treeRoot := projectTreeRoot{&root, make(project.Projects)}
-	for _, v := range projEntries {
-		if err := treeRoot.add(jirix, v); err != nil {
-			return err
-		}
+	projEntries, treeRoot, err := project.GenerateSubmoduleTree(jirix, projects)
+	if err != nil {
+		return err
 	}
 
 	// Start creating .gitmodule and set up script.
@@ -258,7 +123,7 @@
 		if reRootRepoName != "" && reRootRepoName == v.Path {
 			return fmt.Errorf("path collision for root repo and project %+v", v)
 		}
-		if _, ok := treeRoot.dropped[v.Key()]; ok {
+		if _, ok := treeRoot.Dropped[v.Key()]; ok {
 			jirix.Logger.Debugf("dropped project %+v", v)
 			continue
 		}
diff --git a/cmd/jiri/generate-gitmodules_test.go b/cmd/jiri/generate-gitmodules_test.go
index 5510559..4043fa7 100644
--- a/cmd/jiri/generate-gitmodules_test.go
+++ b/cmd/jiri/generate-gitmodules_test.go
@@ -19,80 +19,6 @@
 	"go.fuchsia.dev/jiri/project"
 )
 
-func TestPrefixTree(t *testing.T) {
-	_, fakeroot, cleanup := setupUniverse(t)
-	defer cleanup()
-
-	projects := []project.Project{
-		{Name: "root", Path: "."},
-		{Name: "a", Path: "a"},
-		{Name: "b", Path: "b"},
-		{Name: "c/d/e", Path: "c/d/e"},
-		{Name: "c/d/f", Path: "c/d/f"},
-		{Name: "c/d", Path: "c/d"},
-		{Name: "c", Path: "c"},
-	}
-	expectedDropped := []project.Project{
-		projects[0],
-		projects[3],
-		projects[4],
-		projects[5],
-	}
-
-	// Fill projects into prefix tree
-	root := projectTree{nil, make(map[string]*projectTree)}
-	dropped := make(project.Projects)
-	treeRoot := projectTreeRoot{&root, dropped}
-	for _, v := range projects {
-		if err := treeRoot.add(fakeroot.X, v); err != nil {
-			t.Errorf("adding project to prefixTree failed due to error: %v", err)
-			break
-		}
-	}
-
-	// generate logs when test failed
-	failedDropped := func() {
-		t.Logf("wrong nested projects list")
-		t.Logf("expecting: ")
-		for _, v := range expectedDropped {
-			t.Logf("\tproject:%q", v.Path)
-		}
-		t.Logf("got:")
-		for _, v := range treeRoot.dropped {
-			t.Logf("\tproject:%q", v.Path)
-		}
-		t.Fail()
-	}
-
-	// Verify nested projects
-	if len(treeRoot.dropped) != len(expectedDropped) {
-		failedDropped()
-	}
-	for _, v := range expectedDropped {
-		if _, ok := treeRoot.dropped[v.Key()]; !ok {
-			failedDropped()
-			break
-		}
-	}
-
-	// Verify the shape of prefix tree
-	if len(root.children) == 3 {
-		prefixes := []string{"a", "b", "c"}
-		for _, v := range prefixes {
-			if _, ok := root.children[v]; !ok {
-				t.Errorf("root node does not contain project %q", v)
-			}
-		}
-		for _, v := range root.children {
-			if len(v.children) != 0 {
-				t.Errorf("more than 1 level of nodes found in prefix tree")
-			}
-		}
-	} else {
-		t.Errorf("expecting %v first level nodes, but got %v", 3, len(root.children))
-	}
-}
-
 func TestGitModules(t *testing.T) {
 	goldenScript := []byte(`#!/bin/sh
 git update-index --add --cacheinfo 160000 87326c54332e5be21eda2173bb001aaee73a9ab7 "manifest"
diff --git a/cmd/jiri/snapshot.go b/cmd/jiri/snapshot.go
index 54d694a..216c554 100644
--- a/cmd/jiri/snapshot.go
+++ b/cmd/jiri/snapshot.go
@@ -10,6 +10,14 @@
 	"go.fuchsia.dev/jiri/project"
 )
 
+var (
+	submoduleFlag bool
+)
+
+func init() {
+	cmdSnapshot.Flags.BoolVar(&submoduleFlag, "submodule", false, "Filter snapshot for use with git submodules.")
+}
+
 var cmdSnapshot = &cmdline.Command{
 	Runner: jiri.RunnerFunc(runSnapshot),
 	Name:   "snapshot",
@@ -26,5 +34,5 @@
 	if len(args) != 1 {
 		return jirix.UsageErrorf("unexpected number of arguments")
 	}
-	return project.CreateSnapshot(jirix, args[0], nil, nil, true)
+	return project.CreateSnapshot(jirix, args[0], nil, nil, true, submoduleFlag)
 }
diff --git a/project/project.go b/project/project.go
index 92dd629..03691c8 100644
--- a/project/project.go
+++ b/project/project.go
@@ -738,7 +738,7 @@
 // HEAD of all projects and writes this snapshot out to the given file.
 // if hooks are not passed, jiri will read JiriManifestFile and get hooks from there,
 // so always pass hooks incase updating from a snapshot
-func CreateSnapshot(jirix *jiri.X, file string, hooks Hooks, pkgs Packages, localManifest bool) error {
+func CreateSnapshot(jirix *jiri.X, file string, hooks Hooks, pkgs Packages, localManifest bool, submodules bool) error {
 	jirix.TimerPush("create snapshot")
 	defer jirix.TimerPop()
 
@@ -755,6 +755,19 @@
 		return err
 	}
 
+	if submodules {
+		_, treeRoot, err := GenerateSubmoduleTree(jirix, localProjects)
+		if err != nil {
+			return err
+		}
+		// Remove dropped projects
+		for _, project := range localProjects {
+			if _, ok := treeRoot.Dropped[project.Key()]; ok {
+				delete(localProjects, project.Key())
+			}
+		}
+	}
+
 	for _, project := range localProjects {
 		manifest.Projects = append(manifest.Projects, project)
 	}
@@ -772,10 +785,12 @@
 		}
 	}
 
-	for _, hook := range hooks {
-		manifest.Hooks = append(manifest.Hooks, hook)
+	// Skip hooks for submodules
+	if !submodules {
+		for _, hook := range hooks {
+			manifest.Hooks = append(manifest.Hooks, hook)
+		}
 	}
-
 	for _, pack := range pkgs {
 		manifest.Packages = append(manifest.Packages, pack)
 	}
@@ -1470,7 +1485,7 @@
 // projects and writes it to the update history directory.
 func WriteUpdateHistorySnapshot(jirix *jiri.X, snapshotPath string, hooks Hooks, pkgs Packages, localManifest bool) error {
 	snapshotFile := filepath.Join(jirix.UpdateHistoryDir(), time.Now().Format(time.RFC3339))
-	if err := CreateSnapshot(jirix, snapshotFile, hooks, pkgs, localManifest); err != nil {
+	if err := CreateSnapshot(jirix, snapshotFile, hooks, pkgs, localManifest, false); err != nil {
 		return err
 	}
 
@@ -2571,3 +2586,165 @@
 	jirix.Logger.Debugf("package optional attributes written to %s", jsonFile)
 	return nil
 }
+
+type ProjectTree struct {
+	project  *Project
+	Children map[string]*ProjectTree
+}
+
+type ProjectTreeRoot struct {
+	Root    *ProjectTree
+	Dropped Projects
+}
+
+func GenerateSubmoduleTree(jirix *jiri.X, projects Projects) ([]Project, *ProjectTreeRoot, error) {
+	projEntries := make([]Project, len(projects))
+
+	// relativize the paths and copy projects from map to slice for sorting.
+	i := 0
+	for _, v := range projects {
+		relPath, err := makePathRel(jirix.Root, v.Path)
+		if err != nil {
+			return nil, nil, err
+		}
+		v.Path = relPath
+		projEntries[i] = v
+		i++
+	}
+	sort.Slice(projEntries, func(i, j int) bool {
+		return string(projEntries[i].Key()) < string(projEntries[j].Key())
+	})
+
+	// Create path prefix tree to collect all nested projects
+	root := ProjectTree{nil, make(map[string]*ProjectTree)}
+	treeRoot := ProjectTreeRoot{&root, make(Projects)}
+	for _, v := range projEntries {
+		if err := treeRoot.add(jirix, v); err != nil {
+			return nil, nil, err
+		}
+	}
+	return projEntries, &treeRoot, nil
+}
+
+func (p *ProjectTreeRoot) add(jirix *jiri.X, proj Project) error {
+	if p == nil || p.Root == nil {
+		return errors.New("add called with nil root pointer")
+	}
+
+	if proj.Path == "." || proj.Path == "" || proj.Path == string(filepath.Separator) {
+		// Skip fuchsia.git project
+		p.Dropped[proj.Key()] = proj
+		return nil
+	}
+
+	// git submodule does not support one submodule to be placed under the path
+	// of another submodule, therefore, it is necessary to detect nested
+	// projects in jiri manifests and drop them from gitmodules file.
+	//
+	// The nested project detection is based on only 1 rule:
+	// If the path of project A (pathA) is the parent directory of project B,
+	// project B will be considered as nested under project A. It will be recorded
+	// in "dropped" map.
+	//
+	// Due to the introduction of fuchsia.git, based on the rule above, all
+	// other projects will be considered as nested project under fuchsia.git,
+	// therefore, fuchsia.git is excluded in this detection process.
+	//
+	// The detection algorithm works in following ways:
+	//
+	// Assuming we have two project: "projA" and "projB", "projA" is located at
+	// "$JIRI_ROOT/a" and projB is located as "$JIRI_ROOT/b/c".
+	// The projectTree will look like the following chart:
+	//
+	//                   a    +-------+
+	//               +--------+ projA |
+	//               |        +-------+
+	// +---------+   |
+	// |nil(root)+---+
+	// +---------+   |
+	//               |   b    +-------+   c   +-------+
+	//               +--------+  nil  +-------+ projB |
+	//                        +-------+       +-------+
+	//
+	// The text inside each block represents the projectTree.project field,
+	// each edge represents a key of projectTree.children field.
+	//
+	// Assuming we adds project "projC" whose path is "$JIRI_ROOT/a/d", it will
+	// be dropped as the children of root already have key "a" and
+	// children["a"].project is not pointed to nil, which means "projC" is
+	// nested under "projA".
+	//
+	// Assuming we adds project "projD" whose path is "$JIRI_ROOT/d", it will
+	// be added successfully since root.children does not have key "d" yet,
+	// which means "projD" is not nested under any known project and no project
+	// is currently nested under "projD" yet.
+	//
+	// Assuming we adds project "projE" whose path is "$JIRI_ROOT/b", it will
+	// be added successfully and "projB" will be dropped. The reason is that
+	// root.children["b"].project is nil but root.children["b"].children is not
+	// empty, so any projects that can be reached from root.children["b"]
+	// should be dropped as they are nested under "projE".
+	elmts := strings.Split(proj.Path, string(filepath.Separator))
+	pin := p.Root
+	for i := 0; i < len(elmts); i++ {
+		if child, ok := pin.Children[elmts[i]]; ok {
+			if child.project != nil {
+				// proj is nested under next.project, drop proj
+				jirix.Logger.Debugf("project %q:%q nested under project %q:%q", proj.Path, proj.Remote, proj.Path, child.project.Remote)
+				p.Dropped[proj.Key()] = proj
+				return nil
+			}
+			pin = child
+		} else {
+			child = &ProjectTree{nil, make(map[string]*ProjectTree)}
+			pin.Children[elmts[i]] = child
+			pin = child
+		}
+	}
+	if len(pin.Children) != 0 {
+		// There is one or more project nested under proj.
+		jirix.Logger.Debugf("following project nested under project %q:%q", proj.Path, proj.Remote)
+		if err := p.prune(jirix, pin); err != nil {
+			return err
+		}
+		jirix.Logger.Debugf("\n")
+	}
+	pin.project = &proj
+	return nil
+}
+
+func (p *ProjectTreeRoot) prune(jirix *jiri.X, node *ProjectTree) error {
+	// Looking for projects nested under node using BFS
+	workList := make([]*ProjectTree, 0)
+	workList = append(workList, node)
+
+	for len(workList) > 0 {
+		item := workList[0]
+		if item == nil {
+			return errors.New("purgeLeaves encountered a nil node")
+		}
+		workList = workList[1:]
+		if item.project != nil {
+			p.Dropped[item.project.Key()] = *item.project
+			jirix.Logger.Debugf("\tnested project %q:%q", item.project.Path, item.project.Remote)
+		}
+		for _, v := range item.Children {
+			workList = append(workList, v)
+		}
+	}
+
+	// Purge leaves under node
+	node.Children = make(map[string]*ProjectTree)
+	return nil
+}
+
+func makePathRel(basepath, targpath string) (string, error) {
+	if filepath.IsAbs(targpath) {
+		relPath, err := filepath.Rel(basepath, targpath)
+		if err != nil {
+			return "", err
+		}
+		return relPath, nil
+	}
+	return targpath, nil
+}
diff --git a/project/project_test.go b/project/project_test.go
index 862073d..d03bc47 100644
--- a/project/project_test.go
+++ b/project/project_test.go
@@ -2707,3 +2707,77 @@
 		t.Errorf("expecting error from CheckProjectsHostnames, but got nil")
 	}
 }
+
+func TestPrefixTree(t *testing.T) {
+	jirix, cleanup := xtest.NewX(t)
+	defer cleanup()
+
+	projectList := []project.Project{
+		{Name: "root", Path: "."},
+		{Name: "a", Path: "a"},
+		{Name: "b", Path: "b"},
+		{Name: "c/d/e", Path: "c/d/e"},
+		{Name: "c/d/f", Path: "c/d/f"},
+		{Name: "c/d", Path: "c/d"},
+		{Name: "c", Path: "c"},
+	}
+	projects := make(project.Projects)
+	for _, p := range projectList {
+		projects[p.Key()] = p
+	}
+	expectedDropped := []project.Project{
+		projectList[0],
+		projectList[3],
+		projectList[4],
+		projectList[5],
+	}
+
+	// Fill projects into prefix tree
+	_, treeRoot, err := project.GenerateSubmoduleTree(jirix, projects)
+	if err != nil {
+		t.Errorf("GenerateSubmoduleTree failed due to error: %v", err)
+	}
+
+	// generate logs when test failed
+	failedDropped := func() {
+		t.Logf("wrong nested projects list")
+		t.Logf("expecting: ")
+		for _, v := range expectedDropped {
+			t.Logf("\tproject:%q", v.Path)
+		}
+		t.Logf("got:")
+		for _, v := range treeRoot.Dropped {
+			t.Logf("\tproject:%q", v.Path)
+		}
+		t.Fail()
+	}
+
+	// Verify nested projects
+	if len(treeRoot.Dropped) != len(expectedDropped) {
+		failedDropped()
+	}
+	for _, v := range expectedDropped {
+		if _, ok := treeRoot.Dropped[v.Key()]; !ok {
+			failedDropped()
+			break
+		}
+	}
+
+	root := treeRoot.Root
+	// Verify the shape of prefix tree
+	if len(root.Children) == 3 {
+		prefixes := []string{"a", "b", "c"}
+		for _, v := range prefixes {
+			if _, ok := root.Children[v]; !ok {
+				t.Errorf("root node does not contain project %q", v)
+			}
+		}
+		for _, v := range root.Children {
+			if len(v.Children) != 0 {
+				t.Errorf("more than 1 level of nodes found in prefix tree")
+			}
+		}
+	} else {
+		t.Errorf("expecting %v first level nodes, but got %v", 3, len(root.Children))
+	}
+}