[jiri2repo] Create command to convert Jiri source manifest to Repo manifest

Bug: 41015
Change-Id: I677135fe5cee8f161b972b79e369f97034b78580
diff --git a/cmd/jiri2repo/jiri/source_manifest.go b/cmd/jiri2repo/jiri/source_manifest.go
new file mode 100644
index 0000000..850218b
--- /dev/null
+++ b/cmd/jiri2repo/jiri/source_manifest.go
@@ -0,0 +1,76 @@
+package jiri
+
+import (
+	"encoding/json"
+	"io/ioutil"
+)
+
+// SourceManifest represent a Jiri source manifest file.
+//
+// This was copied from:
+// https://fuchsia.googlesource.com/jiri/+/refs/heads/master/project/source_manifest.go
+// which was created using the proto file:
+// https://github.com/luci/recipes-py/blob/master/recipe_engine/source_manifest.proto.
+type SourceManifest struct {
+	// Version will increment on backwards-incompatible changes only. Backwards
+	// compatible changes will not alter this version number.
+	//
+	// Currently, the only valid version number is 0.
+	Version int32 `json:"version"`
+	// Map of local file system directory path (with forward slashes) to
+	// a Directory message containing one or more deployments.
+	//
+	// The local path is relative to some job-specific root. This should be used
+	// for informational/display/organization purposes, and should not be used as
+	// a global primary key. i.e. if you depend on chromium/src.git being in
+	// a folder called “src”, I will find you and make really angry faces at you
+	// until you change it...(╬ಠ益ಠ). Instead, implementations should consider
+	// indexing by e.g. git repository URL or cipd package name as more better
+	// primary keys.
+	Directories map[string]*SourceManifest_Directory `json:"directories"`
+}
+
+type SourceManifest_Directory struct {
+	GitCheckout *SourceManifest_GitCheckout `json:"git_checkout,omitempty"`
+}
+
+type SourceManifest_GitCheckout struct {
+	// The canonicalized URL of the original repo that is considered the “source
+	// of truth” for the source code. Ex.
+	//   https://chromium.googlesource.com/chromium/tools/build.git
+	//   https://github.com/luci/recipes-py
+	RepoUrl string `json:"repo_url,omitempty"`
+	// If different from repo_url, this can be the URL of the repo that the source
+	// was actually fetched from (i.e. a mirror). Ex.
+	//   https://chromium.googlesource.com/external/github.com/luci/recipes-py
+	//
+	// If this is empty, it's presumed to be equal to repo_url.
+	FetchUrl string `json:"fetch_url,omitempty"`
+	// The fully resolved revision (commit hash) of the source. Ex.
+	//   3617b0eea7ec74b8e731a23fed2f4070cbc284c4
+	//
+	// Note that this is the raw revision bytes, not their hex-encoded form.
+	Revision string `json:"revision,omitempty"`
+	// The ref that the task used to resolve/fetch the revision of the source
+	// (if any). Ex.
+	//   refs/heads/master
+	//   refs/changes/04/511804/4
+	//
+	// This should always be a ref on the hosted repo (not any local alias
+	// like 'refs/remotes/...').
+	//
+	// This should always be an absolute ref (i.e. starts with 'refs/'). An
+	// example of a non-absolute ref would be 'master'.
+	FetchRef string `json:"fetch_ref,omitempty"`
+}
+
+func ReadSourceManifestFile(path string, into *SourceManifest) error {
+	bytes, err := ioutil.ReadFile(path)
+	if err != nil {
+		return err
+	}
+	if err := json.Unmarshal(bytes, into); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/cmd/jiri2repo/main.go b/cmd/jiri2repo/main.go
new file mode 100644
index 0000000..865bfd1
--- /dev/null
+++ b/cmd/jiri2repo/main.go
@@ -0,0 +1,260 @@
+// Copyright 2019 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Binary main converts Jiri source manifests (JSON) into Repo manifests (XML).
+//
+// This tool is specific to Fuchsia in that it only understands the subset of either
+// manifest schema sufficient for producing Repo manifests that can be used with go/ab.
+//
+// The key elements of the Repo manifest are:
+// * <remote>
+// * <default>
+// * <project>
+//
+// <default> is mostly filled from user-input and specifies the default GoB remote and
+// branch for <project> elements in the manifest.
+//
+// <remote> and <project> elements are built from the entries of the "directories"
+// property from the input source manifest.
+//
+// Source manifests are created relative to some working directory, so there is usually
+// (always?) a directory entry named ".". This is represented in the Repo manifest by a
+// <project> whose name attribute is the basename of the directory>git_checkout>repo_url.
+//
+// For documentation on Repo manifests, see go/repo-manifests-explained and go/repo.
+//
+// For documentation on Jiri manifest, see this proto:
+// https://chromium.googlesource.com/external/github.com/luci/recipes-py/+/refs/heads/master/recipe_engine/source_manifest.proto
+package main
+
+import (
+	"context"
+	"errors"
+	"flag"
+	"fmt"
+	"html/template"
+	"io"
+	"log"
+	"net/url"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+
+	"go.fuchsia.dev/infra/cmd/jiri2repo/jiri"
+	"go.fuchsia.dev/infra/cmd/jiri2repo/repo"
+)
+
+var bin = filepath.Base(os.Args[0])
+
+// Command-line flags.
+var (
+	// The release branch name.
+	//
+	// This is used as the <default> revision in the repo manifest.
+	branch string
+
+	// The GoB host that will house the output Repo manifest.
+	//
+	// This is set as the <default> remote in the repo manifest.
+	defaultRemote string
+
+	// Where to write output. Defaults to stdout.
+	output string
+)
+
+const gobHostSuffix = ".googlesource.com"
+
+var usageTemplate = template.Must(template.New("usage").Parse(`
+{{.Cmd}} [options] SOURCE_MANIFEST_PATH
+
+Converts a Jiri source manifest to a Repo manifest.
+
+EXAMPLES:
+
+{{.Cmd}} -branch releases/canary -remote fuchsia ./source_manifest.json
+{{.Cmd}} -remote fuchsia ./source_manifest.json
+{{.Cmd}} -remote fuchsia -output default.xml ./source_manifest.json
+
+OPTIONS:
+`))
+
+func usage() {
+	usageArgs := struct{ Cmd string }{Cmd: bin}
+	usageTemplate.Execute(flag.CommandLine.Output(), usageArgs)
+	flag.PrintDefaults()
+	os.Exit(1)
+}
+
+func init() {
+	flag.Usage = usage
+	flag.StringVar(&branch, "branch", "refs/heads/master", "The release branch")
+	flag.StringVar(&output, "output", "", "Optional filepath to write output to. If empty, stdout is used")
+	flag.StringVar(&defaultRemote, "remote", "", "The SSO GoB host that will house the output Repo manifest")
+}
+
+func main() {
+	flag.Parse()
+	if err := validateArgs(); err != nil {
+		log.Print(err)
+		flag.Usage()
+	}
+
+	if err := execute(context.Background()); err != nil {
+		log.Fatal(err)
+	}
+}
+
+func validateArgs() error {
+	if flag.NArg() != 1 {
+		return errors.New("expected one positional argument")
+	}
+	if branch == "" {
+		return errors.New("missing -branch")
+	}
+	if defaultRemote == "" {
+		return errors.New("missing -remote")
+	}
+	return nil
+}
+
+func execute(ctx context.Context) (err error) {
+	var in jiri.SourceManifest
+	var out repo.Manifest
+
+	sourceManifestPath := flag.Arg(0)
+	if err := jiri.ReadSourceManifestFile(sourceManifestPath, &in); err != nil {
+		return fmt.Errorf("failed to read %s: %v", sourceManifestPath, err)
+	}
+
+	defaults := repo.Default{
+		Branch: branch,
+		Remote: defaultRemote,
+	}
+
+	if err := convert(in, &out, defaults); err != nil {
+		return fmt.Errorf("conversion failed: %v", err)
+	}
+
+	var w io.Writer = os.Stdout
+	if output != "" {
+		w, err = os.Create(output)
+		if err != nil {
+			return fmt.Errorf("failed to open output file %q: %v", output, err)
+		}
+	}
+	if err := repo.WriteManifest(w, &out); err != nil {
+		return fmt.Errorf("failed to write Repo manifest: %v", err)
+	}
+
+	return nil
+}
+
+// Converts the input source manifest to a Repo manifest. Writes the output to out.
+//
+// defaults is written as the Repo manifest's <default> element. Any ".googlesource.com"
+// suffix is stripped from defaults.Remote before writing the output.
+func convert(in jiri.SourceManifest, out *repo.Manifest, defaults repo.Default) error {
+	// Tracks GoB remote names to avoid outputting duplicate <remote> elements.
+	remotes := make(map[string]bool)
+
+	// Remotes are recorded without GoB host suffixes in Repo manifests.
+	defaults.Remote = strings.TrimSuffix(defaults.Remote, gobHostSuffix)
+
+	// Convert each source manifest directory to a <project> and <remote>
+	for name, dir := range in.Directories {
+		project, remote, err := convertDirectory(name, *dir, defaults)
+		if err != nil {
+			return err
+		}
+		out.Project = append(out.Project, *project)
+		if _, ok := remotes[remote.Name]; !ok {
+			remotes[remote.Name] = true
+			out.Remote = append(out.Remote, *remote)
+		}
+	}
+
+	// If there is no <remote> with the specified GoB remote name, either the user
+	// mistakenly entered the wrong default remote or the output Repo manifest will be
+	// stored on a git host that is separate from any of the repos listed in the manifest.
+	// The second scenario is unlikely; Err on the side of caution and fail.
+	if !remotes[defaults.Remote] {
+		return fmt.Errorf("the input manifest contained no projects from the specified default remote %q", defaults.Remote)
+	}
+
+	out.Default = defaults
+	out.Comment = fmt.Sprintf("Auto generated by %s. DO NOT EDIT", bin)
+
+	// Sort sections for deterministic output.
+	sort.Slice(out.Remote, func(a, b int) bool {
+		return out.Remote[a].Name < out.Remote[b].Name
+	})
+	sort.Slice(out.Project, func(a, b int) bool {
+		return out.Project[a].Name < out.Project[b].Name
+	})
+
+	return nil
+}
+
+// convertDirectory converts a Jiri source manifest directory object into a set of Repo
+// <project> and <remote> elements.
+//
+// The directory name is used as the project's name and path. If the directory name is ".",
+// the basename of directory's repository url is used instead. The empty string is not
+// allowed and results in an error.
+func convertDirectory(name string, d jiri.SourceManifest_Directory, defaults repo.Default) (*repo.Project, *repo.Remote, error) {
+	// Resolve the directory name.
+	if name == "." {
+		name = filepath.Base(d.GitCheckout.RepoUrl)
+	}
+	if name == "" {
+		return nil, nil, fmt.Errorf("source manifest directory has no name: %v", d)
+	}
+
+	checkout := d.GitCheckout
+	reviewURL, err := ssoCodeReviewURL(checkout.RepoUrl)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	remote := &repo.Remote{
+		// By convention the remote name is the GoB hostname and not the review hostname.
+		Name:   strings.TrimSuffix(reviewURL.Host, "-review"),
+		Fetch:  checkout.FetchRef,
+		Review: reviewURL.String(),
+	}
+	project := &repo.Project{
+		Name:     name,
+		Path:     name,
+		Remote:   remote.Name,
+		Revision: checkout.Revision,
+	}
+
+	if project.Remote == defaults.Remote {
+		project.Remote = ""
+	}
+	return project, remote, nil
+}
+
+func ssoCodeReviewURL(gobURL string) (*url.URL, error) {
+	u, err := url.Parse(gobURL)
+	if err != nil {
+		return nil, err
+	}
+	// URL.Hostname() returns the empty string if the scheme is empty.
+	if u.Scheme == "" {
+		return nil, fmt.Errorf("invalid URL %q: origin has no scheme", gobURL)
+	}
+	hostname := u.Hostname()
+	if !strings.HasSuffix(hostname, gobHostSuffix) {
+		return nil, fmt.Errorf("cannot compute code review host for non Git-on-Borg hostname %q", hostname)
+	}
+	host := strings.TrimSuffix(hostname, gobHostSuffix)
+
+	return &url.URL{
+		Scheme: "sso",
+		Host:   host + "-review",
+		Path:   "/",
+	}, nil
+}
diff --git a/cmd/jiri2repo/main_test.go b/cmd/jiri2repo/main_test.go
new file mode 100644
index 0000000..8e1f733
--- /dev/null
+++ b/cmd/jiri2repo/main_test.go
@@ -0,0 +1,134 @@
+package main
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"go.fuchsia.dev/infra/cmd/jiri2repo/jiri"
+	"go.fuchsia.dev/infra/cmd/jiri2repo/repo"
+)
+
+func TestSSOCodeReviewURL(t *testing.T) {
+	tests := []struct {
+		input string
+		want  string
+	}{
+		{
+			input: "https://host.googlesource.com",
+			want:  "sso://host-review/",
+		},
+		{
+			input: "https://host-with-dash.googlesource.com",
+			want:  "sso://host-with-dash-review/",
+		},
+		{
+			input: "https://host.with.dot.googlesource.com",
+			want:  "sso://host.with.dot-review/",
+		},
+		{
+			input: "sso://host.googlesource.com",
+			want:  "sso://host-review/",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.input, func(t *testing.T) {
+			url, err := ssoCodeReviewURL(tt.input)
+			if err != nil {
+				t.Errorf("unexpected error: %v", err.Error())
+				return
+			}
+			got := url.String()
+			if tt.want != got {
+				t.Errorf("got %q but wanted %q", got, tt.want)
+			}
+		})
+	}
+
+	errTests := []string{
+		"host",
+		"host.com",
+		"host.googlesource.com",
+	}
+	for _, input := range errTests {
+		t.Run(input, func(t *testing.T) {
+			url, err := ssoCodeReviewURL(input)
+			if err == nil {
+				t.Errorf("got %s but wanted an error", url.String())
+			}
+		})
+	}
+}
+
+func TestConvert(t *testing.T) {
+	input := jiri.SourceManifest{
+		Directories: map[string]*jiri.SourceManifest_Directory{
+			// Ensure convert handles cwd.
+			".": &jiri.SourceManifest_Directory{
+				GitCheckout: &jiri.SourceManifest_GitCheckout{
+					FetchRef: "refs/heads/master",
+					RepoUrl:  "https://fuchsia.googlesource.com/fuchsia",
+					Revision: "fuchsia_revision",
+				},
+			},
+			"code": &jiri.SourceManifest_Directory{
+				GitCheckout: &jiri.SourceManifest_GitCheckout{
+					FetchRef: "refs/heads/dev",
+					RepoUrl:  "https://code.googlesource.com/code",
+					Revision: "code_revision",
+				},
+			},
+		},
+	}
+
+	defaults := repo.Default{
+		Branch: "default_branch",
+		// DefaultRemote should not explicitly appear as any project's remote.
+		// This should also be recorded as just "fuchsia" in the output <default>
+		Remote: "fuchsia.googlesource.com",
+	}
+
+	want := repo.Manifest{
+		Remote: []repo.Remote{
+			{
+				Name:   "code",
+				Review: "sso://code-review/",
+				Fetch:  "refs/heads/dev",
+			},
+			{
+				Name:   "fuchsia",
+				Review: "sso://fuchsia-review/",
+				Fetch:  "refs/heads/master",
+			},
+		},
+		Default: repo.Default{
+			Branch: "default_branch",
+			Remote: "fuchsia",
+		},
+		Project: []repo.Project{
+			{
+				Name:     "code",
+				Path:     "code",
+				Remote:   "code",
+				Revision: "code_revision",
+			},
+			{
+				Name:     "fuchsia",
+				Path:     "fuchsia",
+				Revision: "fuchsia_revision",
+			},
+		},
+	}
+
+	var got repo.Manifest
+	if err := convert(input, &got, defaults); err != nil {
+		t.Fatal(err)
+	}
+
+	// No point in testing the comment
+	got.Comment = ""
+
+	if diff := cmp.Diff(want, got); diff != "" {
+		t.Errorf("convert() mismatch (-want +got):\n%s", diff)
+	}
+}
diff --git a/cmd/jiri2repo/repo/manifest.go b/cmd/jiri2repo/repo/manifest.go
new file mode 100644
index 0000000..a0a3fdc
--- /dev/null
+++ b/cmd/jiri2repo/repo/manifest.go
@@ -0,0 +1,101 @@
+package repo
+
+import (
+	"encoding/xml"
+	"io"
+	"io/ioutil"
+)
+
+// Manifest is the root element of the file.
+//
+// The order of the fields in this struct is the order they will be written by
+// xml.Marshal.
+//
+// Some fields of Manifest and its children have been omitted. For full documentation of
+// the Repo manifest structure, see: go/repo-manifests-explained.
+type Manifest struct {
+	XMLName xml.Name  `xml:"manifest"`
+	Comment string    `xml:",comment"`
+	Remote  []Remote  `xml:"remote"`
+	Default Default   `xml:"default"`
+	Project []Project `xml:"project"`
+}
+
+// Default holds default values for Project element attributes.
+//
+// The revision attribute is represented by `Branch` instead of `Revision` because the
+// value is always a branch name. "revision" is a misnomer in the Repo manifest schema.
+type Default struct {
+	// Name of a Git branch (e.g. master or refs/heads/master). Project elements lacking
+	// their own revision attribute will use this value.
+	Branch string `xml:"revision,attr,omitempty"`
+
+	// Name of a previously defined remote element. Project elements lacking a remote
+	// attribute of their own will use this remote.
+	Remote string `xml:"remote,attr,omitempty"`
+}
+
+// Remote specifies a Git URL and review server shared by one or more repos.
+type Remote struct {
+	// A short name unique to this manifest file. The name specified here is used as the
+	// remote name in each project's .git/config, and is therefore automatically available
+	// to commands like git fetch, git remote, git pull and git push.
+	Name string `xml:"name,attr,omitempty"`
+
+	// The Git URL prefix for all projects which use this remote. Each project's name is
+	// appended to this prefix to form the actual URL used to clone the project.
+	Fetch string `xml:"fetch,attr,omitempty"`
+
+	// Hostname of the Gerrit server where reviews are uploaded to by repo upload. This
+	// attribute is optional; if not specified then repo upload will not function.
+	Review string `xml:"review,attr,omitempty"`
+
+	// Revision omitted; Project elements will contain a revision if necessary.
+}
+
+// Project represents a single Git repository
+type Project struct {
+	// Name is the name of this project.
+	Name string `xml:"name,attr,omitempty"`
+
+	// Path is an optional path relative to the top directory of the repo client where the
+	// Git working directory for this project should be placed. If not supplied the
+	// project name is used. If the project has a parent element, its path will be
+	// prefixed by the parent's.
+	Path string `xml:"path,attr,omitempty"`
+
+	// Remote is the name of a previously defined remote element. If not supplied the
+	// remote given by the default element is used.
+	Remote string `xml:"remote,attr,omitempty"`
+
+	// Revision is the name of the Git branch the manifest wants to track for this
+	// project. Names can be relative to refs/heads (e.g. just master) or absolute (e.g.
+	// refs/heads/master). Tags and/or explicit SHA-1s should work in theory, but have not
+	// been extensively tested. If not supplied the revision given by the remote element
+	// is used if applicable, else the default element is used.
+	Revision string `xml:"revision,attr,omitempty"`
+}
+
+// ParseManifest parses a Manifest from the given io.Reader.
+func ParseManifest(r io.Reader, into *Manifest) error {
+	bytes, err := ioutil.ReadAll(r)
+	if err != nil {
+		return err
+	}
+	return xml.Unmarshal(bytes, into)
+}
+
+// WriteManifest writes the given Manifest to the given io.Writer.
+func WriteManifest(w io.Writer, m *Manifest) error {
+	bytes, err := xml.MarshalIndent(m, "", "  ")
+	if err != nil {
+		return err
+	}
+	if _, err := w.Write([]byte(xml.Header)); err != nil {
+		return err
+	}
+	if _, err := w.Write(bytes); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/cmd/jiri2repo/repo/manifest_test.go b/cmd/jiri2repo/repo/manifest_test.go
new file mode 100644
index 0000000..713fb55
--- /dev/null
+++ b/cmd/jiri2repo/repo/manifest_test.go
@@ -0,0 +1,71 @@
+package repo_test
+
+import (
+	"encoding/xml"
+	"strings"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"go.fuchsia.dev/infra/cmd/jiri2repo/repo"
+)
+
+func TestParseManifestFile(t *testing.T) {
+	input := `
+	<manifest>
+		<remote name="android"
+			    fetch=".."
+			    review="sso://android/" />
+		<remote name="fuchsia"
+			    fetch=".."
+				review="sso://fuchsia"
+				revision="deadbeef" />
+
+		<project name="integration"
+				 path="integration-path" />
+		<project name="fuchsia"
+				 path="fuchsia-path"
+				 remote="fuchsia-remote"
+				 revision="fuchsia-revision" />
+	</manifest>
+	`
+
+	want := repo.Manifest{
+		XMLName: xml.Name{
+			Space: "",
+			Local: "manifest",
+		},
+		Remote: []repo.Remote{
+			{
+				Name:   "android",
+				Fetch:  "..",
+				Review: "sso://android/",
+			},
+			{
+				Name:   "fuchsia",
+				Fetch:  "..",
+				Review: "sso://fuchsia",
+			},
+		},
+		Project: []repo.Project{
+			{
+				Name: "integration",
+				Path: "integration-path",
+			},
+			{
+				Name:     "fuchsia",
+				Path:     "fuchsia-path",
+				Remote:   "fuchsia-remote",
+				Revision: "fuchsia-revision",
+			},
+		},
+	}
+
+	var got repo.Manifest
+	if err := repo.ParseManifest(strings.NewReader(input), &got); err != nil {
+		t.Fatal(err)
+	}
+
+	if diff := cmp.Diff(want, got); diff != "" {
+		t.Errorf("ParseManifestFile() mismatch (-want +got):\n%s", diff)
+	}
+}