[release-branch.go1.15-security] cmd/go, cmd/cgo: don't let bogus symbol set cgo_ldflag

A hand-edited object file can have a symbol name that uses newline and
other normally invalid characters. The cgo tool will generate Go files
containing symbol names, unquoted. That can permit those symbol names
to inject Go code into a cgo-generated file. If that Go code uses the
//go:cgo_ldflag pragma, it can cause the C linker to run arbitrary
code when building a package. If you build an imported package we
permit arbitrary code at run time, but we don't want to permit it at
package build time. This CL prevents this in two ways.

In cgo, reject invalid symbols that contain non-printable or space
characters, or that contain anything that looks like a Go comment.

In the go tool, double check all //go:cgo_ldflag directives in
generated code, to make sure they follow the existing LDFLAG restrictions.

Thanks to Chris Brown and Tempus Ex for reporting this.

Fixes CVE-2020-28366

Change-Id: Ia1ad8f3791ea79612690fa7d26ac451d0f6df7c1
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/895832
Reviewed-by: Than McIntosh <thanm@google.com>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
(cherry picked from commit 6bc814dd2bbfeaafa41d314dd4cc591b575dfbf6)
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/901056
Reviewed-by: Filippo Valsorda <valsorda@google.com>
Reviewed-by: Roland Shoemaker <bracewell@google.com>
diff --git a/misc/cgo/errors/badsym_test.go b/misc/cgo/errors/badsym_test.go
new file mode 100644
index 0000000..b2701bf
--- /dev/null
+++ b/misc/cgo/errors/badsym_test.go
@@ -0,0 +1,216 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package errorstest
+
+import (
+	"bytes"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+	"unicode"
+)
+
+// A manually modified object file could pass unexpected characters
+// into the files generated by cgo.
+
+const magicInput = "abcdefghijklmnopqrstuvwxyz0123"
+const magicReplace = "\n//go:cgo_ldflag \"-badflag\"\n//"
+
+const cSymbol = "BadSymbol" + magicInput + "Name"
+const cDefSource = "int " + cSymbol + " = 1;"
+const cRefSource = "extern int " + cSymbol + "; int F() { return " + cSymbol + "; }"
+
+// goSource is the source code for the trivial Go file we use.
+// We will replace TMPDIR with the temporary directory name.
+const goSource = `
+package main
+
+// #cgo LDFLAGS: TMPDIR/cbad.o TMPDIR/cbad.so
+// extern int F();
+import "C"
+
+func main() {
+	println(C.F())
+}
+`
+
+func TestBadSymbol(t *testing.T) {
+	dir := t.TempDir()
+
+	mkdir := func(base string) string {
+		ret := filepath.Join(dir, base)
+		if err := os.Mkdir(ret, 0755); err != nil {
+			t.Fatal(err)
+		}
+		return ret
+	}
+
+	cdir := mkdir("c")
+	godir := mkdir("go")
+
+	makeFile := func(mdir, base, source string) string {
+		ret := filepath.Join(mdir, base)
+		if err := ioutil.WriteFile(ret, []byte(source), 0644); err != nil {
+			t.Fatal(err)
+		}
+		return ret
+	}
+
+	cDefFile := makeFile(cdir, "cdef.c", cDefSource)
+	cRefFile := makeFile(cdir, "cref.c", cRefSource)
+
+	ccCmd := cCompilerCmd(t)
+
+	cCompile := func(arg, base, src string) string {
+		out := filepath.Join(cdir, base)
+		run := append(ccCmd, arg, "-o", out, src)
+		output, err := exec.Command(run[0], run[1:]...).CombinedOutput()
+		if err != nil {
+			t.Log(run)
+			t.Logf("%s", output)
+			t.Fatal(err)
+		}
+		if err := os.Remove(src); err != nil {
+			t.Fatal(err)
+		}
+		return out
+	}
+
+	// Build a shared library that defines a symbol whose name
+	// contains magicInput.
+
+	cShared := cCompile("-shared", "c.so", cDefFile)
+
+	// Build an object file that refers to the symbol whose name
+	// contains magicInput.
+
+	cObj := cCompile("-c", "c.o", cRefFile)
+
+	// Rewrite the shared library and the object file, replacing
+	// magicInput with magicReplace. This will have the effect of
+	// introducing a symbol whose name looks like a cgo command.
+	// The cgo tool will use that name when it generates the
+	// _cgo_import.go file, thus smuggling a magic //go:cgo_ldflag
+	// pragma into a Go file. We used to not check the pragmas in
+	// _cgo_import.go.
+
+	rewrite := func(from, to string) {
+		obj, err := ioutil.ReadFile(from)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if bytes.Count(obj, []byte(magicInput)) == 0 {
+			t.Fatalf("%s: did not find magic string", from)
+		}
+
+		if len(magicInput) != len(magicReplace) {
+			t.Fatalf("internal test error: different magic lengths: %d != %d", len(magicInput), len(magicReplace))
+		}
+
+		obj = bytes.ReplaceAll(obj, []byte(magicInput), []byte(magicReplace))
+
+		if err := ioutil.WriteFile(to, obj, 0644); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	cBadShared := filepath.Join(godir, "cbad.so")
+	rewrite(cShared, cBadShared)
+
+	cBadObj := filepath.Join(godir, "cbad.o")
+	rewrite(cObj, cBadObj)
+
+	goSourceBadObject := strings.ReplaceAll(goSource, "TMPDIR", godir)
+	makeFile(godir, "go.go", goSourceBadObject)
+
+	makeFile(godir, "go.mod", "module badsym")
+
+	// Try to build our little package.
+	cmd := exec.Command("go", "build", "-ldflags=-v")
+	cmd.Dir = godir
+	output, err := cmd.CombinedOutput()
+
+	// The build should fail, but we want it to fail because we
+	// detected the error, not because we passed a bad flag to the
+	// C linker.
+
+	if err == nil {
+		t.Errorf("go build succeeded unexpectedly")
+	}
+
+	t.Logf("%s", output)
+
+	for _, line := range bytes.Split(output, []byte("\n")) {
+		if bytes.Contains(line, []byte("dynamic symbol")) && bytes.Contains(line, []byte("contains unsupported character")) {
+			// This is the error from cgo.
+			continue
+		}
+
+		// We passed -ldflags=-v to see the external linker invocation,
+		// which should not include -badflag.
+		if bytes.Contains(line, []byte("-badflag")) {
+			t.Error("output should not mention -badflag")
+		}
+
+		// Also check for compiler errors, just in case.
+		// GCC says "unrecognized command line option".
+		// clang says "unknown argument".
+		if bytes.Contains(line, []byte("unrecognized")) || bytes.Contains(output, []byte("unknown")) {
+			t.Error("problem should have been caught before invoking C linker")
+		}
+	}
+}
+
+func cCompilerCmd(t *testing.T) []string {
+	cc := []string{goEnv(t, "CC")}
+
+	out := goEnv(t, "GOGCCFLAGS")
+	quote := '\000'
+	start := 0
+	lastSpace := true
+	backslash := false
+	s := string(out)
+	for i, c := range s {
+		if quote == '\000' && unicode.IsSpace(c) {
+			if !lastSpace {
+				cc = append(cc, s[start:i])
+				lastSpace = true
+			}
+		} else {
+			if lastSpace {
+				start = i
+				lastSpace = false
+			}
+			if quote == '\000' && !backslash && (c == '"' || c == '\'') {
+				quote = c
+				backslash = false
+			} else if !backslash && quote == c {
+				quote = '\000'
+			} else if (quote == '\000' || quote == '"') && !backslash && c == '\\' {
+				backslash = true
+			} else {
+				backslash = false
+			}
+		}
+	}
+	if !lastSpace {
+		cc = append(cc, s[start:])
+	}
+	return cc
+}
+
+func goEnv(t *testing.T, key string) string {
+	out, err := exec.Command("go", "env", key).CombinedOutput()
+	if err != nil {
+		t.Logf("go env %s\n", key)
+		t.Logf("%s", out)
+		t.Fatal(err)
+	}
+	return strings.TrimSpace(string(out))
+}
diff --git a/src/cmd/cgo/out.go b/src/cmd/cgo/out.go
index cbdb4e0..dcd69ed 100644
--- a/src/cmd/cgo/out.go
+++ b/src/cmd/cgo/out.go
@@ -336,6 +336,8 @@
 			if s.Version != "" {
 				targ += "#" + s.Version
 			}
+			checkImportSymName(s.Name)
+			checkImportSymName(targ)
 			fmt.Fprintf(stdout, "//go:cgo_import_dynamic %s %s %q\n", s.Name, targ, s.Library)
 		}
 		lib, _ := f.ImportedLibraries()
@@ -351,6 +353,7 @@
 			if len(s) > 0 && s[0] == '_' {
 				s = s[1:]
 			}
+			checkImportSymName(s)
 			fmt.Fprintf(stdout, "//go:cgo_import_dynamic %s %s %q\n", s, s, "")
 		}
 		lib, _ := f.ImportedLibraries()
@@ -365,6 +368,8 @@
 		for _, s := range sym {
 			ss := strings.Split(s, ":")
 			name := strings.Split(ss[0], "@")[0]
+			checkImportSymName(name)
+			checkImportSymName(ss[0])
 			fmt.Fprintf(stdout, "//go:cgo_import_dynamic %s %s %q\n", name, ss[0], strings.ToLower(ss[1]))
 		}
 		return
@@ -382,6 +387,7 @@
 				// Go symbols.
 				continue
 			}
+			checkImportSymName(s.Name)
 			fmt.Fprintf(stdout, "//go:cgo_import_dynamic %s %s %q\n", s.Name, s.Name, s.Library)
 		}
 		lib, err := f.ImportedLibraries()
@@ -397,6 +403,23 @@
 	fatalf("cannot parse %s as ELF, Mach-O, PE or XCOFF", obj)
 }
 
+// checkImportSymName checks a symbol name we are going to emit as part
+// of a //go:cgo_import_dynamic pragma. These names come from object
+// files, so they may be corrupt. We are going to emit them unquoted,
+// so while they don't need to be valid symbol names (and in some cases,
+// involving symbol versions, they won't be) they must contain only
+// graphic characters and must not contain Go comments.
+func checkImportSymName(s string) {
+	for _, c := range s {
+		if !unicode.IsGraphic(c) || unicode.IsSpace(c) {
+			fatalf("dynamic symbol %q contains unsupported character", s)
+		}
+	}
+	if strings.Index(s, "//") >= 0 || strings.Index(s, "/*") >= 0 {
+		fatalf("dynamic symbol %q contains Go comment")
+	}
+}
+
 // Construct a gcc struct matching the gc argument frame.
 // Assumes that in gcc, char is 1 byte, short 2 bytes, int 4 bytes, long long 8 bytes.
 // These assumptions are checked by the gccProlog.
diff --git a/src/cmd/go/internal/work/exec.go b/src/cmd/go/internal/work/exec.go
index 071c9d2..13d4c8c 100644
--- a/src/cmd/go/internal/work/exec.go
+++ b/src/cmd/go/internal/work/exec.go
@@ -2711,6 +2711,66 @@
 		noCompiler()
 	}
 
+	// Double check the //go:cgo_ldflag comments in the generated files.
+	// The compiler only permits such comments in files whose base name
+	// starts with "_cgo_". Make sure that the comments in those files
+	// are safe. This is a backstop against people somehow smuggling
+	// such a comment into a file generated by cgo.
+	if cfg.BuildToolchainName == "gc" && !cfg.BuildN {
+		var flags []string
+		for _, f := range outGo {
+			if !strings.HasPrefix(filepath.Base(f), "_cgo_") {
+				continue
+			}
+
+			src, err := ioutil.ReadFile(f)
+			if err != nil {
+				return nil, nil, err
+			}
+
+			const cgoLdflag = "//go:cgo_ldflag"
+			idx := bytes.Index(src, []byte(cgoLdflag))
+			for idx >= 0 {
+				// We are looking at //go:cgo_ldflag.
+				// Find start of line.
+				start := bytes.LastIndex(src[:idx], []byte("\n"))
+				if start == -1 {
+					start = 0
+				}
+
+				// Find end of line.
+				end := bytes.Index(src[idx:], []byte("\n"))
+				if end == -1 {
+					end = len(src)
+				} else {
+					end += idx
+				}
+
+				// Check for first line comment in line.
+				// We don't worry about /* */ comments,
+				// which normally won't appear in files
+				// generated by cgo.
+				commentStart := bytes.Index(src[start:], []byte("//"))
+				commentStart += start
+				// If that line comment is //go:cgo_ldflag,
+				// it's a match.
+				if bytes.HasPrefix(src[commentStart:], []byte(cgoLdflag)) {
+					// Pull out the flag, and unquote it.
+					// This is what the compiler does.
+					flag := string(src[idx+len(cgoLdflag) : end])
+					flag = strings.TrimSpace(flag)
+					flag = strings.Trim(flag, `"`)
+					flags = append(flags, flag)
+				}
+				src = src[end:]
+				idx = bytes.Index(src, []byte(cgoLdflag))
+			}
+		}
+		if err := checkLinkerFlags("LDFLAGS", "go:cgo_ldflag", flags); err != nil {
+			return nil, nil, err
+		}
+	}
+
 	return outGo, outObj, nil
 }