Change the licenseclassifier return type to a summary structure rather than a
raw list of matches. This allows us to return additional metadata about the
original input to the classifier. One datum currently included in this summary
is the number of lines of input presented to the classifier, which is necessary
to identify ranges of text that did not contain a license.

This change consolidates the usages of the third party classifier API around a
common interface for the compliance team and provides a common stub,
eliminating many ad-hoc stub implementations.

This CL was tested via global presubmit to ensure usages outside of the
compliance team were not negatively impacted.  *** Change 388303070

PiperOrigin-RevId: 388973411
diff --git a/v2/classifier.go b/v2/classifier.go
index 89fa74c..f970543 100644
--- a/v2/classifier.go
+++ b/v2/classifier.go
@@ -36,6 +36,13 @@
 	EndTokenIndex   int
 }
 
+// Results captures the summary information and matches detected by the
+// classifier.
+type Results struct {
+	Matches         Matches
+	TotalInputLines int
+}
+
 // Matches is a sortable slice of Match.
 type Matches []*Match
 
@@ -57,7 +64,7 @@
 }
 
 // Match reports instances of the supplied content in the corpus.
-func (c *Classifier) match(in []byte) Matches {
+func (c *Classifier) match(in []byte) Results {
 	id := c.createTargetIndexedDocument(in)
 
 	firstPass := make(map[string]*indexedDocument)
@@ -69,7 +76,10 @@
 	}
 
 	if len(firstPass) == 0 {
-		return nil
+		return Results{
+			Matches:         nil,
+			TotalInputLines: 0,
+		}
 	}
 
 	// Perform the expensive work of generating a searchset to look for token runs.
@@ -162,7 +172,10 @@
 			out = append(out, candidates[i])
 		}
 	}
-	return out
+	return Results{
+		Matches:         out,
+		TotalInputLines: id.Tokens[len(id.Tokens)-1].Line,
+	}
 }
 
 // Classifier provides methods for identifying open source licenses in text
@@ -226,15 +239,15 @@
 
 // Match finds matches within an unknown text. This will not modify the contents
 // of the supplied byte slice.
-func (c *Classifier) Match(in []byte) Matches {
+func (c *Classifier) Match(in []byte) Results {
 	return c.match(in)
 }
 
 // MatchFrom finds matches within the read content.
-func (c *Classifier) MatchFrom(in io.Reader) (Matches, error) {
+func (c *Classifier) MatchFrom(in io.Reader) (Results, error) {
 	b, err := ioutil.ReadAll(in)
 	if err != nil {
-		return nil, fmt.Errorf("classifier couldn't read: %w", err)
+		return Results{}, fmt.Errorf("classifier couldn't read: %w", err)
 	}
 	return c.Match(b), nil
 }
diff --git a/v2/classifier_test.go b/v2/classifier_test.go
index 5cb9dbf..b52b283 100644
--- a/v2/classifier_test.go
+++ b/v2/classifier_test.go
@@ -75,7 +75,7 @@
 		s := readScenario(f)
 
 		m := c.Match(s.data)
-		checkMatches(t, m, f, s.expected)
+		checkMatches(t, m.Matches, f, s.expected)
 	}
 }
 
@@ -236,7 +236,7 @@
 		if err != nil {
 			t.Errorf("unexpected error: %v", err)
 		}
-		checkMatches(t, m, f, s.expected)
+		checkMatches(t, m.Matches, f, s.expected)
 	}
 }