fix(gazelle): __init__.py in per-file targets (#1582)

As per Python spec, `__init__.py` files are depended upon by every file
in the package, so let's make sure that our generated targets also
understand this implicit dependency. Note that because Python module
dependencies are not a DAG, we can not depend on the Bazel target for
`__init__.py` files (to avoid cycles in Bazel), and hence a non-empty
`__init__.py` file is added to the `srcs` attribute of every
`py_library` target.

The language spec also says that each package depends on the parent
package, but that is a less commonly used feature, and can make things
more complex.

From [importlib] docs:
> Changed in version 3.3: Parent packages are automatically imported.

From [import] language reference:
> Importing parent.one will implicitly execute parent/__init__.py and
parent/one/__init__.py.


[importlib]:
https://docs.python.org/3/library/importlib.html#importlib.import_module
[import]:
https://docs.python.org/3/reference/import.html#regular-packages

---------

Co-authored-by: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b032f4e..b524536 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,6 +50,9 @@
 ### Added
 
 * (docs) bzlmod extensions are now documented on rules-python.readthedocs.io
+* (gazelle) `file` generation mode can now also add `__init__.py` to the srcs
+  attribute for every target in the package. This is enabled through a separate
+  directive `python_generation_mode_per_file_include_init`.
 
 [0.XX.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.XX.0
 
diff --git a/gazelle/README.md b/gazelle/README.md
index 8272b49..567d290 100644
--- a/gazelle/README.md
+++ b/gazelle/README.md
@@ -184,6 +184,8 @@
 | Controls whether the Python import statements should be validated. Can be "true" or "false" | |
 | `# gazelle:python_generation_mode`| `package` |
 | Controls the target generation mode. Can be "file", "package", or "project" | |
+| `# gazelle:python_generation_mode_per_file_include_init`| `package` |
+| Controls whether `__init__.py` files are included as srcs in each generated target when target generation mode is "file". Can be "true", or "false" | |
 | `# gazelle:python_library_naming_convention`| `$package_name$` |
 | Controls the `py_library` naming convention. It interpolates \$package_name\$ with the Bazel package name. E.g. if the Bazel package name is `foo`, setting this to `$package_name$_my_lib` would result in a generated target named `foo_my_lib`. | |
 | `# gazelle:python_binary_naming_convention` | `$package_name$_bin` |
diff --git a/gazelle/python/configure.go b/gazelle/python/configure.go
index 2d38805..69d2762 100644
--- a/gazelle/python/configure.go
+++ b/gazelle/python/configure.go
@@ -59,6 +59,7 @@
 		pythonconfig.IgnoreDependenciesDirective,
 		pythonconfig.ValidateImportStatementsDirective,
 		pythonconfig.GenerationMode,
+		pythonconfig.GenerationModePerFileIncludeInit,
 		pythonconfig.LibraryNamingConvention,
 		pythonconfig.BinaryNamingConvention,
 		pythonconfig.TestNamingConvention,
@@ -149,6 +150,12 @@
 					pythonconfig.GenerationMode, d.Value)
 				log.Fatal(err)
 			}
+		case pythonconfig.GenerationModePerFileIncludeInit:
+			v, err := strconv.ParseBool(strings.TrimSpace(d.Value))
+			if err != nil {
+				log.Fatal(err)
+			}
+			config.SetPerFileGenerationIncludeInit(v)
 		case pythonconfig.LibraryNamingConvention:
 			config.SetLibraryNamingConvention(strings.TrimSpace(d.Value))
 		case pythonconfig.BinaryNamingConvention:
diff --git a/gazelle/python/generate.go b/gazelle/python/generate.go
index 5ab9f53..8d9b169 100644
--- a/gazelle/python/generate.go
+++ b/gazelle/python/generate.go
@@ -272,18 +272,16 @@
 		result.Imports = append(result.Imports, pyLibrary.PrivateAttr(config.GazelleImportsKey))
 	}
 	if cfg.PerFileGeneration() {
+		hasInit, nonEmptyInit := hasLibraryEntrypointFile(args.Dir)
 		pyLibraryFilenames.Each(func(index int, filename interface{}) {
-			if filename == pyLibraryEntrypointFilename {
-				stat, err := os.Stat(filepath.Join(args.Dir, filename.(string)))
-				if err != nil {
-					log.Fatalf("ERROR: %v\n", err)
-				}
-				if stat.Size() == 0 {
-					return // ignore empty __init__.py
-				}
+			pyLibraryTargetName := strings.TrimSuffix(filepath.Base(filename.(string)), ".py")
+			if filename == pyLibraryEntrypointFilename && !nonEmptyInit {
+				return // ignore empty __init__.py.
 			}
 			srcs := treeset.NewWith(godsutils.StringComparator, filename)
-			pyLibraryTargetName := strings.TrimSuffix(filepath.Base(filename.(string)), ".py")
+			if cfg.PerFileGenerationIncludeInit() && hasInit && nonEmptyInit {
+				srcs.Add(pyLibraryEntrypointFilename)
+			}
 			appendPyLibrary(srcs, pyLibraryTargetName)
 		})
 	} else if !pyLibraryFilenames.Empty() {
@@ -468,6 +466,19 @@
 	return false
 }
 
+// hasLibraryEntrypointFile returns if the given directory has the library
+// entrypoint file, and if it is non-empty.
+func hasLibraryEntrypointFile(dir string) (bool, bool) {
+	stat, err := os.Stat(filepath.Join(dir, pyLibraryEntrypointFilename))
+	if os.IsNotExist(err) {
+		return false, false
+	}
+	if err != nil {
+		log.Fatalf("ERROR: %v\n", err)
+	}
+	return true, stat.Size() != 0
+}
+
 // isEntrypointFile returns whether the given path is an entrypoint file. The
 // given path can be absolute or relative.
 func isEntrypointFile(path string) bool {
diff --git a/gazelle/python/resolve.go b/gazelle/python/resolve.go
index 1ddd63d..f019a64 100644
--- a/gazelle/python/resolve.go
+++ b/gazelle/python/resolve.go
@@ -61,11 +61,17 @@
 	provides := make([]resolve.ImportSpec, 0, len(srcs)+1)
 	for _, src := range srcs {
 		ext := filepath.Ext(src)
-		if ext == ".py" {
-			pythonProjectRoot := cfg.PythonProjectRoot()
-			provide := importSpecFromSrc(pythonProjectRoot, f.Pkg, src)
-			provides = append(provides, provide)
+		if ext != ".py" {
+			continue
 		}
+		if cfg.PerFileGeneration() && len(srcs) > 1 && src == pyLibraryEntrypointFilename {
+			// Do not provide import spec from __init__.py when it is being included as
+			// part of another module.
+			continue
+		}
+		pythonProjectRoot := cfg.PythonProjectRoot()
+		provide := importSpecFromSrc(pythonProjectRoot, f.Pkg, src)
+		provides = append(provides, provide)
 	}
 	if len(provides) == 0 {
 		return nil
diff --git a/gazelle/python/testdata/per_file_non_empty_init/BUILD.in b/gazelle/python/testdata/per_file_non_empty_init/BUILD.in
index a5853f6..f76a3d0 100644
--- a/gazelle/python/testdata/per_file_non_empty_init/BUILD.in
+++ b/gazelle/python/testdata/per_file_non_empty_init/BUILD.in
@@ -1,3 +1,4 @@
 load("@rules_python//python:defs.bzl", "py_library")
 
 # gazelle:python_generation_mode file
+# gazelle:python_generation_mode_per_file_include_init true
diff --git a/gazelle/python/testdata/per_file_non_empty_init/BUILD.out b/gazelle/python/testdata/per_file_non_empty_init/BUILD.out
index 8733dbd..ee4a417 100644
--- a/gazelle/python/testdata/per_file_non_empty_init/BUILD.out
+++ b/gazelle/python/testdata/per_file_non_empty_init/BUILD.out
@@ -1,6 +1,7 @@
 load("@rules_python//python:defs.bzl", "py_library")
 
 # gazelle:python_generation_mode file
+# gazelle:python_generation_mode_per_file_include_init true
 
 py_library(
     name = "__init__",
@@ -11,6 +12,9 @@
 
 py_library(
     name = "foo",
-    srcs = ["foo.py"],
+    srcs = [
+        "__init__.py",
+        "foo.py",
+    ],
     visibility = ["//:__subpackages__"],
 )
diff --git a/gazelle/python/testdata/per_file_subdirs/bar/BUILD.in b/gazelle/python/testdata/per_file_subdirs/bar/BUILD.in
index e69de29..4fc674a 100644
--- a/gazelle/python/testdata/per_file_subdirs/bar/BUILD.in
+++ b/gazelle/python/testdata/per_file_subdirs/bar/BUILD.in
@@ -0,0 +1 @@
+# gazelle:python_generation_mode_per_file_include_init true
diff --git a/gazelle/python/testdata/per_file_subdirs/bar/BUILD.out b/gazelle/python/testdata/per_file_subdirs/bar/BUILD.out
index 4da8d9c..8835fb2 100644
--- a/gazelle/python/testdata/per_file_subdirs/bar/BUILD.out
+++ b/gazelle/python/testdata/per_file_subdirs/bar/BUILD.out
@@ -1,5 +1,7 @@
 load("@rules_python//python:defs.bzl", "py_library", "py_test")
 
+# gazelle:python_generation_mode_per_file_include_init true
+
 py_library(
     name = "__init__",
     srcs = ["__init__.py"],
@@ -7,8 +9,20 @@
 )
 
 py_library(
+    name = "bar",
+    srcs = [
+        "__init__.py",
+        "bar.py",
+    ],
+    visibility = ["//:__subpackages__"],
+)
+
+py_library(
     name = "foo",
-    srcs = ["foo.py"],
+    srcs = [
+        "__init__.py",
+        "foo.py",
+    ],
     visibility = ["//:__subpackages__"],
 )
 
diff --git a/gazelle/python/testdata/per_file_subdirs/bar/bar.py b/gazelle/python/testdata/per_file_subdirs/bar/bar.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/gazelle/python/testdata/per_file_subdirs/bar/bar.py
diff --git a/gazelle/pythonconfig/pythonconfig.go b/gazelle/pythonconfig/pythonconfig.go
index 636d6a4..09d308a 100644
--- a/gazelle/pythonconfig/pythonconfig.go
+++ b/gazelle/pythonconfig/pythonconfig.go
@@ -50,6 +50,10 @@
 	// GenerationMode represents the directive that controls the target generation
 	// mode. See below for the GenerationModeType constants.
 	GenerationMode = "python_generation_mode"
+	// GenerationModePerFileIncludeInit represents the directive that augments
+	// the "per_file" GenerationMode by including the package's __init__.py file.
+	// This is a boolean directive.
+	GenerationModePerFileIncludeInit = "python_generation_mode_per_file_include_init"
 	// LibraryNamingConvention represents the directive that controls the
 	// py_library naming convention. It interpolates $package_name$ with the
 	// Bazel package name. E.g. if the Bazel package name is `foo`, setting this
@@ -122,15 +126,16 @@
 	pythonProjectRoot string
 	gazelleManifest   *manifest.Manifest
 
-	excludedPatterns         *singlylinkedlist.List
-	ignoreFiles              map[string]struct{}
-	ignoreDependencies       map[string]struct{}
-	validateImportStatements bool
-	coarseGrainedGeneration  bool
-	perFileGeneration        bool
-	libraryNamingConvention  string
-	binaryNamingConvention   string
-	testNamingConvention     string
+	excludedPatterns             *singlylinkedlist.List
+	ignoreFiles                  map[string]struct{}
+	ignoreDependencies           map[string]struct{}
+	validateImportStatements     bool
+	coarseGrainedGeneration      bool
+	perFileGeneration            bool
+	perFileGenerationIncludeInit bool
+	libraryNamingConvention      string
+	binaryNamingConvention       string
+	testNamingConvention         string
 }
 
 // New creates a new Config.
@@ -139,18 +144,19 @@
 	pythonProjectRoot string,
 ) *Config {
 	return &Config{
-		extensionEnabled:         true,
-		repoRoot:                 repoRoot,
-		pythonProjectRoot:        pythonProjectRoot,
-		excludedPatterns:         singlylinkedlist.New(),
-		ignoreFiles:              make(map[string]struct{}),
-		ignoreDependencies:       make(map[string]struct{}),
-		validateImportStatements: true,
-		coarseGrainedGeneration:  false,
-		perFileGeneration:        false,
-		libraryNamingConvention:  packageNameNamingConventionSubstitution,
-		binaryNamingConvention:   fmt.Sprintf("%s_bin", packageNameNamingConventionSubstitution),
-		testNamingConvention:     fmt.Sprintf("%s_test", packageNameNamingConventionSubstitution),
+		extensionEnabled:             true,
+		repoRoot:                     repoRoot,
+		pythonProjectRoot:            pythonProjectRoot,
+		excludedPatterns:             singlylinkedlist.New(),
+		ignoreFiles:                  make(map[string]struct{}),
+		ignoreDependencies:           make(map[string]struct{}),
+		validateImportStatements:     true,
+		coarseGrainedGeneration:      false,
+		perFileGeneration:            false,
+		perFileGenerationIncludeInit: false,
+		libraryNamingConvention:      packageNameNamingConventionSubstitution,
+		binaryNamingConvention:       fmt.Sprintf("%s_bin", packageNameNamingConventionSubstitution),
+		testNamingConvention:         fmt.Sprintf("%s_test", packageNameNamingConventionSubstitution),
 	}
 }
 
@@ -163,19 +169,20 @@
 // current Config and sets itself as the parent to the child.
 func (c *Config) NewChild() *Config {
 	return &Config{
-		parent:                   c,
-		extensionEnabled:         c.extensionEnabled,
-		repoRoot:                 c.repoRoot,
-		pythonProjectRoot:        c.pythonProjectRoot,
-		excludedPatterns:         c.excludedPatterns,
-		ignoreFiles:              make(map[string]struct{}),
-		ignoreDependencies:       make(map[string]struct{}),
-		validateImportStatements: c.validateImportStatements,
-		coarseGrainedGeneration:  c.coarseGrainedGeneration,
-		perFileGeneration:        c.perFileGeneration,
-		libraryNamingConvention:  c.libraryNamingConvention,
-		binaryNamingConvention:   c.binaryNamingConvention,
-		testNamingConvention:     c.testNamingConvention,
+		parent:                       c,
+		extensionEnabled:             c.extensionEnabled,
+		repoRoot:                     c.repoRoot,
+		pythonProjectRoot:            c.pythonProjectRoot,
+		excludedPatterns:             c.excludedPatterns,
+		ignoreFiles:                  make(map[string]struct{}),
+		ignoreDependencies:           make(map[string]struct{}),
+		validateImportStatements:     c.validateImportStatements,
+		coarseGrainedGeneration:      c.coarseGrainedGeneration,
+		perFileGeneration:            c.perFileGeneration,
+		perFileGenerationIncludeInit: c.perFileGenerationIncludeInit,
+		libraryNamingConvention:      c.libraryNamingConvention,
+		binaryNamingConvention:       c.binaryNamingConvention,
+		testNamingConvention:         c.testNamingConvention,
 	}
 }
 
@@ -344,6 +351,18 @@
 	return c.perFileGeneration
 }
 
+// SetPerFileGenerationIncludeInit sets whether py_library targets should
+// include __init__.py files when PerFileGeneration() is true.
+func (c *Config) SetPerFileGenerationIncludeInit(includeInit bool) {
+	c.perFileGenerationIncludeInit = includeInit
+}
+
+// PerFileGenerationIncludeInit returns whether py_library targets should
+// include __init__.py files when PerFileGeneration() is true.
+func (c *Config) PerFileGenerationIncludeInit() bool {
+	return c.perFileGenerationIncludeInit
+}
+
 // SetLibraryNamingConvention sets the py_library target naming convention.
 func (c *Config) SetLibraryNamingConvention(libraryNamingConvention string) {
 	c.libraryNamingConvention = libraryNamingConvention