Revert "[engine] Limit number of active subprocesses"
This reverts commit 612779b564d2ba08167db3f63f4166fa012729bd.
Reason for revert: `shac check --all` is hanging in large repos:
https://ci.chromium.org/b/8767597391211255425
Original change's description:
> [engine] Limit number of active subprocesses
>
> Previously the number of concurrent subprocess invocations launched by
> `ctx.os.exec()` was unbounded, which could place a strain on the system.
> Now there's effectively a pool of NumCPU+2 workers for running
> subprocesses.
>
> `ctx.os.exec()` returns immediately, but the underlying subprocess is
> started asynchronously.
>
> `ba -against main` showed no significant difference in the results of
> the `ctx.os.exec()` benchmarks.
>
> Change-Id: I76e4542249783c9a503f0f927e327e9f90f8bb04
> Reviewed-on: https://fuchsia-review.googlesource.com/c/shac-project/shac/+/867979
> Reviewed-by: Ina Huh <ihuh@google.com>
> Commit-Queue: Oliver Newman <olivernewman@google.com>
Change-Id: Icfd3611825b1995948c856170ddc353b7ebfb1eb
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://fuchsia-review.googlesource.com/c/shac-project/shac/+/929633
Fuchsia-Auto-Submit: Oliver Newman <olivernewman@google.com>
Commit-Queue: Auto-Submit <auto-submit@fuchsia-infra.iam.gserviceaccount.com>
Reviewed-by: RubberStamper 🤖 <android-build-ayeaye@system.gserviceaccount.com>
diff --git a/internal/engine/run.go b/internal/engine/run.go
index c341870..512cd9c 100644
--- a/internal/engine/run.go
+++ b/internal/engine/run.go
@@ -40,7 +40,6 @@
"go.starlark.net/resolve"
"go.starlark.net/starlark"
"golang.org/x/sync/errgroup"
- "golang.org/x/sync/semaphore"
"google.golang.org/protobuf/encoding/prototext"
)
@@ -365,8 +364,6 @@
packages: packages,
}
- subprocessSem := semaphore.NewWeighted(int64(runtime.NumCPU()) + 2)
-
var vars map[string]string
newState := func(scm scmCheckout, subdir string, idx int) (*shacState, error) {
@@ -407,7 +404,6 @@
sandbox: sb,
scm: scm,
subdir: subdir,
- subprocessSem: subprocessSem,
tmpdir: filepath.Join(tmpdir, strconv.Itoa(idx)),
writableRoot: doc.WritableRoot,
vars: vars,
@@ -468,7 +464,7 @@
if err != nil {
return err
}
- shacStates = append(shacStates, state)
+ shacStates = []*shacState{state}
}
// Parse the starlark files. Run everything from our errgroup.
@@ -492,7 +488,6 @@
if cb == nil {
loop = false
} else {
- // Actually run the check.
eg.Go(cb)
}
case <-done:
@@ -649,9 +644,6 @@
filter CheckFilter
passthroughEnv []*PassthroughEnv
- // Limits the number of concurrent subprocesses launched by ctx.os.exec().
- subprocessSem *semaphore.Weighted
-
// Set when fail() is called. This happens only during the first phase, thus
// no mutex is needed.
failErr *failure
diff --git a/internal/engine/run_test.go b/internal/engine/run_test.go
index 392f459..e84ce40 100644
--- a/internal/engine/run_test.go
+++ b/internal/engine/run_test.go
@@ -2273,7 +2273,7 @@
},
{
name: "ctx-os-exec-parallel.star",
- want: strings.Repeat("[//ctx-os-exec-parallel.star:28] Hello, world\n", 1000),
+ want: strings.Repeat("[//ctx-os-exec-parallel.star:27] Hello, world\n", 10),
},
{
name: "ctx-os-exec-relpath.star",
diff --git a/internal/engine/runtime_ctx_os.go b/internal/engine/runtime_ctx_os.go
index a1735f8..12e0818 100644
--- a/internal/engine/runtime_ctx_os.go
+++ b/internal/engine/runtime_ctx_os.go
@@ -42,7 +42,6 @@
raiseOnFailure bool
okRetcodes []int
tempDir string
- startErrs <-chan error
waitCalled bool
}
@@ -81,27 +80,15 @@
return []string{"wait"}
}
-func (s *subprocess) wait(state *shacState) (starlark.Value, error) {
+func (s *subprocess) wait() (starlark.Value, error) {
if s.waitCalled {
return nil, fmt.Errorf("wait was already called")
}
s.waitCalled = true
- val, err := s.waitInner(state)
- if err2 := s.cleanup(); err == nil {
- err = err2
- }
- return val, err
-}
-func (s *subprocess) waitInner(state *shacState) (starlark.Value, error) {
- if err := <-s.startErrs; err != nil {
- // If cmd.Start() failed the semaphore will already have been released,
- // no need to release it.
- return nil, err
- }
+ defer s.cleanup()
err := s.cmd.Wait()
- state.subprocessSem.Release(1)
retcode := 0
if err != nil {
var errExit *exec.ExitError
@@ -140,25 +127,16 @@
}
func (s *subprocess) cleanup() error {
- // Wait for the subprocess to launch before trying to kill it. s.startErrs
- // gets closed after the subprocess starts, so even if the error has already
- // been received by `wait()`, this receive will return due to the channel
- // being closed.
- <-s.startErrs
// Kill the process before doing any other cleanup steps to ensure resources
- // are no longer in use before cleaning them up.
- var err error
- if s.cmd.ProcessState == nil {
- err = s.cmd.Process.Kill()
- // Kill() is non-blocking, so it's necessary to wait for the process to
- // exit before cleaning up resources.
- _ = s.cmd.Wait()
- }
+ // are no longer in use.
+ err := s.cmd.Process.Kill()
+ // Kill() doesn't block until the process actually completes, so we need to
+ // wait before cleaning up resources.
+ _ = s.cmd.Wait()
if err2 := os.RemoveAll(s.tempDir); err == nil {
err = err2
}
-
buffers.push(s.stdout)
buffers.push(s.stderr)
s.stdout, s.stderr = nil, nil
@@ -170,7 +148,7 @@
if err := starlark.UnpackArgs(name, args, kwargs); err != nil {
return nil, err
}
- return self.(*subprocess).wait(s)
+ return self.(*subprocess).wait()
})
// ctxOsExec implements the native function ctx.os.exec().
@@ -234,6 +212,15 @@
return os.RemoveAll(tempDir)
})
+ stdout := buffers.get()
+ stderr := buffers.get()
+
+ cleanupFuncs = append(cleanupFuncs, func() error {
+ buffers.push(stdout)
+ buffers.push(stderr)
+ return nil
+ })
+
env := map[string]string{
"PATH": os.Getenv("PATH"),
"TEMP": tempDir,
@@ -400,31 +387,15 @@
cmd := s.sandbox.Command(ctx, config)
- stdout, stderr := buffers.get(), buffers.get()
+ cmd.Stdin = stdin
// TODO(olivernewman): Also handle commands that may output non-utf-8 bytes.
cmd.Stdout = stdout
cmd.Stderr = stderr
- cmd.Stdin = stdin
- startErrs := make(chan error, 1)
- go func() {
- // Signals to subprocess.cleanup() that starting the subprocess is done,
- // whether or not it was successful.
- defer close(startErrs)
-
- err := s.subprocessSem.Acquire(ctx, 1)
- if err != nil {
- startErrs <- err
- return
- }
- err = execsupport.Start(cmd)
- if err != nil {
- // Release early if the process failed to start, no point in
- // delaying until wait() is called.
- s.subprocessSem.Release(1)
- }
- startErrs <- err
- }()
+ err = execsupport.Start(cmd)
+ if err != nil {
+ return nil, err
+ }
proc := &subprocess{
cmd: cmd,
@@ -434,9 +405,7 @@
raiseOnFailure: bool(argraiseOnFailure),
okRetcodes: okRetcodes,
tempDir: tempDir,
- startErrs: startErrs,
}
-
// Only clean up now if starting the subprocess failed; otherwise it will
// get cleaned up by wait().
cleanupFuncs = cleanupFuncs[:0]
diff --git a/internal/engine/testdata/print/ctx-os-exec-parallel.star b/internal/engine/testdata/print/ctx-os-exec-parallel.star
index ab9b28f..5aaf075 100644
--- a/internal/engine/testdata/print/ctx-os-exec-parallel.star
+++ b/internal/engine/testdata/print/ctx-os-exec-parallel.star
@@ -18,10 +18,9 @@
else:
cmd = ["./hello_world.sh"]
- # Launch more parallel subprocesses than any realistic host machine will
- # have cores (but not too many, or the test will be very slow).
- num_procs = 1000
- procs = [ctx.os.exec(cmd) for _ in range(num_procs)]
+ procs = []
+ for _ in range(10):
+ procs.append(ctx.os.exec(cmd))
for proc in procs:
res = proc.wait()
diff --git a/internal/engine/version.go b/internal/engine/version.go
index 8e1d778..877fd9e 100644
--- a/internal/engine/version.go
+++ b/internal/engine/version.go
@@ -26,7 +26,7 @@
// Version is the current tool version.
//
// TODO(maruel): Add proper version, preferably from git tag.
- Version = shacVersion{0, 1, 15}
+ Version = shacVersion{0, 1, 14}
)
func (v shacVersion) String() string {