Merge pull request #193 from ddunbar/core-wait-for-tasks-on-cancellation
[Core] Fix a race condition in task cancellation.
diff --git a/lib/BuildSystem/BuildSystem.cpp b/lib/BuildSystem/BuildSystem.cpp
index 3af7939..1ef4c49 100644
--- a/lib/BuildSystem/BuildSystem.cpp
+++ b/lib/BuildSystem/BuildSystem.cpp
@@ -1125,7 +1125,7 @@
if (node->isVirtual()) {
return Rule{
keyData,
- /*Action=*/ [node](BuildEngine& engine) -> Task* {
+ /*Action=*/ [](BuildEngine& engine) -> Task* {
return engine.registerTask(new VirtualInputNodeTask());
},
/*IsValid=*/ [node](BuildEngine& engine, const Rule& rule,
diff --git a/lib/Core/BuildEngine.cpp b/lib/Core/BuildEngine.cpp
index 7067f46..5496279 100644
--- a/lib/Core/BuildEngine.cpp
+++ b/lib/Core/BuildEngine.cpp
@@ -856,7 +856,7 @@
ruleInfo->keyID, ruleInfo->rule, ruleInfo->result, &error);
if (!result) {
delegate.error(error);
- completeRemainingTasks();
+ cancelRemainingTasks();
return false;
}
}
@@ -891,6 +891,10 @@
// If we haven't done any other work at this point but we have pending
// tasks, we need to wait for a task to complete.
+ //
+ // NOTE: Cancellation also implements this process, if you modify this
+ // code please also validate that \see cancelRemainingTasks() is still
+ // correct.
if (!didWork && numOutstandingUnfinishedTasks != 0) {
TracingInterval i(EngineQueueItemKind::Waiting);
@@ -901,7 +905,7 @@
// of the mutex, if one has been added then we may have already missed
// the condition notification and cannot safely wait.
if (finishedTaskInfos.empty()) {
- finishedTaskInfosCondition.wait(lock);
+ finishedTaskInfosCondition.wait(lock);
}
didWork = true;
@@ -1054,17 +1058,45 @@
assert(!cycleList.empty());
delegate.cycleDetected(cycleList);
- completeRemainingTasks();
+ cancelRemainingTasks();
}
- // Complete all of the remaining tasks.
- //
- // FIXME: Should we have a task abort callback?
- void completeRemainingTasks() {
+ // Cancel all of the remaining tasks.
+ void cancelRemainingTasks() {
+ // We need to wait for any currently running tasks to be reported as
+ // complete. Not doing this would mean we could get asynchronous calls
+ // attempting to modify the task state concurrently with the cancellation
+ // process, which isn't something we want to need to synchronize on.
+ //
+ // We don't process the requests at all, we simply drain them. In practice,
+ // we expect clients to implement cancellation in conjection with causing
+ // long-running tasks to also cancel and fail, so preserving those results
+ // is not valuable.
+ while (numOutstandingUnfinishedTasks != 0) {
+ std::unique_lock<std::mutex> lock(finishedTaskInfosMutex);
+ if (finishedTaskInfos.empty()) {
+ finishedTaskInfosCondition.wait(lock);
+ } else {
+ assert(finishedTaskInfos.size() <= numOutstandingUnfinishedTasks);
+ numOutstandingUnfinishedTasks -= finishedTaskInfos.size();
+ finishedTaskInfos.clear();
+ }
+ }
+
for (auto& it: taskInfos) {
- // Complete the task, even though it did not update the value.
+ // Cancel the task, marking it incomplete.
//
- // FIXME: What should we do here with the value?
+ // This will force it to rerun in a later build, but since it was already
+ // running in this build that was almost certainly going to be
+ // required. Technically, there are rare situations where it wouldn't have
+ // to rerun (e.g., if resultIsValid becomes true after being false in this
+ // run), and if we were willing to restore the tasks state--either by
+ // keeping the old one or by restoring from the database--we could ensure
+ // that doesn't happen.
+ //
+ // NOTE: Actually, we currently don't sync this write to the database, so
+ // in some cases we do actually preserve this information (if the client
+ // ends up cancelling, then reloading froom the database).
TaskInfo* taskInfo = &it.second;
RuleInfo* ruleInfo = taskInfo->forRuleInfo;
assert(taskInfo == ruleInfo->getPendingTaskInfo());
@@ -1176,7 +1208,7 @@
db->lookupRuleResult(ruleInfo.keyID, ruleInfo.rule, &ruleInfo.result, &error);
if (!error.empty()) {
delegate.error(error);
- completeRemainingTasks();
+ cancelRemainingTasks();
}
}
@@ -1350,7 +1382,7 @@
// Validate the InputID.
if (inputID > BuildEngine::kMaximumInputID) {
delegate.error("error: attempt to use reserved input ID");
- completeRemainingTasks();
+ cancelRemainingTasks();
return;
}
@@ -1368,7 +1400,7 @@
if (!taskInfo->forRuleInfo->isInProgressComputing()) {
delegate.error("error: invalid state for adding discovered dependency");
- completeRemainingTasks();
+ cancelRemainingTasks();
return;
}
@@ -1385,7 +1417,7 @@
if (!taskInfo->forRuleInfo->isInProgressComputing()) {
delegate.error("error: invalid state for marking task complete");
- completeRemainingTasks();
+ cancelRemainingTasks();
return;
}