diff --git a/src/hydra-queue-runner/build-remote.cc b/src/hydra-queue-runner/build-remote.cc index bbac00f6..e28ca795 100644 --- a/src/hydra-queue-runner/build-remote.cc +++ b/src/hydra-queue-runner/build-remote.cc @@ -424,6 +424,7 @@ private: }; void State::buildRemote(ref destStore, + MachineReservation::ptr & reservation, ::Machine::ptr machine, Step::ptr step, const ServeProto::BuildOptions & buildOptions, RemoteResult & result, std::shared_ptr activeStep, @@ -569,6 +570,15 @@ void State::buildRemote(ref destStore, } SemaphoreReleaser releaser(&localWorkThrottler); + /* Once we've started copying outputs, release the machine reservation + * so further builds can happen. We do not release the machine earlier + * to avoid situations where the queue runner is bottlenecked on + * copying outputs and we end up building too many things that we + * haven't been able to allow copy slots for. */ + assert(reservation.unique()); + reservation = 0; + wakeDispatcher(); + StorePathSet outputs; for (auto & [_, realisation] : buildResult.builtOutputs) outputs.insert(realisation.outPath); diff --git a/src/hydra-queue-runner/builder.cc b/src/hydra-queue-runner/builder.cc index 5269febd..fade0fd5 100644 --- a/src/hydra-queue-runner/builder.cc +++ b/src/hydra-queue-runner/builder.cc @@ -46,10 +46,12 @@ void State::builder(MachineReservation::ptr reservation) } } - /* Release the machine and wake up the dispatcher. */ - assert(reservation.unique()); - reservation = 0; - wakeDispatcher(); + /* If the machine hasn't been released yet, release and wake up the dispatcher. */ + if (reservation) { + assert(reservation.unique()); + reservation = 0; + wakeDispatcher(); + } /* If there was a temporary failure, retry the step after an exponentially increasing interval. */ @@ -72,11 +74,11 @@ void State::builder(MachineReservation::ptr reservation) State::StepResult State::doBuildStep(nix::ref destStore, - MachineReservation::ptr reservation, + MachineReservation::ptr & reservation, std::shared_ptr activeStep) { - auto & step(reservation->step); - auto & machine(reservation->machine); + auto step(reservation->step); + auto machine(reservation->machine); { auto step_(step->state.lock()); @@ -211,7 +213,7 @@ State::StepResult State::doBuildStep(nix::ref destStore, try { /* FIXME: referring builds may have conflicting timeouts. */ - buildRemote(destStore, machine, step, buildOptions, result, activeStep, updateStep, narMembers); + buildRemote(destStore, reservation, machine, step, buildOptions, result, activeStep, updateStep, narMembers); } catch (Error & e) { if (activeStep->state_.lock()->cancelled) { printInfo("marking step %d of build %d as cancelled", stepNr, buildId); diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index 81ba4a00..deee781d 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -562,10 +562,11 @@ private: retried. */ enum StepResult { sDone, sRetry, sMaybeCancelled }; StepResult doBuildStep(nix::ref destStore, - MachineReservation::ptr reservation, + MachineReservation::ptr & reservation, std::shared_ptr activeStep); void buildRemote(nix::ref destStore, + MachineReservation::ptr & reservation, Machine::ptr machine, Step::ptr step, const nix::ServeProto::BuildOptions & buildOptions, RemoteResult & result, std::shared_ptr activeStep,