From 6fc7a6f7c82f5731b892fd85c79bcd470286a180 Mon Sep 17 00:00:00 2001 From: Pierre Bourdon Date: Sat, 20 Apr 2024 16:48:03 +0200 Subject: [PATCH] queue-runner: add prom metrics to allow detecting internal bottlenecks By looking at the ratio of running vs. waiting for the dispatcher and the queue monitor, we should get better visibility into what hydra is currently bottlenecked on. There are other side effects we can try to measure to get to the same result, but having a simple way doesn't cost us much. --- src/hydra-queue-runner/hydra-queue-runner.cc | 28 ++++++++++++++++++++ src/hydra-queue-runner/state.hh | 6 +++++ 2 files changed, 34 insertions(+) diff --git a/src/hydra-queue-runner/hydra-queue-runner.cc b/src/hydra-queue-runner/hydra-queue-runner.cc index ab146312..8abb4ea9 100644 --- a/src/hydra-queue-runner/hydra-queue-runner.cc +++ b/src/hydra-queue-runner/hydra-queue-runner.cc @@ -98,6 +98,34 @@ State::PromMetrics::PromMetrics() .Register(*registry) .Add({}) ) + , dispatcher_time_spent_running( + prometheus::BuildCounter() + .Name("hydraqueuerunner_dispatcher_time_spent_running") + .Help("Time (in micros) spent running the dispatcher") + .Register(*registry) + .Add({}) + ) + , dispatcher_time_spent_waiting( + prometheus::BuildCounter() + .Name("hydraqueuerunner_dispatcher_time_spent_waiting") + .Help("Time (in micros) spent waiting for the dispatcher to obtain work") + .Register(*registry) + .Add({}) + ) + , queue_monitor_time_spent_running( + prometheus::BuildCounter() + .Name("hydraqueuerunner_queue_monitor_time_spent_running") + .Help("Time (in micros) spent running the queue monitor") + .Register(*registry) + .Add({}) + ) + , queue_monitor_time_spent_waiting( + prometheus::BuildCounter() + .Name("hydraqueuerunner_queue_monitor_time_spent_waiting") + .Help("Time (in micros) spent waiting for the queue monitor to obtain work") + .Register(*registry) + .Add({}) + ) { } diff --git a/src/hydra-queue-runner/state.hh b/src/hydra-queue-runner/state.hh index edfad4fb..318499d5 100644 --- a/src/hydra-queue-runner/state.hh +++ b/src/hydra-queue-runner/state.hh @@ -464,6 +464,12 @@ private: prometheus::Counter& queue_monitor_time_spent_running; prometheus::Counter& queue_monitor_time_spent_waiting; + prometheus::Counter& dispatcher_time_spent_running; + prometheus::Counter& dispatcher_time_spent_waiting; + + prometheus::Counter& queue_monitor_time_spent_running; + prometheus::Counter& queue_monitor_time_spent_waiting; + PromMetrics(); }; PromMetrics prom;