diff --git a/collectors/prometheus.go b/collectors/prometheus.go index 5377c08..39d10b3 100644 --- a/collectors/prometheus.go +++ b/collectors/prometheus.go @@ -6,6 +6,7 @@ import ( "net/http" "os" "path/filepath" + "time" "github.com/btcsuite/btcd/btcutil" "github.com/lightninglabs/lndclient" @@ -84,7 +85,7 @@ func DefaultConfig() *PrometheusConfig { // NewPrometheusExporter makes a new instance of the PrometheusExporter given // the address to listen for Prometheus on and an lnd gRPC client. func NewPrometheusExporter(cfg *PrometheusConfig, lnd *lndclient.LndServices, - monitoringCfg *MonitoringConfig, quitChan chan struct{}) *PrometheusExporter { + monitoringCfg *MonitoringConfig, quitChan chan struct{}, programStartTime time.Time) *PrometheusExporter { // We have six collectors and a htlc monitor running, so we buffer our // error channel by 7 so that we do not need to consume all errors from @@ -104,7 +105,7 @@ func NewPrometheusExporter(cfg *PrometheusConfig, lnd *lndclient.LndServices, NewWalletCollector(lnd, errChan), NewPeerCollector(lnd.Client, errChan), NewInfoCollector(lnd.Client, errChan), - NewStateCollector(lnd, errChan), + NewStateCollector(lnd, errChan, programStartTime), } if !monitoringCfg.DisableHtlc { diff --git a/collectors/state_collector.go b/collectors/state_collector.go index ed486a3..99e1f94 100644 --- a/collectors/state_collector.go +++ b/collectors/state_collector.go @@ -10,22 +10,29 @@ import ( "github.com/prometheus/client_golang/prometheus" ) +// StateCollector is a collector that keeps track of LND's state. type StateCollector struct { lnd *lndclient.LndServices - // Use one gauge to track the starting time of LND. + // timeToStartDesc is a gauge to track time from unlocked to started of LND. timeToStartDesc *prometheus.Desc - // startTime records a best-effort timestamp of when LND was started. - startTime time.Time + // timeToUnlockDesc is a gauge to track the time to unlock of LND. + timeToUnlockDesc *prometheus.Desc - // endTime records when LND makes a transition from RPC_ACTIVE to + // programStartTime records a best-effort timestamp of when lndmon was started. + programStartTime time.Time + + // unlockTime records a best-effort timestamp of when LND was unlocked. + unlockTime time.Time + + // endTime records when LND makes a transition from UNLOCKED to // SERVER_ACTIVE. // If lndmon starts after LND has already reached SERVER_ACTIVE, no // startup time metric will be emitted. endTime time.Time - // mutex is a lock for preventing concurrent writes to startTime or + // mutex is a lock for preventing concurrent writes to unlockTime or // endTime. mutex sync.RWMutex @@ -36,7 +43,7 @@ type StateCollector struct { // NewStateCollector returns a new instance of the StateCollector. func NewStateCollector(lnd *lndclient.LndServices, - errChan chan<- error) *StateCollector { + errChan chan<- error, programStartTime time.Time) *StateCollector { sc := &StateCollector{ lnd: lnd, @@ -45,8 +52,14 @@ func NewStateCollector(lnd *lndclient.LndServices, "time to start in seconds", nil, nil, ), - startTime: time.Now(), - errChan: errChan, + timeToUnlockDesc: prometheus.NewDesc( + "lnd_time_to_unlock_secs", + "time to unlocked in seconds", + nil, nil, + ), + programStartTime: programStartTime, + unlockTime: time.Now(), + errChan: errChan, } go sc.monitorStateChanges() @@ -57,24 +70,30 @@ func NewStateCollector(lnd *lndclient.LndServices, func (s *StateCollector) monitorStateChanges() { var serverActiveReached bool - for { - state, err := s.lnd.State.GetState(context.Background()) - if err != nil { - s.errChan <- fmt.Errorf("StateCollector GetState failed with: %v", err) - continue - } - - s.mutex.Lock() - if state == lndclient.WalletStateRPCActive && !s.startTime.IsZero() { - s.endTime = time.Now() - serverActiveReached = true - } - s.mutex.Unlock() + stateUpdates, errChan, err := s.lnd.State.SubscribeState(context.Background()) + if err != nil { + s.errChan <- fmt.Errorf("StateCollector SubscribeState failed with: %v", err) + return + } - if serverActiveReached { - break + for { + select { + case state := <-stateUpdates: + s.mutex.Lock() + if state == lndclient.WalletStateServerActive && !s.unlockTime.IsZero() { + s.endTime = time.Now() + serverActiveReached = true + } + s.mutex.Unlock() + + if serverActiveReached { + return + } + + case err := <-errChan: + s.errChan <- fmt.Errorf("StateCollector state update failed with: %v", err) + return } - time.Sleep(1 * time.Second) } } @@ -85,6 +104,7 @@ func (s *StateCollector) monitorStateChanges() { // NOTE: Part of the prometheus.Collector interface. func (s *StateCollector) Describe(ch chan<- *prometheus.Desc) { ch <- s.timeToStartDesc + ch <- s.timeToUnlockDesc } // Collect is called by the Prometheus registry when collecting metrics. @@ -95,11 +115,17 @@ func (s *StateCollector) Collect(ch chan<- prometheus.Metric) { s.mutex.RLock() defer s.mutex.RUnlock() - // We have set both a startTime and endTime, calculate the difference and emit a metric. - if !s.startTime.IsZero() && !s.endTime.IsZero() { - timeToStartInSecs := s.endTime.Sub(s.startTime).Seconds() + // We have set unlockTime and endTime. + // Calculate the differences and emit a metric. + if !s.unlockTime.IsZero() && !s.endTime.IsZero() { + timeToUnlockInSecs := s.unlockTime.Sub(s.programStartTime).Seconds() + timeToStartInSecs := s.endTime.Sub(s.unlockTime).Seconds() ch <- prometheus.MustNewConstMetric( s.timeToStartDesc, prometheus.GaugeValue, timeToStartInSecs, ) + + ch <- prometheus.MustNewConstMetric( + s.timeToUnlockDesc, prometheus.GaugeValue, timeToUnlockInSecs, + ) } } diff --git a/lndmon.go b/lndmon.go index 6b1374f..088cae9 100755 --- a/lndmon.go +++ b/lndmon.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "path/filepath" + "time" flags "github.com/jessevdk/go-flags" "github.com/lightninglabs/lndclient" @@ -37,6 +38,8 @@ func start() error { return fmt.Errorf("could not intercept signal: %v", err) } + programStartTime := time.Now() + // Initialize our lnd client, requiring at least lnd v0.11. lnd, err := lndclient.NewLndServices( &lndclient.LndServicesConfig{ @@ -74,7 +77,7 @@ func start() error { // Start our Prometheus exporter. This exporter spawns a goroutine // that pulls metrics from our lnd client on a set interval. exporter := collectors.NewPrometheusExporter( - cfg.Prometheus, &lnd.LndServices, &monitoringCfg, quit, + cfg.Prometheus, &lnd.LndServices, &monitoringCfg, quit, programStartTime, ) if err := exporter.Start(); err != nil { return err