extend metrics and rebuild prometheus exporting logic
This commit is contained in:
parent
fa1f4f761d
commit
cc5ee00b89
17 changed files with 997 additions and 226 deletions
|
@ -31,10 +31,10 @@ func RegisterDatadog(config *types.Datadog) Registry {
|
|||
}
|
||||
|
||||
registry := &standardRegistry{
|
||||
enabled: true,
|
||||
reqsCounter: datadogClient.NewCounter(ddMetricsReqsName, 1.0),
|
||||
reqDurationHistogram: datadogClient.NewHistogram(ddMetricsLatencyName, 1.0),
|
||||
retriesCounter: datadogClient.NewCounter(ddRetriesTotalName, 1.0),
|
||||
enabled: true,
|
||||
backendReqsCounter: datadogClient.NewCounter(ddMetricsReqsName, 1.0),
|
||||
backendReqDurationHistogram: datadogClient.NewHistogram(ddMetricsLatencyName, 1.0),
|
||||
backendRetriesCounter: datadogClient.NewCounter(ddRetriesTotalName, 1.0),
|
||||
}
|
||||
|
||||
return registry
|
||||
|
|
|
@ -31,10 +31,10 @@ func TestDatadog(t *testing.T) {
|
|||
}
|
||||
|
||||
udp.ShouldReceiveAll(t, expected, func() {
|
||||
datadogRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
datadogRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
datadogRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
datadogRegistry.BackendReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.BackendReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.BackendReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
datadogRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
datadogRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
})
|
||||
}
|
||||
|
|
|
@ -37,10 +37,10 @@ func RegisterInfluxDB(config *types.InfluxDB) Registry {
|
|||
}
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: true,
|
||||
reqsCounter: influxDBClient.NewCounter(influxDBMetricsReqsName),
|
||||
reqDurationHistogram: influxDBClient.NewHistogram(influxDBMetricsLatencyName),
|
||||
retriesCounter: influxDBClient.NewCounter(influxDBRetriesTotalName),
|
||||
enabled: true,
|
||||
backendReqsCounter: influxDBClient.NewCounter(influxDBMetricsReqsName),
|
||||
backendReqDurationHistogram: influxDBClient.NewHistogram(influxDBMetricsLatencyName),
|
||||
backendRetriesCounter: influxDBClient.NewCounter(influxDBRetriesTotalName),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -31,11 +31,11 @@ func TestInfluxDB(t *testing.T) {
|
|||
}
|
||||
|
||||
msg := udp.ReceiveString(t, func() {
|
||||
influxDBRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
influxDBRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
influxDBRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
influxDBRegistry.BackendReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.BackendReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
influxDBRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
influxDBRegistry.BackendReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
})
|
||||
|
||||
assertMessage(t, msg, expected)
|
||||
|
|
|
@ -9,71 +9,168 @@ import (
|
|||
type Registry interface {
|
||||
// IsEnabled shows whether metrics instrumentation is enabled.
|
||||
IsEnabled() bool
|
||||
ReqsCounter() metrics.Counter
|
||||
ReqDurationHistogram() metrics.Histogram
|
||||
RetriesCounter() metrics.Counter
|
||||
// server metrics
|
||||
ConfigReloadsCounter() metrics.Counter
|
||||
ConfigReloadsFailureCounter() metrics.Counter
|
||||
LastConfigReloadSuccessGauge() metrics.Gauge
|
||||
LastConfigReloadFailureGauge() metrics.Gauge
|
||||
|
||||
// entry point metrics
|
||||
EntrypointReqsCounter() metrics.Counter
|
||||
EntrypointReqDurationHistogram() metrics.Histogram
|
||||
EntrypointOpenConnsGauge() metrics.Gauge
|
||||
|
||||
// backend metrics
|
||||
BackendReqsCounter() metrics.Counter
|
||||
BackendReqDurationHistogram() metrics.Histogram
|
||||
BackendOpenConnsGauge() metrics.Gauge
|
||||
BackendRetriesCounter() metrics.Counter
|
||||
BackendServerUpGauge() metrics.Gauge
|
||||
}
|
||||
|
||||
// NewMultiRegistry creates a new standardRegistry that wraps multiple Registries.
|
||||
// NewVoidRegistry is a noop implementation of metrics.Registry.
|
||||
// It is used to avoid nil checking in components that do metric collections.
|
||||
func NewVoidRegistry() Registry {
|
||||
return NewMultiRegistry([]Registry{})
|
||||
}
|
||||
|
||||
// NewMultiRegistry is an implementation of metrics.Registry that wraps multiple registries.
|
||||
// It handles the case when a registry hasn't registered some metric and returns nil.
|
||||
// This allows for feature imparity between the different metric implementations.
|
||||
func NewMultiRegistry(registries []Registry) Registry {
|
||||
reqsCounters := []metrics.Counter{}
|
||||
reqDurationHistograms := []metrics.Histogram{}
|
||||
retriesCounters := []metrics.Counter{}
|
||||
configReloadsCounter := []metrics.Counter{}
|
||||
configReloadsFailureCounter := []metrics.Counter{}
|
||||
lastConfigReloadSuccessGauge := []metrics.Gauge{}
|
||||
lastConfigReloadFailureGauge := []metrics.Gauge{}
|
||||
entrypointReqsCounter := []metrics.Counter{}
|
||||
entrypointReqDurationHistogram := []metrics.Histogram{}
|
||||
entrypointOpenConnsGauge := []metrics.Gauge{}
|
||||
backendReqsCounter := []metrics.Counter{}
|
||||
backendReqDurationHistogram := []metrics.Histogram{}
|
||||
backendOpenConnsGauge := []metrics.Gauge{}
|
||||
backendRetriesCounter := []metrics.Counter{}
|
||||
backendServerUpGauge := []metrics.Gauge{}
|
||||
|
||||
for _, r := range registries {
|
||||
reqsCounters = append(reqsCounters, r.ReqsCounter())
|
||||
reqDurationHistograms = append(reqDurationHistograms, r.ReqDurationHistogram())
|
||||
retriesCounters = append(retriesCounters, r.RetriesCounter())
|
||||
if r.ConfigReloadsCounter() != nil {
|
||||
configReloadsCounter = append(configReloadsCounter, r.ConfigReloadsCounter())
|
||||
}
|
||||
if r.ConfigReloadsFailureCounter() != nil {
|
||||
configReloadsFailureCounter = append(configReloadsFailureCounter, r.ConfigReloadsFailureCounter())
|
||||
}
|
||||
if r.LastConfigReloadSuccessGauge() != nil {
|
||||
lastConfigReloadSuccessGauge = append(lastConfigReloadSuccessGauge, r.LastConfigReloadSuccessGauge())
|
||||
}
|
||||
if r.LastConfigReloadFailureGauge() != nil {
|
||||
lastConfigReloadFailureGauge = append(lastConfigReloadFailureGauge, r.LastConfigReloadFailureGauge())
|
||||
}
|
||||
if r.EntrypointReqsCounter() != nil {
|
||||
entrypointReqsCounter = append(entrypointReqsCounter, r.EntrypointReqsCounter())
|
||||
}
|
||||
if r.EntrypointReqDurationHistogram() != nil {
|
||||
entrypointReqDurationHistogram = append(entrypointReqDurationHistogram, r.EntrypointReqDurationHistogram())
|
||||
}
|
||||
if r.EntrypointOpenConnsGauge() != nil {
|
||||
entrypointOpenConnsGauge = append(entrypointOpenConnsGauge, r.EntrypointOpenConnsGauge())
|
||||
}
|
||||
if r.BackendReqsCounter() != nil {
|
||||
backendReqsCounter = append(backendReqsCounter, r.BackendReqsCounter())
|
||||
}
|
||||
if r.BackendReqDurationHistogram() != nil {
|
||||
backendReqDurationHistogram = append(backendReqDurationHistogram, r.BackendReqDurationHistogram())
|
||||
}
|
||||
if r.BackendOpenConnsGauge() != nil {
|
||||
backendOpenConnsGauge = append(backendOpenConnsGauge, r.BackendOpenConnsGauge())
|
||||
}
|
||||
if r.BackendRetriesCounter() != nil {
|
||||
backendRetriesCounter = append(backendRetriesCounter, r.BackendRetriesCounter())
|
||||
}
|
||||
if r.BackendServerUpGauge() != nil {
|
||||
backendServerUpGauge = append(backendServerUpGauge, r.BackendServerUpGauge())
|
||||
}
|
||||
}
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: true,
|
||||
reqsCounter: multi.NewCounter(reqsCounters...),
|
||||
reqDurationHistogram: multi.NewHistogram(reqDurationHistograms...),
|
||||
retriesCounter: multi.NewCounter(retriesCounters...),
|
||||
enabled: len(registries) > 0,
|
||||
configReloadsCounter: multi.NewCounter(configReloadsCounter...),
|
||||
configReloadsFailureCounter: multi.NewCounter(configReloadsFailureCounter...),
|
||||
lastConfigReloadSuccessGauge: multi.NewGauge(lastConfigReloadSuccessGauge...),
|
||||
lastConfigReloadFailureGauge: multi.NewGauge(lastConfigReloadFailureGauge...),
|
||||
entrypointReqsCounter: multi.NewCounter(entrypointReqsCounter...),
|
||||
entrypointReqDurationHistogram: multi.NewHistogram(entrypointReqDurationHistogram...),
|
||||
entrypointOpenConnsGauge: multi.NewGauge(entrypointOpenConnsGauge...),
|
||||
backendReqsCounter: multi.NewCounter(backendReqsCounter...),
|
||||
backendReqDurationHistogram: multi.NewHistogram(backendReqDurationHistogram...),
|
||||
backendOpenConnsGauge: multi.NewGauge(backendOpenConnsGauge...),
|
||||
backendRetriesCounter: multi.NewCounter(backendRetriesCounter...),
|
||||
backendServerUpGauge: multi.NewGauge(backendServerUpGauge...),
|
||||
}
|
||||
}
|
||||
|
||||
type standardRegistry struct {
|
||||
enabled bool
|
||||
reqsCounter metrics.Counter
|
||||
reqDurationHistogram metrics.Histogram
|
||||
retriesCounter metrics.Counter
|
||||
enabled bool
|
||||
configReloadsCounter metrics.Counter
|
||||
configReloadsFailureCounter metrics.Counter
|
||||
lastConfigReloadSuccessGauge metrics.Gauge
|
||||
lastConfigReloadFailureGauge metrics.Gauge
|
||||
entrypointReqsCounter metrics.Counter
|
||||
entrypointReqDurationHistogram metrics.Histogram
|
||||
entrypointOpenConnsGauge metrics.Gauge
|
||||
backendReqsCounter metrics.Counter
|
||||
backendReqDurationHistogram metrics.Histogram
|
||||
backendOpenConnsGauge metrics.Gauge
|
||||
backendRetriesCounter metrics.Counter
|
||||
backendServerUpGauge metrics.Gauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) IsEnabled() bool {
|
||||
return r.enabled
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ReqsCounter() metrics.Counter {
|
||||
return r.reqsCounter
|
||||
func (r *standardRegistry) ConfigReloadsCounter() metrics.Counter {
|
||||
return r.configReloadsCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ReqDurationHistogram() metrics.Histogram {
|
||||
return r.reqDurationHistogram
|
||||
func (r *standardRegistry) ConfigReloadsFailureCounter() metrics.Counter {
|
||||
return r.configReloadsFailureCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) RetriesCounter() metrics.Counter {
|
||||
return r.retriesCounter
|
||||
func (r *standardRegistry) LastConfigReloadSuccessGauge() metrics.Gauge {
|
||||
return r.lastConfigReloadSuccessGauge
|
||||
}
|
||||
|
||||
// NewVoidRegistry is a noop implementation of metrics.Registry.
|
||||
// It is used to avoid nil checking in components that do metric collections.
|
||||
func NewVoidRegistry() Registry {
|
||||
return &standardRegistry{
|
||||
enabled: false,
|
||||
reqsCounter: &voidCounter{},
|
||||
reqDurationHistogram: &voidHistogram{},
|
||||
retriesCounter: &voidCounter{},
|
||||
}
|
||||
func (r *standardRegistry) LastConfigReloadFailureGauge() metrics.Gauge {
|
||||
return r.lastConfigReloadFailureGauge
|
||||
}
|
||||
|
||||
type voidCounter struct{}
|
||||
func (r *standardRegistry) EntrypointReqsCounter() metrics.Counter {
|
||||
return r.entrypointReqsCounter
|
||||
}
|
||||
|
||||
func (v *voidCounter) With(labelValues ...string) metrics.Counter { return v }
|
||||
func (v *voidCounter) Add(delta float64) {}
|
||||
func (r *standardRegistry) EntrypointReqDurationHistogram() metrics.Histogram {
|
||||
return r.entrypointReqDurationHistogram
|
||||
}
|
||||
|
||||
type voidHistogram struct{}
|
||||
func (r *standardRegistry) EntrypointOpenConnsGauge() metrics.Gauge {
|
||||
return r.entrypointOpenConnsGauge
|
||||
}
|
||||
|
||||
func (h *voidHistogram) With(labelValues ...string) metrics.Histogram { return h }
|
||||
func (h *voidHistogram) Observe(value float64) {}
|
||||
func (r *standardRegistry) BackendReqsCounter() metrics.Counter {
|
||||
return r.backendReqsCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendReqDurationHistogram() metrics.Histogram {
|
||||
return r.backendReqDurationHistogram
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendOpenConnsGauge() metrics.Gauge {
|
||||
return r.backendOpenConnsGauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendRetriesCounter() metrics.Counter {
|
||||
return r.backendRetriesCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendServerUpGauge() metrics.Gauge {
|
||||
return r.backendServerUpGauge
|
||||
}
|
||||
|
|
|
@ -7,29 +7,18 @@ import (
|
|||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNewVoidRegistry(t *testing.T) {
|
||||
registry := NewVoidRegistry()
|
||||
|
||||
if registry.IsEnabled() {
|
||||
t.Errorf("VoidRegistry should not return true for IsEnabled()")
|
||||
}
|
||||
registry.ReqsCounter().With("some", "value").Add(1)
|
||||
registry.ReqDurationHistogram().With("some", "value").Observe(1)
|
||||
registry.RetriesCounter().With("some", "value").Add(1)
|
||||
}
|
||||
|
||||
func TestNewMultiRegistry(t *testing.T) {
|
||||
registries := []Registry{newCollectingRetryMetrics(), newCollectingRetryMetrics()}
|
||||
registry := NewMultiRegistry(registries)
|
||||
|
||||
registry.ReqsCounter().With("key", "requests").Add(1)
|
||||
registry.ReqDurationHistogram().With("key", "durations").Observe(2)
|
||||
registry.RetriesCounter().With("key", "retries").Add(3)
|
||||
registry.BackendReqsCounter().With("key", "requests").Add(1)
|
||||
registry.BackendReqDurationHistogram().With("key", "durations").Observe(2)
|
||||
registry.BackendRetriesCounter().With("key", "retries").Add(3)
|
||||
|
||||
for _, collectingRegistry := range registries {
|
||||
cReqsCounter := collectingRegistry.ReqsCounter().(*counterMock)
|
||||
cReqDurationHistogram := collectingRegistry.ReqDurationHistogram().(*histogramMock)
|
||||
cRetriesCounter := collectingRegistry.RetriesCounter().(*counterMock)
|
||||
cReqsCounter := collectingRegistry.BackendReqsCounter().(*counterMock)
|
||||
cReqDurationHistogram := collectingRegistry.BackendReqDurationHistogram().(*histogramMock)
|
||||
cRetriesCounter := collectingRegistry.BackendRetriesCounter().(*counterMock)
|
||||
|
||||
wantCounterValue := float64(1)
|
||||
if cReqsCounter.counterValue != wantCounterValue {
|
||||
|
@ -52,9 +41,9 @@ func TestNewMultiRegistry(t *testing.T) {
|
|||
|
||||
func newCollectingRetryMetrics() Registry {
|
||||
return &standardRegistry{
|
||||
reqsCounter: &counterMock{},
|
||||
reqDurationHistogram: &histogramMock{},
|
||||
retriesCounter: &counterMock{},
|
||||
backendReqsCounter: &counterMock{},
|
||||
backendReqDurationHistogram: &histogramMock{},
|
||||
backendRetriesCounter: &counterMock{},
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2,10 +2,14 @@ package metrics
|
|||
|
||||
import (
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/containous/mux"
|
||||
"github.com/containous/traefik/safe"
|
||||
"github.com/containous/traefik/types"
|
||||
"github.com/go-kit/kit/metrics/prometheus"
|
||||
"github.com/go-kit/kit/metrics"
|
||||
stdprometheus "github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
@ -13,15 +17,50 @@ import (
|
|||
const (
|
||||
metricNamePrefix = "traefik_"
|
||||
|
||||
reqsTotalName = metricNamePrefix + "requests_total"
|
||||
reqDurationName = metricNamePrefix + "request_duration_seconds"
|
||||
retriesTotalName = metricNamePrefix + "backend_retries_total"
|
||||
// server meta information
|
||||
configReloadsTotalName = metricNamePrefix + "config_reloads_total"
|
||||
configReloadsFailuresTotalName = metricNamePrefix + "config_reloads_failure_total"
|
||||
configLastReloadSuccessName = metricNamePrefix + "config_last_reload_success"
|
||||
configLastReloadFailureName = metricNamePrefix + "config_last_reload_failure"
|
||||
|
||||
// entrypoint
|
||||
entrypointReqsTotalName = metricNamePrefix + "entrypoint_requests_total"
|
||||
entrypointReqDurationName = metricNamePrefix + "entrypoint_request_duration_seconds"
|
||||
entrypointOpenConnsName = metricNamePrefix + "entrypoint_open_connections"
|
||||
|
||||
// backend level
|
||||
backendReqsTotalName = metricNamePrefix + "backend_requests_total"
|
||||
backendReqDurationName = metricNamePrefix + "backend_request_duration_seconds"
|
||||
backendOpenConnsName = metricNamePrefix + "backend_open_connections"
|
||||
backendRetriesTotalName = metricNamePrefix + "backend_retries_total"
|
||||
backendServerUpName = metricNamePrefix + "backend_server_up"
|
||||
)
|
||||
|
||||
// PrometheusHandler expose Prometheus routes
|
||||
const (
|
||||
// generationAgeForever indicates that a metric never gets outdated.
|
||||
generationAgeForever = 0
|
||||
// generationAgeDefault is the default age of three generations.
|
||||
generationAgeDefault = 3
|
||||
)
|
||||
|
||||
// promState holds all metric state internally and acts as the only Collector we register for Prometheus.
|
||||
//
|
||||
// This enables control to remove metrics that belong to outdated configuration.
|
||||
// As an example why this is required, consider Traefik learns about a new service.
|
||||
// It populates the 'traefik_server_backend_up' metric for it with a value of 1 (alive).
|
||||
// When the backend is undeployed now the metric is still there in the client library
|
||||
// and will be until Traefik would be restarted.
|
||||
//
|
||||
// To solve this problem promState keeps track of configuration generations.
|
||||
// Every time a new configuration is loaded, the generation is increased by one.
|
||||
// Metrics that "belong" to a dynamic configuration part of Traefik (e.g. backend, entrypoint)
|
||||
// are removed, given they were tracked more than 3 generations ago.
|
||||
var promState = newPrometheusState()
|
||||
|
||||
// PrometheusHandler exposes Prometheus routes.
|
||||
type PrometheusHandler struct{}
|
||||
|
||||
// AddRoutes add Prometheus routes on a router
|
||||
// AddRoutes adds Prometheus routes on a router.
|
||||
func (h PrometheusHandler) AddRoutes(router *mux.Router) {
|
||||
router.Methods(http.MethodGet).Path("/metrics").Handler(promhttp.Handler())
|
||||
}
|
||||
|
@ -34,24 +73,332 @@ func RegisterPrometheus(config *types.Prometheus) Registry {
|
|||
buckets = config.Buckets
|
||||
}
|
||||
|
||||
reqCounter := prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: reqsTotalName,
|
||||
Help: "How many HTTP requests processed, partitioned by status code and method.",
|
||||
}, []string{"service", "code", "method"})
|
||||
reqDurationHistogram := prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
|
||||
Name: reqDurationName,
|
||||
Help: "How long it took to process the request.",
|
||||
safe.Go(func() {
|
||||
promState.ListenValueUpdates()
|
||||
})
|
||||
|
||||
configReloads := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: configReloadsTotalName,
|
||||
Help: "Config reloads",
|
||||
}, []string{})
|
||||
configReloadsFailures := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: configReloadsFailuresTotalName,
|
||||
Help: "Config failure reloads",
|
||||
}, []string{})
|
||||
lastConfigReloadSuccess := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: configLastReloadSuccessName,
|
||||
Help: "Last config reload success",
|
||||
}, []string{})
|
||||
lastConfigReloadFailure := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: configLastReloadFailureName,
|
||||
Help: "Last config reload failure",
|
||||
}, []string{})
|
||||
|
||||
entrypointReqs := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: entrypointReqsTotalName,
|
||||
Help: "How many HTTP requests processed on an entrypoint, partitioned by status code, protocol, and method.",
|
||||
}, []string{"code", "method", "protocol", "entrypoint"})
|
||||
entrypointReqDurations := newHistogramFrom(promState.collectors, stdprometheus.HistogramOpts{
|
||||
Name: entrypointReqDurationName,
|
||||
Help: "How long it took to process the request on an entrypoint, partitioned by status code, protocol, and method.",
|
||||
Buckets: buckets,
|
||||
}, []string{"service", "code"})
|
||||
retryCounter := prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: retriesTotalName,
|
||||
Help: "How many request retries happened in total.",
|
||||
}, []string{"service"})
|
||||
}, []string{"code", "method", "protocol", "entrypoint"})
|
||||
entrypointOpenConns := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: entrypointOpenConnsName,
|
||||
Help: "How many open connections exist on an entrypoint, partitioned by method and protocol.",
|
||||
}, []string{"method", "protocol", "entrypoint"})
|
||||
|
||||
backendReqs := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: backendReqsTotalName,
|
||||
Help: "How many HTTP requests processed on a backend, partitioned by status code, protocol, and method.",
|
||||
}, []string{"code", "method", "protocol", "backend"})
|
||||
backendReqDurations := newHistogramFrom(promState.collectors, stdprometheus.HistogramOpts{
|
||||
Name: backendReqDurationName,
|
||||
Help: "How long it took to process the request on a backend, partitioned by status code, protocol, and method.",
|
||||
Buckets: buckets,
|
||||
}, []string{"code", "method", "protocol", "backend"})
|
||||
backendOpenConns := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: backendOpenConnsName,
|
||||
Help: "How many open connections exist on a backend, partitioned by method and protocol.",
|
||||
}, []string{"method", "protocol", "backend"})
|
||||
backendRetries := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: backendRetriesTotalName,
|
||||
Help: "How many request retries happened on a backend.",
|
||||
}, []string{"backend"})
|
||||
backendServerUp := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: backendServerUpName,
|
||||
Help: "Backend server is up, described by gauge value of 0 or 1.",
|
||||
}, []string{"backend", "url"})
|
||||
|
||||
promState.describers = []func(chan<- *stdprometheus.Desc){
|
||||
configReloads.cv.Describe,
|
||||
configReloadsFailures.cv.Describe,
|
||||
lastConfigReloadSuccess.gv.Describe,
|
||||
lastConfigReloadFailure.gv.Describe,
|
||||
entrypointReqs.cv.Describe,
|
||||
entrypointReqDurations.hv.Describe,
|
||||
entrypointOpenConns.gv.Describe,
|
||||
backendReqs.cv.Describe,
|
||||
backendReqDurations.hv.Describe,
|
||||
backendOpenConns.gv.Describe,
|
||||
backendRetries.cv.Describe,
|
||||
backendServerUp.gv.Describe,
|
||||
}
|
||||
stdprometheus.MustRegister(promState)
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: true,
|
||||
reqsCounter: reqCounter,
|
||||
reqDurationHistogram: reqDurationHistogram,
|
||||
retriesCounter: retryCounter,
|
||||
enabled: true,
|
||||
configReloadsCounter: configReloads,
|
||||
configReloadsFailureCounter: configReloadsFailures,
|
||||
lastConfigReloadSuccessGauge: lastConfigReloadSuccess,
|
||||
lastConfigReloadFailureGauge: lastConfigReloadFailure,
|
||||
entrypointReqsCounter: entrypointReqs,
|
||||
entrypointReqDurationHistogram: entrypointReqDurations,
|
||||
entrypointOpenConnsGauge: entrypointOpenConns,
|
||||
backendReqsCounter: backendReqs,
|
||||
backendReqDurationHistogram: backendReqDurations,
|
||||
backendOpenConnsGauge: backendOpenConns,
|
||||
backendRetriesCounter: backendRetries,
|
||||
backendServerUpGauge: backendServerUp,
|
||||
}
|
||||
}
|
||||
|
||||
// OnConfigurationUpdate increases the current generation of the prometheus state.
|
||||
func OnConfigurationUpdate() {
|
||||
promState.IncGeneration()
|
||||
}
|
||||
|
||||
func newPrometheusState() *prometheusState {
|
||||
collectors := make(chan *collector)
|
||||
state := make(map[string]*collector)
|
||||
|
||||
return &prometheusState{
|
||||
collectors: collectors,
|
||||
state: state,
|
||||
}
|
||||
}
|
||||
|
||||
type prometheusState struct {
|
||||
currentGeneration int
|
||||
collectors chan *collector
|
||||
describers []func(ch chan<- *stdprometheus.Desc)
|
||||
|
||||
mtx sync.Mutex
|
||||
state map[string]*collector
|
||||
}
|
||||
|
||||
func (ps *prometheusState) IncGeneration() {
|
||||
ps.mtx.Lock()
|
||||
defer ps.mtx.Unlock()
|
||||
ps.currentGeneration++
|
||||
}
|
||||
|
||||
func (ps *prometheusState) ListenValueUpdates() {
|
||||
for collector := range ps.collectors {
|
||||
ps.mtx.Lock()
|
||||
collector.lastTrackedGeneration = ps.currentGeneration
|
||||
ps.state[collector.id] = collector
|
||||
ps.mtx.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// Describe implements prometheus.Collector and simply calls
|
||||
// the registered describer functions.
|
||||
func (ps *prometheusState) Describe(ch chan<- *stdprometheus.Desc) {
|
||||
for _, desc := range ps.describers {
|
||||
desc(ch)
|
||||
}
|
||||
}
|
||||
|
||||
// Collect implements prometheus.Collector. It calls the Collect
|
||||
// method of all metrics it received on the collectors channel.
|
||||
// It's also responsible to remove metrics that were tracked
|
||||
// at least three generations ago. Those metrics are cleaned up
|
||||
// after the Collect of them were called.
|
||||
func (ps *prometheusState) Collect(ch chan<- stdprometheus.Metric) {
|
||||
ps.mtx.Lock()
|
||||
defer ps.mtx.Unlock()
|
||||
|
||||
outdatedKeys := []string{}
|
||||
for key, cs := range ps.state {
|
||||
cs.collector.Collect(ch)
|
||||
|
||||
if cs.maxAge == generationAgeForever {
|
||||
continue
|
||||
}
|
||||
if ps.currentGeneration-cs.lastTrackedGeneration >= cs.maxAge {
|
||||
outdatedKeys = append(outdatedKeys, key)
|
||||
}
|
||||
}
|
||||
|
||||
for _, key := range outdatedKeys {
|
||||
delete(ps.state, key)
|
||||
}
|
||||
}
|
||||
|
||||
func newCollector(metricName string, lnvs labelNamesValues, c stdprometheus.Collector) *collector {
|
||||
maxAge := generationAgeDefault
|
||||
|
||||
// metrics without labels should never become outdated
|
||||
if len(lnvs) == 0 {
|
||||
maxAge = generationAgeForever
|
||||
}
|
||||
|
||||
return &collector{
|
||||
id: buildMetricID(metricName, lnvs),
|
||||
maxAge: maxAge,
|
||||
collector: c,
|
||||
}
|
||||
}
|
||||
|
||||
// collector wraps a Collector object from the Prometheus client library.
|
||||
// It adds information on how many generations this metric should be present
|
||||
// in the /metrics output, relatived to the time it was last tracked.
|
||||
type collector struct {
|
||||
id string
|
||||
collector stdprometheus.Collector
|
||||
lastTrackedGeneration int
|
||||
maxAge int
|
||||
}
|
||||
|
||||
func buildMetricID(metricName string, lnvs labelNamesValues) string {
|
||||
newLnvs := append([]string{}, lnvs...)
|
||||
sort.Strings(newLnvs)
|
||||
return metricName + ":" + strings.Join(newLnvs, "|")
|
||||
}
|
||||
|
||||
func newCounterFrom(collectors chan<- *collector, opts stdprometheus.CounterOpts, labelNames []string) *counter {
|
||||
cv := stdprometheus.NewCounterVec(opts, labelNames)
|
||||
c := &counter{
|
||||
name: opts.Name,
|
||||
cv: cv,
|
||||
collectors: collectors,
|
||||
}
|
||||
if len(labelNames) == 0 {
|
||||
c.Add(0)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
type counter struct {
|
||||
name string
|
||||
cv *stdprometheus.CounterVec
|
||||
labelNamesValues labelNamesValues
|
||||
collectors chan<- *collector
|
||||
}
|
||||
|
||||
func (c *counter) With(labelValues ...string) metrics.Counter {
|
||||
return &counter{
|
||||
name: c.name,
|
||||
cv: c.cv,
|
||||
labelNamesValues: c.labelNamesValues.With(labelValues...),
|
||||
collectors: c.collectors,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *counter) Add(delta float64) {
|
||||
collector := c.cv.With(c.labelNamesValues.ToLabels())
|
||||
collector.Add(delta)
|
||||
c.collectors <- newCollector(c.name, c.labelNamesValues, collector)
|
||||
}
|
||||
|
||||
func (c *counter) Describe(ch chan<- *stdprometheus.Desc) {
|
||||
c.cv.Describe(ch)
|
||||
}
|
||||
|
||||
func newGaugeFrom(collectors chan<- *collector, opts stdprometheus.GaugeOpts, labelNames []string) *gauge {
|
||||
gv := stdprometheus.NewGaugeVec(opts, labelNames)
|
||||
g := &gauge{
|
||||
name: opts.Name,
|
||||
gv: gv,
|
||||
collectors: collectors,
|
||||
}
|
||||
if len(labelNames) == 0 {
|
||||
g.Set(0)
|
||||
}
|
||||
return g
|
||||
}
|
||||
|
||||
type gauge struct {
|
||||
name string
|
||||
gv *stdprometheus.GaugeVec
|
||||
labelNamesValues labelNamesValues
|
||||
collectors chan<- *collector
|
||||
}
|
||||
|
||||
func (g *gauge) With(labelValues ...string) metrics.Gauge {
|
||||
return &gauge{
|
||||
name: g.name,
|
||||
gv: g.gv,
|
||||
labelNamesValues: g.labelNamesValues.With(labelValues...),
|
||||
collectors: g.collectors,
|
||||
}
|
||||
}
|
||||
|
||||
func (g *gauge) Set(value float64) {
|
||||
collector := g.gv.With(g.labelNamesValues.ToLabels())
|
||||
collector.Set(value)
|
||||
g.collectors <- newCollector(g.name, g.labelNamesValues, collector)
|
||||
}
|
||||
|
||||
func (g *gauge) Describe(ch chan<- *stdprometheus.Desc) {
|
||||
g.gv.Describe(ch)
|
||||
}
|
||||
|
||||
func newHistogramFrom(collectors chan<- *collector, opts stdprometheus.HistogramOpts, labelNames []string) *histogram {
|
||||
hv := stdprometheus.NewHistogramVec(opts, labelNames)
|
||||
return &histogram{
|
||||
name: opts.Name,
|
||||
hv: hv,
|
||||
collectors: collectors,
|
||||
}
|
||||
}
|
||||
|
||||
type histogram struct {
|
||||
name string
|
||||
hv *stdprometheus.HistogramVec
|
||||
labelNamesValues labelNamesValues
|
||||
collectors chan<- *collector
|
||||
}
|
||||
|
||||
func (h *histogram) With(labelValues ...string) metrics.Histogram {
|
||||
return &histogram{
|
||||
name: h.name,
|
||||
hv: h.hv,
|
||||
labelNamesValues: h.labelNamesValues.With(labelValues...),
|
||||
collectors: h.collectors,
|
||||
}
|
||||
}
|
||||
|
||||
func (h *histogram) Observe(value float64) {
|
||||
collector := h.hv.With(h.labelNamesValues.ToLabels())
|
||||
collector.Observe(value)
|
||||
h.collectors <- newCollector(h.name, h.labelNamesValues, collector)
|
||||
}
|
||||
|
||||
func (h *histogram) Describe(ch chan<- *stdprometheus.Desc) {
|
||||
h.hv.Describe(ch)
|
||||
}
|
||||
|
||||
// labelNamesValues is a type alias that provides validation on its With method.
|
||||
// Metrics may include it as a member to help them satisfy With semantics and
|
||||
// save some code duplication.
|
||||
type labelNamesValues []string
|
||||
|
||||
// With validates the input, and returns a new aggregate labelNamesValues.
|
||||
func (lvs labelNamesValues) With(labelValues ...string) labelNamesValues {
|
||||
if len(labelValues)%2 != 0 {
|
||||
labelValues = append(labelValues, "unknown")
|
||||
}
|
||||
return append(lvs, labelValues...)
|
||||
}
|
||||
|
||||
// ToLabels is a convenience method to convert a labelNamesValues
|
||||
// to the native prometheus.Labels.
|
||||
func (lvs labelNamesValues) ToLabels() stdprometheus.Labels {
|
||||
labels := stdprometheus.Labels{}
|
||||
for i := 0; i < len(lvs); i += 2 {
|
||||
labels[lvs[i]] = lvs[i+1]
|
||||
}
|
||||
return labels
|
||||
}
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/containous/traefik/types"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
|
@ -12,20 +14,54 @@ import (
|
|||
|
||||
func TestPrometheus(t *testing.T) {
|
||||
prometheusRegistry := RegisterPrometheus(&types.Prometheus{})
|
||||
defer prometheus.Unregister(promState)
|
||||
|
||||
if !prometheusRegistry.IsEnabled() {
|
||||
t.Errorf("PrometheusRegistry should return true for IsEnabled()")
|
||||
}
|
||||
prometheusRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
prometheusRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
prometheusRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
prometheusRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
prometheusRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
|
||||
metricsFamilies, err := prometheus.DefaultGatherer.Gather()
|
||||
if err != nil {
|
||||
t.Fatalf("could not gather metrics families: %s", err)
|
||||
}
|
||||
prometheusRegistry.ConfigReloadsCounter().Add(1)
|
||||
prometheusRegistry.ConfigReloadsFailureCounter().Add(1)
|
||||
prometheusRegistry.LastConfigReloadSuccessGauge().Set(float64(time.Now().Unix()))
|
||||
prometheusRegistry.LastConfigReloadFailureGauge().Set(float64(time.Now().Unix()))
|
||||
|
||||
prometheusRegistry.
|
||||
EntrypointReqsCounter().
|
||||
With("code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http", "entrypoint", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
EntrypointReqDurationHistogram().
|
||||
With("code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http", "entrypoint", "http").
|
||||
Observe(1)
|
||||
prometheusRegistry.
|
||||
EntrypointOpenConnsGauge().
|
||||
With("method", http.MethodGet, "protocol", "http", "entrypoint", "http").
|
||||
Set(1)
|
||||
|
||||
prometheusRegistry.
|
||||
BackendReqsCounter().
|
||||
With("backend", "backend1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
BackendReqDurationHistogram().
|
||||
With("backend", "backend1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Observe(10000)
|
||||
prometheusRegistry.
|
||||
BackendOpenConnsGauge().
|
||||
With("backend", "backend1", "method", http.MethodGet, "protocol", "http").
|
||||
Set(1)
|
||||
prometheusRegistry.
|
||||
BackendRetriesCounter().
|
||||
With("backend", "backend1").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
BackendServerUpGauge().
|
||||
With("backend", "backend1", "url", "http://127.0.0.10:80").
|
||||
Set(1)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
metricsFamilies := mustScrape()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
|
@ -33,46 +69,93 @@ func TestPrometheus(t *testing.T) {
|
|||
assert func(*dto.MetricFamily)
|
||||
}{
|
||||
{
|
||||
name: reqsTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"service": "test",
|
||||
},
|
||||
assert: func(family *dto.MetricFamily) {
|
||||
cv := family.Metric[0].Counter.GetValue()
|
||||
expectedCv := float64(2)
|
||||
if cv != expectedCv {
|
||||
t.Errorf("gathered metrics do not contain correct value for total requests, got %f expected %f", cv, expectedCv)
|
||||
}
|
||||
},
|
||||
name: configReloadsTotalName,
|
||||
assert: buildCounterAssert(t, configReloadsTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: reqDurationName,
|
||||
labels: map[string]string{
|
||||
"service": "test",
|
||||
"code": "200",
|
||||
},
|
||||
assert: func(family *dto.MetricFamily) {
|
||||
sc := family.Metric[0].Histogram.GetSampleCount()
|
||||
expectedSc := uint64(2)
|
||||
if sc != expectedSc {
|
||||
t.Errorf("gathered metrics do not contain correct sample count for request duration, got %d expected %d", sc, expectedSc)
|
||||
}
|
||||
},
|
||||
name: configReloadsFailuresTotalName,
|
||||
assert: buildCounterAssert(t, configReloadsFailuresTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: retriesTotalName,
|
||||
name: configLastReloadSuccessName,
|
||||
assert: buildTimestampAssert(t, configLastReloadSuccessName),
|
||||
},
|
||||
{
|
||||
name: configLastReloadFailureName,
|
||||
assert: buildTimestampAssert(t, configLastReloadFailureName),
|
||||
},
|
||||
{
|
||||
name: entrypointReqsTotalName,
|
||||
labels: map[string]string{
|
||||
"service": "test",
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"entrypoint": "http",
|
||||
},
|
||||
assert: func(family *dto.MetricFamily) {
|
||||
cv := family.Metric[0].Counter.GetValue()
|
||||
expectedCv := float64(1)
|
||||
if cv != expectedCv {
|
||||
t.Errorf("gathered metrics do not contain correct value for total retries, got %f expected %f", cv, expectedCv)
|
||||
}
|
||||
assert: buildCounterAssert(t, entrypointReqsTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: entrypointReqDurationName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"entrypoint": "http",
|
||||
},
|
||||
assert: buildHistogramAssert(t, entrypointReqDurationName, 1),
|
||||
},
|
||||
{
|
||||
name: entrypointOpenConnsName,
|
||||
labels: map[string]string{
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"entrypoint": "http",
|
||||
},
|
||||
assert: buildGaugeAssert(t, entrypointOpenConnsName, 1),
|
||||
},
|
||||
{
|
||||
name: backendReqsTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"backend": "backend1",
|
||||
},
|
||||
assert: buildCounterAssert(t, backendReqsTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: backendReqDurationName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"backend": "backend1",
|
||||
},
|
||||
assert: buildHistogramAssert(t, backendReqDurationName, 1),
|
||||
},
|
||||
{
|
||||
name: backendOpenConnsName,
|
||||
labels: map[string]string{
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"backend": "backend1",
|
||||
},
|
||||
assert: buildGaugeAssert(t, backendOpenConnsName, 1),
|
||||
},
|
||||
{
|
||||
name: backendRetriesTotalName,
|
||||
labels: map[string]string{
|
||||
"backend": "backend1",
|
||||
},
|
||||
assert: buildGreaterThanCounterAssert(t, backendRetriesTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: backendServerUpName,
|
||||
labels: map[string]string{
|
||||
"backend": "backend1",
|
||||
"url": "http://127.0.0.10:80",
|
||||
},
|
||||
assert: buildGaugeAssert(t, backendServerUpName, 1),
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -94,6 +177,90 @@ func TestPrometheus(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestPrometheusGenerationLogicForMetricWithLabel(t *testing.T) {
|
||||
prometheusRegistry := RegisterPrometheus(&types.Prometheus{})
|
||||
defer prometheus.Unregister(promState)
|
||||
|
||||
// Metrics with labels belonging to a specific configuration in Traefik
|
||||
// should be removed when the generationMaxAge is exceeded. As example
|
||||
// we use the traefik_backend_requests_total metric.
|
||||
prometheusRegistry.
|
||||
BackendReqsCounter().
|
||||
With("backend", "backend1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
assertMetricExists(t, backendReqsTotalName, mustScrape())
|
||||
|
||||
// Increase the config generation one more than the max age of a metric.
|
||||
for i := 0; i < generationAgeDefault+1; i++ {
|
||||
OnConfigurationUpdate()
|
||||
}
|
||||
|
||||
// On the next scrape the metric still exists and will be removed
|
||||
// after the scrape completed.
|
||||
assertMetricExists(t, backendReqsTotalName, mustScrape())
|
||||
|
||||
// Now the metric should be absent.
|
||||
assertMetricAbsent(t, backendReqsTotalName, mustScrape())
|
||||
}
|
||||
|
||||
func TestPrometheusGenerationLogicForMetricWithoutLabel(t *testing.T) {
|
||||
prometheusRegistry := RegisterPrometheus(&types.Prometheus{})
|
||||
defer prometheus.Unregister(promState)
|
||||
|
||||
// Metrics without labels like traefik_config_reloads_total should live forever
|
||||
// and never get removed.
|
||||
prometheusRegistry.ConfigReloadsCounter().Add(1)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
assertMetricExists(t, configReloadsTotalName, mustScrape())
|
||||
|
||||
// Increase the config generation one more than the max age of a metric.
|
||||
for i := 0; i < generationAgeDefault+100; i++ {
|
||||
OnConfigurationUpdate()
|
||||
}
|
||||
|
||||
// Scrape two times in order to verify, that it is not removed after the
|
||||
// first scrape completed.
|
||||
assertMetricExists(t, configReloadsTotalName, mustScrape())
|
||||
assertMetricExists(t, configReloadsTotalName, mustScrape())
|
||||
}
|
||||
|
||||
// Tracking and gathering the metrics happens concurrently.
|
||||
// In practice this is no problem, because in case a tracked metric would miss
|
||||
// the current scrape, it would just be there in the next one.
|
||||
// That we can test reliably the tracking of all metrics here, we sleep
|
||||
// for a short amount of time, to make sure the metric will be present
|
||||
// in the next scrape.
|
||||
func delayForTrackingCompletion() {
|
||||
time.Sleep(250 * time.Millisecond)
|
||||
}
|
||||
|
||||
func mustScrape() []*dto.MetricFamily {
|
||||
families, err := prometheus.DefaultGatherer.Gather()
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("could not gather metrics families: %s", err))
|
||||
}
|
||||
return families
|
||||
}
|
||||
|
||||
func assertMetricExists(t *testing.T, name string, families []*dto.MetricFamily) {
|
||||
t.Helper()
|
||||
if findMetricFamily(name, families) == nil {
|
||||
t.Errorf("gathered metrics do not contain %q", name)
|
||||
}
|
||||
}
|
||||
|
||||
func assertMetricAbsent(t *testing.T, name string, families []*dto.MetricFamily) {
|
||||
t.Helper()
|
||||
if findMetricFamily(name, families) != nil {
|
||||
t.Errorf("gathered metrics contain %q, but should not", name)
|
||||
}
|
||||
}
|
||||
|
||||
func findMetricFamily(name string, families []*dto.MetricFamily) *dto.MetricFamily {
|
||||
for _, family := range families {
|
||||
if family.GetName() == name {
|
||||
|
@ -102,3 +269,43 @@ func findMetricFamily(name string, families []*dto.MetricFamily) *dto.MetricFami
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func buildCounterAssert(t *testing.T, metricName string, expectedValue int) func(family *dto.MetricFamily) {
|
||||
return func(family *dto.MetricFamily) {
|
||||
if cv := int(family.Metric[0].Counter.GetValue()); cv != expectedValue {
|
||||
t.Errorf("metric %s has value %d, want %d", metricName, cv, expectedValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildGreaterThanCounterAssert(t *testing.T, metricName string, expectedMinValue int) func(family *dto.MetricFamily) {
|
||||
return func(family *dto.MetricFamily) {
|
||||
if cv := int(family.Metric[0].Counter.GetValue()); cv < expectedMinValue {
|
||||
t.Errorf("metric %s has value %d, want at least %d", metricName, cv, expectedMinValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildHistogramAssert(t *testing.T, metricName string, expectedSampleCount int) func(family *dto.MetricFamily) {
|
||||
return func(family *dto.MetricFamily) {
|
||||
if sc := int(family.Metric[0].Histogram.GetSampleCount()); sc != expectedSampleCount {
|
||||
t.Errorf("metric %s has sample count value %d, want %d", metricName, sc, expectedSampleCount)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildGaugeAssert(t *testing.T, metricName string, expectedValue int) func(family *dto.MetricFamily) {
|
||||
return func(family *dto.MetricFamily) {
|
||||
if gv := int(family.Metric[0].Gauge.GetValue()); gv != expectedValue {
|
||||
t.Errorf("metric %s has value %d, want %d", metricName, gv, expectedValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildTimestampAssert(t *testing.T, metricName string) func(family *dto.MetricFamily) {
|
||||
return func(family *dto.MetricFamily) {
|
||||
if ts := time.Unix(int64(family.Metric[0].Gauge.GetValue()), 0); time.Since(ts) > time.Minute {
|
||||
t.Errorf("metric %s has wrong timestamp %v", metricName, ts)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,10 +30,10 @@ func RegisterStatsd(config *types.Statsd) Registry {
|
|||
}
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: true,
|
||||
reqsCounter: statsdClient.NewCounter(statsdMetricsReqsName, 1.0),
|
||||
reqDurationHistogram: statsdClient.NewTiming(statsdMetricsLatencyName, 1.0),
|
||||
retriesCounter: statsdClient.NewCounter(statsdRetriesTotalName, 1.0),
|
||||
enabled: true,
|
||||
backendReqsCounter: statsdClient.NewCounter(statsdMetricsReqsName, 1.0),
|
||||
backendReqDurationHistogram: statsdClient.NewTiming(statsdMetricsLatencyName, 1.0),
|
||||
backendRetriesCounter: statsdClient.NewCounter(statsdRetriesTotalName, 1.0),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -29,10 +29,10 @@ func TestStatsD(t *testing.T) {
|
|||
}
|
||||
|
||||
udp.ShouldReceiveAll(t, expected, func() {
|
||||
statsdRegistry.ReqsCounter().With("service", "test", "code", string(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
statsdRegistry.ReqsCounter().With("service", "test", "code", string(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
statsdRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
statsdRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
statsdRegistry.ReqDurationHistogram().With("service", "test", "code", string(http.StatusOK)).Observe(10000)
|
||||
statsdRegistry.BackendReqsCounter().With("service", "test", "code", string(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
statsdRegistry.BackendReqsCounter().With("service", "test", "code", string(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
statsdRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
statsdRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
statsdRegistry.BackendReqDurationHistogram().With("service", "test", "code", string(http.StatusOK)).Observe(10000)
|
||||
})
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue