Add Metrics
This commit is contained in:
parent
4dc448056c
commit
8e97af8dc3
121 changed files with 8364 additions and 3811 deletions
|
@ -20,18 +20,18 @@ var datadogTicker *time.Ticker
|
|||
|
||||
// Metric names consistent with https://github.com/DataDog/integrations-extras/pull/64
|
||||
const (
|
||||
ddMetricsBackendReqsName = "backend.request.total"
|
||||
ddMetricsBackendLatencyName = "backend.request.duration"
|
||||
ddRetriesTotalName = "backend.retries.total"
|
||||
ddMetricsServiceReqsName = "service.request.total"
|
||||
ddMetricsServiceLatencyName = "service.request.duration"
|
||||
ddRetriesTotalName = "service.retries.total"
|
||||
ddConfigReloadsName = "config.reload.total"
|
||||
ddConfigReloadsFailureTagName = "failure"
|
||||
ddLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
|
||||
ddLastConfigReloadFailureName = "config.reload.lastFailureTimestamp"
|
||||
ddEntrypointReqsName = "entrypoint.request.total"
|
||||
ddEntrypointReqDurationName = "entrypoint.request.duration"
|
||||
ddEntrypointOpenConnsName = "entrypoint.connections.open"
|
||||
ddOpenConnsName = "backend.connections.open"
|
||||
ddServerUpName = "backend.server.up"
|
||||
ddEntryPointReqsName = "entrypoint.request.total"
|
||||
ddEntryPointReqDurationName = "entrypoint.request.duration"
|
||||
ddEntryPointOpenConnsName = "entrypoint.connections.open"
|
||||
ddOpenConnsName = "service.connections.open"
|
||||
ddServerUpName = "service.server.up"
|
||||
)
|
||||
|
||||
// RegisterDatadog registers the metrics pusher if this didn't happen yet and creates a datadog Registry instance.
|
||||
|
@ -41,19 +41,26 @@ func RegisterDatadog(ctx context.Context, config *types.DataDog) Registry {
|
|||
}
|
||||
|
||||
registry := &standardRegistry{
|
||||
enabled: true,
|
||||
configReloadsCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0),
|
||||
configReloadsFailureCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0).With(ddConfigReloadsFailureTagName, "true"),
|
||||
lastConfigReloadSuccessGauge: datadogClient.NewGauge(ddLastConfigReloadSuccessName),
|
||||
lastConfigReloadFailureGauge: datadogClient.NewGauge(ddLastConfigReloadFailureName),
|
||||
entrypointReqsCounter: datadogClient.NewCounter(ddEntrypointReqsName, 1.0),
|
||||
entrypointReqDurationHistogram: datadogClient.NewHistogram(ddEntrypointReqDurationName, 1.0),
|
||||
entrypointOpenConnsGauge: datadogClient.NewGauge(ddEntrypointOpenConnsName),
|
||||
backendReqsCounter: datadogClient.NewCounter(ddMetricsBackendReqsName, 1.0),
|
||||
backendReqDurationHistogram: datadogClient.NewHistogram(ddMetricsBackendLatencyName, 1.0),
|
||||
backendRetriesCounter: datadogClient.NewCounter(ddRetriesTotalName, 1.0),
|
||||
backendOpenConnsGauge: datadogClient.NewGauge(ddOpenConnsName),
|
||||
backendServerUpGauge: datadogClient.NewGauge(ddServerUpName),
|
||||
configReloadsCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0),
|
||||
configReloadsFailureCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0).With(ddConfigReloadsFailureTagName, "true"),
|
||||
lastConfigReloadSuccessGauge: datadogClient.NewGauge(ddLastConfigReloadSuccessName),
|
||||
lastConfigReloadFailureGauge: datadogClient.NewGauge(ddLastConfigReloadFailureName),
|
||||
}
|
||||
|
||||
if config.AddEntryPointsLabels {
|
||||
registry.epEnabled = config.AddEntryPointsLabels
|
||||
registry.entryPointReqsCounter = datadogClient.NewCounter(ddEntryPointReqsName, 1.0)
|
||||
registry.entryPointReqDurationHistogram = datadogClient.NewHistogram(ddEntryPointReqDurationName, 1.0)
|
||||
registry.entryPointOpenConnsGauge = datadogClient.NewGauge(ddEntryPointOpenConnsName)
|
||||
}
|
||||
|
||||
if config.AddServicesLabels {
|
||||
registry.svcEnabled = config.AddServicesLabels
|
||||
registry.serviceReqsCounter = datadogClient.NewCounter(ddMetricsServiceReqsName, 1.0)
|
||||
registry.serviceReqDurationHistogram = datadogClient.NewHistogram(ddMetricsServiceLatencyName, 1.0)
|
||||
registry.serviceRetriesCounter = datadogClient.NewCounter(ddRetriesTotalName, 1.0)
|
||||
registry.serviceOpenConnsGauge = datadogClient.NewGauge(ddOpenConnsName)
|
||||
registry.serviceServerUpGauge = datadogClient.NewGauge(ddServerUpName)
|
||||
}
|
||||
|
||||
return registry
|
||||
|
@ -68,7 +75,7 @@ func initDatadogClient(ctx context.Context, config *types.DataDog) *time.Ticker
|
|||
report := time.NewTicker(time.Duration(config.PushInterval))
|
||||
|
||||
safe.Go(func() {
|
||||
datadogClient.SendLoop(report.C, "udp", address)
|
||||
datadogClient.SendLoop(ctx, report.C, "udp", address)
|
||||
})
|
||||
|
||||
return report
|
||||
|
|
|
@ -16,38 +16,38 @@ func TestDatadog(t *testing.T) {
|
|||
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
|
||||
udp.Timeout = 5 * time.Second
|
||||
|
||||
datadogRegistry := RegisterDatadog(context.Background(), &types.DataDog{Address: ":18125", PushInterval: types.Duration(time.Second)})
|
||||
datadogRegistry := RegisterDatadog(context.Background(), &types.DataDog{Address: ":18125", PushInterval: types.Duration(time.Second), AddEntryPointsLabels: true, AddServicesLabels: true})
|
||||
defer StopDatadog()
|
||||
|
||||
if !datadogRegistry.IsEnabled() {
|
||||
if !datadogRegistry.IsEpEnabled() || !datadogRegistry.IsSvcEnabled() {
|
||||
t.Errorf("DatadogRegistry should return true for IsEnabled()")
|
||||
}
|
||||
|
||||
expected := []string{
|
||||
// We are only validating counts, as it is nearly impossible to validate latency, since it varies every run
|
||||
"traefik.backend.request.total:1.000000|c|#service:test,code:404,method:GET\n",
|
||||
"traefik.backend.request.total:1.000000|c|#service:test,code:200,method:GET\n",
|
||||
"traefik.backend.retries.total:2.000000|c|#service:test\n",
|
||||
"traefik.backend.request.duration:10000.000000|h|#service:test,code:200\n",
|
||||
"traefik.service.request.total:1.000000|c|#service:test,code:404,method:GET\n",
|
||||
"traefik.service.request.total:1.000000|c|#service:test,code:200,method:GET\n",
|
||||
"traefik.service.retries.total:2.000000|c|#service:test\n",
|
||||
"traefik.service.request.duration:10000.000000|h|#service:test,code:200\n",
|
||||
"traefik.config.reload.total:1.000000|c\n",
|
||||
"traefik.config.reload.total:1.000000|c|#failure:true\n",
|
||||
"traefik.entrypoint.request.total:1.000000|c|#entrypoint:test\n",
|
||||
"traefik.entrypoint.request.duration:10000.000000|h|#entrypoint:test\n",
|
||||
"traefik.entrypoint.connections.open:1.000000|g|#entrypoint:test\n",
|
||||
"traefik.backend.server.up:1.000000|g|#backend:test,url:http://127.0.0.1,one:two\n",
|
||||
"traefik.service.server.up:1.000000|g|#service:test,url:http://127.0.0.1,one:two\n",
|
||||
}
|
||||
|
||||
udp.ShouldReceiveAll(t, expected, func() {
|
||||
datadogRegistry.BackendReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.BackendReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.BackendReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
datadogRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
datadogRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
datadogRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
datadogRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
|
||||
datadogRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
|
||||
datadogRegistry.ConfigReloadsCounter().Add(1)
|
||||
datadogRegistry.ConfigReloadsFailureCounter().Add(1)
|
||||
datadogRegistry.EntrypointReqsCounter().With("entrypoint", "test").Add(1)
|
||||
datadogRegistry.EntrypointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
|
||||
datadogRegistry.EntrypointOpenConnsGauge().With("entrypoint", "test").Set(1)
|
||||
datadogRegistry.BackendServerUpGauge().With("backend", "test", "url", "http://127.0.0.1", "one", "two").Set(1)
|
||||
datadogRegistry.EntryPointReqsCounter().With("entrypoint", "test").Add(1)
|
||||
datadogRegistry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
|
||||
datadogRegistry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1)
|
||||
datadogRegistry.ServiceServerUpGauge().With("service", "test", "url", "http://127.0.0.1", "one", "two").Set(1)
|
||||
})
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@ import (
|
|||
"github.com/containous/traefik/pkg/types"
|
||||
kitlog "github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/metrics/influx"
|
||||
influxdb "github.com/influxdata/influxdb/client/v2"
|
||||
influxdb "github.com/influxdata/influxdb1-client/v2"
|
||||
)
|
||||
|
||||
var influxDBClient *influx.Influx
|
||||
|
@ -26,18 +26,18 @@ type influxDBWriter struct {
|
|||
var influxDBTicker *time.Ticker
|
||||
|
||||
const (
|
||||
influxDBMetricsBackendReqsName = "traefik.backend.requests.total"
|
||||
influxDBMetricsBackendLatencyName = "traefik.backend.request.duration"
|
||||
influxDBRetriesTotalName = "traefik.backend.retries.total"
|
||||
influxDBMetricsServiceReqsName = "traefik.service.requests.total"
|
||||
influxDBMetricsServiceLatencyName = "traefik.service.request.duration"
|
||||
influxDBRetriesTotalName = "traefik.service.retries.total"
|
||||
influxDBConfigReloadsName = "traefik.config.reload.total"
|
||||
influxDBConfigReloadsFailureName = influxDBConfigReloadsName + ".failure"
|
||||
influxDBLastConfigReloadSuccessName = "traefik.config.reload.lastSuccessTimestamp"
|
||||
influxDBLastConfigReloadFailureName = "traefik.config.reload.lastFailureTimestamp"
|
||||
influxDBEntrypointReqsName = "traefik.entrypoint.requests.total"
|
||||
influxDBEntrypointReqDurationName = "traefik.entrypoint.request.duration"
|
||||
influxDBEntrypointOpenConnsName = "traefik.entrypoint.connections.open"
|
||||
influxDBOpenConnsName = "traefik.backend.connections.open"
|
||||
influxDBServerUpName = "traefik.backend.server.up"
|
||||
influxDBEntryPointReqsName = "traefik.entrypoint.requests.total"
|
||||
influxDBEntryPointReqDurationName = "traefik.entrypoint.request.duration"
|
||||
influxDBEntryPointOpenConnsName = "traefik.entrypoint.connections.open"
|
||||
influxDBOpenConnsName = "traefik.service.connections.open"
|
||||
influxDBServerUpName = "traefik.service.server.up"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -51,24 +51,33 @@ func RegisterInfluxDB(ctx context.Context, config *types.InfluxDB) Registry {
|
|||
influxDBClient = initInfluxDBClient(ctx, config)
|
||||
}
|
||||
if influxDBTicker == nil {
|
||||
influxDBTicker = initInfluxDBTicker(config)
|
||||
influxDBTicker = initInfluxDBTicker(ctx, config)
|
||||
}
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: true,
|
||||
configReloadsCounter: influxDBClient.NewCounter(influxDBConfigReloadsName),
|
||||
configReloadsFailureCounter: influxDBClient.NewCounter(influxDBConfigReloadsFailureName),
|
||||
lastConfigReloadSuccessGauge: influxDBClient.NewGauge(influxDBLastConfigReloadSuccessName),
|
||||
lastConfigReloadFailureGauge: influxDBClient.NewGauge(influxDBLastConfigReloadFailureName),
|
||||
entrypointReqsCounter: influxDBClient.NewCounter(influxDBEntrypointReqsName),
|
||||
entrypointReqDurationHistogram: influxDBClient.NewHistogram(influxDBEntrypointReqDurationName),
|
||||
entrypointOpenConnsGauge: influxDBClient.NewGauge(influxDBEntrypointOpenConnsName),
|
||||
backendReqsCounter: influxDBClient.NewCounter(influxDBMetricsBackendReqsName),
|
||||
backendReqDurationHistogram: influxDBClient.NewHistogram(influxDBMetricsBackendLatencyName),
|
||||
backendRetriesCounter: influxDBClient.NewCounter(influxDBRetriesTotalName),
|
||||
backendOpenConnsGauge: influxDBClient.NewGauge(influxDBOpenConnsName),
|
||||
backendServerUpGauge: influxDBClient.NewGauge(influxDBServerUpName),
|
||||
registry := &standardRegistry{
|
||||
configReloadsCounter: influxDBClient.NewCounter(influxDBConfigReloadsName),
|
||||
configReloadsFailureCounter: influxDBClient.NewCounter(influxDBConfigReloadsFailureName),
|
||||
lastConfigReloadSuccessGauge: influxDBClient.NewGauge(influxDBLastConfigReloadSuccessName),
|
||||
lastConfigReloadFailureGauge: influxDBClient.NewGauge(influxDBLastConfigReloadFailureName),
|
||||
}
|
||||
|
||||
if config.AddEntryPointsLabels {
|
||||
registry.epEnabled = config.AddEntryPointsLabels
|
||||
registry.entryPointReqsCounter = influxDBClient.NewCounter(influxDBEntryPointReqsName)
|
||||
registry.entryPointReqDurationHistogram = influxDBClient.NewHistogram(influxDBEntryPointReqDurationName)
|
||||
registry.entryPointOpenConnsGauge = influxDBClient.NewGauge(influxDBEntryPointOpenConnsName)
|
||||
}
|
||||
|
||||
if config.AddServicesLabels {
|
||||
registry.svcEnabled = config.AddServicesLabels
|
||||
registry.serviceReqsCounter = influxDBClient.NewCounter(influxDBMetricsServiceReqsName)
|
||||
registry.serviceReqDurationHistogram = influxDBClient.NewHistogram(influxDBMetricsServiceLatencyName)
|
||||
registry.serviceRetriesCounter = influxDBClient.NewCounter(influxDBRetriesTotalName)
|
||||
registry.serviceOpenConnsGauge = influxDBClient.NewGauge(influxDBOpenConnsName)
|
||||
registry.serviceServerUpGauge = influxDBClient.NewGauge(influxDBServerUpName)
|
||||
}
|
||||
|
||||
return registry
|
||||
}
|
||||
|
||||
// initInfluxDBTicker creates a influxDBClient
|
||||
|
@ -115,12 +124,12 @@ func initInfluxDBClient(ctx context.Context, config *types.InfluxDB) *influx.Inf
|
|||
}
|
||||
|
||||
// initInfluxDBTicker initializes metrics pusher
|
||||
func initInfluxDBTicker(config *types.InfluxDB) *time.Ticker {
|
||||
func initInfluxDBTicker(ctx context.Context, config *types.InfluxDB) *time.Ticker {
|
||||
report := time.NewTicker(time.Duration(config.PushInterval))
|
||||
|
||||
safe.Go(func() {
|
||||
var buf bytes.Buffer
|
||||
influxDBClient.WriteLoop(report.C, &influxDBWriter{buf: buf, config: config})
|
||||
influxDBClient.WriteLoop(ctx, report.C, &influxDBWriter{buf: buf, config: config})
|
||||
})
|
||||
|
||||
return report
|
||||
|
|
|
@ -20,35 +20,35 @@ func TestInfluxDB(t *testing.T) {
|
|||
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
|
||||
udp.Timeout = 5 * time.Second
|
||||
|
||||
influxDBRegistry := RegisterInfluxDB(context.Background(), &types.InfluxDB{Address: ":8089", PushInterval: types.Duration(time.Second)})
|
||||
influxDBRegistry := RegisterInfluxDB(context.Background(), &types.InfluxDB{Address: ":8089", PushInterval: types.Duration(time.Second), AddEntryPointsLabels: true, AddServicesLabels: true})
|
||||
defer StopInfluxDB()
|
||||
|
||||
if !influxDBRegistry.IsEnabled() {
|
||||
t.Fatalf("InfluxDB registry must be enabled")
|
||||
if !influxDBRegistry.IsEpEnabled() || !influxDBRegistry.IsSvcEnabled() {
|
||||
t.Fatalf("InfluxDB registry must be epEnabled")
|
||||
}
|
||||
|
||||
expectedBackend := []string{
|
||||
`(traefik\.backend\.requests\.total,backend=test,code=200,method=GET count=1) [\d]{19}`,
|
||||
`(traefik\.backend\.requests\.total,backend=test,code=404,method=GET count=1) [\d]{19}`,
|
||||
`(traefik\.backend\.request\.duration,backend=test,code=200 p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
|
||||
`(traefik\.backend\.retries\.total(?:,code=[\d]{3},method=GET)?,backend=test count=2) [\d]{19}`,
|
||||
expectedService := []string{
|
||||
`(traefik\.service\.requests\.total,code=200,method=GET,service=test count=1) [\d]{19}`,
|
||||
`(traefik\.service\.requests\.total,code=404,method=GET,service=test count=1) [\d]{19}`,
|
||||
`(traefik\.service\.request\.duration,code=200,service=test p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
|
||||
`(traefik\.service\.retries\.total(?:,code=[\d]{3},method=GET)?,service=test count=2) [\d]{19}`,
|
||||
`(traefik\.config\.reload\.total(?:[a-z=0-9A-Z,]+)? count=1) [\d]{19}`,
|
||||
`(traefik\.config\.reload\.total\.failure(?:[a-z=0-9A-Z,]+)? count=1) [\d]{19}`,
|
||||
`(traefik\.backend\.server\.up,backend=test(?:[a-z=0-9A-Z,]+)?,url=http://127.0.0.1 value=1) [\d]{19}`,
|
||||
`(traefik\.service\.server\.up,service=test(?:[a-z=0-9A-Z,]+)?,url=http://127.0.0.1 value=1) [\d]{19}`,
|
||||
}
|
||||
|
||||
msgBackend := udp.ReceiveString(t, func() {
|
||||
influxDBRegistry.BackendReqsCounter().With("backend", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.BackendReqsCounter().With("backend", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.BackendRetriesCounter().With("backend", "test").Add(1)
|
||||
influxDBRegistry.BackendRetriesCounter().With("backend", "test").Add(1)
|
||||
influxDBRegistry.BackendReqDurationHistogram().With("backend", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
msgService := udp.ReceiveString(t, func() {
|
||||
influxDBRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
|
||||
influxDBRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
|
||||
influxDBRegistry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
influxDBRegistry.ConfigReloadsCounter().Add(1)
|
||||
influxDBRegistry.ConfigReloadsFailureCounter().Add(1)
|
||||
influxDBRegistry.BackendServerUpGauge().With("backend", "test", "url", "http://127.0.0.1").Set(1)
|
||||
influxDBRegistry.ServiceServerUpGauge().With("service", "test", "url", "http://127.0.0.1").Set(1)
|
||||
})
|
||||
|
||||
assertMessage(t, msgBackend, expectedBackend)
|
||||
assertMessage(t, msgService, expectedService)
|
||||
|
||||
expectedEntrypoint := []string{
|
||||
`(traefik\.entrypoint\.requests\.total,entrypoint=test(?:[a-z=0-9A-Z,:/.]+)? count=1) [\d]{19}`,
|
||||
|
@ -57,9 +57,9 @@ func TestInfluxDB(t *testing.T) {
|
|||
}
|
||||
|
||||
msgEntrypoint := udp.ReceiveString(t, func() {
|
||||
influxDBRegistry.EntrypointReqsCounter().With("entrypoint", "test").Add(1)
|
||||
influxDBRegistry.EntrypointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
|
||||
influxDBRegistry.EntrypointOpenConnsGauge().With("entrypoint", "test").Set(1)
|
||||
influxDBRegistry.EntryPointReqsCounter().With("entrypoint", "test").Add(1)
|
||||
influxDBRegistry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
|
||||
influxDBRegistry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1)
|
||||
|
||||
})
|
||||
|
||||
|
@ -76,38 +76,38 @@ func TestInfluxDBHTTP(t *testing.T) {
|
|||
}
|
||||
bodyStr := string(body)
|
||||
c <- &bodyStr
|
||||
fmt.Fprintln(w, "ok")
|
||||
_, _ = fmt.Fprintln(w, "ok")
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
influxDBRegistry := RegisterInfluxDB(context.Background(), &types.InfluxDB{Address: ts.URL, Protocol: "http", PushInterval: types.Duration(time.Second), Database: "test", RetentionPolicy: "autogen"})
|
||||
influxDBRegistry := RegisterInfluxDB(context.Background(), &types.InfluxDB{Address: ts.URL, Protocol: "http", PushInterval: types.Duration(time.Second), Database: "test", RetentionPolicy: "autogen", AddEntryPointsLabels: true, AddServicesLabels: true})
|
||||
defer StopInfluxDB()
|
||||
|
||||
if !influxDBRegistry.IsEnabled() {
|
||||
t.Fatalf("InfluxDB registry must be enabled")
|
||||
if !influxDBRegistry.IsEpEnabled() || !influxDBRegistry.IsSvcEnabled() {
|
||||
t.Fatalf("InfluxDB registry must be epEnabled")
|
||||
}
|
||||
|
||||
expectedBackend := []string{
|
||||
`(traefik\.backend\.requests\.total,backend=test,code=200,method=GET count=1) [\d]{19}`,
|
||||
`(traefik\.backend\.requests\.total,backend=test,code=404,method=GET count=1) [\d]{19}`,
|
||||
`(traefik\.backend\.request\.duration,backend=test,code=200 p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
|
||||
`(traefik\.backend\.retries\.total(?:,code=[\d]{3},method=GET)?,backend=test count=2) [\d]{19}`,
|
||||
expectedService := []string{
|
||||
`(traefik\.service\.requests\.total,code=200,method=GET,service=test count=1) [\d]{19}`,
|
||||
`(traefik\.service\.requests\.total,code=404,method=GET,service=test count=1) [\d]{19}`,
|
||||
`(traefik\.service\.request\.duration,code=200,service=test p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
|
||||
`(traefik\.service\.retries\.total(?:,code=[\d]{3},method=GET)?,service=test count=2) [\d]{19}`,
|
||||
`(traefik\.config\.reload\.total(?:[a-z=0-9A-Z,]+)? count=1) [\d]{19}`,
|
||||
`(traefik\.config\.reload\.total\.failure(?:[a-z=0-9A-Z,]+)? count=1) [\d]{19}`,
|
||||
`(traefik\.backend\.server\.up,backend=test(?:[a-z=0-9A-Z,]+)?,url=http://127.0.0.1 value=1) [\d]{19}`,
|
||||
`(traefik\.service\.server\.up,service=test(?:[a-z=0-9A-Z,]+)?,url=http://127.0.0.1 value=1) [\d]{19}`,
|
||||
}
|
||||
|
||||
influxDBRegistry.BackendReqsCounter().With("backend", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.BackendReqsCounter().With("backend", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.BackendRetriesCounter().With("backend", "test").Add(1)
|
||||
influxDBRegistry.BackendRetriesCounter().With("backend", "test").Add(1)
|
||||
influxDBRegistry.BackendReqDurationHistogram().With("backend", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
influxDBRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
|
||||
influxDBRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
|
||||
influxDBRegistry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
influxDBRegistry.ConfigReloadsCounter().Add(1)
|
||||
influxDBRegistry.ConfigReloadsFailureCounter().Add(1)
|
||||
influxDBRegistry.BackendServerUpGauge().With("backend", "test", "url", "http://127.0.0.1").Set(1)
|
||||
msgBackend := <-c
|
||||
influxDBRegistry.ServiceServerUpGauge().With("service", "test", "url", "http://127.0.0.1").Set(1)
|
||||
msgService := <-c
|
||||
|
||||
assertMessage(t, *msgBackend, expectedBackend)
|
||||
assertMessage(t, *msgService, expectedService)
|
||||
|
||||
expectedEntrypoint := []string{
|
||||
`(traefik\.entrypoint\.requests\.total,entrypoint=test(?:[a-z=0-9A-Z,:/.]+)? count=1) [\d]{19}`,
|
||||
|
@ -115,9 +115,9 @@ func TestInfluxDBHTTP(t *testing.T) {
|
|||
`(traefik\.entrypoint\.connections\.open,entrypoint=test value=1) [\d]{19}`,
|
||||
}
|
||||
|
||||
influxDBRegistry.EntrypointReqsCounter().With("entrypoint", "test").Add(1)
|
||||
influxDBRegistry.EntrypointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
|
||||
influxDBRegistry.EntrypointOpenConnsGauge().With("entrypoint", "test").Set(1)
|
||||
influxDBRegistry.EntryPointReqsCounter().With("entrypoint", "test").Add(1)
|
||||
influxDBRegistry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
|
||||
influxDBRegistry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1)
|
||||
msgEntrypoint := <-c
|
||||
|
||||
assertMessage(t, *msgEntrypoint, expectedEntrypoint)
|
||||
|
|
|
@ -7,8 +7,10 @@ import (
|
|||
|
||||
// Registry has to implemented by any system that wants to monitor and expose metrics.
|
||||
type Registry interface {
|
||||
// IsEnabled shows whether metrics instrumentation is enabled.
|
||||
IsEnabled() bool
|
||||
// IsEpEnabled shows whether metrics instrumentation is enabled on entry points.
|
||||
IsEpEnabled() bool
|
||||
// IsSvcEnabled shows whether metrics instrumentation is enabled on services.
|
||||
IsSvcEnabled() bool
|
||||
|
||||
// server metrics
|
||||
ConfigReloadsCounter() metrics.Counter
|
||||
|
@ -17,16 +19,16 @@ type Registry interface {
|
|||
LastConfigReloadFailureGauge() metrics.Gauge
|
||||
|
||||
// entry point metrics
|
||||
EntrypointReqsCounter() metrics.Counter
|
||||
EntrypointReqDurationHistogram() metrics.Histogram
|
||||
EntrypointOpenConnsGauge() metrics.Gauge
|
||||
EntryPointReqsCounter() metrics.Counter
|
||||
EntryPointReqDurationHistogram() metrics.Histogram
|
||||
EntryPointOpenConnsGauge() metrics.Gauge
|
||||
|
||||
// backend metrics
|
||||
BackendReqsCounter() metrics.Counter
|
||||
BackendReqDurationHistogram() metrics.Histogram
|
||||
BackendOpenConnsGauge() metrics.Gauge
|
||||
BackendRetriesCounter() metrics.Counter
|
||||
BackendServerUpGauge() metrics.Gauge
|
||||
// service metrics
|
||||
ServiceReqsCounter() metrics.Counter
|
||||
ServiceReqDurationHistogram() metrics.Histogram
|
||||
ServiceOpenConnsGauge() metrics.Gauge
|
||||
ServiceRetriesCounter() metrics.Counter
|
||||
ServiceServerUpGauge() metrics.Gauge
|
||||
}
|
||||
|
||||
// NewVoidRegistry is a noop implementation of metrics.Registry.
|
||||
|
@ -43,14 +45,14 @@ func NewMultiRegistry(registries []Registry) Registry {
|
|||
var configReloadsFailureCounter []metrics.Counter
|
||||
var lastConfigReloadSuccessGauge []metrics.Gauge
|
||||
var lastConfigReloadFailureGauge []metrics.Gauge
|
||||
var entrypointReqsCounter []metrics.Counter
|
||||
var entrypointReqDurationHistogram []metrics.Histogram
|
||||
var entrypointOpenConnsGauge []metrics.Gauge
|
||||
var backendReqsCounter []metrics.Counter
|
||||
var backendReqDurationHistogram []metrics.Histogram
|
||||
var backendOpenConnsGauge []metrics.Gauge
|
||||
var backendRetriesCounter []metrics.Counter
|
||||
var backendServerUpGauge []metrics.Gauge
|
||||
var entryPointReqsCounter []metrics.Counter
|
||||
var entryPointReqDurationHistogram []metrics.Histogram
|
||||
var entryPointOpenConnsGauge []metrics.Gauge
|
||||
var serviceReqsCounter []metrics.Counter
|
||||
var serviceReqDurationHistogram []metrics.Histogram
|
||||
var serviceOpenConnsGauge []metrics.Gauge
|
||||
var serviceRetriesCounter []metrics.Counter
|
||||
var serviceServerUpGauge []metrics.Gauge
|
||||
|
||||
for _, r := range registries {
|
||||
if r.ConfigReloadsCounter() != nil {
|
||||
|
@ -65,67 +67,73 @@ func NewMultiRegistry(registries []Registry) Registry {
|
|||
if r.LastConfigReloadFailureGauge() != nil {
|
||||
lastConfigReloadFailureGauge = append(lastConfigReloadFailureGauge, r.LastConfigReloadFailureGauge())
|
||||
}
|
||||
if r.EntrypointReqsCounter() != nil {
|
||||
entrypointReqsCounter = append(entrypointReqsCounter, r.EntrypointReqsCounter())
|
||||
if r.EntryPointReqsCounter() != nil {
|
||||
entryPointReqsCounter = append(entryPointReqsCounter, r.EntryPointReqsCounter())
|
||||
}
|
||||
if r.EntrypointReqDurationHistogram() != nil {
|
||||
entrypointReqDurationHistogram = append(entrypointReqDurationHistogram, r.EntrypointReqDurationHistogram())
|
||||
if r.EntryPointReqDurationHistogram() != nil {
|
||||
entryPointReqDurationHistogram = append(entryPointReqDurationHistogram, r.EntryPointReqDurationHistogram())
|
||||
}
|
||||
if r.EntrypointOpenConnsGauge() != nil {
|
||||
entrypointOpenConnsGauge = append(entrypointOpenConnsGauge, r.EntrypointOpenConnsGauge())
|
||||
if r.EntryPointOpenConnsGauge() != nil {
|
||||
entryPointOpenConnsGauge = append(entryPointOpenConnsGauge, r.EntryPointOpenConnsGauge())
|
||||
}
|
||||
if r.BackendReqsCounter() != nil {
|
||||
backendReqsCounter = append(backendReqsCounter, r.BackendReqsCounter())
|
||||
if r.ServiceReqsCounter() != nil {
|
||||
serviceReqsCounter = append(serviceReqsCounter, r.ServiceReqsCounter())
|
||||
}
|
||||
if r.BackendReqDurationHistogram() != nil {
|
||||
backendReqDurationHistogram = append(backendReqDurationHistogram, r.BackendReqDurationHistogram())
|
||||
if r.ServiceReqDurationHistogram() != nil {
|
||||
serviceReqDurationHistogram = append(serviceReqDurationHistogram, r.ServiceReqDurationHistogram())
|
||||
}
|
||||
if r.BackendOpenConnsGauge() != nil {
|
||||
backendOpenConnsGauge = append(backendOpenConnsGauge, r.BackendOpenConnsGauge())
|
||||
if r.ServiceOpenConnsGauge() != nil {
|
||||
serviceOpenConnsGauge = append(serviceOpenConnsGauge, r.ServiceOpenConnsGauge())
|
||||
}
|
||||
if r.BackendRetriesCounter() != nil {
|
||||
backendRetriesCounter = append(backendRetriesCounter, r.BackendRetriesCounter())
|
||||
if r.ServiceRetriesCounter() != nil {
|
||||
serviceRetriesCounter = append(serviceRetriesCounter, r.ServiceRetriesCounter())
|
||||
}
|
||||
if r.BackendServerUpGauge() != nil {
|
||||
backendServerUpGauge = append(backendServerUpGauge, r.BackendServerUpGauge())
|
||||
if r.ServiceServerUpGauge() != nil {
|
||||
serviceServerUpGauge = append(serviceServerUpGauge, r.ServiceServerUpGauge())
|
||||
}
|
||||
}
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: len(registries) > 0,
|
||||
epEnabled: len(entryPointReqsCounter) > 0 || len(entryPointReqDurationHistogram) > 0 || len(entryPointOpenConnsGauge) > 0,
|
||||
svcEnabled: len(serviceReqsCounter) > 0 || len(serviceReqDurationHistogram) > 0 || len(serviceOpenConnsGauge) > 0 || len(serviceRetriesCounter) > 0 || len(serviceServerUpGauge) > 0,
|
||||
configReloadsCounter: multi.NewCounter(configReloadsCounter...),
|
||||
configReloadsFailureCounter: multi.NewCounter(configReloadsFailureCounter...),
|
||||
lastConfigReloadSuccessGauge: multi.NewGauge(lastConfigReloadSuccessGauge...),
|
||||
lastConfigReloadFailureGauge: multi.NewGauge(lastConfigReloadFailureGauge...),
|
||||
entrypointReqsCounter: multi.NewCounter(entrypointReqsCounter...),
|
||||
entrypointReqDurationHistogram: multi.NewHistogram(entrypointReqDurationHistogram...),
|
||||
entrypointOpenConnsGauge: multi.NewGauge(entrypointOpenConnsGauge...),
|
||||
backendReqsCounter: multi.NewCounter(backendReqsCounter...),
|
||||
backendReqDurationHistogram: multi.NewHistogram(backendReqDurationHistogram...),
|
||||
backendOpenConnsGauge: multi.NewGauge(backendOpenConnsGauge...),
|
||||
backendRetriesCounter: multi.NewCounter(backendRetriesCounter...),
|
||||
backendServerUpGauge: multi.NewGauge(backendServerUpGauge...),
|
||||
entryPointReqsCounter: multi.NewCounter(entryPointReqsCounter...),
|
||||
entryPointReqDurationHistogram: multi.NewHistogram(entryPointReqDurationHistogram...),
|
||||
entryPointOpenConnsGauge: multi.NewGauge(entryPointOpenConnsGauge...),
|
||||
serviceReqsCounter: multi.NewCounter(serviceReqsCounter...),
|
||||
serviceReqDurationHistogram: multi.NewHistogram(serviceReqDurationHistogram...),
|
||||
serviceOpenConnsGauge: multi.NewGauge(serviceOpenConnsGauge...),
|
||||
serviceRetriesCounter: multi.NewCounter(serviceRetriesCounter...),
|
||||
serviceServerUpGauge: multi.NewGauge(serviceServerUpGauge...),
|
||||
}
|
||||
}
|
||||
|
||||
type standardRegistry struct {
|
||||
enabled bool
|
||||
epEnabled bool
|
||||
svcEnabled bool
|
||||
configReloadsCounter metrics.Counter
|
||||
configReloadsFailureCounter metrics.Counter
|
||||
lastConfigReloadSuccessGauge metrics.Gauge
|
||||
lastConfigReloadFailureGauge metrics.Gauge
|
||||
entrypointReqsCounter metrics.Counter
|
||||
entrypointReqDurationHistogram metrics.Histogram
|
||||
entrypointOpenConnsGauge metrics.Gauge
|
||||
backendReqsCounter metrics.Counter
|
||||
backendReqDurationHistogram metrics.Histogram
|
||||
backendOpenConnsGauge metrics.Gauge
|
||||
backendRetriesCounter metrics.Counter
|
||||
backendServerUpGauge metrics.Gauge
|
||||
entryPointReqsCounter metrics.Counter
|
||||
entryPointReqDurationHistogram metrics.Histogram
|
||||
entryPointOpenConnsGauge metrics.Gauge
|
||||
serviceReqsCounter metrics.Counter
|
||||
serviceReqDurationHistogram metrics.Histogram
|
||||
serviceOpenConnsGauge metrics.Gauge
|
||||
serviceRetriesCounter metrics.Counter
|
||||
serviceServerUpGauge metrics.Gauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) IsEnabled() bool {
|
||||
return r.enabled
|
||||
func (r *standardRegistry) IsEpEnabled() bool {
|
||||
return r.epEnabled
|
||||
}
|
||||
|
||||
func (r *standardRegistry) IsSvcEnabled() bool {
|
||||
return r.svcEnabled
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ConfigReloadsCounter() metrics.Counter {
|
||||
|
@ -144,34 +152,34 @@ func (r *standardRegistry) LastConfigReloadFailureGauge() metrics.Gauge {
|
|||
return r.lastConfigReloadFailureGauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) EntrypointReqsCounter() metrics.Counter {
|
||||
return r.entrypointReqsCounter
|
||||
func (r *standardRegistry) EntryPointReqsCounter() metrics.Counter {
|
||||
return r.entryPointReqsCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) EntrypointReqDurationHistogram() metrics.Histogram {
|
||||
return r.entrypointReqDurationHistogram
|
||||
func (r *standardRegistry) EntryPointReqDurationHistogram() metrics.Histogram {
|
||||
return r.entryPointReqDurationHistogram
|
||||
}
|
||||
|
||||
func (r *standardRegistry) EntrypointOpenConnsGauge() metrics.Gauge {
|
||||
return r.entrypointOpenConnsGauge
|
||||
func (r *standardRegistry) EntryPointOpenConnsGauge() metrics.Gauge {
|
||||
return r.entryPointOpenConnsGauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendReqsCounter() metrics.Counter {
|
||||
return r.backendReqsCounter
|
||||
func (r *standardRegistry) ServiceReqsCounter() metrics.Counter {
|
||||
return r.serviceReqsCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendReqDurationHistogram() metrics.Histogram {
|
||||
return r.backendReqDurationHistogram
|
||||
func (r *standardRegistry) ServiceReqDurationHistogram() metrics.Histogram {
|
||||
return r.serviceReqDurationHistogram
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendOpenConnsGauge() metrics.Gauge {
|
||||
return r.backendOpenConnsGauge
|
||||
func (r *standardRegistry) ServiceOpenConnsGauge() metrics.Gauge {
|
||||
return r.serviceOpenConnsGauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendRetriesCounter() metrics.Counter {
|
||||
return r.backendRetriesCounter
|
||||
func (r *standardRegistry) ServiceRetriesCounter() metrics.Counter {
|
||||
return r.serviceRetriesCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendServerUpGauge() metrics.Gauge {
|
||||
return r.backendServerUpGauge
|
||||
func (r *standardRegistry) ServiceServerUpGauge() metrics.Gauge {
|
||||
return r.serviceServerUpGauge
|
||||
}
|
||||
|
|
|
@ -11,14 +11,14 @@ func TestNewMultiRegistry(t *testing.T) {
|
|||
registries := []Registry{newCollectingRetryMetrics(), newCollectingRetryMetrics()}
|
||||
registry := NewMultiRegistry(registries)
|
||||
|
||||
registry.BackendReqsCounter().With("key", "requests").Add(1)
|
||||
registry.BackendReqDurationHistogram().With("key", "durations").Observe(2)
|
||||
registry.BackendRetriesCounter().With("key", "retries").Add(3)
|
||||
registry.ServiceReqsCounter().With("key", "requests").Add(1)
|
||||
registry.ServiceReqDurationHistogram().With("key", "durations").Observe(2)
|
||||
registry.ServiceRetriesCounter().With("key", "retries").Add(3)
|
||||
|
||||
for _, collectingRegistry := range registries {
|
||||
cReqsCounter := collectingRegistry.BackendReqsCounter().(*counterMock)
|
||||
cReqDurationHistogram := collectingRegistry.BackendReqDurationHistogram().(*histogramMock)
|
||||
cRetriesCounter := collectingRegistry.BackendRetriesCounter().(*counterMock)
|
||||
cReqsCounter := collectingRegistry.ServiceReqsCounter().(*counterMock)
|
||||
cReqDurationHistogram := collectingRegistry.ServiceReqDurationHistogram().(*histogramMock)
|
||||
cRetriesCounter := collectingRegistry.ServiceRetriesCounter().(*counterMock)
|
||||
|
||||
wantCounterValue := float64(1)
|
||||
if cReqsCounter.counterValue != wantCounterValue {
|
||||
|
@ -41,9 +41,9 @@ func TestNewMultiRegistry(t *testing.T) {
|
|||
|
||||
func newCollectingRetryMetrics() Registry {
|
||||
return &standardRegistry{
|
||||
backendReqsCounter: &counterMock{},
|
||||
backendReqDurationHistogram: &histogramMock{},
|
||||
backendRetriesCounter: &counterMock{},
|
||||
serviceReqsCounter: &counterMock{},
|
||||
serviceReqDurationHistogram: &histogramMock{},
|
||||
serviceRetriesCounter: &counterMock{},
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@ package metrics
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
|
@ -28,43 +29,45 @@ const (
|
|||
configLastReloadSuccessName = metricConfigPrefix + "last_reload_success"
|
||||
configLastReloadFailureName = metricConfigPrefix + "last_reload_failure"
|
||||
|
||||
// entrypoint
|
||||
// entry point
|
||||
metricEntryPointPrefix = MetricNamePrefix + "entrypoint_"
|
||||
entrypointReqsTotalName = metricEntryPointPrefix + "requests_total"
|
||||
entrypointReqDurationName = metricEntryPointPrefix + "request_duration_seconds"
|
||||
entrypointOpenConnsName = metricEntryPointPrefix + "open_connections"
|
||||
entryPointReqsTotalName = metricEntryPointPrefix + "requests_total"
|
||||
entryPointReqDurationName = metricEntryPointPrefix + "request_duration_seconds"
|
||||
entryPointOpenConnsName = metricEntryPointPrefix + "open_connections"
|
||||
|
||||
// backend level.
|
||||
// service level.
|
||||
|
||||
// MetricBackendPrefix prefix of all backend metric names
|
||||
MetricBackendPrefix = MetricNamePrefix + "backend_"
|
||||
backendReqsTotalName = MetricBackendPrefix + "requests_total"
|
||||
backendReqDurationName = MetricBackendPrefix + "request_duration_seconds"
|
||||
backendOpenConnsName = MetricBackendPrefix + "open_connections"
|
||||
backendRetriesTotalName = MetricBackendPrefix + "retries_total"
|
||||
backendServerUpName = MetricBackendPrefix + "server_up"
|
||||
// MetricServicePrefix prefix of all service metric names
|
||||
MetricServicePrefix = MetricNamePrefix + "service_"
|
||||
serviceReqsTotalName = MetricServicePrefix + "requests_total"
|
||||
serviceReqDurationName = MetricServicePrefix + "request_duration_seconds"
|
||||
serviceOpenConnsName = MetricServicePrefix + "open_connections"
|
||||
serviceRetriesTotalName = MetricServicePrefix + "retries_total"
|
||||
serviceServerUpName = MetricServicePrefix + "server_up"
|
||||
)
|
||||
|
||||
// promState holds all metric state internally and acts as the only Collector we register for Prometheus.
|
||||
//
|
||||
// This enables control to remove metrics that belong to outdated configuration.
|
||||
// As an example why this is required, consider Traefik learns about a new service.
|
||||
// It populates the 'traefik_server_backend_up' metric for it with a value of 1 (alive).
|
||||
// When the backend is undeployed now the metric is still there in the client library
|
||||
// It populates the 'traefik_server_service_up' metric for it with a value of 1 (alive).
|
||||
// When the service is undeployed now the metric is still there in the client library
|
||||
// and will be returned on the metrics endpoint until Traefik would be restarted.
|
||||
//
|
||||
// To solve this problem promState keeps track of Traefik's dynamic configuration.
|
||||
// Metrics that "belong" to a dynamic configuration part like backends or entrypoints
|
||||
// Metrics that "belong" to a dynamic configuration part like services or entryPoints
|
||||
// are removed after they were scraped at least once when the corresponding object
|
||||
// doesn't exist anymore.
|
||||
var promState = newPrometheusState()
|
||||
|
||||
var promRegistry = stdprometheus.NewRegistry()
|
||||
|
||||
// PrometheusHandler exposes Prometheus routes.
|
||||
type PrometheusHandler struct{}
|
||||
|
||||
// Append adds Prometheus routes on a router.
|
||||
func (h PrometheusHandler) Append(router *mux.Router) {
|
||||
router.Methods(http.MethodGet).Path("/metrics").Handler(promhttp.Handler())
|
||||
router.Methods(http.MethodGet).Path("/metrics").Handler(promhttp.HandlerFor(promRegistry, promhttp.HandlerOpts{}))
|
||||
}
|
||||
|
||||
// RegisterPrometheus registers all Prometheus metrics.
|
||||
|
@ -72,6 +75,17 @@ func (h PrometheusHandler) Append(router *mux.Router) {
|
|||
func RegisterPrometheus(ctx context.Context, config *types.Prometheus) Registry {
|
||||
standardRegistry := initStandardRegistry(config)
|
||||
|
||||
if err := promRegistry.Register(stdprometheus.NewProcessCollector(stdprometheus.ProcessCollectorOpts{})); err != nil {
|
||||
if _, ok := err.(stdprometheus.AlreadyRegisteredError); !ok {
|
||||
log.FromContext(ctx).Warn("ProcessCollector is already registered")
|
||||
}
|
||||
}
|
||||
if err := promRegistry.Register(stdprometheus.NewGoCollector()); err != nil {
|
||||
if _, ok := err.(stdprometheus.AlreadyRegisteredError); !ok {
|
||||
log.FromContext(ctx).Warn("GoCollector is already registered")
|
||||
}
|
||||
}
|
||||
|
||||
if !registerPromState(ctx) {
|
||||
return nil
|
||||
}
|
||||
|
@ -106,76 +120,89 @@ func initStandardRegistry(config *types.Prometheus) Registry {
|
|||
Help: "Last config reload failure",
|
||||
}, []string{})
|
||||
|
||||
entrypointReqs := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: entrypointReqsTotalName,
|
||||
Help: "How many HTTP requests processed on an entrypoint, partitioned by status code, protocol, and method.",
|
||||
}, []string{"code", "method", "protocol", "entrypoint"})
|
||||
entrypointReqDurations := newHistogramFrom(promState.collectors, stdprometheus.HistogramOpts{
|
||||
Name: entrypointReqDurationName,
|
||||
Help: "How long it took to process the request on an entrypoint, partitioned by status code, protocol, and method.",
|
||||
Buckets: buckets,
|
||||
}, []string{"code", "method", "protocol", "entrypoint"})
|
||||
entrypointOpenConns := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: entrypointOpenConnsName,
|
||||
Help: "How many open connections exist on an entrypoint, partitioned by method and protocol.",
|
||||
}, []string{"method", "protocol", "entrypoint"})
|
||||
|
||||
backendReqs := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: backendReqsTotalName,
|
||||
Help: "How many HTTP requests processed on a backend, partitioned by status code, protocol, and method.",
|
||||
}, []string{"code", "method", "protocol", "backend"})
|
||||
backendReqDurations := newHistogramFrom(promState.collectors, stdprometheus.HistogramOpts{
|
||||
Name: backendReqDurationName,
|
||||
Help: "How long it took to process the request on a backend, partitioned by status code, protocol, and method.",
|
||||
Buckets: buckets,
|
||||
}, []string{"code", "method", "protocol", "backend"})
|
||||
backendOpenConns := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: backendOpenConnsName,
|
||||
Help: "How many open connections exist on a backend, partitioned by method and protocol.",
|
||||
}, []string{"method", "protocol", "backend"})
|
||||
backendRetries := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: backendRetriesTotalName,
|
||||
Help: "How many request retries happened on a backend.",
|
||||
}, []string{"backend"})
|
||||
backendServerUp := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: backendServerUpName,
|
||||
Help: "Backend server is up, described by gauge value of 0 or 1.",
|
||||
}, []string{"backend", "url"})
|
||||
|
||||
promState.describers = []func(chan<- *stdprometheus.Desc){
|
||||
configReloads.cv.Describe,
|
||||
configReloadsFailures.cv.Describe,
|
||||
lastConfigReloadSuccess.gv.Describe,
|
||||
lastConfigReloadFailure.gv.Describe,
|
||||
entrypointReqs.cv.Describe,
|
||||
entrypointReqDurations.hv.Describe,
|
||||
entrypointOpenConns.gv.Describe,
|
||||
backendReqs.cv.Describe,
|
||||
backendReqDurations.hv.Describe,
|
||||
backendOpenConns.gv.Describe,
|
||||
backendRetries.cv.Describe,
|
||||
backendServerUp.gv.Describe,
|
||||
}
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: true,
|
||||
configReloadsCounter: configReloads,
|
||||
configReloadsFailureCounter: configReloadsFailures,
|
||||
lastConfigReloadSuccessGauge: lastConfigReloadSuccess,
|
||||
lastConfigReloadFailureGauge: lastConfigReloadFailure,
|
||||
entrypointReqsCounter: entrypointReqs,
|
||||
entrypointReqDurationHistogram: entrypointReqDurations,
|
||||
entrypointOpenConnsGauge: entrypointOpenConns,
|
||||
backendReqsCounter: backendReqs,
|
||||
backendReqDurationHistogram: backendReqDurations,
|
||||
backendOpenConnsGauge: backendOpenConns,
|
||||
backendRetriesCounter: backendRetries,
|
||||
backendServerUpGauge: backendServerUp,
|
||||
reg := &standardRegistry{
|
||||
epEnabled: config.AddEntryPointsLabels,
|
||||
svcEnabled: config.AddServicesLabels,
|
||||
configReloadsCounter: configReloads,
|
||||
configReloadsFailureCounter: configReloadsFailures,
|
||||
lastConfigReloadSuccessGauge: lastConfigReloadSuccess,
|
||||
lastConfigReloadFailureGauge: lastConfigReloadFailure,
|
||||
}
|
||||
|
||||
if config.AddEntryPointsLabels {
|
||||
entryPointReqs := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: entryPointReqsTotalName,
|
||||
Help: "How many HTTP requests processed on an entrypoint, partitioned by status code, protocol, and method.",
|
||||
}, []string{"code", "method", "protocol", "entrypoint"})
|
||||
entryPointReqDurations := newHistogramFrom(promState.collectors, stdprometheus.HistogramOpts{
|
||||
Name: entryPointReqDurationName,
|
||||
Help: "How long it took to process the request on an entrypoint, partitioned by status code, protocol, and method.",
|
||||
Buckets: buckets,
|
||||
}, []string{"code", "method", "protocol", "entrypoint"})
|
||||
entryPointOpenConns := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: entryPointOpenConnsName,
|
||||
Help: "How many open connections exist on an entrypoint, partitioned by method and protocol.",
|
||||
}, []string{"method", "protocol", "entrypoint"})
|
||||
|
||||
promState.describers = append(promState.describers, []func(chan<- *stdprometheus.Desc){
|
||||
entryPointReqs.cv.Describe,
|
||||
entryPointReqDurations.hv.Describe,
|
||||
entryPointOpenConns.gv.Describe,
|
||||
}...)
|
||||
reg.entryPointReqsCounter = entryPointReqs
|
||||
reg.entryPointReqDurationHistogram = entryPointReqDurations
|
||||
reg.entryPointOpenConnsGauge = entryPointOpenConns
|
||||
}
|
||||
if config.AddServicesLabels {
|
||||
serviceReqs := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: serviceReqsTotalName,
|
||||
Help: "How many HTTP requests processed on a service, partitioned by status code, protocol, and method.",
|
||||
}, []string{"code", "method", "protocol", "service"})
|
||||
serviceReqDurations := newHistogramFrom(promState.collectors, stdprometheus.HistogramOpts{
|
||||
Name: serviceReqDurationName,
|
||||
Help: "How long it took to process the request on a service, partitioned by status code, protocol, and method.",
|
||||
Buckets: buckets,
|
||||
}, []string{"code", "method", "protocol", "service"})
|
||||
serviceOpenConns := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: serviceOpenConnsName,
|
||||
Help: "How many open connections exist on a service, partitioned by method and protocol.",
|
||||
}, []string{"method", "protocol", "service"})
|
||||
serviceRetries := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: serviceRetriesTotalName,
|
||||
Help: "How many request retries happened on a service.",
|
||||
}, []string{"service"})
|
||||
serviceServerUp := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: serviceServerUpName,
|
||||
Help: "service server is up, described by gauge value of 0 or 1.",
|
||||
}, []string{"service", "url"})
|
||||
|
||||
promState.describers = append(promState.describers, []func(chan<- *stdprometheus.Desc){
|
||||
serviceReqs.cv.Describe,
|
||||
serviceReqDurations.hv.Describe,
|
||||
serviceOpenConns.gv.Describe,
|
||||
serviceRetries.cv.Describe,
|
||||
serviceServerUp.gv.Describe,
|
||||
}...)
|
||||
|
||||
reg.serviceReqsCounter = serviceReqs
|
||||
reg.serviceReqDurationHistogram = serviceReqDurations
|
||||
reg.serviceOpenConnsGauge = serviceOpenConns
|
||||
reg.serviceRetriesCounter = serviceRetries
|
||||
reg.serviceServerUpGauge = serviceServerUp
|
||||
}
|
||||
|
||||
return reg
|
||||
}
|
||||
|
||||
func registerPromState(ctx context.Context) bool {
|
||||
if err := stdprometheus.Register(promState); err != nil {
|
||||
if err := promRegistry.Register(promState); err != nil {
|
||||
logger := log.FromContext(ctx)
|
||||
if _, ok := err.(stdprometheus.AlreadyRegisteredError); !ok {
|
||||
logger.Errorf("Unable to register Traefik to Prometheus: %v", err)
|
||||
|
@ -189,24 +216,24 @@ func registerPromState(ctx context.Context) bool {
|
|||
// OnConfigurationUpdate receives the current configuration from Traefik.
|
||||
// It then converts the configuration to the optimized package internal format
|
||||
// and sets it to the promState.
|
||||
func OnConfigurationUpdate(configurations dynamic.Configurations) {
|
||||
func OnConfigurationUpdate(dynConf dynamic.Configurations, entryPoints []string) {
|
||||
dynamicConfig := newDynamicConfig()
|
||||
|
||||
// FIXME metrics
|
||||
// for _, config := range configurations {
|
||||
// for _, frontend := range config.Frontends {
|
||||
// for _, entrypointName := range frontend.EntryPoints {
|
||||
// dynamicConfig.entrypoints[entrypointName] = true
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// for backendName, backend := range config.Backends {
|
||||
// dynamicConfig.backends[backendName] = make(map[string]bool)
|
||||
// for _, server := range backend.Servers {
|
||||
// dynamicConfig.backends[backendName][server.URL] = true
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
for _, value := range entryPoints {
|
||||
dynamicConfig.entryPoints[value] = true
|
||||
}
|
||||
for key, config := range dynConf {
|
||||
for name := range config.HTTP.Routers {
|
||||
dynamicConfig.routers[fmt.Sprintf("%s@%s", name, key)] = true
|
||||
}
|
||||
|
||||
for serviceName, service := range config.HTTP.Services {
|
||||
dynamicConfig.services[fmt.Sprintf("%s@%s", serviceName, key)] = make(map[string]bool)
|
||||
for _, server := range service.LoadBalancer.Servers {
|
||||
dynamicConfig.services[fmt.Sprintf("%s@%s", serviceName, key)][server.URL] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
promState.SetDynamicConfig(dynamicConfig)
|
||||
}
|
||||
|
@ -279,15 +306,15 @@ func (ps *prometheusState) Collect(ch chan<- stdprometheus.Metric) {
|
|||
func (ps *prometheusState) isOutdated(collector *collector) bool {
|
||||
labels := collector.labels
|
||||
|
||||
if entrypointName, ok := labels["entrypoint"]; ok && !ps.dynamicConfig.hasEntrypoint(entrypointName) {
|
||||
if entrypointName, ok := labels["entrypoint"]; ok && !ps.dynamicConfig.hasEntryPoint(entrypointName) {
|
||||
return true
|
||||
}
|
||||
|
||||
if backendName, ok := labels["backend"]; ok {
|
||||
if !ps.dynamicConfig.hasBackend(backendName) {
|
||||
if serviceName, ok := labels["service"]; ok {
|
||||
if !ps.dynamicConfig.hasService(serviceName) {
|
||||
return true
|
||||
}
|
||||
if url, ok := labels["url"]; ok && !ps.dynamicConfig.hasServerURL(backendName, url) {
|
||||
if url, ok := labels["url"]; ok && !ps.dynamicConfig.hasServerURL(serviceName, url) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -297,33 +324,35 @@ func (ps *prometheusState) isOutdated(collector *collector) bool {
|
|||
|
||||
func newDynamicConfig() *dynamicConfig {
|
||||
return &dynamicConfig{
|
||||
entrypoints: make(map[string]bool),
|
||||
backends: make(map[string]map[string]bool),
|
||||
entryPoints: make(map[string]bool),
|
||||
routers: make(map[string]bool),
|
||||
services: make(map[string]map[string]bool),
|
||||
}
|
||||
}
|
||||
|
||||
// dynamicConfig holds the current configuration for entrypoints, backends,
|
||||
// dynamicConfig holds the current configuration for entryPoints, services,
|
||||
// and server URLs in an optimized way to check for existence. This provides
|
||||
// a performant way to check whether the collected metrics belong to the
|
||||
// current configuration or to an outdated one.
|
||||
type dynamicConfig struct {
|
||||
entrypoints map[string]bool
|
||||
backends map[string]map[string]bool
|
||||
entryPoints map[string]bool
|
||||
routers map[string]bool
|
||||
services map[string]map[string]bool
|
||||
}
|
||||
|
||||
func (d *dynamicConfig) hasEntrypoint(entrypointName string) bool {
|
||||
_, ok := d.entrypoints[entrypointName]
|
||||
func (d *dynamicConfig) hasEntryPoint(entrypointName string) bool {
|
||||
_, ok := d.entryPoints[entrypointName]
|
||||
return ok
|
||||
}
|
||||
|
||||
func (d *dynamicConfig) hasBackend(backendName string) bool {
|
||||
_, ok := d.backends[backendName]
|
||||
func (d *dynamicConfig) hasService(serviceName string) bool {
|
||||
_, ok := d.services[serviceName]
|
||||
return ok
|
||||
}
|
||||
|
||||
func (d *dynamicConfig) hasServerURL(backendName, serverURL string) bool {
|
||||
if backend, hasBackend := d.backends[backendName]; hasBackend {
|
||||
_, ok := backend[serverURL]
|
||||
func (d *dynamicConfig) hasServerURL(serviceName, serverURL string) bool {
|
||||
if service, hasService := d.services[serviceName]; hasService {
|
||||
_, ok := service[serverURL]
|
||||
return ok
|
||||
}
|
||||
return false
|
||||
|
@ -479,7 +508,7 @@ func (h *histogram) Observe(value float64) {
|
|||
labels := h.labelNamesValues.ToLabels()
|
||||
collector := h.hv.With(labels)
|
||||
collector.Observe(value)
|
||||
h.collectors <- newCollector(h.name, labels, collector, func() {
|
||||
h.collectors <- newCollector(h.name, labels, h.hv, func() {
|
||||
h.hv.Delete(labels)
|
||||
})
|
||||
}
|
||||
|
|
|
@ -30,61 +30,61 @@ func TestRegisterPromState(t *testing.T) {
|
|||
{
|
||||
desc: "Register once",
|
||||
prometheusSlice: []*types.Prometheus{{}},
|
||||
expectedNbRegistries: 1,
|
||||
initPromState: true,
|
||||
unregisterPromState: false,
|
||||
expectedNbRegistries: 1,
|
||||
},
|
||||
{
|
||||
desc: "Register once with no promState init",
|
||||
prometheusSlice: []*types.Prometheus{{}},
|
||||
expectedNbRegistries: 0,
|
||||
initPromState: false,
|
||||
unregisterPromState: false,
|
||||
expectedNbRegistries: 1,
|
||||
},
|
||||
{
|
||||
desc: "Register twice",
|
||||
prometheusSlice: []*types.Prometheus{{}, {}},
|
||||
expectedNbRegistries: 2,
|
||||
initPromState: true,
|
||||
unregisterPromState: false,
|
||||
expectedNbRegistries: 2,
|
||||
},
|
||||
{
|
||||
desc: "Register twice with no promstate init",
|
||||
prometheusSlice: []*types.Prometheus{{}, {}},
|
||||
expectedNbRegistries: 0,
|
||||
initPromState: false,
|
||||
unregisterPromState: false,
|
||||
expectedNbRegistries: 2,
|
||||
},
|
||||
{
|
||||
desc: "Register twice with unregister",
|
||||
prometheusSlice: []*types.Prometheus{{}, {}},
|
||||
initPromState: true,
|
||||
unregisterPromState: true,
|
||||
expectedNbRegistries: 2,
|
||||
initPromState: true,
|
||||
},
|
||||
{
|
||||
desc: "Register twice with unregister but no promstate init",
|
||||
prometheusSlice: []*types.Prometheus{{}, {}},
|
||||
unregisterPromState: true,
|
||||
expectedNbRegistries: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range testCases {
|
||||
actualNbRegistries := 0
|
||||
for _, prom := range test.prometheusSlice {
|
||||
if test.initPromState {
|
||||
initStandardRegistry(prom)
|
||||
test := test
|
||||
t.Run(test.desc, func(t *testing.T) {
|
||||
actualNbRegistries := 0
|
||||
for _, prom := range test.prometheusSlice {
|
||||
if test.initPromState {
|
||||
initStandardRegistry(prom)
|
||||
}
|
||||
if registerPromState(context.Background()) {
|
||||
actualNbRegistries++
|
||||
}
|
||||
if test.unregisterPromState {
|
||||
promRegistry.Unregister(promState)
|
||||
}
|
||||
|
||||
promState.reset()
|
||||
}
|
||||
|
||||
if registerPromState(context.Background()) {
|
||||
actualNbRegistries++
|
||||
}
|
||||
|
||||
if test.unregisterPromState {
|
||||
prometheus.Unregister(promState)
|
||||
}
|
||||
|
||||
promState.reset()
|
||||
}
|
||||
|
||||
prometheus.Unregister(promState)
|
||||
|
||||
assert.Equal(t, test.expectedNbRegistries, actualNbRegistries)
|
||||
promRegistry.Unregister(promState)
|
||||
assert.Equal(t, test.expectedNbRegistries, actualNbRegistries)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -99,13 +99,15 @@ func (ps *prometheusState) reset() {
|
|||
}
|
||||
|
||||
func TestPrometheus(t *testing.T) {
|
||||
promState = newPrometheusState()
|
||||
promRegistry = prometheus.NewRegistry()
|
||||
// Reset state of global promState.
|
||||
defer promState.reset()
|
||||
|
||||
prometheusRegistry := RegisterPrometheus(context.Background(), &types.Prometheus{})
|
||||
defer prometheus.Unregister(promState)
|
||||
prometheusRegistry := RegisterPrometheus(context.Background(), &types.Prometheus{AddEntryPointsLabels: true, AddServicesLabels: true})
|
||||
defer promRegistry.Unregister(promState)
|
||||
|
||||
if !prometheusRegistry.IsEnabled() {
|
||||
if !prometheusRegistry.IsEpEnabled() || !prometheusRegistry.IsSvcEnabled() {
|
||||
t.Errorf("PrometheusRegistry should return true for IsEnabled()")
|
||||
}
|
||||
|
||||
|
@ -115,44 +117,44 @@ func TestPrometheus(t *testing.T) {
|
|||
prometheusRegistry.LastConfigReloadFailureGauge().Set(float64(time.Now().Unix()))
|
||||
|
||||
prometheusRegistry.
|
||||
EntrypointReqsCounter().
|
||||
EntryPointReqsCounter().
|
||||
With("code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http", "entrypoint", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
EntrypointReqDurationHistogram().
|
||||
EntryPointReqDurationHistogram().
|
||||
With("code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http", "entrypoint", "http").
|
||||
Observe(1)
|
||||
prometheusRegistry.
|
||||
EntrypointOpenConnsGauge().
|
||||
EntryPointOpenConnsGauge().
|
||||
With("method", http.MethodGet, "protocol", "http", "entrypoint", "http").
|
||||
Set(1)
|
||||
|
||||
prometheusRegistry.
|
||||
BackendReqsCounter().
|
||||
With("backend", "backend1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
ServiceReqsCounter().
|
||||
With("service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
BackendReqDurationHistogram().
|
||||
With("backend", "backend1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
ServiceReqDurationHistogram().
|
||||
With("service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Observe(10000)
|
||||
prometheusRegistry.
|
||||
BackendOpenConnsGauge().
|
||||
With("backend", "backend1", "method", http.MethodGet, "protocol", "http").
|
||||
ServiceOpenConnsGauge().
|
||||
With("service", "service1", "method", http.MethodGet, "protocol", "http").
|
||||
Set(1)
|
||||
prometheusRegistry.
|
||||
BackendRetriesCounter().
|
||||
With("backend", "backend1").
|
||||
ServiceRetriesCounter().
|
||||
With("service", "service1").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
BackendServerUpGauge().
|
||||
With("backend", "backend1", "url", "http://127.0.0.10:80").
|
||||
ServiceServerUpGauge().
|
||||
With("service", "service1", "url", "http://127.0.0.10:80").
|
||||
Set(1)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
metricsFamilies := mustScrape()
|
||||
|
||||
tests := []struct {
|
||||
testCases := []struct {
|
||||
name string
|
||||
labels map[string]string
|
||||
assert func(*dto.MetricFamily)
|
||||
|
@ -174,107 +176,111 @@ func TestPrometheus(t *testing.T) {
|
|||
assert: buildTimestampAssert(t, configLastReloadFailureName),
|
||||
},
|
||||
{
|
||||
name: entrypointReqsTotalName,
|
||||
name: entryPointReqsTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"entrypoint": "http",
|
||||
},
|
||||
assert: buildCounterAssert(t, entrypointReqsTotalName, 1),
|
||||
assert: buildCounterAssert(t, entryPointReqsTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: entrypointReqDurationName,
|
||||
name: entryPointReqDurationName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"entrypoint": "http",
|
||||
},
|
||||
assert: buildHistogramAssert(t, entrypointReqDurationName, 1),
|
||||
assert: buildHistogramAssert(t, entryPointReqDurationName, 1),
|
||||
},
|
||||
{
|
||||
name: entrypointOpenConnsName,
|
||||
name: entryPointOpenConnsName,
|
||||
labels: map[string]string{
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"entrypoint": "http",
|
||||
},
|
||||
assert: buildGaugeAssert(t, entrypointOpenConnsName, 1),
|
||||
assert: buildGaugeAssert(t, entryPointOpenConnsName, 1),
|
||||
},
|
||||
{
|
||||
name: backendReqsTotalName,
|
||||
name: serviceReqsTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"backend": "backend1",
|
||||
"service": "service1",
|
||||
},
|
||||
assert: buildCounterAssert(t, backendReqsTotalName, 1),
|
||||
assert: buildCounterAssert(t, serviceReqsTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: backendReqDurationName,
|
||||
name: serviceReqDurationName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"backend": "backend1",
|
||||
"service": "service1",
|
||||
},
|
||||
assert: buildHistogramAssert(t, backendReqDurationName, 1),
|
||||
assert: buildHistogramAssert(t, serviceReqDurationName, 1),
|
||||
},
|
||||
{
|
||||
name: backendOpenConnsName,
|
||||
name: serviceOpenConnsName,
|
||||
labels: map[string]string{
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"backend": "backend1",
|
||||
"service": "service1",
|
||||
},
|
||||
assert: buildGaugeAssert(t, backendOpenConnsName, 1),
|
||||
assert: buildGaugeAssert(t, serviceOpenConnsName, 1),
|
||||
},
|
||||
{
|
||||
name: backendRetriesTotalName,
|
||||
name: serviceRetriesTotalName,
|
||||
labels: map[string]string{
|
||||
"backend": "backend1",
|
||||
"service": "service1",
|
||||
},
|
||||
assert: buildGreaterThanCounterAssert(t, backendRetriesTotalName, 1),
|
||||
assert: buildGreaterThanCounterAssert(t, serviceRetriesTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: backendServerUpName,
|
||||
name: serviceServerUpName,
|
||||
labels: map[string]string{
|
||||
"backend": "backend1",
|
||||
"service": "service1",
|
||||
"url": "http://127.0.0.10:80",
|
||||
},
|
||||
assert: buildGaugeAssert(t, backendServerUpName, 1),
|
||||
assert: buildGaugeAssert(t, serviceServerUpName, 1),
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
family := findMetricFamily(test.name, metricsFamilies)
|
||||
if family == nil {
|
||||
t.Errorf("gathered metrics do not contain %q", test.name)
|
||||
continue
|
||||
}
|
||||
for _, label := range family.Metric[0].Label {
|
||||
val, ok := test.labels[*label.Name]
|
||||
if !ok {
|
||||
t.Errorf("%q metric contains unexpected label %q", test.name, *label.Name)
|
||||
} else if val != *label.Value {
|
||||
t.Errorf("label %q in metric %q has wrong value %q, expected %q", *label.Name, test.name, *label.Value, val)
|
||||
for _, test := range testCases {
|
||||
test := test
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
family := findMetricFamily(test.name, metricsFamilies)
|
||||
if family == nil {
|
||||
t.Errorf("gathered metrics do not contain %q", test.name)
|
||||
return
|
||||
}
|
||||
}
|
||||
test.assert(family)
|
||||
|
||||
for _, label := range family.Metric[0].Label {
|
||||
val, ok := test.labels[*label.Name]
|
||||
if !ok {
|
||||
t.Errorf("%q metric contains unexpected label %q", test.name, *label.Name)
|
||||
} else if val != *label.Value {
|
||||
t.Errorf("label %q in metric %q has wrong value %q, expected %q", *label.Name, test.name, *label.Value, val)
|
||||
}
|
||||
}
|
||||
test.assert(family)
|
||||
})
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheusMetricRemoval(t *testing.T) {
|
||||
// FIXME metrics
|
||||
t.Skip("waiting for metrics")
|
||||
|
||||
promState = newPrometheusState()
|
||||
promRegistry = prometheus.NewRegistry()
|
||||
// Reset state of global promState.
|
||||
defer promState.reset()
|
||||
|
||||
prometheusRegistry := RegisterPrometheus(context.Background(), &types.Prometheus{})
|
||||
defer prometheus.Unregister(promState)
|
||||
prometheusRegistry := RegisterPrometheus(context.Background(), &types.Prometheus{AddEntryPointsLabels: true, AddServicesLabels: true})
|
||||
defer promRegistry.Unregister(promState)
|
||||
|
||||
configurations := make(dynamic.Configurations)
|
||||
configurations["providerName"] = &dynamic.Configuration{
|
||||
|
@ -289,78 +295,78 @@ func TestPrometheusMetricRemoval(t *testing.T) {
|
|||
),
|
||||
}
|
||||
|
||||
OnConfigurationUpdate(configurations)
|
||||
OnConfigurationUpdate(configurations, []string{"entrypoint1"})
|
||||
|
||||
// Register some metrics manually that are not part of the active configuration.
|
||||
// Those metrics should be part of the /metrics output on the first scrape but
|
||||
// should be removed after that scrape.
|
||||
prometheusRegistry.
|
||||
EntrypointReqsCounter().
|
||||
EntryPointReqsCounter().
|
||||
With("entrypoint", "entrypoint2", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
BackendReqsCounter().
|
||||
With("backend", "backend2", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
ServiceReqsCounter().
|
||||
With("service", "service2", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
BackendServerUpGauge().
|
||||
With("backend", "backend1", "url", "http://localhost:9999").
|
||||
ServiceServerUpGauge().
|
||||
With("service", "service1", "url", "http://localhost:9999").
|
||||
Set(1)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
assertMetricsExist(t, mustScrape(), entrypointReqsTotalName, backendReqsTotalName, backendServerUpName)
|
||||
assertMetricsAbsent(t, mustScrape(), entrypointReqsTotalName, backendReqsTotalName, backendServerUpName)
|
||||
assertMetricsExist(t, mustScrape(), entryPointReqsTotalName, serviceReqsTotalName, serviceServerUpName)
|
||||
assertMetricsAbsent(t, mustScrape(), entryPointReqsTotalName, serviceReqsTotalName, serviceServerUpName)
|
||||
|
||||
// To verify that metrics belonging to active configurations are not removed
|
||||
// here the counter examples.
|
||||
prometheusRegistry.
|
||||
EntrypointReqsCounter().
|
||||
EntryPointReqsCounter().
|
||||
With("entrypoint", "entrypoint1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
assertMetricsExist(t, mustScrape(), entrypointReqsTotalName)
|
||||
assertMetricsExist(t, mustScrape(), entrypointReqsTotalName)
|
||||
assertMetricsExist(t, mustScrape(), entryPointReqsTotalName)
|
||||
assertMetricsExist(t, mustScrape(), entryPointReqsTotalName)
|
||||
}
|
||||
|
||||
func TestPrometheusRemovedMetricsReset(t *testing.T) {
|
||||
// Reset state of global promState.
|
||||
defer promState.reset()
|
||||
|
||||
prometheusRegistry := RegisterPrometheus(context.Background(), &types.Prometheus{})
|
||||
defer prometheus.Unregister(promState)
|
||||
prometheusRegistry := RegisterPrometheus(context.Background(), &types.Prometheus{AddEntryPointsLabels: true, AddServicesLabels: true})
|
||||
defer promRegistry.Unregister(promState)
|
||||
|
||||
labelNamesValues := []string{
|
||||
"backend", "backend",
|
||||
"service", "service",
|
||||
"code", strconv.Itoa(http.StatusOK),
|
||||
"method", http.MethodGet,
|
||||
"protocol", "http",
|
||||
}
|
||||
prometheusRegistry.
|
||||
BackendReqsCounter().
|
||||
ServiceReqsCounter().
|
||||
With(labelNamesValues...).
|
||||
Add(3)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
metricsFamilies := mustScrape()
|
||||
assertCounterValue(t, 3, findMetricFamily(backendReqsTotalName, metricsFamilies), labelNamesValues...)
|
||||
assertCounterValue(t, 3, findMetricFamily(serviceReqsTotalName, metricsFamilies), labelNamesValues...)
|
||||
|
||||
// There is no dynamic configuration and so this metric will be deleted
|
||||
// after the first scrape.
|
||||
assertMetricsAbsent(t, mustScrape(), backendReqsTotalName)
|
||||
assertMetricsAbsent(t, mustScrape(), serviceReqsTotalName)
|
||||
|
||||
prometheusRegistry.
|
||||
BackendReqsCounter().
|
||||
ServiceReqsCounter().
|
||||
With(labelNamesValues...).
|
||||
Add(1)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
metricsFamilies = mustScrape()
|
||||
assertCounterValue(t, 1, findMetricFamily(backendReqsTotalName, metricsFamilies), labelNamesValues...)
|
||||
assertCounterValue(t, 1, findMetricFamily(serviceReqsTotalName, metricsFamilies), labelNamesValues...)
|
||||
}
|
||||
|
||||
// Tracking and gathering the metrics happens concurrently.
|
||||
|
@ -374,7 +380,7 @@ func delayForTrackingCompletion() {
|
|||
}
|
||||
|
||||
func mustScrape() []*dto.MetricFamily {
|
||||
families, err := prometheus.DefaultGatherer.Gather()
|
||||
families, err := promRegistry.Gather()
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("could not gather metrics families: %s", err))
|
||||
}
|
||||
|
|
|
@ -19,18 +19,18 @@ var statsdClient = statsd.New("traefik.", kitlog.LoggerFunc(func(keyvals ...inte
|
|||
var statsdTicker *time.Ticker
|
||||
|
||||
const (
|
||||
statsdMetricsBackendReqsName = "backend.request.total"
|
||||
statsdMetricsBackendLatencyName = "backend.request.duration"
|
||||
statsdRetriesTotalName = "backend.retries.total"
|
||||
statsdMetricsServiceReqsName = "service.request.total"
|
||||
statsdMetricsServiceLatencyName = "service.request.duration"
|
||||
statsdRetriesTotalName = "service.retries.total"
|
||||
statsdConfigReloadsName = "config.reload.total"
|
||||
statsdConfigReloadsFailureName = statsdConfigReloadsName + ".failure"
|
||||
statsdLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
|
||||
statsdLastConfigReloadFailureName = "config.reload.lastFailureTimestamp"
|
||||
statsdEntrypointReqsName = "entrypoint.request.total"
|
||||
statsdEntrypointReqDurationName = "entrypoint.request.duration"
|
||||
statsdEntrypointOpenConnsName = "entrypoint.connections.open"
|
||||
statsdOpenConnsName = "backend.connections.open"
|
||||
statsdServerUpName = "backend.server.up"
|
||||
statsdEntryPointReqsName = "entrypoint.request.total"
|
||||
statsdEntryPointReqDurationName = "entrypoint.request.duration"
|
||||
statsdEntryPointOpenConnsName = "entrypoint.connections.open"
|
||||
statsdOpenConnsName = "service.connections.open"
|
||||
statsdServerUpName = "service.server.up"
|
||||
)
|
||||
|
||||
// RegisterStatsd registers the metrics pusher if this didn't happen yet and creates a statsd Registry instance.
|
||||
|
@ -39,21 +39,30 @@ func RegisterStatsd(ctx context.Context, config *types.Statsd) Registry {
|
|||
statsdTicker = initStatsdTicker(ctx, config)
|
||||
}
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: true,
|
||||
configReloadsCounter: statsdClient.NewCounter(statsdConfigReloadsName, 1.0),
|
||||
configReloadsFailureCounter: statsdClient.NewCounter(statsdConfigReloadsFailureName, 1.0),
|
||||
lastConfigReloadSuccessGauge: statsdClient.NewGauge(statsdLastConfigReloadSuccessName),
|
||||
lastConfigReloadFailureGauge: statsdClient.NewGauge(statsdLastConfigReloadFailureName),
|
||||
entrypointReqsCounter: statsdClient.NewCounter(statsdEntrypointReqsName, 1.0),
|
||||
entrypointReqDurationHistogram: statsdClient.NewTiming(statsdEntrypointReqDurationName, 1.0),
|
||||
entrypointOpenConnsGauge: statsdClient.NewGauge(statsdEntrypointOpenConnsName),
|
||||
backendReqsCounter: statsdClient.NewCounter(statsdMetricsBackendReqsName, 1.0),
|
||||
backendReqDurationHistogram: statsdClient.NewTiming(statsdMetricsBackendLatencyName, 1.0),
|
||||
backendRetriesCounter: statsdClient.NewCounter(statsdRetriesTotalName, 1.0),
|
||||
backendOpenConnsGauge: statsdClient.NewGauge(statsdOpenConnsName),
|
||||
backendServerUpGauge: statsdClient.NewGauge(statsdServerUpName),
|
||||
registry := &standardRegistry{
|
||||
configReloadsCounter: statsdClient.NewCounter(statsdConfigReloadsName, 1.0),
|
||||
configReloadsFailureCounter: statsdClient.NewCounter(statsdConfigReloadsFailureName, 1.0),
|
||||
lastConfigReloadSuccessGauge: statsdClient.NewGauge(statsdLastConfigReloadSuccessName),
|
||||
lastConfigReloadFailureGauge: statsdClient.NewGauge(statsdLastConfigReloadFailureName),
|
||||
}
|
||||
|
||||
if config.AddEntryPointsLabels {
|
||||
registry.epEnabled = config.AddEntryPointsLabels
|
||||
registry.entryPointReqsCounter = statsdClient.NewCounter(statsdEntryPointReqsName, 1.0)
|
||||
registry.entryPointReqDurationHistogram = statsdClient.NewTiming(statsdEntryPointReqDurationName, 1.0)
|
||||
registry.entryPointOpenConnsGauge = statsdClient.NewGauge(statsdEntryPointOpenConnsName)
|
||||
}
|
||||
|
||||
if config.AddServicesLabels {
|
||||
registry.svcEnabled = config.AddServicesLabels
|
||||
registry.serviceReqsCounter = statsdClient.NewCounter(statsdMetricsServiceReqsName, 1.0)
|
||||
registry.serviceReqDurationHistogram = statsdClient.NewTiming(statsdMetricsServiceLatencyName, 1.0)
|
||||
registry.serviceRetriesCounter = statsdClient.NewCounter(statsdRetriesTotalName, 1.0)
|
||||
registry.serviceOpenConnsGauge = statsdClient.NewGauge(statsdOpenConnsName)
|
||||
registry.serviceServerUpGauge = statsdClient.NewGauge(statsdServerUpName)
|
||||
}
|
||||
|
||||
return registry
|
||||
}
|
||||
|
||||
// initStatsdTicker initializes metrics pusher and creates a statsdClient if not created already
|
||||
|
@ -66,7 +75,7 @@ func initStatsdTicker(ctx context.Context, config *types.Statsd) *time.Ticker {
|
|||
report := time.NewTicker(time.Duration(config.PushInterval))
|
||||
|
||||
safe.Go(func() {
|
||||
statsdClient.SendLoop(report.C, "udp", address)
|
||||
statsdClient.SendLoop(ctx, report.C, "udp", address)
|
||||
})
|
||||
|
||||
return report
|
||||
|
|
|
@ -15,37 +15,37 @@ func TestStatsD(t *testing.T) {
|
|||
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
|
||||
udp.Timeout = 5 * time.Second
|
||||
|
||||
statsdRegistry := RegisterStatsd(context.Background(), &types.Statsd{Address: ":18125", PushInterval: types.Duration(time.Second)})
|
||||
statsdRegistry := RegisterStatsd(context.Background(), &types.Statsd{Address: ":18125", PushInterval: types.Duration(time.Second), AddEntryPointsLabels: true, AddServicesLabels: true})
|
||||
defer StopStatsd()
|
||||
|
||||
if !statsdRegistry.IsEnabled() {
|
||||
if !statsdRegistry.IsEpEnabled() || !statsdRegistry.IsSvcEnabled() {
|
||||
t.Errorf("Statsd registry should return true for IsEnabled()")
|
||||
}
|
||||
|
||||
expected := []string{
|
||||
// We are only validating counts, as it is nearly impossible to validate latency, since it varies every run
|
||||
"traefik.backend.request.total:2.000000|c\n",
|
||||
"traefik.backend.retries.total:2.000000|c\n",
|
||||
"traefik.backend.request.duration:10000.000000|ms",
|
||||
"traefik.service.request.total:2.000000|c\n",
|
||||
"traefik.service.retries.total:2.000000|c\n",
|
||||
"traefik.service.request.duration:10000.000000|ms",
|
||||
"traefik.config.reload.total:1.000000|c\n",
|
||||
"traefik.config.reload.total:1.000000|c\n",
|
||||
"traefik.entrypoint.request.total:1.000000|c\n",
|
||||
"traefik.entrypoint.request.duration:10000.000000|ms",
|
||||
"traefik.entrypoint.connections.open:1.000000|g\n",
|
||||
"traefik.backend.server.up:1.000000|g\n",
|
||||
"traefik.service.server.up:1.000000|g\n",
|
||||
}
|
||||
|
||||
udp.ShouldReceiveAll(t, expected, func() {
|
||||
statsdRegistry.BackendReqsCounter().With("service", "test", "code", string(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
statsdRegistry.BackendReqsCounter().With("service", "test", "code", string(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
statsdRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
statsdRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
statsdRegistry.BackendReqDurationHistogram().With("service", "test", "code", string(http.StatusOK)).Observe(10000)
|
||||
statsdRegistry.ServiceReqsCounter().With("service", "test", "code", string(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
statsdRegistry.ServiceReqsCounter().With("service", "test", "code", string(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
statsdRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
|
||||
statsdRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
|
||||
statsdRegistry.ServiceReqDurationHistogram().With("service", "test", "code", string(http.StatusOK)).Observe(10000)
|
||||
statsdRegistry.ConfigReloadsCounter().Add(1)
|
||||
statsdRegistry.ConfigReloadsFailureCounter().Add(1)
|
||||
statsdRegistry.EntrypointReqsCounter().With("entrypoint", "test").Add(1)
|
||||
statsdRegistry.EntrypointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
|
||||
statsdRegistry.EntrypointOpenConnsGauge().With("entrypoint", "test").Set(1)
|
||||
statsdRegistry.BackendServerUpGauge().With("backend:test", "url", "http://127.0.0.1").Set(1)
|
||||
statsdRegistry.EntryPointReqsCounter().With("entrypoint", "test").Add(1)
|
||||
statsdRegistry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
|
||||
statsdRegistry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1)
|
||||
statsdRegistry.ServiceServerUpGauge().With("service:test", "url", "http://127.0.0.1").Set(1)
|
||||
})
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue