Move code to pkg
This commit is contained in:
parent
bd4c822670
commit
f1b085fa36
465 changed files with 656 additions and 680 deletions
88
pkg/metrics/datadog.go
Normal file
88
pkg/metrics/datadog.go
Normal file
|
@ -0,0 +1,88 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/containous/traefik/pkg/log"
|
||||
"github.com/containous/traefik/pkg/safe"
|
||||
"github.com/containous/traefik/pkg/types"
|
||||
kitlog "github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/metrics/dogstatsd"
|
||||
)
|
||||
|
||||
var datadogClient = dogstatsd.New("traefik.", kitlog.LoggerFunc(func(keyvals ...interface{}) error {
|
||||
log.WithoutContext().WithField(log.MetricsProviderName, "datadog").Info(keyvals)
|
||||
return nil
|
||||
}))
|
||||
|
||||
var datadogTicker *time.Ticker
|
||||
|
||||
// Metric names consistent with https://github.com/DataDog/integrations-extras/pull/64
|
||||
const (
|
||||
ddMetricsBackendReqsName = "backend.request.total"
|
||||
ddMetricsBackendLatencyName = "backend.request.duration"
|
||||
ddRetriesTotalName = "backend.retries.total"
|
||||
ddConfigReloadsName = "config.reload.total"
|
||||
ddConfigReloadsFailureTagName = "failure"
|
||||
ddLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
|
||||
ddLastConfigReloadFailureName = "config.reload.lastFailureTimestamp"
|
||||
ddEntrypointReqsName = "entrypoint.request.total"
|
||||
ddEntrypointReqDurationName = "entrypoint.request.duration"
|
||||
ddEntrypointOpenConnsName = "entrypoint.connections.open"
|
||||
ddOpenConnsName = "backend.connections.open"
|
||||
ddServerUpName = "backend.server.up"
|
||||
)
|
||||
|
||||
// RegisterDatadog registers the metrics pusher if this didn't happen yet and creates a datadog Registry instance.
|
||||
func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry {
|
||||
if datadogTicker == nil {
|
||||
datadogTicker = initDatadogClient(ctx, config)
|
||||
}
|
||||
|
||||
registry := &standardRegistry{
|
||||
enabled: true,
|
||||
configReloadsCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0),
|
||||
configReloadsFailureCounter: datadogClient.NewCounter(ddConfigReloadsName, 1.0).With(ddConfigReloadsFailureTagName, "true"),
|
||||
lastConfigReloadSuccessGauge: datadogClient.NewGauge(ddLastConfigReloadSuccessName),
|
||||
lastConfigReloadFailureGauge: datadogClient.NewGauge(ddLastConfigReloadFailureName),
|
||||
entrypointReqsCounter: datadogClient.NewCounter(ddEntrypointReqsName, 1.0),
|
||||
entrypointReqDurationHistogram: datadogClient.NewHistogram(ddEntrypointReqDurationName, 1.0),
|
||||
entrypointOpenConnsGauge: datadogClient.NewGauge(ddEntrypointOpenConnsName),
|
||||
backendReqsCounter: datadogClient.NewCounter(ddMetricsBackendReqsName, 1.0),
|
||||
backendReqDurationHistogram: datadogClient.NewHistogram(ddMetricsBackendLatencyName, 1.0),
|
||||
backendRetriesCounter: datadogClient.NewCounter(ddRetriesTotalName, 1.0),
|
||||
backendOpenConnsGauge: datadogClient.NewGauge(ddOpenConnsName),
|
||||
backendServerUpGauge: datadogClient.NewGauge(ddServerUpName),
|
||||
}
|
||||
|
||||
return registry
|
||||
}
|
||||
|
||||
func initDatadogClient(ctx context.Context, config *types.Datadog) *time.Ticker {
|
||||
address := config.Address
|
||||
if len(address) == 0 {
|
||||
address = "localhost:8125"
|
||||
}
|
||||
pushInterval, err := time.ParseDuration(config.PushInterval)
|
||||
if err != nil {
|
||||
log.FromContext(ctx).Warnf("Unable to parse %s from config.PushInterval: using 10s as the default value", config.PushInterval)
|
||||
pushInterval = 10 * time.Second
|
||||
}
|
||||
|
||||
report := time.NewTicker(pushInterval)
|
||||
|
||||
safe.Go(func() {
|
||||
datadogClient.SendLoop(report.C, "udp", address)
|
||||
})
|
||||
|
||||
return report
|
||||
}
|
||||
|
||||
// StopDatadog stops internal datadogTicker which controls the pushing of metrics to DD Agent and resets it to `nil`.
|
||||
func StopDatadog() {
|
||||
if datadogTicker != nil {
|
||||
datadogTicker.Stop()
|
||||
}
|
||||
datadogTicker = nil
|
||||
}
|
53
pkg/metrics/datadog_test.go
Normal file
53
pkg/metrics/datadog_test.go
Normal file
|
@ -0,0 +1,53 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/containous/traefik/pkg/types"
|
||||
"github.com/stvp/go-udp-testing"
|
||||
)
|
||||
|
||||
func TestDatadog(t *testing.T) {
|
||||
udp.SetAddr(":18125")
|
||||
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
|
||||
udp.Timeout = 5 * time.Second
|
||||
|
||||
datadogRegistry := RegisterDatadog(context.Background(), &types.Datadog{Address: ":18125", PushInterval: "1s"})
|
||||
defer StopDatadog()
|
||||
|
||||
if !datadogRegistry.IsEnabled() {
|
||||
t.Errorf("DatadogRegistry should return true for IsEnabled()")
|
||||
}
|
||||
|
||||
expected := []string{
|
||||
// We are only validating counts, as it is nearly impossible to validate latency, since it varies every run
|
||||
"traefik.backend.request.total:1.000000|c|#service:test,code:404,method:GET\n",
|
||||
"traefik.backend.request.total:1.000000|c|#service:test,code:200,method:GET\n",
|
||||
"traefik.backend.retries.total:2.000000|c|#service:test\n",
|
||||
"traefik.backend.request.duration:10000.000000|h|#service:test,code:200\n",
|
||||
"traefik.config.reload.total:1.000000|c\n",
|
||||
"traefik.config.reload.total:1.000000|c|#failure:true\n",
|
||||
"traefik.entrypoint.request.total:1.000000|c|#entrypoint:test\n",
|
||||
"traefik.entrypoint.request.duration:10000.000000|h|#entrypoint:test\n",
|
||||
"traefik.entrypoint.connections.open:1.000000|g|#entrypoint:test\n",
|
||||
"traefik.backend.server.up:1.000000|g|#backend:test,url:http://127.0.0.1,one:two\n",
|
||||
}
|
||||
|
||||
udp.ShouldReceiveAll(t, expected, func() {
|
||||
datadogRegistry.BackendReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.BackendReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.BackendReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
datadogRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
datadogRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
datadogRegistry.ConfigReloadsCounter().Add(1)
|
||||
datadogRegistry.ConfigReloadsFailureCounter().Add(1)
|
||||
datadogRegistry.EntrypointReqsCounter().With("entrypoint", "test").Add(1)
|
||||
datadogRegistry.EntrypointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
|
||||
datadogRegistry.EntrypointOpenConnsGauge().With("entrypoint", "test").Set(1)
|
||||
datadogRegistry.BackendServerUpGauge().With("backend", "test", "url", "http://127.0.0.1", "one", "two").Set(1)
|
||||
})
|
||||
}
|
213
pkg/metrics/influxdb.go
Normal file
213
pkg/metrics/influxdb.go
Normal file
|
@ -0,0 +1,213 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"time"
|
||||
|
||||
"github.com/containous/traefik/pkg/log"
|
||||
"github.com/containous/traefik/pkg/safe"
|
||||
"github.com/containous/traefik/pkg/types"
|
||||
kitlog "github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/metrics/influx"
|
||||
influxdb "github.com/influxdata/influxdb/client/v2"
|
||||
)
|
||||
|
||||
var influxDBClient *influx.Influx
|
||||
|
||||
type influxDBWriter struct {
|
||||
buf bytes.Buffer
|
||||
config *types.InfluxDB
|
||||
}
|
||||
|
||||
var influxDBTicker *time.Ticker
|
||||
|
||||
const (
|
||||
influxDBMetricsBackendReqsName = "traefik.backend.requests.total"
|
||||
influxDBMetricsBackendLatencyName = "traefik.backend.request.duration"
|
||||
influxDBRetriesTotalName = "traefik.backend.retries.total"
|
||||
influxDBConfigReloadsName = "traefik.config.reload.total"
|
||||
influxDBConfigReloadsFailureName = influxDBConfigReloadsName + ".failure"
|
||||
influxDBLastConfigReloadSuccessName = "traefik.config.reload.lastSuccessTimestamp"
|
||||
influxDBLastConfigReloadFailureName = "traefik.config.reload.lastFailureTimestamp"
|
||||
influxDBEntrypointReqsName = "traefik.entrypoint.requests.total"
|
||||
influxDBEntrypointReqDurationName = "traefik.entrypoint.request.duration"
|
||||
influxDBEntrypointOpenConnsName = "traefik.entrypoint.connections.open"
|
||||
influxDBOpenConnsName = "traefik.backend.connections.open"
|
||||
influxDBServerUpName = "traefik.backend.server.up"
|
||||
)
|
||||
|
||||
const (
|
||||
protocolHTTP = "http"
|
||||
protocolUDP = "udp"
|
||||
)
|
||||
|
||||
// RegisterInfluxDB registers the metrics pusher if this didn't happen yet and creates a InfluxDB Registry instance.
|
||||
func RegisterInfluxDB(ctx context.Context, config *types.InfluxDB) Registry {
|
||||
if influxDBClient == nil {
|
||||
influxDBClient = initInfluxDBClient(ctx, config)
|
||||
}
|
||||
if influxDBTicker == nil {
|
||||
influxDBTicker = initInfluxDBTicker(ctx, config)
|
||||
}
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: true,
|
||||
configReloadsCounter: influxDBClient.NewCounter(influxDBConfigReloadsName),
|
||||
configReloadsFailureCounter: influxDBClient.NewCounter(influxDBConfigReloadsFailureName),
|
||||
lastConfigReloadSuccessGauge: influxDBClient.NewGauge(influxDBLastConfigReloadSuccessName),
|
||||
lastConfigReloadFailureGauge: influxDBClient.NewGauge(influxDBLastConfigReloadFailureName),
|
||||
entrypointReqsCounter: influxDBClient.NewCounter(influxDBEntrypointReqsName),
|
||||
entrypointReqDurationHistogram: influxDBClient.NewHistogram(influxDBEntrypointReqDurationName),
|
||||
entrypointOpenConnsGauge: influxDBClient.NewGauge(influxDBEntrypointOpenConnsName),
|
||||
backendReqsCounter: influxDBClient.NewCounter(influxDBMetricsBackendReqsName),
|
||||
backendReqDurationHistogram: influxDBClient.NewHistogram(influxDBMetricsBackendLatencyName),
|
||||
backendRetriesCounter: influxDBClient.NewCounter(influxDBRetriesTotalName),
|
||||
backendOpenConnsGauge: influxDBClient.NewGauge(influxDBOpenConnsName),
|
||||
backendServerUpGauge: influxDBClient.NewGauge(influxDBServerUpName),
|
||||
}
|
||||
}
|
||||
|
||||
// initInfluxDBTicker creates a influxDBClient
|
||||
func initInfluxDBClient(ctx context.Context, config *types.InfluxDB) *influx.Influx {
|
||||
logger := log.FromContext(ctx)
|
||||
|
||||
// TODO deprecated: move this switch into configuration.SetEffectiveConfiguration when web provider will be removed.
|
||||
switch config.Protocol {
|
||||
case protocolUDP:
|
||||
if len(config.Database) > 0 || len(config.RetentionPolicy) > 0 {
|
||||
logger.Warn("Database and RetentionPolicy options have no effect with UDP.")
|
||||
config.Database = ""
|
||||
config.RetentionPolicy = ""
|
||||
}
|
||||
case protocolHTTP:
|
||||
if u, err := url.Parse(config.Address); err == nil {
|
||||
if u.Scheme != "http" && u.Scheme != "https" {
|
||||
logger.Warnf("InfluxDB address %s should specify a scheme (http or https): falling back on HTTP.", config.Address)
|
||||
config.Address = "http://" + config.Address
|
||||
}
|
||||
} else {
|
||||
logger.Errorf("Unable to parse the InfluxDB address %v: falling back on UDP.", err)
|
||||
config.Protocol = protocolUDP
|
||||
config.Database = ""
|
||||
config.RetentionPolicy = ""
|
||||
}
|
||||
default:
|
||||
logger.Warnf("Unsupported protocol %s: falling back on UDP.", config.Protocol)
|
||||
config.Protocol = protocolUDP
|
||||
config.Database = ""
|
||||
config.RetentionPolicy = ""
|
||||
}
|
||||
|
||||
return influx.New(
|
||||
map[string]string{},
|
||||
influxdb.BatchPointsConfig{
|
||||
Database: config.Database,
|
||||
RetentionPolicy: config.RetentionPolicy,
|
||||
},
|
||||
kitlog.LoggerFunc(func(keyvals ...interface{}) error {
|
||||
log.WithoutContext().WithField(log.MetricsProviderName, "influxdb").Info(keyvals)
|
||||
return nil
|
||||
}))
|
||||
}
|
||||
|
||||
// initInfluxDBTicker initializes metrics pusher
|
||||
func initInfluxDBTicker(ctx context.Context, config *types.InfluxDB) *time.Ticker {
|
||||
pushInterval, err := time.ParseDuration(config.PushInterval)
|
||||
if err != nil {
|
||||
log.FromContext(ctx).Warnf("Unable to parse %s from config.PushInterval: using 10s as the default value", config.PushInterval)
|
||||
pushInterval = 10 * time.Second
|
||||
}
|
||||
|
||||
report := time.NewTicker(pushInterval)
|
||||
|
||||
safe.Go(func() {
|
||||
var buf bytes.Buffer
|
||||
influxDBClient.WriteLoop(report.C, &influxDBWriter{buf: buf, config: config})
|
||||
})
|
||||
|
||||
return report
|
||||
}
|
||||
|
||||
// StopInfluxDB stops internal influxDBTicker which controls the pushing of metrics to InfluxDB Agent and resets it to `nil`
|
||||
func StopInfluxDB() {
|
||||
if influxDBTicker != nil {
|
||||
influxDBTicker.Stop()
|
||||
}
|
||||
influxDBTicker = nil
|
||||
}
|
||||
|
||||
// Write creates a http or udp client and attempts to write BatchPoints.
|
||||
// If a "database not found" error is encountered, a CREATE DATABASE
|
||||
// query is attempted when using protocol http.
|
||||
func (w *influxDBWriter) Write(bp influxdb.BatchPoints) error {
|
||||
c, err := w.initWriteClient()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer c.Close()
|
||||
|
||||
if writeErr := c.Write(bp); writeErr != nil {
|
||||
ctx := log.With(context.Background(), log.Str(log.MetricsProviderName, "influxdb"))
|
||||
log.FromContext(ctx).Errorf("Error while writing to InfluxDB: %s", writeErr.Error())
|
||||
|
||||
if handleErr := w.handleWriteError(ctx, c, writeErr); handleErr != nil {
|
||||
return handleErr
|
||||
}
|
||||
// Retry write after successful handling of writeErr
|
||||
return c.Write(bp)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *influxDBWriter) initWriteClient() (influxdb.Client, error) {
|
||||
if w.config.Protocol == "http" {
|
||||
return influxdb.NewHTTPClient(influxdb.HTTPConfig{
|
||||
Addr: w.config.Address,
|
||||
Username: w.config.Username,
|
||||
Password: w.config.Password,
|
||||
})
|
||||
}
|
||||
|
||||
return influxdb.NewUDPClient(influxdb.UDPConfig{
|
||||
Addr: w.config.Address,
|
||||
})
|
||||
}
|
||||
|
||||
func (w *influxDBWriter) handleWriteError(ctx context.Context, c influxdb.Client, writeErr error) error {
|
||||
if w.config.Protocol != protocolHTTP {
|
||||
return writeErr
|
||||
}
|
||||
|
||||
match, matchErr := regexp.MatchString("database not found", writeErr.Error())
|
||||
|
||||
if matchErr != nil || !match {
|
||||
return writeErr
|
||||
}
|
||||
|
||||
qStr := fmt.Sprintf("CREATE DATABASE \"%s\"", w.config.Database)
|
||||
if w.config.RetentionPolicy != "" {
|
||||
qStr = fmt.Sprintf("%s WITH NAME \"%s\"", qStr, w.config.RetentionPolicy)
|
||||
}
|
||||
|
||||
logger := log.FromContext(ctx)
|
||||
|
||||
logger.Debugf("InfluxDB database not found: attempting to create one with %s", qStr)
|
||||
|
||||
q := influxdb.NewQuery(qStr, "", "")
|
||||
response, queryErr := c.Query(q)
|
||||
if queryErr == nil && response.Error() != nil {
|
||||
queryErr = response.Error()
|
||||
}
|
||||
if queryErr != nil {
|
||||
logger.Errorf("Error while creating the InfluxDB database %s", queryErr)
|
||||
return queryErr
|
||||
}
|
||||
|
||||
logger.Debugf("Successfully created the InfluxDB database %s", w.config.Database)
|
||||
return nil
|
||||
}
|
135
pkg/metrics/influxdb_test.go
Normal file
135
pkg/metrics/influxdb_test.go
Normal file
|
@ -0,0 +1,135 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/containous/traefik/pkg/types"
|
||||
"github.com/stvp/go-udp-testing"
|
||||
)
|
||||
|
||||
func TestInfluxDB(t *testing.T) {
|
||||
udp.SetAddr(":8089")
|
||||
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
|
||||
udp.Timeout = 5 * time.Second
|
||||
|
||||
influxDBRegistry := RegisterInfluxDB(context.Background(), &types.InfluxDB{Address: ":8089", PushInterval: "1s"})
|
||||
defer StopInfluxDB()
|
||||
|
||||
if !influxDBRegistry.IsEnabled() {
|
||||
t.Fatalf("InfluxDB registry must be enabled")
|
||||
}
|
||||
|
||||
expectedBackend := []string{
|
||||
`(traefik\.backend\.requests\.total,backend=test,code=200,method=GET count=1) [\d]{19}`,
|
||||
`(traefik\.backend\.requests\.total,backend=test,code=404,method=GET count=1) [\d]{19}`,
|
||||
`(traefik\.backend\.request\.duration,backend=test,code=200 p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
|
||||
`(traefik\.backend\.retries\.total(?:,code=[\d]{3},method=GET)?,backend=test count=2) [\d]{19}`,
|
||||
`(traefik\.config\.reload\.total(?:[a-z=0-9A-Z,]+)? count=1) [\d]{19}`,
|
||||
`(traefik\.config\.reload\.total\.failure(?:[a-z=0-9A-Z,]+)? count=1) [\d]{19}`,
|
||||
`(traefik\.backend\.server\.up,backend=test(?:[a-z=0-9A-Z,]+)?,url=http://127.0.0.1 value=1) [\d]{19}`,
|
||||
}
|
||||
|
||||
msgBackend := udp.ReceiveString(t, func() {
|
||||
influxDBRegistry.BackendReqsCounter().With("backend", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.BackendReqsCounter().With("backend", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.BackendRetriesCounter().With("backend", "test").Add(1)
|
||||
influxDBRegistry.BackendRetriesCounter().With("backend", "test").Add(1)
|
||||
influxDBRegistry.BackendReqDurationHistogram().With("backend", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
influxDBRegistry.ConfigReloadsCounter().Add(1)
|
||||
influxDBRegistry.ConfigReloadsFailureCounter().Add(1)
|
||||
influxDBRegistry.BackendServerUpGauge().With("backend", "test", "url", "http://127.0.0.1").Set(1)
|
||||
})
|
||||
|
||||
assertMessage(t, msgBackend, expectedBackend)
|
||||
|
||||
expectedEntrypoint := []string{
|
||||
`(traefik\.entrypoint\.requests\.total,entrypoint=test(?:[a-z=0-9A-Z,:/.]+)? count=1) [\d]{19}`,
|
||||
`(traefik\.entrypoint\.request\.duration(?:,code=[\d]{3})?,entrypoint=test(?:[a-z=0-9A-Z,:/.]+)? p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
|
||||
`(traefik\.entrypoint\.connections\.open,entrypoint=test value=1) [\d]{19}`,
|
||||
}
|
||||
|
||||
msgEntrypoint := udp.ReceiveString(t, func() {
|
||||
influxDBRegistry.EntrypointReqsCounter().With("entrypoint", "test").Add(1)
|
||||
influxDBRegistry.EntrypointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
|
||||
influxDBRegistry.EntrypointOpenConnsGauge().With("entrypoint", "test").Set(1)
|
||||
|
||||
})
|
||||
|
||||
assertMessage(t, msgEntrypoint, expectedEntrypoint)
|
||||
}
|
||||
|
||||
func TestInfluxDBHTTP(t *testing.T) {
|
||||
c := make(chan *string)
|
||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
body, err := ioutil.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
http.Error(w, "can't read body "+err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
bodyStr := string(body)
|
||||
c <- &bodyStr
|
||||
fmt.Fprintln(w, "ok")
|
||||
}))
|
||||
defer ts.Close()
|
||||
|
||||
influxDBRegistry := RegisterInfluxDB(context.Background(), &types.InfluxDB{Address: ts.URL, Protocol: "http", PushInterval: "1s", Database: "test", RetentionPolicy: "autogen"})
|
||||
defer StopInfluxDB()
|
||||
|
||||
if !influxDBRegistry.IsEnabled() {
|
||||
t.Fatalf("InfluxDB registry must be enabled")
|
||||
}
|
||||
|
||||
expectedBackend := []string{
|
||||
`(traefik\.backend\.requests\.total,backend=test,code=200,method=GET count=1) [\d]{19}`,
|
||||
`(traefik\.backend\.requests\.total,backend=test,code=404,method=GET count=1) [\d]{19}`,
|
||||
`(traefik\.backend\.request\.duration,backend=test,code=200 p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
|
||||
`(traefik\.backend\.retries\.total(?:,code=[\d]{3},method=GET)?,backend=test count=2) [\d]{19}`,
|
||||
`(traefik\.config\.reload\.total(?:[a-z=0-9A-Z,]+)? count=1) [\d]{19}`,
|
||||
`(traefik\.config\.reload\.total\.failure(?:[a-z=0-9A-Z,]+)? count=1) [\d]{19}`,
|
||||
`(traefik\.backend\.server\.up,backend=test(?:[a-z=0-9A-Z,]+)?,url=http://127.0.0.1 value=1) [\d]{19}`,
|
||||
}
|
||||
|
||||
influxDBRegistry.BackendReqsCounter().With("backend", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.BackendReqsCounter().With("backend", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
influxDBRegistry.BackendRetriesCounter().With("backend", "test").Add(1)
|
||||
influxDBRegistry.BackendRetriesCounter().With("backend", "test").Add(1)
|
||||
influxDBRegistry.BackendReqDurationHistogram().With("backend", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
influxDBRegistry.ConfigReloadsCounter().Add(1)
|
||||
influxDBRegistry.ConfigReloadsFailureCounter().Add(1)
|
||||
influxDBRegistry.BackendServerUpGauge().With("backend", "test", "url", "http://127.0.0.1").Set(1)
|
||||
msgBackend := <-c
|
||||
|
||||
assertMessage(t, *msgBackend, expectedBackend)
|
||||
|
||||
expectedEntrypoint := []string{
|
||||
`(traefik\.entrypoint\.requests\.total,entrypoint=test(?:[a-z=0-9A-Z,:/.]+)? count=1) [\d]{19}`,
|
||||
`(traefik\.entrypoint\.request\.duration(?:,code=[\d]{3})?,entrypoint=test(?:[a-z=0-9A-Z,:/.]+)? p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
|
||||
`(traefik\.entrypoint\.connections\.open,entrypoint=test value=1) [\d]{19}`,
|
||||
}
|
||||
|
||||
influxDBRegistry.EntrypointReqsCounter().With("entrypoint", "test").Add(1)
|
||||
influxDBRegistry.EntrypointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
|
||||
influxDBRegistry.EntrypointOpenConnsGauge().With("entrypoint", "test").Set(1)
|
||||
msgEntrypoint := <-c
|
||||
|
||||
assertMessage(t, *msgEntrypoint, expectedEntrypoint)
|
||||
}
|
||||
|
||||
func assertMessage(t *testing.T, msg string, patterns []string) {
|
||||
t.Helper()
|
||||
for _, pattern := range patterns {
|
||||
re := regexp.MustCompile(pattern)
|
||||
match := re.FindStringSubmatch(msg)
|
||||
if len(match) != 2 {
|
||||
t.Errorf("Got %q %v, want %q", msg, match, pattern)
|
||||
}
|
||||
}
|
||||
}
|
177
pkg/metrics/metrics.go
Normal file
177
pkg/metrics/metrics.go
Normal file
|
@ -0,0 +1,177 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"github.com/go-kit/kit/metrics"
|
||||
"github.com/go-kit/kit/metrics/multi"
|
||||
)
|
||||
|
||||
// Registry has to implemented by any system that wants to monitor and expose metrics.
|
||||
type Registry interface {
|
||||
// IsEnabled shows whether metrics instrumentation is enabled.
|
||||
IsEnabled() bool
|
||||
|
||||
// server metrics
|
||||
ConfigReloadsCounter() metrics.Counter
|
||||
ConfigReloadsFailureCounter() metrics.Counter
|
||||
LastConfigReloadSuccessGauge() metrics.Gauge
|
||||
LastConfigReloadFailureGauge() metrics.Gauge
|
||||
|
||||
// entry point metrics
|
||||
EntrypointReqsCounter() metrics.Counter
|
||||
EntrypointReqDurationHistogram() metrics.Histogram
|
||||
EntrypointOpenConnsGauge() metrics.Gauge
|
||||
|
||||
// backend metrics
|
||||
BackendReqsCounter() metrics.Counter
|
||||
BackendReqDurationHistogram() metrics.Histogram
|
||||
BackendOpenConnsGauge() metrics.Gauge
|
||||
BackendRetriesCounter() metrics.Counter
|
||||
BackendServerUpGauge() metrics.Gauge
|
||||
}
|
||||
|
||||
// NewVoidRegistry is a noop implementation of metrics.Registry.
|
||||
// It is used to avoid nil checking in components that do metric collections.
|
||||
func NewVoidRegistry() Registry {
|
||||
return NewMultiRegistry([]Registry{})
|
||||
}
|
||||
|
||||
// NewMultiRegistry is an implementation of metrics.Registry that wraps multiple registries.
|
||||
// It handles the case when a registry hasn't registered some metric and returns nil.
|
||||
// This allows for feature imparity between the different metric implementations.
|
||||
func NewMultiRegistry(registries []Registry) Registry {
|
||||
var configReloadsCounter []metrics.Counter
|
||||
var configReloadsFailureCounter []metrics.Counter
|
||||
var lastConfigReloadSuccessGauge []metrics.Gauge
|
||||
var lastConfigReloadFailureGauge []metrics.Gauge
|
||||
var entrypointReqsCounter []metrics.Counter
|
||||
var entrypointReqDurationHistogram []metrics.Histogram
|
||||
var entrypointOpenConnsGauge []metrics.Gauge
|
||||
var backendReqsCounter []metrics.Counter
|
||||
var backendReqDurationHistogram []metrics.Histogram
|
||||
var backendOpenConnsGauge []metrics.Gauge
|
||||
var backendRetriesCounter []metrics.Counter
|
||||
var backendServerUpGauge []metrics.Gauge
|
||||
|
||||
for _, r := range registries {
|
||||
if r.ConfigReloadsCounter() != nil {
|
||||
configReloadsCounter = append(configReloadsCounter, r.ConfigReloadsCounter())
|
||||
}
|
||||
if r.ConfigReloadsFailureCounter() != nil {
|
||||
configReloadsFailureCounter = append(configReloadsFailureCounter, r.ConfigReloadsFailureCounter())
|
||||
}
|
||||
if r.LastConfigReloadSuccessGauge() != nil {
|
||||
lastConfigReloadSuccessGauge = append(lastConfigReloadSuccessGauge, r.LastConfigReloadSuccessGauge())
|
||||
}
|
||||
if r.LastConfigReloadFailureGauge() != nil {
|
||||
lastConfigReloadFailureGauge = append(lastConfigReloadFailureGauge, r.LastConfigReloadFailureGauge())
|
||||
}
|
||||
if r.EntrypointReqsCounter() != nil {
|
||||
entrypointReqsCounter = append(entrypointReqsCounter, r.EntrypointReqsCounter())
|
||||
}
|
||||
if r.EntrypointReqDurationHistogram() != nil {
|
||||
entrypointReqDurationHistogram = append(entrypointReqDurationHistogram, r.EntrypointReqDurationHistogram())
|
||||
}
|
||||
if r.EntrypointOpenConnsGauge() != nil {
|
||||
entrypointOpenConnsGauge = append(entrypointOpenConnsGauge, r.EntrypointOpenConnsGauge())
|
||||
}
|
||||
if r.BackendReqsCounter() != nil {
|
||||
backendReqsCounter = append(backendReqsCounter, r.BackendReqsCounter())
|
||||
}
|
||||
if r.BackendReqDurationHistogram() != nil {
|
||||
backendReqDurationHistogram = append(backendReqDurationHistogram, r.BackendReqDurationHistogram())
|
||||
}
|
||||
if r.BackendOpenConnsGauge() != nil {
|
||||
backendOpenConnsGauge = append(backendOpenConnsGauge, r.BackendOpenConnsGauge())
|
||||
}
|
||||
if r.BackendRetriesCounter() != nil {
|
||||
backendRetriesCounter = append(backendRetriesCounter, r.BackendRetriesCounter())
|
||||
}
|
||||
if r.BackendServerUpGauge() != nil {
|
||||
backendServerUpGauge = append(backendServerUpGauge, r.BackendServerUpGauge())
|
||||
}
|
||||
}
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: len(registries) > 0,
|
||||
configReloadsCounter: multi.NewCounter(configReloadsCounter...),
|
||||
configReloadsFailureCounter: multi.NewCounter(configReloadsFailureCounter...),
|
||||
lastConfigReloadSuccessGauge: multi.NewGauge(lastConfigReloadSuccessGauge...),
|
||||
lastConfigReloadFailureGauge: multi.NewGauge(lastConfigReloadFailureGauge...),
|
||||
entrypointReqsCounter: multi.NewCounter(entrypointReqsCounter...),
|
||||
entrypointReqDurationHistogram: multi.NewHistogram(entrypointReqDurationHistogram...),
|
||||
entrypointOpenConnsGauge: multi.NewGauge(entrypointOpenConnsGauge...),
|
||||
backendReqsCounter: multi.NewCounter(backendReqsCounter...),
|
||||
backendReqDurationHistogram: multi.NewHistogram(backendReqDurationHistogram...),
|
||||
backendOpenConnsGauge: multi.NewGauge(backendOpenConnsGauge...),
|
||||
backendRetriesCounter: multi.NewCounter(backendRetriesCounter...),
|
||||
backendServerUpGauge: multi.NewGauge(backendServerUpGauge...),
|
||||
}
|
||||
}
|
||||
|
||||
type standardRegistry struct {
|
||||
enabled bool
|
||||
configReloadsCounter metrics.Counter
|
||||
configReloadsFailureCounter metrics.Counter
|
||||
lastConfigReloadSuccessGauge metrics.Gauge
|
||||
lastConfigReloadFailureGauge metrics.Gauge
|
||||
entrypointReqsCounter metrics.Counter
|
||||
entrypointReqDurationHistogram metrics.Histogram
|
||||
entrypointOpenConnsGauge metrics.Gauge
|
||||
backendReqsCounter metrics.Counter
|
||||
backendReqDurationHistogram metrics.Histogram
|
||||
backendOpenConnsGauge metrics.Gauge
|
||||
backendRetriesCounter metrics.Counter
|
||||
backendServerUpGauge metrics.Gauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) IsEnabled() bool {
|
||||
return r.enabled
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ConfigReloadsCounter() metrics.Counter {
|
||||
return r.configReloadsCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ConfigReloadsFailureCounter() metrics.Counter {
|
||||
return r.configReloadsFailureCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) LastConfigReloadSuccessGauge() metrics.Gauge {
|
||||
return r.lastConfigReloadSuccessGauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) LastConfigReloadFailureGauge() metrics.Gauge {
|
||||
return r.lastConfigReloadFailureGauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) EntrypointReqsCounter() metrics.Counter {
|
||||
return r.entrypointReqsCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) EntrypointReqDurationHistogram() metrics.Histogram {
|
||||
return r.entrypointReqDurationHistogram
|
||||
}
|
||||
|
||||
func (r *standardRegistry) EntrypointOpenConnsGauge() metrics.Gauge {
|
||||
return r.entrypointOpenConnsGauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendReqsCounter() metrics.Counter {
|
||||
return r.backendReqsCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendReqDurationHistogram() metrics.Histogram {
|
||||
return r.backendReqDurationHistogram
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendOpenConnsGauge() metrics.Gauge {
|
||||
return r.backendOpenConnsGauge
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendRetriesCounter() metrics.Counter {
|
||||
return r.backendRetriesCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) BackendServerUpGauge() metrics.Gauge {
|
||||
return r.backendServerUpGauge
|
||||
}
|
76
pkg/metrics/metrics_test.go
Normal file
76
pkg/metrics/metrics_test.go
Normal file
|
@ -0,0 +1,76 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/go-kit/kit/metrics"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNewMultiRegistry(t *testing.T) {
|
||||
registries := []Registry{newCollectingRetryMetrics(), newCollectingRetryMetrics()}
|
||||
registry := NewMultiRegistry(registries)
|
||||
|
||||
registry.BackendReqsCounter().With("key", "requests").Add(1)
|
||||
registry.BackendReqDurationHistogram().With("key", "durations").Observe(2)
|
||||
registry.BackendRetriesCounter().With("key", "retries").Add(3)
|
||||
|
||||
for _, collectingRegistry := range registries {
|
||||
cReqsCounter := collectingRegistry.BackendReqsCounter().(*counterMock)
|
||||
cReqDurationHistogram := collectingRegistry.BackendReqDurationHistogram().(*histogramMock)
|
||||
cRetriesCounter := collectingRegistry.BackendRetriesCounter().(*counterMock)
|
||||
|
||||
wantCounterValue := float64(1)
|
||||
if cReqsCounter.counterValue != wantCounterValue {
|
||||
t.Errorf("Got value %f for ReqsCounter, want %f", cReqsCounter.counterValue, wantCounterValue)
|
||||
}
|
||||
wantHistogramValue := float64(2)
|
||||
if cReqDurationHistogram.lastHistogramValue != wantHistogramValue {
|
||||
t.Errorf("Got last observation %f for ReqDurationHistogram, want %f", cReqDurationHistogram.lastHistogramValue, wantHistogramValue)
|
||||
}
|
||||
wantCounterValue = float64(3)
|
||||
if cRetriesCounter.counterValue != wantCounterValue {
|
||||
t.Errorf("Got value %f for RetriesCounter, want %f", cRetriesCounter.counterValue, wantCounterValue)
|
||||
}
|
||||
|
||||
assert.Equal(t, []string{"key", "requests"}, cReqsCounter.lastLabelValues)
|
||||
assert.Equal(t, []string{"key", "durations"}, cReqDurationHistogram.lastLabelValues)
|
||||
assert.Equal(t, []string{"key", "retries"}, cRetriesCounter.lastLabelValues)
|
||||
}
|
||||
}
|
||||
|
||||
func newCollectingRetryMetrics() Registry {
|
||||
return &standardRegistry{
|
||||
backendReqsCounter: &counterMock{},
|
||||
backendReqDurationHistogram: &histogramMock{},
|
||||
backendRetriesCounter: &counterMock{},
|
||||
}
|
||||
}
|
||||
|
||||
type counterMock struct {
|
||||
counterValue float64
|
||||
lastLabelValues []string
|
||||
}
|
||||
|
||||
func (c *counterMock) With(labelValues ...string) metrics.Counter {
|
||||
c.lastLabelValues = labelValues
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *counterMock) Add(delta float64) {
|
||||
c.counterValue += delta
|
||||
}
|
||||
|
||||
type histogramMock struct {
|
||||
lastHistogramValue float64
|
||||
lastLabelValues []string
|
||||
}
|
||||
|
||||
func (c *histogramMock) With(labelValues ...string) metrics.Histogram {
|
||||
c.lastLabelValues = labelValues
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *histogramMock) Observe(value float64) {
|
||||
c.lastHistogramValue = value
|
||||
}
|
512
pkg/metrics/prometheus.go
Normal file
512
pkg/metrics/prometheus.go
Normal file
|
@ -0,0 +1,512 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/containous/mux"
|
||||
"github.com/containous/traefik/pkg/config"
|
||||
"github.com/containous/traefik/pkg/log"
|
||||
"github.com/containous/traefik/pkg/safe"
|
||||
"github.com/containous/traefik/pkg/types"
|
||||
"github.com/go-kit/kit/metrics"
|
||||
stdprometheus "github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
const (
|
||||
// MetricNamePrefix prefix of all metric names
|
||||
MetricNamePrefix = "traefik_"
|
||||
|
||||
// server meta information
|
||||
metricConfigPrefix = MetricNamePrefix + "config_"
|
||||
configReloadsTotalName = metricConfigPrefix + "reloads_total"
|
||||
configReloadsFailuresTotalName = metricConfigPrefix + "reloads_failure_total"
|
||||
configLastReloadSuccessName = metricConfigPrefix + "last_reload_success"
|
||||
configLastReloadFailureName = metricConfigPrefix + "last_reload_failure"
|
||||
|
||||
// entrypoint
|
||||
metricEntryPointPrefix = MetricNamePrefix + "entrypoint_"
|
||||
entrypointReqsTotalName = metricEntryPointPrefix + "requests_total"
|
||||
entrypointReqDurationName = metricEntryPointPrefix + "request_duration_seconds"
|
||||
entrypointOpenConnsName = metricEntryPointPrefix + "open_connections"
|
||||
|
||||
// backend level.
|
||||
|
||||
// MetricBackendPrefix prefix of all backend metric names
|
||||
MetricBackendPrefix = MetricNamePrefix + "backend_"
|
||||
backendReqsTotalName = MetricBackendPrefix + "requests_total"
|
||||
backendReqDurationName = MetricBackendPrefix + "request_duration_seconds"
|
||||
backendOpenConnsName = MetricBackendPrefix + "open_connections"
|
||||
backendRetriesTotalName = MetricBackendPrefix + "retries_total"
|
||||
backendServerUpName = MetricBackendPrefix + "server_up"
|
||||
)
|
||||
|
||||
// promState holds all metric state internally and acts as the only Collector we register for Prometheus.
|
||||
//
|
||||
// This enables control to remove metrics that belong to outdated configuration.
|
||||
// As an example why this is required, consider Traefik learns about a new service.
|
||||
// It populates the 'traefik_server_backend_up' metric for it with a value of 1 (alive).
|
||||
// When the backend is undeployed now the metric is still there in the client library
|
||||
// and will be returned on the metrics endpoint until Traefik would be restarted.
|
||||
//
|
||||
// To solve this problem promState keeps track of Traefik's dynamic configuration.
|
||||
// Metrics that "belong" to a dynamic configuration part like backends or entrypoints
|
||||
// are removed after they were scraped at least once when the corresponding object
|
||||
// doesn't exist anymore.
|
||||
var promState = newPrometheusState()
|
||||
|
||||
// PrometheusHandler exposes Prometheus routes.
|
||||
type PrometheusHandler struct{}
|
||||
|
||||
// Append adds Prometheus routes on a router.
|
||||
func (h PrometheusHandler) Append(router *mux.Router) {
|
||||
router.Methods(http.MethodGet).Path("/metrics").Handler(promhttp.Handler())
|
||||
}
|
||||
|
||||
// RegisterPrometheus registers all Prometheus metrics.
|
||||
// It must be called only once and failing to register the metrics will lead to a panic.
|
||||
func RegisterPrometheus(ctx context.Context, config *types.Prometheus) Registry {
|
||||
standardRegistry := initStandardRegistry(config)
|
||||
|
||||
if !registerPromState(ctx) {
|
||||
return nil
|
||||
}
|
||||
|
||||
return standardRegistry
|
||||
}
|
||||
|
||||
func initStandardRegistry(config *types.Prometheus) Registry {
|
||||
buckets := []float64{0.1, 0.3, 1.2, 5.0}
|
||||
if config.Buckets != nil {
|
||||
buckets = config.Buckets
|
||||
}
|
||||
|
||||
safe.Go(func() {
|
||||
promState.ListenValueUpdates()
|
||||
})
|
||||
|
||||
configReloads := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: configReloadsTotalName,
|
||||
Help: "Config reloads",
|
||||
}, []string{})
|
||||
configReloadsFailures := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: configReloadsFailuresTotalName,
|
||||
Help: "Config failure reloads",
|
||||
}, []string{})
|
||||
lastConfigReloadSuccess := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: configLastReloadSuccessName,
|
||||
Help: "Last config reload success",
|
||||
}, []string{})
|
||||
lastConfigReloadFailure := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: configLastReloadFailureName,
|
||||
Help: "Last config reload failure",
|
||||
}, []string{})
|
||||
|
||||
entrypointReqs := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: entrypointReqsTotalName,
|
||||
Help: "How many HTTP requests processed on an entrypoint, partitioned by status code, protocol, and method.",
|
||||
}, []string{"code", "method", "protocol", "entrypoint"})
|
||||
entrypointReqDurations := newHistogramFrom(promState.collectors, stdprometheus.HistogramOpts{
|
||||
Name: entrypointReqDurationName,
|
||||
Help: "How long it took to process the request on an entrypoint, partitioned by status code, protocol, and method.",
|
||||
Buckets: buckets,
|
||||
}, []string{"code", "method", "protocol", "entrypoint"})
|
||||
entrypointOpenConns := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: entrypointOpenConnsName,
|
||||
Help: "How many open connections exist on an entrypoint, partitioned by method and protocol.",
|
||||
}, []string{"method", "protocol", "entrypoint"})
|
||||
|
||||
backendReqs := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: backendReqsTotalName,
|
||||
Help: "How many HTTP requests processed on a backend, partitioned by status code, protocol, and method.",
|
||||
}, []string{"code", "method", "protocol", "backend"})
|
||||
backendReqDurations := newHistogramFrom(promState.collectors, stdprometheus.HistogramOpts{
|
||||
Name: backendReqDurationName,
|
||||
Help: "How long it took to process the request on a backend, partitioned by status code, protocol, and method.",
|
||||
Buckets: buckets,
|
||||
}, []string{"code", "method", "protocol", "backend"})
|
||||
backendOpenConns := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: backendOpenConnsName,
|
||||
Help: "How many open connections exist on a backend, partitioned by method and protocol.",
|
||||
}, []string{"method", "protocol", "backend"})
|
||||
backendRetries := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
|
||||
Name: backendRetriesTotalName,
|
||||
Help: "How many request retries happened on a backend.",
|
||||
}, []string{"backend"})
|
||||
backendServerUp := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
|
||||
Name: backendServerUpName,
|
||||
Help: "Backend server is up, described by gauge value of 0 or 1.",
|
||||
}, []string{"backend", "url"})
|
||||
|
||||
promState.describers = []func(chan<- *stdprometheus.Desc){
|
||||
configReloads.cv.Describe,
|
||||
configReloadsFailures.cv.Describe,
|
||||
lastConfigReloadSuccess.gv.Describe,
|
||||
lastConfigReloadFailure.gv.Describe,
|
||||
entrypointReqs.cv.Describe,
|
||||
entrypointReqDurations.hv.Describe,
|
||||
entrypointOpenConns.gv.Describe,
|
||||
backendReqs.cv.Describe,
|
||||
backendReqDurations.hv.Describe,
|
||||
backendOpenConns.gv.Describe,
|
||||
backendRetries.cv.Describe,
|
||||
backendServerUp.gv.Describe,
|
||||
}
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: true,
|
||||
configReloadsCounter: configReloads,
|
||||
configReloadsFailureCounter: configReloadsFailures,
|
||||
lastConfigReloadSuccessGauge: lastConfigReloadSuccess,
|
||||
lastConfigReloadFailureGauge: lastConfigReloadFailure,
|
||||
entrypointReqsCounter: entrypointReqs,
|
||||
entrypointReqDurationHistogram: entrypointReqDurations,
|
||||
entrypointOpenConnsGauge: entrypointOpenConns,
|
||||
backendReqsCounter: backendReqs,
|
||||
backendReqDurationHistogram: backendReqDurations,
|
||||
backendOpenConnsGauge: backendOpenConns,
|
||||
backendRetriesCounter: backendRetries,
|
||||
backendServerUpGauge: backendServerUp,
|
||||
}
|
||||
}
|
||||
|
||||
func registerPromState(ctx context.Context) bool {
|
||||
if err := stdprometheus.Register(promState); err != nil {
|
||||
logger := log.FromContext(ctx)
|
||||
if _, ok := err.(stdprometheus.AlreadyRegisteredError); !ok {
|
||||
logger.Errorf("Unable to register Traefik to Prometheus: %v", err)
|
||||
return false
|
||||
}
|
||||
logger.Debug("Prometheus collector already registered.")
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// OnConfigurationUpdate receives the current configuration from Traefik.
|
||||
// It then converts the configuration to the optimized package internal format
|
||||
// and sets it to the promState.
|
||||
func OnConfigurationUpdate(configurations config.Configurations) {
|
||||
dynamicConfig := newDynamicConfig()
|
||||
|
||||
// FIXME metrics
|
||||
// for _, config := range configurations {
|
||||
// for _, frontend := range config.Frontends {
|
||||
// for _, entrypointName := range frontend.EntryPoints {
|
||||
// dynamicConfig.entrypoints[entrypointName] = true
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// for backendName, backend := range config.Backends {
|
||||
// dynamicConfig.backends[backendName] = make(map[string]bool)
|
||||
// for _, server := range backend.Servers {
|
||||
// dynamicConfig.backends[backendName][server.URL] = true
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
promState.SetDynamicConfig(dynamicConfig)
|
||||
}
|
||||
|
||||
func newPrometheusState() *prometheusState {
|
||||
return &prometheusState{
|
||||
collectors: make(chan *collector),
|
||||
dynamicConfig: newDynamicConfig(),
|
||||
state: make(map[string]*collector),
|
||||
}
|
||||
}
|
||||
|
||||
type prometheusState struct {
|
||||
collectors chan *collector
|
||||
describers []func(ch chan<- *stdprometheus.Desc)
|
||||
|
||||
mtx sync.Mutex
|
||||
dynamicConfig *dynamicConfig
|
||||
state map[string]*collector
|
||||
}
|
||||
|
||||
func (ps *prometheusState) SetDynamicConfig(dynamicConfig *dynamicConfig) {
|
||||
ps.mtx.Lock()
|
||||
defer ps.mtx.Unlock()
|
||||
ps.dynamicConfig = dynamicConfig
|
||||
}
|
||||
|
||||
func (ps *prometheusState) ListenValueUpdates() {
|
||||
for collector := range ps.collectors {
|
||||
ps.mtx.Lock()
|
||||
ps.state[collector.id] = collector
|
||||
ps.mtx.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// Describe implements prometheus.Collector and simply calls
|
||||
// the registered describer functions.
|
||||
func (ps *prometheusState) Describe(ch chan<- *stdprometheus.Desc) {
|
||||
for _, desc := range ps.describers {
|
||||
desc(ch)
|
||||
}
|
||||
}
|
||||
|
||||
// Collect implements prometheus.Collector. It calls the Collect
|
||||
// method of all metrics it received on the collectors channel.
|
||||
// It's also responsible to remove metrics that belong to an outdated configuration.
|
||||
// The removal happens only after their Collect method was called to ensure that
|
||||
// also those metrics will be exported on the current scrape.
|
||||
func (ps *prometheusState) Collect(ch chan<- stdprometheus.Metric) {
|
||||
ps.mtx.Lock()
|
||||
defer ps.mtx.Unlock()
|
||||
|
||||
var outdatedKeys []string
|
||||
for key, cs := range ps.state {
|
||||
cs.collector.Collect(ch)
|
||||
|
||||
if ps.isOutdated(cs) {
|
||||
outdatedKeys = append(outdatedKeys, key)
|
||||
}
|
||||
}
|
||||
|
||||
for _, key := range outdatedKeys {
|
||||
ps.state[key].delete()
|
||||
delete(ps.state, key)
|
||||
}
|
||||
}
|
||||
|
||||
// isOutdated checks whether the passed collector has labels that mark
|
||||
// it as belonging to an outdated configuration of Traefik.
|
||||
func (ps *prometheusState) isOutdated(collector *collector) bool {
|
||||
labels := collector.labels
|
||||
|
||||
if entrypointName, ok := labels["entrypoint"]; ok && !ps.dynamicConfig.hasEntrypoint(entrypointName) {
|
||||
return true
|
||||
}
|
||||
|
||||
if backendName, ok := labels["backend"]; ok {
|
||||
if !ps.dynamicConfig.hasBackend(backendName) {
|
||||
return true
|
||||
}
|
||||
if url, ok := labels["url"]; ok && !ps.dynamicConfig.hasServerURL(backendName, url) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func newDynamicConfig() *dynamicConfig {
|
||||
return &dynamicConfig{
|
||||
entrypoints: make(map[string]bool),
|
||||
backends: make(map[string]map[string]bool),
|
||||
}
|
||||
}
|
||||
|
||||
// dynamicConfig holds the current configuration for entrypoints, backends,
|
||||
// and server URLs in an optimized way to check for existence. This provides
|
||||
// a performant way to check whether the collected metrics belong to the
|
||||
// current configuration or to an outdated one.
|
||||
type dynamicConfig struct {
|
||||
entrypoints map[string]bool
|
||||
backends map[string]map[string]bool
|
||||
}
|
||||
|
||||
func (d *dynamicConfig) hasEntrypoint(entrypointName string) bool {
|
||||
_, ok := d.entrypoints[entrypointName]
|
||||
return ok
|
||||
}
|
||||
|
||||
func (d *dynamicConfig) hasBackend(backendName string) bool {
|
||||
_, ok := d.backends[backendName]
|
||||
return ok
|
||||
}
|
||||
|
||||
func (d *dynamicConfig) hasServerURL(backendName, serverURL string) bool {
|
||||
if backend, hasBackend := d.backends[backendName]; hasBackend {
|
||||
_, ok := backend[serverURL]
|
||||
return ok
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func newCollector(metricName string, labels stdprometheus.Labels, c stdprometheus.Collector, delete func()) *collector {
|
||||
return &collector{
|
||||
id: buildMetricID(metricName, labels),
|
||||
labels: labels,
|
||||
collector: c,
|
||||
delete: delete,
|
||||
}
|
||||
}
|
||||
|
||||
// collector wraps a Collector object from the Prometheus client library.
|
||||
// It adds information on how many generations this metric should be present
|
||||
// in the /metrics output, relatived to the time it was last tracked.
|
||||
type collector struct {
|
||||
id string
|
||||
labels stdprometheus.Labels
|
||||
collector stdprometheus.Collector
|
||||
delete func()
|
||||
}
|
||||
|
||||
func buildMetricID(metricName string, labels stdprometheus.Labels) string {
|
||||
var labelNamesValues []string
|
||||
for name, value := range labels {
|
||||
labelNamesValues = append(labelNamesValues, name, value)
|
||||
}
|
||||
sort.Strings(labelNamesValues)
|
||||
return metricName + ":" + strings.Join(labelNamesValues, "|")
|
||||
}
|
||||
|
||||
func newCounterFrom(collectors chan<- *collector, opts stdprometheus.CounterOpts, labelNames []string) *counter {
|
||||
cv := stdprometheus.NewCounterVec(opts, labelNames)
|
||||
c := &counter{
|
||||
name: opts.Name,
|
||||
cv: cv,
|
||||
collectors: collectors,
|
||||
}
|
||||
if len(labelNames) == 0 {
|
||||
c.Add(0)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
type counter struct {
|
||||
name string
|
||||
cv *stdprometheus.CounterVec
|
||||
labelNamesValues labelNamesValues
|
||||
collectors chan<- *collector
|
||||
}
|
||||
|
||||
func (c *counter) With(labelValues ...string) metrics.Counter {
|
||||
return &counter{
|
||||
name: c.name,
|
||||
cv: c.cv,
|
||||
labelNamesValues: c.labelNamesValues.With(labelValues...),
|
||||
collectors: c.collectors,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *counter) Add(delta float64) {
|
||||
labels := c.labelNamesValues.ToLabels()
|
||||
collector := c.cv.With(labels)
|
||||
collector.Add(delta)
|
||||
c.collectors <- newCollector(c.name, labels, collector, func() {
|
||||
c.cv.Delete(labels)
|
||||
})
|
||||
}
|
||||
|
||||
func (c *counter) Describe(ch chan<- *stdprometheus.Desc) {
|
||||
c.cv.Describe(ch)
|
||||
}
|
||||
|
||||
func newGaugeFrom(collectors chan<- *collector, opts stdprometheus.GaugeOpts, labelNames []string) *gauge {
|
||||
gv := stdprometheus.NewGaugeVec(opts, labelNames)
|
||||
g := &gauge{
|
||||
name: opts.Name,
|
||||
gv: gv,
|
||||
collectors: collectors,
|
||||
}
|
||||
if len(labelNames) == 0 {
|
||||
g.Set(0)
|
||||
}
|
||||
return g
|
||||
}
|
||||
|
||||
type gauge struct {
|
||||
name string
|
||||
gv *stdprometheus.GaugeVec
|
||||
labelNamesValues labelNamesValues
|
||||
collectors chan<- *collector
|
||||
}
|
||||
|
||||
func (g *gauge) With(labelValues ...string) metrics.Gauge {
|
||||
return &gauge{
|
||||
name: g.name,
|
||||
gv: g.gv,
|
||||
labelNamesValues: g.labelNamesValues.With(labelValues...),
|
||||
collectors: g.collectors,
|
||||
}
|
||||
}
|
||||
|
||||
func (g *gauge) Add(delta float64) {
|
||||
labels := g.labelNamesValues.ToLabels()
|
||||
collector := g.gv.With(labels)
|
||||
collector.Add(delta)
|
||||
g.collectors <- newCollector(g.name, labels, collector, func() {
|
||||
g.gv.Delete(labels)
|
||||
})
|
||||
}
|
||||
|
||||
func (g *gauge) Set(value float64) {
|
||||
labels := g.labelNamesValues.ToLabels()
|
||||
collector := g.gv.With(labels)
|
||||
collector.Set(value)
|
||||
g.collectors <- newCollector(g.name, labels, collector, func() {
|
||||
g.gv.Delete(labels)
|
||||
})
|
||||
}
|
||||
|
||||
func (g *gauge) Describe(ch chan<- *stdprometheus.Desc) {
|
||||
g.gv.Describe(ch)
|
||||
}
|
||||
|
||||
func newHistogramFrom(collectors chan<- *collector, opts stdprometheus.HistogramOpts, labelNames []string) *histogram {
|
||||
hv := stdprometheus.NewHistogramVec(opts, labelNames)
|
||||
return &histogram{
|
||||
name: opts.Name,
|
||||
hv: hv,
|
||||
collectors: collectors,
|
||||
}
|
||||
}
|
||||
|
||||
type histogram struct {
|
||||
name string
|
||||
hv *stdprometheus.HistogramVec
|
||||
labelNamesValues labelNamesValues
|
||||
collectors chan<- *collector
|
||||
}
|
||||
|
||||
func (h *histogram) With(labelValues ...string) metrics.Histogram {
|
||||
return &histogram{
|
||||
name: h.name,
|
||||
hv: h.hv,
|
||||
labelNamesValues: h.labelNamesValues.With(labelValues...),
|
||||
collectors: h.collectors,
|
||||
}
|
||||
}
|
||||
|
||||
func (h *histogram) Observe(value float64) {
|
||||
labels := h.labelNamesValues.ToLabels()
|
||||
collector := h.hv.With(labels)
|
||||
collector.Observe(value)
|
||||
h.collectors <- newCollector(h.name, labels, collector, func() {
|
||||
h.hv.Delete(labels)
|
||||
})
|
||||
}
|
||||
|
||||
func (h *histogram) Describe(ch chan<- *stdprometheus.Desc) {
|
||||
h.hv.Describe(ch)
|
||||
}
|
||||
|
||||
// labelNamesValues is a type alias that provides validation on its With method.
|
||||
// Metrics may include it as a member to help them satisfy With semantics and
|
||||
// save some code duplication.
|
||||
type labelNamesValues []string
|
||||
|
||||
// With validates the input, and returns a new aggregate labelNamesValues.
|
||||
func (lvs labelNamesValues) With(labelValues ...string) labelNamesValues {
|
||||
if len(labelValues)%2 != 0 {
|
||||
labelValues = append(labelValues, "unknown")
|
||||
}
|
||||
return append(lvs, labelValues...)
|
||||
}
|
||||
|
||||
// ToLabels is a convenience method to convert a labelNamesValues
|
||||
// to the native prometheus.Labels.
|
||||
func (lvs labelNamesValues) ToLabels() stdprometheus.Labels {
|
||||
labels := stdprometheus.Labels{}
|
||||
for i := 0; i < len(lvs); i += 2 {
|
||||
labels[lvs[i]] = lvs[i+1]
|
||||
}
|
||||
return labels
|
||||
}
|
504
pkg/metrics/prometheus_test.go
Normal file
504
pkg/metrics/prometheus_test.go
Normal file
|
@ -0,0 +1,504 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/containous/traefik/pkg/config"
|
||||
th "github.com/containous/traefik/pkg/testhelpers"
|
||||
"github.com/containous/traefik/pkg/types"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
dto "github.com/prometheus/client_model/go"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestRegisterPromState(t *testing.T) {
|
||||
// Reset state of global promState.
|
||||
defer promState.reset()
|
||||
|
||||
testCases := []struct {
|
||||
desc string
|
||||
prometheusSlice []*types.Prometheus
|
||||
initPromState bool
|
||||
unregisterPromState bool
|
||||
expectedNbRegistries int
|
||||
}{
|
||||
{
|
||||
desc: "Register once",
|
||||
prometheusSlice: []*types.Prometheus{{}},
|
||||
expectedNbRegistries: 1,
|
||||
initPromState: true,
|
||||
},
|
||||
{
|
||||
desc: "Register once with no promState init",
|
||||
prometheusSlice: []*types.Prometheus{{}},
|
||||
expectedNbRegistries: 0,
|
||||
},
|
||||
{
|
||||
desc: "Register twice",
|
||||
prometheusSlice: []*types.Prometheus{{}, {}},
|
||||
expectedNbRegistries: 2,
|
||||
initPromState: true,
|
||||
},
|
||||
{
|
||||
desc: "Register twice with no promstate init",
|
||||
prometheusSlice: []*types.Prometheus{{}, {}},
|
||||
expectedNbRegistries: 0,
|
||||
},
|
||||
{
|
||||
desc: "Register twice with unregister",
|
||||
prometheusSlice: []*types.Prometheus{{}, {}},
|
||||
unregisterPromState: true,
|
||||
expectedNbRegistries: 2,
|
||||
initPromState: true,
|
||||
},
|
||||
{
|
||||
desc: "Register twice with unregister but no promstate init",
|
||||
prometheusSlice: []*types.Prometheus{{}, {}},
|
||||
unregisterPromState: true,
|
||||
expectedNbRegistries: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range testCases {
|
||||
actualNbRegistries := 0
|
||||
for _, prom := range test.prometheusSlice {
|
||||
if test.initPromState {
|
||||
initStandardRegistry(prom)
|
||||
}
|
||||
|
||||
if registerPromState(context.Background()) {
|
||||
actualNbRegistries++
|
||||
}
|
||||
|
||||
if test.unregisterPromState {
|
||||
prometheus.Unregister(promState)
|
||||
}
|
||||
|
||||
promState.reset()
|
||||
}
|
||||
|
||||
prometheus.Unregister(promState)
|
||||
|
||||
assert.Equal(t, test.expectedNbRegistries, actualNbRegistries)
|
||||
}
|
||||
}
|
||||
|
||||
// reset is a utility method for unit testing. It should be called after each
|
||||
// test run that changes promState internally in order to avoid dependencies
|
||||
// between unit tests.
|
||||
func (ps *prometheusState) reset() {
|
||||
ps.collectors = make(chan *collector)
|
||||
ps.describers = []func(ch chan<- *prometheus.Desc){}
|
||||
ps.dynamicConfig = newDynamicConfig()
|
||||
ps.state = make(map[string]*collector)
|
||||
}
|
||||
|
||||
func TestPrometheus(t *testing.T) {
|
||||
// Reset state of global promState.
|
||||
defer promState.reset()
|
||||
|
||||
prometheusRegistry := RegisterPrometheus(context.Background(), &types.Prometheus{})
|
||||
defer prometheus.Unregister(promState)
|
||||
|
||||
if !prometheusRegistry.IsEnabled() {
|
||||
t.Errorf("PrometheusRegistry should return true for IsEnabled()")
|
||||
}
|
||||
|
||||
prometheusRegistry.ConfigReloadsCounter().Add(1)
|
||||
prometheusRegistry.ConfigReloadsFailureCounter().Add(1)
|
||||
prometheusRegistry.LastConfigReloadSuccessGauge().Set(float64(time.Now().Unix()))
|
||||
prometheusRegistry.LastConfigReloadFailureGauge().Set(float64(time.Now().Unix()))
|
||||
|
||||
prometheusRegistry.
|
||||
EntrypointReqsCounter().
|
||||
With("code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http", "entrypoint", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
EntrypointReqDurationHistogram().
|
||||
With("code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http", "entrypoint", "http").
|
||||
Observe(1)
|
||||
prometheusRegistry.
|
||||
EntrypointOpenConnsGauge().
|
||||
With("method", http.MethodGet, "protocol", "http", "entrypoint", "http").
|
||||
Set(1)
|
||||
|
||||
prometheusRegistry.
|
||||
BackendReqsCounter().
|
||||
With("backend", "backend1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
BackendReqDurationHistogram().
|
||||
With("backend", "backend1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Observe(10000)
|
||||
prometheusRegistry.
|
||||
BackendOpenConnsGauge().
|
||||
With("backend", "backend1", "method", http.MethodGet, "protocol", "http").
|
||||
Set(1)
|
||||
prometheusRegistry.
|
||||
BackendRetriesCounter().
|
||||
With("backend", "backend1").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
BackendServerUpGauge().
|
||||
With("backend", "backend1", "url", "http://127.0.0.10:80").
|
||||
Set(1)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
metricsFamilies := mustScrape()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
labels map[string]string
|
||||
assert func(*dto.MetricFamily)
|
||||
}{
|
||||
{
|
||||
name: configReloadsTotalName,
|
||||
assert: buildCounterAssert(t, configReloadsTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: configReloadsFailuresTotalName,
|
||||
assert: buildCounterAssert(t, configReloadsFailuresTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: configLastReloadSuccessName,
|
||||
assert: buildTimestampAssert(t, configLastReloadSuccessName),
|
||||
},
|
||||
{
|
||||
name: configLastReloadFailureName,
|
||||
assert: buildTimestampAssert(t, configLastReloadFailureName),
|
||||
},
|
||||
{
|
||||
name: entrypointReqsTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"entrypoint": "http",
|
||||
},
|
||||
assert: buildCounterAssert(t, entrypointReqsTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: entrypointReqDurationName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"entrypoint": "http",
|
||||
},
|
||||
assert: buildHistogramAssert(t, entrypointReqDurationName, 1),
|
||||
},
|
||||
{
|
||||
name: entrypointOpenConnsName,
|
||||
labels: map[string]string{
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"entrypoint": "http",
|
||||
},
|
||||
assert: buildGaugeAssert(t, entrypointOpenConnsName, 1),
|
||||
},
|
||||
{
|
||||
name: backendReqsTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"backend": "backend1",
|
||||
},
|
||||
assert: buildCounterAssert(t, backendReqsTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: backendReqDurationName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"backend": "backend1",
|
||||
},
|
||||
assert: buildHistogramAssert(t, backendReqDurationName, 1),
|
||||
},
|
||||
{
|
||||
name: backendOpenConnsName,
|
||||
labels: map[string]string{
|
||||
"method": http.MethodGet,
|
||||
"protocol": "http",
|
||||
"backend": "backend1",
|
||||
},
|
||||
assert: buildGaugeAssert(t, backendOpenConnsName, 1),
|
||||
},
|
||||
{
|
||||
name: backendRetriesTotalName,
|
||||
labels: map[string]string{
|
||||
"backend": "backend1",
|
||||
},
|
||||
assert: buildGreaterThanCounterAssert(t, backendRetriesTotalName, 1),
|
||||
},
|
||||
{
|
||||
name: backendServerUpName,
|
||||
labels: map[string]string{
|
||||
"backend": "backend1",
|
||||
"url": "http://127.0.0.10:80",
|
||||
},
|
||||
assert: buildGaugeAssert(t, backendServerUpName, 1),
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
family := findMetricFamily(test.name, metricsFamilies)
|
||||
if family == nil {
|
||||
t.Errorf("gathered metrics do not contain %q", test.name)
|
||||
continue
|
||||
}
|
||||
for _, label := range family.Metric[0].Label {
|
||||
val, ok := test.labels[*label.Name]
|
||||
if !ok {
|
||||
t.Errorf("%q metric contains unexpected label %q", test.name, *label.Name)
|
||||
} else if val != *label.Value {
|
||||
t.Errorf("label %q in metric %q has wrong value %q, expected %q", *label.Name, test.name, *label.Value, val)
|
||||
}
|
||||
}
|
||||
test.assert(family)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheusMetricRemoval(t *testing.T) {
|
||||
// FIXME metrics
|
||||
t.Skip("waiting for metrics")
|
||||
|
||||
// Reset state of global promState.
|
||||
defer promState.reset()
|
||||
|
||||
prometheusRegistry := RegisterPrometheus(context.Background(), &types.Prometheus{})
|
||||
defer prometheus.Unregister(promState)
|
||||
|
||||
configurations := make(config.Configurations)
|
||||
configurations["providerName"] = &config.Configuration{
|
||||
HTTP: th.BuildConfiguration(
|
||||
th.WithRouters(
|
||||
th.WithRouter("foo",
|
||||
th.WithServiceName("bar")),
|
||||
),
|
||||
th.WithLoadBalancerServices(th.WithService("bar",
|
||||
th.WithLBMethod("wrr"),
|
||||
th.WithServers(th.WithServer("http://localhost:9000"))),
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
OnConfigurationUpdate(configurations)
|
||||
|
||||
// Register some metrics manually that are not part of the active configuration.
|
||||
// Those metrics should be part of the /metrics output on the first scrape but
|
||||
// should be removed after that scrape.
|
||||
prometheusRegistry.
|
||||
EntrypointReqsCounter().
|
||||
With("entrypoint", "entrypoint2", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
BackendReqsCounter().
|
||||
With("backend", "backend2", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
prometheusRegistry.
|
||||
BackendServerUpGauge().
|
||||
With("backend", "backend1", "url", "http://localhost:9999").
|
||||
Set(1)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
assertMetricsExist(t, mustScrape(), entrypointReqsTotalName, backendReqsTotalName, backendServerUpName)
|
||||
assertMetricsAbsent(t, mustScrape(), entrypointReqsTotalName, backendReqsTotalName, backendServerUpName)
|
||||
|
||||
// To verify that metrics belonging to active configurations are not removed
|
||||
// here the counter examples.
|
||||
prometheusRegistry.
|
||||
EntrypointReqsCounter().
|
||||
With("entrypoint", "entrypoint1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
|
||||
Add(1)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
assertMetricsExist(t, mustScrape(), entrypointReqsTotalName)
|
||||
assertMetricsExist(t, mustScrape(), entrypointReqsTotalName)
|
||||
}
|
||||
|
||||
func TestPrometheusRemovedMetricsReset(t *testing.T) {
|
||||
// Reset state of global promState.
|
||||
defer promState.reset()
|
||||
|
||||
prometheusRegistry := RegisterPrometheus(context.Background(), &types.Prometheus{})
|
||||
defer prometheus.Unregister(promState)
|
||||
|
||||
labelNamesValues := []string{
|
||||
"backend", "backend",
|
||||
"code", strconv.Itoa(http.StatusOK),
|
||||
"method", http.MethodGet,
|
||||
"protocol", "http",
|
||||
}
|
||||
prometheusRegistry.
|
||||
BackendReqsCounter().
|
||||
With(labelNamesValues...).
|
||||
Add(3)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
metricsFamilies := mustScrape()
|
||||
assertCounterValue(t, 3, findMetricFamily(backendReqsTotalName, metricsFamilies), labelNamesValues...)
|
||||
|
||||
// There is no dynamic configuration and so this metric will be deleted
|
||||
// after the first scrape.
|
||||
assertMetricsAbsent(t, mustScrape(), backendReqsTotalName)
|
||||
|
||||
prometheusRegistry.
|
||||
BackendReqsCounter().
|
||||
With(labelNamesValues...).
|
||||
Add(1)
|
||||
|
||||
delayForTrackingCompletion()
|
||||
|
||||
metricsFamilies = mustScrape()
|
||||
assertCounterValue(t, 1, findMetricFamily(backendReqsTotalName, metricsFamilies), labelNamesValues...)
|
||||
}
|
||||
|
||||
// Tracking and gathering the metrics happens concurrently.
|
||||
// In practice this is no problem, because in case a tracked metric would miss
|
||||
// the current scrape, it would just be there in the next one.
|
||||
// That we can test reliably the tracking of all metrics here, we sleep
|
||||
// for a short amount of time, to make sure the metric will be present
|
||||
// in the next scrape.
|
||||
func delayForTrackingCompletion() {
|
||||
time.Sleep(250 * time.Millisecond)
|
||||
}
|
||||
|
||||
func mustScrape() []*dto.MetricFamily {
|
||||
families, err := prometheus.DefaultGatherer.Gather()
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("could not gather metrics families: %s", err))
|
||||
}
|
||||
return families
|
||||
}
|
||||
|
||||
func assertMetricsExist(t *testing.T, families []*dto.MetricFamily, metricNames ...string) {
|
||||
t.Helper()
|
||||
|
||||
for _, metricName := range metricNames {
|
||||
if findMetricFamily(metricName, families) == nil {
|
||||
t.Errorf("gathered metrics should contain %q", metricName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func assertMetricsAbsent(t *testing.T, families []*dto.MetricFamily, metricNames ...string) {
|
||||
t.Helper()
|
||||
|
||||
for _, metricName := range metricNames {
|
||||
if findMetricFamily(metricName, families) != nil {
|
||||
t.Errorf("gathered metrics should not contain %q", metricName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func findMetricFamily(name string, families []*dto.MetricFamily) *dto.MetricFamily {
|
||||
for _, family := range families {
|
||||
if family.GetName() == name {
|
||||
return family
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func findMetricByLabelNamesValues(family *dto.MetricFamily, labelNamesValues ...string) *dto.Metric {
|
||||
if family == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, metric := range family.Metric {
|
||||
if hasMetricAllLabelPairs(metric, labelNamesValues...) {
|
||||
return metric
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func hasMetricAllLabelPairs(metric *dto.Metric, labelNamesValues ...string) bool {
|
||||
for i := 0; i < len(labelNamesValues); i += 2 {
|
||||
name, val := labelNamesValues[i], labelNamesValues[i+1]
|
||||
if !hasMetricLabelPair(metric, name, val) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func hasMetricLabelPair(metric *dto.Metric, labelName, labelValue string) bool {
|
||||
for _, labelPair := range metric.Label {
|
||||
if labelPair.GetName() == labelName && labelPair.GetValue() == labelValue {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func assertCounterValue(t *testing.T, want float64, family *dto.MetricFamily, labelNamesValues ...string) {
|
||||
t.Helper()
|
||||
|
||||
metric := findMetricByLabelNamesValues(family, labelNamesValues...)
|
||||
|
||||
if metric == nil {
|
||||
t.Error("metric must not be nil")
|
||||
return
|
||||
}
|
||||
if metric.Counter == nil {
|
||||
t.Errorf("metric %s must be a counter", family.GetName())
|
||||
return
|
||||
}
|
||||
|
||||
if cv := metric.Counter.GetValue(); cv != want {
|
||||
t.Errorf("metric %s has value %v, want %v", family.GetName(), cv, want)
|
||||
}
|
||||
}
|
||||
|
||||
func buildCounterAssert(t *testing.T, metricName string, expectedValue int) func(family *dto.MetricFamily) {
|
||||
return func(family *dto.MetricFamily) {
|
||||
if cv := int(family.Metric[0].Counter.GetValue()); cv != expectedValue {
|
||||
t.Errorf("metric %s has value %d, want %d", metricName, cv, expectedValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildGreaterThanCounterAssert(t *testing.T, metricName string, expectedMinValue int) func(family *dto.MetricFamily) {
|
||||
return func(family *dto.MetricFamily) {
|
||||
if cv := int(family.Metric[0].Counter.GetValue()); cv < expectedMinValue {
|
||||
t.Errorf("metric %s has value %d, want at least %d", metricName, cv, expectedMinValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildHistogramAssert(t *testing.T, metricName string, expectedSampleCount int) func(family *dto.MetricFamily) {
|
||||
return func(family *dto.MetricFamily) {
|
||||
if sc := int(family.Metric[0].Histogram.GetSampleCount()); sc != expectedSampleCount {
|
||||
t.Errorf("metric %s has sample count value %d, want %d", metricName, sc, expectedSampleCount)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildGaugeAssert(t *testing.T, metricName string, expectedValue int) func(family *dto.MetricFamily) {
|
||||
return func(family *dto.MetricFamily) {
|
||||
if gv := int(family.Metric[0].Gauge.GetValue()); gv != expectedValue {
|
||||
t.Errorf("metric %s has value %d, want %d", metricName, gv, expectedValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func buildTimestampAssert(t *testing.T, metricName string) func(family *dto.MetricFamily) {
|
||||
return func(family *dto.MetricFamily) {
|
||||
if ts := time.Unix(int64(family.Metric[0].Gauge.GetValue()), 0); time.Since(ts) > time.Minute {
|
||||
t.Errorf("metric %s has wrong timestamp %v", metricName, ts)
|
||||
}
|
||||
}
|
||||
}
|
86
pkg/metrics/statsd.go
Normal file
86
pkg/metrics/statsd.go
Normal file
|
@ -0,0 +1,86 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/containous/traefik/pkg/log"
|
||||
"github.com/containous/traefik/pkg/safe"
|
||||
"github.com/containous/traefik/pkg/types"
|
||||
kitlog "github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/metrics/statsd"
|
||||
)
|
||||
|
||||
var statsdClient = statsd.New("traefik.", kitlog.LoggerFunc(func(keyvals ...interface{}) error {
|
||||
log.WithoutContext().WithField(log.MetricsProviderName, "statsd").Info(keyvals)
|
||||
return nil
|
||||
}))
|
||||
|
||||
var statsdTicker *time.Ticker
|
||||
|
||||
const (
|
||||
statsdMetricsBackendReqsName = "backend.request.total"
|
||||
statsdMetricsBackendLatencyName = "backend.request.duration"
|
||||
statsdRetriesTotalName = "backend.retries.total"
|
||||
statsdConfigReloadsName = "config.reload.total"
|
||||
statsdConfigReloadsFailureName = statsdConfigReloadsName + ".failure"
|
||||
statsdLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
|
||||
statsdLastConfigReloadFailureName = "config.reload.lastFailureTimestamp"
|
||||
statsdEntrypointReqsName = "entrypoint.request.total"
|
||||
statsdEntrypointReqDurationName = "entrypoint.request.duration"
|
||||
statsdEntrypointOpenConnsName = "entrypoint.connections.open"
|
||||
statsdOpenConnsName = "backend.connections.open"
|
||||
statsdServerUpName = "backend.server.up"
|
||||
)
|
||||
|
||||
// RegisterStatsd registers the metrics pusher if this didn't happen yet and creates a statsd Registry instance.
|
||||
func RegisterStatsd(ctx context.Context, config *types.Statsd) Registry {
|
||||
if statsdTicker == nil {
|
||||
statsdTicker = initStatsdTicker(ctx, config)
|
||||
}
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: true,
|
||||
configReloadsCounter: statsdClient.NewCounter(statsdConfigReloadsName, 1.0),
|
||||
configReloadsFailureCounter: statsdClient.NewCounter(statsdConfigReloadsFailureName, 1.0),
|
||||
lastConfigReloadSuccessGauge: statsdClient.NewGauge(statsdLastConfigReloadSuccessName),
|
||||
lastConfigReloadFailureGauge: statsdClient.NewGauge(statsdLastConfigReloadFailureName),
|
||||
entrypointReqsCounter: statsdClient.NewCounter(statsdEntrypointReqsName, 1.0),
|
||||
entrypointReqDurationHistogram: statsdClient.NewTiming(statsdEntrypointReqDurationName, 1.0),
|
||||
entrypointOpenConnsGauge: statsdClient.NewGauge(statsdEntrypointOpenConnsName),
|
||||
backendReqsCounter: statsdClient.NewCounter(statsdMetricsBackendReqsName, 1.0),
|
||||
backendReqDurationHistogram: statsdClient.NewTiming(statsdMetricsBackendLatencyName, 1.0),
|
||||
backendRetriesCounter: statsdClient.NewCounter(statsdRetriesTotalName, 1.0),
|
||||
backendOpenConnsGauge: statsdClient.NewGauge(statsdOpenConnsName),
|
||||
backendServerUpGauge: statsdClient.NewGauge(statsdServerUpName),
|
||||
}
|
||||
}
|
||||
|
||||
// initStatsdTicker initializes metrics pusher and creates a statsdClient if not created already
|
||||
func initStatsdTicker(ctx context.Context, config *types.Statsd) *time.Ticker {
|
||||
address := config.Address
|
||||
if len(address) == 0 {
|
||||
address = "localhost:8125"
|
||||
}
|
||||
pushInterval, err := time.ParseDuration(config.PushInterval)
|
||||
if err != nil {
|
||||
log.FromContext(ctx).Warnf("Unable to parse %s from config.PushInterval: using 10s as the default value", config.PushInterval)
|
||||
pushInterval = 10 * time.Second
|
||||
}
|
||||
|
||||
report := time.NewTicker(pushInterval)
|
||||
|
||||
safe.Go(func() {
|
||||
statsdClient.SendLoop(report.C, "udp", address)
|
||||
})
|
||||
|
||||
return report
|
||||
}
|
||||
|
||||
// StopStatsd stops internal statsdTicker which controls the pushing of metrics to StatsD Agent and resets it to `nil`
|
||||
func StopStatsd() {
|
||||
if statsdTicker != nil {
|
||||
statsdTicker.Stop()
|
||||
}
|
||||
statsdTicker = nil
|
||||
}
|
51
pkg/metrics/statsd_test.go
Normal file
51
pkg/metrics/statsd_test.go
Normal file
|
@ -0,0 +1,51 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/containous/traefik/pkg/types"
|
||||
"github.com/stvp/go-udp-testing"
|
||||
)
|
||||
|
||||
func TestStatsD(t *testing.T) {
|
||||
udp.SetAddr(":18125")
|
||||
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
|
||||
udp.Timeout = 5 * time.Second
|
||||
|
||||
statsdRegistry := RegisterStatsd(context.Background(), &types.Statsd{Address: ":18125", PushInterval: "1s"})
|
||||
defer StopStatsd()
|
||||
|
||||
if !statsdRegistry.IsEnabled() {
|
||||
t.Errorf("Statsd registry should return true for IsEnabled()")
|
||||
}
|
||||
|
||||
expected := []string{
|
||||
// We are only validating counts, as it is nearly impossible to validate latency, since it varies every run
|
||||
"traefik.backend.request.total:2.000000|c\n",
|
||||
"traefik.backend.retries.total:2.000000|c\n",
|
||||
"traefik.backend.request.duration:10000.000000|ms",
|
||||
"traefik.config.reload.total:1.000000|c\n",
|
||||
"traefik.config.reload.total:1.000000|c\n",
|
||||
"traefik.entrypoint.request.total:1.000000|c\n",
|
||||
"traefik.entrypoint.request.duration:10000.000000|ms",
|
||||
"traefik.entrypoint.connections.open:1.000000|g\n",
|
||||
"traefik.backend.server.up:1.000000|g\n",
|
||||
}
|
||||
|
||||
udp.ShouldReceiveAll(t, expected, func() {
|
||||
statsdRegistry.BackendReqsCounter().With("service", "test", "code", string(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
statsdRegistry.BackendReqsCounter().With("service", "test", "code", string(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
statsdRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
statsdRegistry.BackendRetriesCounter().With("service", "test").Add(1)
|
||||
statsdRegistry.BackendReqDurationHistogram().With("service", "test", "code", string(http.StatusOK)).Observe(10000)
|
||||
statsdRegistry.ConfigReloadsCounter().Add(1)
|
||||
statsdRegistry.ConfigReloadsFailureCounter().Add(1)
|
||||
statsdRegistry.EntrypointReqsCounter().With("entrypoint", "test").Add(1)
|
||||
statsdRegistry.EntrypointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
|
||||
statsdRegistry.EntrypointOpenConnsGauge().With("entrypoint", "test").Set(1)
|
||||
statsdRegistry.BackendServerUpGauge().With("backend:test", "url", "http://127.0.0.1").Set(1)
|
||||
})
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue