Extract metrics to own package and refactor implementations
This commit is contained in:
parent
c1b5b740ff
commit
e6c2040ea8
19 changed files with 599 additions and 797 deletions
69
metrics/datadog.go
Normal file
69
metrics/datadog.go
Normal file
|
@ -0,0 +1,69 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/containous/traefik/log"
|
||||
"github.com/containous/traefik/safe"
|
||||
"github.com/containous/traefik/types"
|
||||
kitlog "github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/metrics/dogstatsd"
|
||||
)
|
||||
|
||||
var datadogClient = dogstatsd.New("traefik.", kitlog.LoggerFunc(func(keyvals ...interface{}) error {
|
||||
log.Info(keyvals)
|
||||
return nil
|
||||
}))
|
||||
|
||||
var datadogTicker *time.Ticker
|
||||
|
||||
// Metric names consistent with https://github.com/DataDog/integrations-extras/pull/64
|
||||
const (
|
||||
ddMetricsReqsName = "requests.total"
|
||||
ddMetricsLatencyName = "request.duration"
|
||||
ddRetriesTotalName = "backend.retries.total"
|
||||
)
|
||||
|
||||
// RegisterDatadog registers the metrics pusher if this didn't happen yet and creates a datadog Registry instance.
|
||||
func RegisterDatadog(config *types.Datadog) Registry {
|
||||
if datadogTicker == nil {
|
||||
datadogTicker = initDatadogClient(config)
|
||||
}
|
||||
|
||||
registry := &standardRegistry{
|
||||
enabled: true,
|
||||
reqsCounter: datadogClient.NewCounter(ddMetricsReqsName, 1.0),
|
||||
reqDurationHistogram: datadogClient.NewHistogram(ddMetricsLatencyName, 1.0),
|
||||
retriesCounter: datadogClient.NewCounter(ddRetriesTotalName, 1.0),
|
||||
}
|
||||
|
||||
return registry
|
||||
}
|
||||
|
||||
func initDatadogClient(config *types.Datadog) *time.Ticker {
|
||||
address := config.Address
|
||||
if len(address) == 0 {
|
||||
address = "localhost:8125"
|
||||
}
|
||||
pushInterval, err := time.ParseDuration(config.PushInterval)
|
||||
if err != nil {
|
||||
log.Warnf("Unable to parse %s into pushInterval, using 10s as default value", config.PushInterval)
|
||||
pushInterval = 10 * time.Second
|
||||
}
|
||||
|
||||
report := time.NewTicker(pushInterval)
|
||||
|
||||
safe.Go(func() {
|
||||
datadogClient.SendLoop(report.C, "udp", address)
|
||||
})
|
||||
|
||||
return report
|
||||
}
|
||||
|
||||
// StopDatadog stops internal datadogTicker which controls the pushing of metrics to DD Agent and resets it to `nil`.
|
||||
func StopDatadog() {
|
||||
if datadogTicker != nil {
|
||||
datadogTicker.Stop()
|
||||
}
|
||||
datadogTicker = nil
|
||||
}
|
40
metrics/datadog_test.go
Normal file
40
metrics/datadog_test.go
Normal file
|
@ -0,0 +1,40 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/containous/traefik/types"
|
||||
"github.com/stvp/go-udp-testing"
|
||||
)
|
||||
|
||||
func TestDatadog(t *testing.T) {
|
||||
udp.SetAddr(":18125")
|
||||
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
|
||||
udp.Timeout = 5 * time.Second
|
||||
|
||||
datadogRegistry := RegisterDatadog(&types.Datadog{Address: ":18125", PushInterval: "1s"})
|
||||
defer StopDatadog()
|
||||
|
||||
if !datadogRegistry.IsEnabled() {
|
||||
t.Errorf("DatadogRegistry should return true for IsEnabled()")
|
||||
}
|
||||
|
||||
expected := []string{
|
||||
// We are only validating counts, as it is nearly impossible to validate latency, since it varies every run
|
||||
"traefik.requests.total:1.000000|c|#service:test,code:404,method:GET\n",
|
||||
"traefik.requests.total:1.000000|c|#service:test,code:200,method:GET\n",
|
||||
"traefik.backend.retries.total:2.000000|c|#service:test\n",
|
||||
"traefik.request.duration:10000.000000|h|#service:test,code:200",
|
||||
}
|
||||
|
||||
udp.ShouldReceiveAll(t, expected, func() {
|
||||
datadogRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
datadogRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
datadogRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
datadogRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
})
|
||||
}
|
79
metrics/metrics.go
Normal file
79
metrics/metrics.go
Normal file
|
@ -0,0 +1,79 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"github.com/go-kit/kit/metrics"
|
||||
"github.com/go-kit/kit/metrics/multi"
|
||||
)
|
||||
|
||||
// Registry has to implemented by any system that wants to monitor and expose metrics.
|
||||
type Registry interface {
|
||||
// IsEnabled shows whether metrics instrumentation is enabled.
|
||||
IsEnabled() bool
|
||||
ReqsCounter() metrics.Counter
|
||||
ReqDurationHistogram() metrics.Histogram
|
||||
RetriesCounter() metrics.Counter
|
||||
}
|
||||
|
||||
// NewMultiRegistry creates a new standardRegistry that wraps multiple Registries.
|
||||
func NewMultiRegistry(registries []Registry) Registry {
|
||||
reqsCounters := []metrics.Counter{}
|
||||
reqDurationHistograms := []metrics.Histogram{}
|
||||
retriesCounters := []metrics.Counter{}
|
||||
|
||||
for _, r := range registries {
|
||||
reqsCounters = append(reqsCounters, r.ReqsCounter())
|
||||
reqDurationHistograms = append(reqDurationHistograms, r.ReqDurationHistogram())
|
||||
retriesCounters = append(retriesCounters, r.RetriesCounter())
|
||||
}
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: true,
|
||||
reqsCounter: multi.NewCounter(reqsCounters...),
|
||||
reqDurationHistogram: multi.NewHistogram(reqDurationHistograms...),
|
||||
retriesCounter: multi.NewCounter(retriesCounters...),
|
||||
}
|
||||
}
|
||||
|
||||
type standardRegistry struct {
|
||||
enabled bool
|
||||
reqsCounter metrics.Counter
|
||||
reqDurationHistogram metrics.Histogram
|
||||
retriesCounter metrics.Counter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) IsEnabled() bool {
|
||||
return r.enabled
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ReqsCounter() metrics.Counter {
|
||||
return r.reqsCounter
|
||||
}
|
||||
|
||||
func (r *standardRegistry) ReqDurationHistogram() metrics.Histogram {
|
||||
return r.reqDurationHistogram
|
||||
}
|
||||
|
||||
func (r *standardRegistry) RetriesCounter() metrics.Counter {
|
||||
return r.retriesCounter
|
||||
}
|
||||
|
||||
// NewVoidRegistry is a noop implementation of metrics.Registry.
|
||||
// It is used to avoid nil checking in components that do metric collections.
|
||||
func NewVoidRegistry() Registry {
|
||||
return &standardRegistry{
|
||||
enabled: false,
|
||||
reqsCounter: &voidCounter{},
|
||||
reqDurationHistogram: &voidHistogram{},
|
||||
retriesCounter: &voidCounter{},
|
||||
}
|
||||
}
|
||||
|
||||
type voidCounter struct{}
|
||||
|
||||
func (v *voidCounter) With(labelValues ...string) metrics.Counter { return v }
|
||||
func (v *voidCounter) Add(delta float64) {}
|
||||
|
||||
type voidHistogram struct{}
|
||||
|
||||
func (h *voidHistogram) With(labelValues ...string) metrics.Histogram { return h }
|
||||
func (h *voidHistogram) Observe(value float64) {}
|
87
metrics/metrics_test.go
Normal file
87
metrics/metrics_test.go
Normal file
|
@ -0,0 +1,87 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/go-kit/kit/metrics"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNewVoidRegistry(t *testing.T) {
|
||||
registry := NewVoidRegistry()
|
||||
|
||||
if registry.IsEnabled() {
|
||||
t.Errorf("VoidRegistry should not return true for IsEnabled()")
|
||||
}
|
||||
registry.ReqsCounter().With("some", "value").Add(1)
|
||||
registry.ReqDurationHistogram().With("some", "value").Observe(1)
|
||||
registry.RetriesCounter().With("some", "value").Add(1)
|
||||
}
|
||||
|
||||
func TestNewMultiRegistry(t *testing.T) {
|
||||
registries := []Registry{newCollectingRetryMetrics(), newCollectingRetryMetrics()}
|
||||
registry := NewMultiRegistry(registries)
|
||||
|
||||
registry.ReqsCounter().With("key", "requests").Add(1)
|
||||
registry.ReqDurationHistogram().With("key", "durations").Observe(2)
|
||||
registry.RetriesCounter().With("key", "retries").Add(3)
|
||||
|
||||
for _, collectingRegistry := range registries {
|
||||
cReqsCounter := collectingRegistry.ReqsCounter().(*counterMock)
|
||||
cReqDurationHistogram := collectingRegistry.ReqDurationHistogram().(*histogramMock)
|
||||
cRetriesCounter := collectingRegistry.RetriesCounter().(*counterMock)
|
||||
|
||||
wantCounterValue := float64(1)
|
||||
if cReqsCounter.counterValue != wantCounterValue {
|
||||
t.Errorf("Got value %f for ReqsCounter, want %f", cReqsCounter.counterValue, wantCounterValue)
|
||||
}
|
||||
wantHistogramValue := float64(2)
|
||||
if cReqDurationHistogram.lastHistogramValue != wantHistogramValue {
|
||||
t.Errorf("Got last observation %f for ReqDurationHistogram, want %f", cReqDurationHistogram.lastHistogramValue, wantHistogramValue)
|
||||
}
|
||||
wantCounterValue = float64(3)
|
||||
if cRetriesCounter.counterValue != wantCounterValue {
|
||||
t.Errorf("Got value %f for RetriesCounter, want %f", cRetriesCounter.counterValue, wantCounterValue)
|
||||
}
|
||||
|
||||
assert.Equal(t, []string{"key", "requests"}, cReqsCounter.lastLabelValues)
|
||||
assert.Equal(t, []string{"key", "durations"}, cReqDurationHistogram.lastLabelValues)
|
||||
assert.Equal(t, []string{"key", "retries"}, cRetriesCounter.lastLabelValues)
|
||||
}
|
||||
}
|
||||
|
||||
func newCollectingRetryMetrics() Registry {
|
||||
return &standardRegistry{
|
||||
reqsCounter: &counterMock{},
|
||||
reqDurationHistogram: &histogramMock{},
|
||||
retriesCounter: &counterMock{},
|
||||
}
|
||||
}
|
||||
|
||||
type counterMock struct {
|
||||
counterValue float64
|
||||
lastLabelValues []string
|
||||
}
|
||||
|
||||
func (c *counterMock) With(labelValues ...string) metrics.Counter {
|
||||
c.lastLabelValues = labelValues
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *counterMock) Add(delta float64) {
|
||||
c.counterValue += delta
|
||||
}
|
||||
|
||||
type histogramMock struct {
|
||||
lastHistogramValue float64
|
||||
lastLabelValues []string
|
||||
}
|
||||
|
||||
func (c *histogramMock) With(labelValues ...string) metrics.Histogram {
|
||||
c.lastLabelValues = labelValues
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *histogramMock) Observe(value float64) {
|
||||
c.lastHistogramValue = value
|
||||
}
|
45
metrics/prometheus.go
Normal file
45
metrics/prometheus.go
Normal file
|
@ -0,0 +1,45 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"github.com/containous/traefik/types"
|
||||
"github.com/go-kit/kit/metrics/prometheus"
|
||||
stdprometheus "github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const (
|
||||
metricNamePrefix = "traefik_"
|
||||
|
||||
reqsTotalName = metricNamePrefix + "requests_total"
|
||||
reqDurationName = metricNamePrefix + "request_duration_seconds"
|
||||
retriesTotalName = metricNamePrefix + "backend_retries_total"
|
||||
)
|
||||
|
||||
// RegisterPrometheus registers all Prometheus metrics.
|
||||
// It must be called only once and failing to register the metrics will lead to a panic.
|
||||
func RegisterPrometheus(config *types.Prometheus) Registry {
|
||||
buckets := []float64{0.1, 0.3, 1.2, 5.0}
|
||||
if config.Buckets != nil {
|
||||
buckets = config.Buckets
|
||||
}
|
||||
|
||||
reqCounter := prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: reqsTotalName,
|
||||
Help: "How many HTTP requests processed, partitioned by status code and method.",
|
||||
}, []string{"service", "code", "method"})
|
||||
reqDurationHistogram := prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
|
||||
Name: reqDurationName,
|
||||
Help: "How long it took to process the request.",
|
||||
Buckets: buckets,
|
||||
}, []string{"service", "code"})
|
||||
retryCounter := prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Name: retriesTotalName,
|
||||
Help: "How many request retries happened in total.",
|
||||
}, []string{"service"})
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: true,
|
||||
reqsCounter: reqCounter,
|
||||
reqDurationHistogram: reqDurationHistogram,
|
||||
retriesCounter: retryCounter,
|
||||
}
|
||||
}
|
104
metrics/prometheus_test.go
Normal file
104
metrics/prometheus_test.go
Normal file
|
@ -0,0 +1,104 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/containous/traefik/types"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
dto "github.com/prometheus/client_model/go"
|
||||
)
|
||||
|
||||
func TestPrometheus(t *testing.T) {
|
||||
prometheusRegistry := RegisterPrometheus(&types.Prometheus{})
|
||||
|
||||
if !prometheusRegistry.IsEnabled() {
|
||||
t.Errorf("PrometheusRegistry should return true for IsEnabled()")
|
||||
}
|
||||
prometheusRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
prometheusRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
prometheusRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
prometheusRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
|
||||
prometheusRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
|
||||
metricsFamilies, err := prometheus.DefaultGatherer.Gather()
|
||||
if err != nil {
|
||||
t.Fatalf("could not gather metrics families: %s", err)
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
labels map[string]string
|
||||
assert func(*dto.MetricFamily)
|
||||
}{
|
||||
{
|
||||
name: reqsTotalName,
|
||||
labels: map[string]string{
|
||||
"code": "200",
|
||||
"method": http.MethodGet,
|
||||
"service": "test",
|
||||
},
|
||||
assert: func(family *dto.MetricFamily) {
|
||||
cv := family.Metric[0].Counter.GetValue()
|
||||
expectedCv := float64(2)
|
||||
if cv != expectedCv {
|
||||
t.Errorf("gathered metrics do not contain correct value for total requests, got %f expected %f", cv, expectedCv)
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: reqDurationName,
|
||||
labels: map[string]string{
|
||||
"service": "test",
|
||||
"code": "200",
|
||||
},
|
||||
assert: func(family *dto.MetricFamily) {
|
||||
sc := family.Metric[0].Histogram.GetSampleCount()
|
||||
expectedSc := uint64(2)
|
||||
if sc != expectedSc {
|
||||
t.Errorf("gathered metrics do not contain correct sample count for request duration, got %d expected %d", sc, expectedSc)
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: retriesTotalName,
|
||||
labels: map[string]string{
|
||||
"service": "test",
|
||||
},
|
||||
assert: func(family *dto.MetricFamily) {
|
||||
cv := family.Metric[0].Counter.GetValue()
|
||||
expectedCv := float64(1)
|
||||
if cv != expectedCv {
|
||||
t.Errorf("gathered metrics do not contain correct value for total retries, got %f expected %f", cv, expectedCv)
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
family := findMetricFamily(test.name, metricsFamilies)
|
||||
if family == nil {
|
||||
t.Errorf("gathered metrics do not contain %q", test.name)
|
||||
continue
|
||||
}
|
||||
for _, label := range family.Metric[0].Label {
|
||||
val, ok := test.labels[*label.Name]
|
||||
if !ok {
|
||||
t.Errorf("%q metric contains unexpected label %q", test.name, *label.Name)
|
||||
} else if val != *label.Value {
|
||||
t.Errorf("label %q in metric %q has wrong value %q, expected %q", *label.Name, test.name, *label.Value, val)
|
||||
}
|
||||
}
|
||||
test.assert(family)
|
||||
}
|
||||
}
|
||||
|
||||
func findMetricFamily(name string, families []*dto.MetricFamily) *dto.MetricFamily {
|
||||
for _, family := range families {
|
||||
if family.GetName() == name {
|
||||
return family
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
61
metrics/statsd.go
Normal file
61
metrics/statsd.go
Normal file
|
@ -0,0 +1,61 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/containous/traefik/log"
|
||||
"github.com/containous/traefik/safe"
|
||||
"github.com/containous/traefik/types"
|
||||
kitlog "github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/metrics/statsd"
|
||||
)
|
||||
|
||||
var statsdClient = statsd.New("traefik.", kitlog.LoggerFunc(func(keyvals ...interface{}) error {
|
||||
log.Info(keyvals)
|
||||
return nil
|
||||
}))
|
||||
|
||||
var statsdTicker *time.Ticker
|
||||
|
||||
// RegisterStatsd registers the metrics pusher if this didn't happen yet and creates a statsd Registry instance.
|
||||
func RegisterStatsd(config *types.Statsd) Registry {
|
||||
if statsdTicker == nil {
|
||||
statsdTicker = initStatsdTicker(config)
|
||||
}
|
||||
|
||||
return &standardRegistry{
|
||||
enabled: true,
|
||||
reqsCounter: statsdClient.NewCounter(ddMetricsReqsName, 1.0),
|
||||
reqDurationHistogram: statsdClient.NewTiming(ddMetricsLatencyName, 1.0),
|
||||
retriesCounter: statsdClient.NewCounter(ddRetriesTotalName, 1.0),
|
||||
}
|
||||
}
|
||||
|
||||
// initStatsdTicker initializes metrics pusher and creates a statsdClient if not created already
|
||||
func initStatsdTicker(config *types.Statsd) *time.Ticker {
|
||||
address := config.Address
|
||||
if len(address) == 0 {
|
||||
address = "localhost:8125"
|
||||
}
|
||||
pushInterval, err := time.ParseDuration(config.PushInterval)
|
||||
if err != nil {
|
||||
log.Warnf("Unable to parse %s into pushInterval, using 10s as default value", config.PushInterval)
|
||||
pushInterval = 10 * time.Second
|
||||
}
|
||||
|
||||
report := time.NewTicker(pushInterval)
|
||||
|
||||
safe.Go(func() {
|
||||
statsdClient.SendLoop(report.C, "udp", address)
|
||||
})
|
||||
|
||||
return report
|
||||
}
|
||||
|
||||
// StopStatsd stops internal statsdTicker which controls the pushing of metrics to StatsD Agent and resets it to `nil`
|
||||
func StopStatsd() {
|
||||
if statsdTicker != nil {
|
||||
statsdTicker.Stop()
|
||||
}
|
||||
statsdTicker = nil
|
||||
}
|
38
metrics/statsd_test.go
Normal file
38
metrics/statsd_test.go
Normal file
|
@ -0,0 +1,38 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/containous/traefik/types"
|
||||
"github.com/stvp/go-udp-testing"
|
||||
)
|
||||
|
||||
func TestStatsD(t *testing.T) {
|
||||
udp.SetAddr(":18125")
|
||||
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
|
||||
udp.Timeout = 5 * time.Second
|
||||
|
||||
statsdRegistry := RegisterStatsd(&types.Statsd{Address: ":18125", PushInterval: "1s"})
|
||||
defer StopStatsd()
|
||||
|
||||
if !statsdRegistry.IsEnabled() {
|
||||
t.Errorf("PrometheusRegistry should return true for IsEnabled()")
|
||||
}
|
||||
|
||||
expected := []string{
|
||||
// We are only validating counts, as it is nearly impossible to validate latency, since it varies every run
|
||||
"traefik.requests.total:2.000000|c\n",
|
||||
"traefik.backend.retries.total:2.000000|c\n",
|
||||
"traefik.request.duration:10000.000000|ms",
|
||||
}
|
||||
|
||||
udp.ShouldReceiveAll(t, expected, func() {
|
||||
statsdRegistry.ReqsCounter().With("service", "test", "code", string(http.StatusOK), "method", http.MethodGet).Add(1)
|
||||
statsdRegistry.ReqsCounter().With("service", "test", "code", string(http.StatusNotFound), "method", http.MethodGet).Add(1)
|
||||
statsdRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
statsdRegistry.RetriesCounter().With("service", "test").Add(1)
|
||||
statsdRegistry.ReqDurationHistogram().With("service", "test", "code", string(http.StatusOK)).Observe(10000)
|
||||
})
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue