Extract metrics to own package and refactor implementations

This commit is contained in:
Marco Jantke 2017-08-23 20:46:03 +02:00 committed by Traefiker
parent c1b5b740ff
commit e6c2040ea8
19 changed files with 599 additions and 797 deletions

69
metrics/datadog.go Normal file
View file

@ -0,0 +1,69 @@
package metrics
import (
"time"
"github.com/containous/traefik/log"
"github.com/containous/traefik/safe"
"github.com/containous/traefik/types"
kitlog "github.com/go-kit/kit/log"
"github.com/go-kit/kit/metrics/dogstatsd"
)
var datadogClient = dogstatsd.New("traefik.", kitlog.LoggerFunc(func(keyvals ...interface{}) error {
log.Info(keyvals)
return nil
}))
var datadogTicker *time.Ticker
// Metric names consistent with https://github.com/DataDog/integrations-extras/pull/64
const (
ddMetricsReqsName = "requests.total"
ddMetricsLatencyName = "request.duration"
ddRetriesTotalName = "backend.retries.total"
)
// RegisterDatadog registers the metrics pusher if this didn't happen yet and creates a datadog Registry instance.
func RegisterDatadog(config *types.Datadog) Registry {
if datadogTicker == nil {
datadogTicker = initDatadogClient(config)
}
registry := &standardRegistry{
enabled: true,
reqsCounter: datadogClient.NewCounter(ddMetricsReqsName, 1.0),
reqDurationHistogram: datadogClient.NewHistogram(ddMetricsLatencyName, 1.0),
retriesCounter: datadogClient.NewCounter(ddRetriesTotalName, 1.0),
}
return registry
}
func initDatadogClient(config *types.Datadog) *time.Ticker {
address := config.Address
if len(address) == 0 {
address = "localhost:8125"
}
pushInterval, err := time.ParseDuration(config.PushInterval)
if err != nil {
log.Warnf("Unable to parse %s into pushInterval, using 10s as default value", config.PushInterval)
pushInterval = 10 * time.Second
}
report := time.NewTicker(pushInterval)
safe.Go(func() {
datadogClient.SendLoop(report.C, "udp", address)
})
return report
}
// StopDatadog stops internal datadogTicker which controls the pushing of metrics to DD Agent and resets it to `nil`.
func StopDatadog() {
if datadogTicker != nil {
datadogTicker.Stop()
}
datadogTicker = nil
}

40
metrics/datadog_test.go Normal file
View file

@ -0,0 +1,40 @@
package metrics
import (
"net/http"
"strconv"
"testing"
"time"
"github.com/containous/traefik/types"
"github.com/stvp/go-udp-testing"
)
func TestDatadog(t *testing.T) {
udp.SetAddr(":18125")
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
udp.Timeout = 5 * time.Second
datadogRegistry := RegisterDatadog(&types.Datadog{Address: ":18125", PushInterval: "1s"})
defer StopDatadog()
if !datadogRegistry.IsEnabled() {
t.Errorf("DatadogRegistry should return true for IsEnabled()")
}
expected := []string{
// We are only validating counts, as it is nearly impossible to validate latency, since it varies every run
"traefik.requests.total:1.000000|c|#service:test,code:404,method:GET\n",
"traefik.requests.total:1.000000|c|#service:test,code:200,method:GET\n",
"traefik.backend.retries.total:2.000000|c|#service:test\n",
"traefik.request.duration:10000.000000|h|#service:test,code:200",
}
udp.ShouldReceiveAll(t, expected, func() {
datadogRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
datadogRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
datadogRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
datadogRegistry.RetriesCounter().With("service", "test").Add(1)
datadogRegistry.RetriesCounter().With("service", "test").Add(1)
})
}

79
metrics/metrics.go Normal file
View file

@ -0,0 +1,79 @@
package metrics
import (
"github.com/go-kit/kit/metrics"
"github.com/go-kit/kit/metrics/multi"
)
// Registry has to implemented by any system that wants to monitor and expose metrics.
type Registry interface {
// IsEnabled shows whether metrics instrumentation is enabled.
IsEnabled() bool
ReqsCounter() metrics.Counter
ReqDurationHistogram() metrics.Histogram
RetriesCounter() metrics.Counter
}
// NewMultiRegistry creates a new standardRegistry that wraps multiple Registries.
func NewMultiRegistry(registries []Registry) Registry {
reqsCounters := []metrics.Counter{}
reqDurationHistograms := []metrics.Histogram{}
retriesCounters := []metrics.Counter{}
for _, r := range registries {
reqsCounters = append(reqsCounters, r.ReqsCounter())
reqDurationHistograms = append(reqDurationHistograms, r.ReqDurationHistogram())
retriesCounters = append(retriesCounters, r.RetriesCounter())
}
return &standardRegistry{
enabled: true,
reqsCounter: multi.NewCounter(reqsCounters...),
reqDurationHistogram: multi.NewHistogram(reqDurationHistograms...),
retriesCounter: multi.NewCounter(retriesCounters...),
}
}
type standardRegistry struct {
enabled bool
reqsCounter metrics.Counter
reqDurationHistogram metrics.Histogram
retriesCounter metrics.Counter
}
func (r *standardRegistry) IsEnabled() bool {
return r.enabled
}
func (r *standardRegistry) ReqsCounter() metrics.Counter {
return r.reqsCounter
}
func (r *standardRegistry) ReqDurationHistogram() metrics.Histogram {
return r.reqDurationHistogram
}
func (r *standardRegistry) RetriesCounter() metrics.Counter {
return r.retriesCounter
}
// NewVoidRegistry is a noop implementation of metrics.Registry.
// It is used to avoid nil checking in components that do metric collections.
func NewVoidRegistry() Registry {
return &standardRegistry{
enabled: false,
reqsCounter: &voidCounter{},
reqDurationHistogram: &voidHistogram{},
retriesCounter: &voidCounter{},
}
}
type voidCounter struct{}
func (v *voidCounter) With(labelValues ...string) metrics.Counter { return v }
func (v *voidCounter) Add(delta float64) {}
type voidHistogram struct{}
func (h *voidHistogram) With(labelValues ...string) metrics.Histogram { return h }
func (h *voidHistogram) Observe(value float64) {}

87
metrics/metrics_test.go Normal file
View file

@ -0,0 +1,87 @@
package metrics
import (
"testing"
"github.com/go-kit/kit/metrics"
"github.com/stretchr/testify/assert"
)
func TestNewVoidRegistry(t *testing.T) {
registry := NewVoidRegistry()
if registry.IsEnabled() {
t.Errorf("VoidRegistry should not return true for IsEnabled()")
}
registry.ReqsCounter().With("some", "value").Add(1)
registry.ReqDurationHistogram().With("some", "value").Observe(1)
registry.RetriesCounter().With("some", "value").Add(1)
}
func TestNewMultiRegistry(t *testing.T) {
registries := []Registry{newCollectingRetryMetrics(), newCollectingRetryMetrics()}
registry := NewMultiRegistry(registries)
registry.ReqsCounter().With("key", "requests").Add(1)
registry.ReqDurationHistogram().With("key", "durations").Observe(2)
registry.RetriesCounter().With("key", "retries").Add(3)
for _, collectingRegistry := range registries {
cReqsCounter := collectingRegistry.ReqsCounter().(*counterMock)
cReqDurationHistogram := collectingRegistry.ReqDurationHistogram().(*histogramMock)
cRetriesCounter := collectingRegistry.RetriesCounter().(*counterMock)
wantCounterValue := float64(1)
if cReqsCounter.counterValue != wantCounterValue {
t.Errorf("Got value %f for ReqsCounter, want %f", cReqsCounter.counterValue, wantCounterValue)
}
wantHistogramValue := float64(2)
if cReqDurationHistogram.lastHistogramValue != wantHistogramValue {
t.Errorf("Got last observation %f for ReqDurationHistogram, want %f", cReqDurationHistogram.lastHistogramValue, wantHistogramValue)
}
wantCounterValue = float64(3)
if cRetriesCounter.counterValue != wantCounterValue {
t.Errorf("Got value %f for RetriesCounter, want %f", cRetriesCounter.counterValue, wantCounterValue)
}
assert.Equal(t, []string{"key", "requests"}, cReqsCounter.lastLabelValues)
assert.Equal(t, []string{"key", "durations"}, cReqDurationHistogram.lastLabelValues)
assert.Equal(t, []string{"key", "retries"}, cRetriesCounter.lastLabelValues)
}
}
func newCollectingRetryMetrics() Registry {
return &standardRegistry{
reqsCounter: &counterMock{},
reqDurationHistogram: &histogramMock{},
retriesCounter: &counterMock{},
}
}
type counterMock struct {
counterValue float64
lastLabelValues []string
}
func (c *counterMock) With(labelValues ...string) metrics.Counter {
c.lastLabelValues = labelValues
return c
}
func (c *counterMock) Add(delta float64) {
c.counterValue += delta
}
type histogramMock struct {
lastHistogramValue float64
lastLabelValues []string
}
func (c *histogramMock) With(labelValues ...string) metrics.Histogram {
c.lastLabelValues = labelValues
return c
}
func (c *histogramMock) Observe(value float64) {
c.lastHistogramValue = value
}

45
metrics/prometheus.go Normal file
View file

@ -0,0 +1,45 @@
package metrics
import (
"github.com/containous/traefik/types"
"github.com/go-kit/kit/metrics/prometheus"
stdprometheus "github.com/prometheus/client_golang/prometheus"
)
const (
metricNamePrefix = "traefik_"
reqsTotalName = metricNamePrefix + "requests_total"
reqDurationName = metricNamePrefix + "request_duration_seconds"
retriesTotalName = metricNamePrefix + "backend_retries_total"
)
// RegisterPrometheus registers all Prometheus metrics.
// It must be called only once and failing to register the metrics will lead to a panic.
func RegisterPrometheus(config *types.Prometheus) Registry {
buckets := []float64{0.1, 0.3, 1.2, 5.0}
if config.Buckets != nil {
buckets = config.Buckets
}
reqCounter := prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Name: reqsTotalName,
Help: "How many HTTP requests processed, partitioned by status code and method.",
}, []string{"service", "code", "method"})
reqDurationHistogram := prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
Name: reqDurationName,
Help: "How long it took to process the request.",
Buckets: buckets,
}, []string{"service", "code"})
retryCounter := prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Name: retriesTotalName,
Help: "How many request retries happened in total.",
}, []string{"service"})
return &standardRegistry{
enabled: true,
reqsCounter: reqCounter,
reqDurationHistogram: reqDurationHistogram,
retriesCounter: retryCounter,
}
}

104
metrics/prometheus_test.go Normal file
View file

@ -0,0 +1,104 @@
package metrics
import (
"net/http"
"strconv"
"testing"
"github.com/containous/traefik/types"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
)
func TestPrometheus(t *testing.T) {
prometheusRegistry := RegisterPrometheus(&types.Prometheus{})
if !prometheusRegistry.IsEnabled() {
t.Errorf("PrometheusRegistry should return true for IsEnabled()")
}
prometheusRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
prometheusRegistry.ReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
prometheusRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
prometheusRegistry.ReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
prometheusRegistry.RetriesCounter().With("service", "test").Add(1)
metricsFamilies, err := prometheus.DefaultGatherer.Gather()
if err != nil {
t.Fatalf("could not gather metrics families: %s", err)
}
tests := []struct {
name string
labels map[string]string
assert func(*dto.MetricFamily)
}{
{
name: reqsTotalName,
labels: map[string]string{
"code": "200",
"method": http.MethodGet,
"service": "test",
},
assert: func(family *dto.MetricFamily) {
cv := family.Metric[0].Counter.GetValue()
expectedCv := float64(2)
if cv != expectedCv {
t.Errorf("gathered metrics do not contain correct value for total requests, got %f expected %f", cv, expectedCv)
}
},
},
{
name: reqDurationName,
labels: map[string]string{
"service": "test",
"code": "200",
},
assert: func(family *dto.MetricFamily) {
sc := family.Metric[0].Histogram.GetSampleCount()
expectedSc := uint64(2)
if sc != expectedSc {
t.Errorf("gathered metrics do not contain correct sample count for request duration, got %d expected %d", sc, expectedSc)
}
},
},
{
name: retriesTotalName,
labels: map[string]string{
"service": "test",
},
assert: func(family *dto.MetricFamily) {
cv := family.Metric[0].Counter.GetValue()
expectedCv := float64(1)
if cv != expectedCv {
t.Errorf("gathered metrics do not contain correct value for total retries, got %f expected %f", cv, expectedCv)
}
},
},
}
for _, test := range tests {
family := findMetricFamily(test.name, metricsFamilies)
if family == nil {
t.Errorf("gathered metrics do not contain %q", test.name)
continue
}
for _, label := range family.Metric[0].Label {
val, ok := test.labels[*label.Name]
if !ok {
t.Errorf("%q metric contains unexpected label %q", test.name, *label.Name)
} else if val != *label.Value {
t.Errorf("label %q in metric %q has wrong value %q, expected %q", *label.Name, test.name, *label.Value, val)
}
}
test.assert(family)
}
}
func findMetricFamily(name string, families []*dto.MetricFamily) *dto.MetricFamily {
for _, family := range families {
if family.GetName() == name {
return family
}
}
return nil
}

61
metrics/statsd.go Normal file
View file

@ -0,0 +1,61 @@
package metrics
import (
"time"
"github.com/containous/traefik/log"
"github.com/containous/traefik/safe"
"github.com/containous/traefik/types"
kitlog "github.com/go-kit/kit/log"
"github.com/go-kit/kit/metrics/statsd"
)
var statsdClient = statsd.New("traefik.", kitlog.LoggerFunc(func(keyvals ...interface{}) error {
log.Info(keyvals)
return nil
}))
var statsdTicker *time.Ticker
// RegisterStatsd registers the metrics pusher if this didn't happen yet and creates a statsd Registry instance.
func RegisterStatsd(config *types.Statsd) Registry {
if statsdTicker == nil {
statsdTicker = initStatsdTicker(config)
}
return &standardRegistry{
enabled: true,
reqsCounter: statsdClient.NewCounter(ddMetricsReqsName, 1.0),
reqDurationHistogram: statsdClient.NewTiming(ddMetricsLatencyName, 1.0),
retriesCounter: statsdClient.NewCounter(ddRetriesTotalName, 1.0),
}
}
// initStatsdTicker initializes metrics pusher and creates a statsdClient if not created already
func initStatsdTicker(config *types.Statsd) *time.Ticker {
address := config.Address
if len(address) == 0 {
address = "localhost:8125"
}
pushInterval, err := time.ParseDuration(config.PushInterval)
if err != nil {
log.Warnf("Unable to parse %s into pushInterval, using 10s as default value", config.PushInterval)
pushInterval = 10 * time.Second
}
report := time.NewTicker(pushInterval)
safe.Go(func() {
statsdClient.SendLoop(report.C, "udp", address)
})
return report
}
// StopStatsd stops internal statsdTicker which controls the pushing of metrics to StatsD Agent and resets it to `nil`
func StopStatsd() {
if statsdTicker != nil {
statsdTicker.Stop()
}
statsdTicker = nil
}

38
metrics/statsd_test.go Normal file
View file

@ -0,0 +1,38 @@
package metrics
import (
"net/http"
"testing"
"time"
"github.com/containous/traefik/types"
"github.com/stvp/go-udp-testing"
)
func TestStatsD(t *testing.T) {
udp.SetAddr(":18125")
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
udp.Timeout = 5 * time.Second
statsdRegistry := RegisterStatsd(&types.Statsd{Address: ":18125", PushInterval: "1s"})
defer StopStatsd()
if !statsdRegistry.IsEnabled() {
t.Errorf("PrometheusRegistry should return true for IsEnabled()")
}
expected := []string{
// We are only validating counts, as it is nearly impossible to validate latency, since it varies every run
"traefik.requests.total:2.000000|c\n",
"traefik.backend.retries.total:2.000000|c\n",
"traefik.request.duration:10000.000000|ms",
}
udp.ShouldReceiveAll(t, expected, func() {
statsdRegistry.ReqsCounter().With("service", "test", "code", string(http.StatusOK), "method", http.MethodGet).Add(1)
statsdRegistry.ReqsCounter().With("service", "test", "code", string(http.StatusNotFound), "method", http.MethodGet).Add(1)
statsdRegistry.RetriesCounter().With("service", "test").Add(1)
statsdRegistry.RetriesCounter().With("service", "test").Add(1)
statsdRegistry.ReqDurationHistogram().With("service", "test", "code", string(http.StatusOK)).Observe(10000)
})
}