1
0
Fork 0

Add router metrics

This commit is contained in:
Jorge Arco 2021-04-30 10:22:04 +02:00 committed by GitHub
parent dc8d5ef744
commit 080cf98e51
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
26 changed files with 756 additions and 217 deletions

View file

@ -20,19 +20,28 @@ var datadogTicker *time.Ticker
// Metric names consistent with https://github.com/DataDog/integrations-extras/pull/64
const (
ddMetricsServiceReqsName = "service.request.total"
ddMetricsServiceLatencyName = "service.request.duration"
ddRetriesTotalName = "service.retries.total"
ddConfigReloadsName = "config.reload.total"
ddConfigReloadsFailureTagName = "failure"
ddLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
ddLastConfigReloadFailureName = "config.reload.lastFailureTimestamp"
ddEntryPointReqsName = "entrypoint.request.total"
ddEntryPointReqDurationName = "entrypoint.request.duration"
ddEntryPointOpenConnsName = "entrypoint.connections.open"
ddOpenConnsName = "service.connections.open"
ddServerUpName = "service.server.up"
ddTLSCertsNotAfterTimestampName = "tls.certs.notAfterTimestamp"
ddEntryPointReqsName = "entrypoint.request.total"
ddEntryPointReqsTLSName = "entrypoint.request.tls.total"
ddEntryPointReqDurationName = "entrypoint.request.duration"
ddEntryPointOpenConnsName = "entrypoint.connections.open"
ddMetricsRouterReqsName = "router.request.total"
ddMetricsRouterReqsTLSName = "router.request.tls.total"
ddMetricsRouterReqsDurationName = "router.request.duration"
ddRouterOpenConnsName = "router.connections.open"
ddMetricsServiceReqsName = "service.request.total"
ddMetricsServiceReqsTLSName = "service.request.tls.total"
ddMetricsServiceReqsDurationName = "service.request.duration"
ddRetriesTotalName = "service.retries.total"
ddOpenConnsName = "service.connections.open"
ddServerUpName = "service.server.up"
)
// RegisterDatadog registers the metrics pusher if this didn't happen yet and creates a datadog Registry instance.
@ -52,14 +61,24 @@ func RegisterDatadog(ctx context.Context, config *types.Datadog) Registry {
if config.AddEntryPointsLabels {
registry.epEnabled = config.AddEntryPointsLabels
registry.entryPointReqsCounter = datadogClient.NewCounter(ddEntryPointReqsName, 1.0)
registry.entryPointReqsTLSCounter = datadogClient.NewCounter(ddEntryPointReqsTLSName, 1.0)
registry.entryPointReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddEntryPointReqDurationName, 1.0), time.Second)
registry.entryPointOpenConnsGauge = datadogClient.NewGauge(ddEntryPointOpenConnsName)
}
if config.AddRoutersLabels {
registry.routerEnabled = config.AddRoutersLabels
registry.routerReqsCounter = datadogClient.NewCounter(ddMetricsRouterReqsName, 1.0)
registry.routerReqsTLSCounter = datadogClient.NewCounter(ddMetricsRouterReqsTLSName, 1.0)
registry.routerReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddMetricsRouterReqsDurationName, 1.0), time.Second)
registry.routerOpenConnsGauge = datadogClient.NewGauge(ddRouterOpenConnsName)
}
if config.AddServicesLabels {
registry.svcEnabled = config.AddServicesLabels
registry.serviceReqsCounter = datadogClient.NewCounter(ddMetricsServiceReqsName, 1.0)
registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddMetricsServiceLatencyName, 1.0), time.Second)
registry.serviceReqsTLSCounter = datadogClient.NewCounter(ddMetricsServiceReqsTLSName, 1.0)
registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(datadogClient.NewHistogram(ddMetricsServiceReqsDurationName, 1.0), time.Second)
registry.serviceRetriesCounter = datadogClient.NewCounter(ddRetriesTotalName, 1.0)
registry.serviceOpenConnsGauge = datadogClient.NewGauge(ddOpenConnsName)
registry.serviceServerUpGauge = datadogClient.NewGauge(ddServerUpName)

View file

@ -17,40 +17,68 @@ func TestDatadog(t *testing.T) {
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
udp.Timeout = 5 * time.Second
datadogRegistry := RegisterDatadog(context.Background(), &types.Datadog{Address: ":18125", PushInterval: ptypes.Duration(time.Second), AddEntryPointsLabels: true, AddServicesLabels: true})
datadogRegistry := RegisterDatadog(context.Background(), &types.Datadog{Address: ":18125", PushInterval: ptypes.Duration(time.Second), AddEntryPointsLabels: true, AddRoutersLabels: true, AddServicesLabels: true})
defer StopDatadog()
if !datadogRegistry.IsEpEnabled() || !datadogRegistry.IsSvcEnabled() {
t.Errorf("DatadogRegistry should return true for IsEnabled()")
if !datadogRegistry.IsEpEnabled() || !datadogRegistry.IsRouterEnabled() || !datadogRegistry.IsSvcEnabled() {
t.Errorf("DatadogRegistry should return true for IsEnabled(), IsRouterEnabled() and IsSvcEnabled()")
}
expected := []string{
// We are only validating counts, as it is nearly impossible to validate latency, since it varies every run
"traefik.service.request.total:1.000000|c|#service:test,code:404,method:GET\n",
"traefik.service.request.total:1.000000|c|#service:test,code:200,method:GET\n",
"traefik.service.retries.total:2.000000|c|#service:test\n",
"traefik.service.request.duration:10000.000000|h|#service:test,code:200\n",
"traefik.config.reload.total:1.000000|c\n",
"traefik.config.reload.total:1.000000|c|#failure:true\n",
"traefik.config.reload.lastSuccessTimestamp:1.000000|g\n",
"traefik.config.reload.lastFailureTimestamp:1.000000|g\n",
"traefik.tls.certs.notAfterTimestamp:1.000000|g|#key:value\n",
"traefik.entrypoint.request.total:1.000000|c|#entrypoint:test\n",
"traefik.entrypoint.request.tls.total:1.000000|c|#entrypoint:test,tls_version:foo,tls_cipher:bar\n",
"traefik.entrypoint.request.duration:10000.000000|h|#entrypoint:test\n",
"traefik.entrypoint.connections.open:1.000000|g|#entrypoint:test\n",
"traefik.router.request.total:1.000000|c|#router:demo,service:test,code:404,method:GET\n",
"traefik.router.request.total:1.000000|c|#router:demo,service:test,code:200,method:GET\n",
"traefik.router.request.tls.total:1.000000|c|#router:demo,service:test,tls_version:foo,tls_cipher:bar\n",
"traefik.router.request.duration:10000.000000|h|#router:demo,service:test,code:200\n",
"traefik.router.connections.open:1.000000|g|#router:demo,service:test\n",
"traefik.service.request.total:1.000000|c|#service:test,code:404,method:GET\n",
"traefik.service.request.total:1.000000|c|#service:test,code:200,method:GET\n",
"traefik.service.request.tls.total:1.000000|c|#service:test,tls_version:foo,tls_cipher:bar\n",
"traefik.service.request.duration:10000.000000|h|#service:test,code:200\n",
"traefik.service.connections.open:1.000000|g|#service:test\n",
"traefik.service.retries.total:2.000000|c|#service:test\n",
"traefik.service.request.duration:10000.000000|h|#service:test,code:200\n",
"traefik.service.server.up:1.000000|g|#service:test,url:http://127.0.0.1,one:two\n",
"traefik.tls.certs.notAfterTimestamp:1.000000|g|#key:value\n",
}
udp.ShouldReceiveAll(t, expected, func() {
datadogRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
datadogRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
datadogRegistry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
datadogRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
datadogRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
datadogRegistry.ConfigReloadsCounter().Add(1)
datadogRegistry.ConfigReloadsFailureCounter().Add(1)
datadogRegistry.LastConfigReloadSuccessGauge().Add(1)
datadogRegistry.LastConfigReloadFailureGauge().Add(1)
datadogRegistry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)
datadogRegistry.EntryPointReqsCounter().With("entrypoint", "test").Add(1)
datadogRegistry.EntryPointReqsTLSCounter().With("entrypoint", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
datadogRegistry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
datadogRegistry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1)
datadogRegistry.RouterReqsCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
datadogRegistry.RouterReqsCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
datadogRegistry.RouterReqsTLSCounter().With("router", "demo", "service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
datadogRegistry.RouterReqDurationHistogram().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
datadogRegistry.RouterOpenConnsGauge().With("router", "demo", "service", "test").Set(1)
datadogRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
datadogRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
datadogRegistry.ServiceReqsTLSCounter().With("service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
datadogRegistry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
datadogRegistry.ServiceOpenConnsGauge().With("service", "test").Set(1)
datadogRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
datadogRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
datadogRegistry.ServiceServerUpGauge().With("service", "test", "url", "http://127.0.0.1", "one", "two").Set(1)
datadogRegistry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)
})
}

View file

@ -26,19 +26,29 @@ type influxDBWriter struct {
var influxDBTicker *time.Ticker
const (
influxDBMetricsServiceReqsName = "traefik.service.requests.total"
influxDBMetricsServiceLatencyName = "traefik.service.request.duration"
influxDBRetriesTotalName = "traefik.service.retries.total"
influxDBConfigReloadsName = "traefik.config.reload.total"
influxDBConfigReloadsFailureName = influxDBConfigReloadsName + ".failure"
influxDBLastConfigReloadSuccessName = "traefik.config.reload.lastSuccessTimestamp"
influxDBLastConfigReloadFailureName = "traefik.config.reload.lastFailureTimestamp"
influxDBEntryPointReqsName = "traefik.entrypoint.requests.total"
influxDBEntryPointReqDurationName = "traefik.entrypoint.request.duration"
influxDBEntryPointOpenConnsName = "traefik.entrypoint.connections.open"
influxDBOpenConnsName = "traefik.service.connections.open"
influxDBServerUpName = "traefik.service.server.up"
influxDBConfigReloadsName = "traefik.config.reload.total"
influxDBConfigReloadsFailureName = influxDBConfigReloadsName + ".failure"
influxDBLastConfigReloadSuccessName = "traefik.config.reload.lastSuccessTimestamp"
influxDBLastConfigReloadFailureName = "traefik.config.reload.lastFailureTimestamp"
influxDBTLSCertsNotAfterTimestampName = "traefik.tls.certs.notAfterTimestamp"
influxDBEntryPointReqsName = "traefik.entrypoint.requests.total"
influxDBEntryPointReqsTLSName = "traefik.entrypoint.requests.tls.total"
influxDBEntryPointReqDurationName = "traefik.entrypoint.request.duration"
influxDBEntryPointOpenConnsName = "traefik.entrypoint.connections.open"
influxDBRouterReqsName = "traefik.router.requests.total"
influxDBRouterReqsTLSName = "traefik.router.requests.tls.total"
influxDBRouterReqsDurationName = "traefik.router.request.duration"
influxDBORouterOpenConnsName = "traefik.router.connections.open"
influxDBServiceReqsName = "traefik.service.requests.total"
influxDBServiceReqsTLSName = "traefik.service.requests.tls.total"
influxDBServiceReqsDurationName = "traefik.service.request.duration"
influxDBServiceRetriesTotalName = "traefik.service.retries.total"
influxDBServiceOpenConnsName = "traefik.service.connections.open"
influxDBServiceServerUpName = "traefik.service.server.up"
)
const (
@ -66,17 +76,27 @@ func RegisterInfluxDB(ctx context.Context, config *types.InfluxDB) Registry {
if config.AddEntryPointsLabels {
registry.epEnabled = config.AddEntryPointsLabels
registry.entryPointReqsCounter = influxDBClient.NewCounter(influxDBEntryPointReqsName)
registry.entryPointReqsTLSCounter = influxDBClient.NewCounter(influxDBEntryPointReqsTLSName)
registry.entryPointReqDurationHistogram, _ = NewHistogramWithScale(influxDBClient.NewHistogram(influxDBEntryPointReqDurationName), time.Second)
registry.entryPointOpenConnsGauge = influxDBClient.NewGauge(influxDBEntryPointOpenConnsName)
}
if config.AddRoutersLabels {
registry.routerEnabled = config.AddRoutersLabels
registry.routerReqsCounter = influxDBClient.NewCounter(influxDBRouterReqsName)
registry.routerReqsTLSCounter = influxDBClient.NewCounter(influxDBRouterReqsTLSName)
registry.routerReqDurationHistogram, _ = NewHistogramWithScale(influxDBClient.NewHistogram(influxDBRouterReqsDurationName), time.Second)
registry.routerOpenConnsGauge = influxDBClient.NewGauge(influxDBORouterOpenConnsName)
}
if config.AddServicesLabels {
registry.svcEnabled = config.AddServicesLabels
registry.serviceReqsCounter = influxDBClient.NewCounter(influxDBMetricsServiceReqsName)
registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(influxDBClient.NewHistogram(influxDBMetricsServiceLatencyName), time.Second)
registry.serviceRetriesCounter = influxDBClient.NewCounter(influxDBRetriesTotalName)
registry.serviceOpenConnsGauge = influxDBClient.NewGauge(influxDBOpenConnsName)
registry.serviceServerUpGauge = influxDBClient.NewGauge(influxDBServerUpName)
registry.serviceReqsCounter = influxDBClient.NewCounter(influxDBServiceReqsName)
registry.serviceReqsTLSCounter = influxDBClient.NewCounter(influxDBServiceReqsTLSName)
registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(influxDBClient.NewHistogram(influxDBServiceReqsDurationName), time.Second)
registry.serviceRetriesCounter = influxDBClient.NewCounter(influxDBServiceRetriesTotalName)
registry.serviceOpenConnsGauge = influxDBClient.NewGauge(influxDBServiceOpenConnsName)
registry.serviceServerUpGauge = influxDBClient.NewGauge(influxDBServiceServerUpName)
}
return registry

View file

@ -21,49 +21,28 @@ func TestInfluxDB(t *testing.T) {
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
udp.Timeout = 5 * time.Second
influxDBRegistry := RegisterInfluxDB(context.Background(), &types.InfluxDB{Address: ":8089", PushInterval: ptypes.Duration(time.Second), AddEntryPointsLabels: true, AddServicesLabels: true})
influxDBRegistry := RegisterInfluxDB(context.Background(), &types.InfluxDB{Address: ":8089", PushInterval: ptypes.Duration(time.Second), AddEntryPointsLabels: true, AddRoutersLabels: true, AddServicesLabels: true})
defer StopInfluxDB()
if !influxDBRegistry.IsEpEnabled() || !influxDBRegistry.IsSvcEnabled() {
t.Fatalf("InfluxDB registry must be epEnabled")
if !influxDBRegistry.IsEpEnabled() || !influxDBRegistry.IsRouterEnabled() || !influxDBRegistry.IsSvcEnabled() {
t.Fatalf("InfluxDBRegistry should return true for IsEnabled(), IsRouterEnabled() and IsSvcEnabled()")
}
expectedService := []string{
`(traefik\.service\.requests\.total,code=200,method=GET,service=test count=1) [\d]{19}`,
`(traefik\.service\.requests\.total,code=404,method=GET,service=test count=1) [\d]{19}`,
`(traefik\.service\.request\.duration,code=200,service=test p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
`(traefik\.service\.retries\.total(?:,code=[\d]{3},method=GET)?,service=test count=2) [\d]{19}`,
`(traefik\.config\.reload\.total(?:[a-z=0-9A-Z,]+)? count=1) [\d]{19}`,
`(traefik\.config\.reload\.total\.failure(?:[a-z=0-9A-Z,]+)? count=1) [\d]{19}`,
`(traefik\.service\.server\.up,service=test(?:[a-z=0-9A-Z,]+)?,url=http://127.0.0.1 value=1) [\d]{19}`,
expectedServer := []string{
`(traefik\.config\.reload\.total count=1) [\d]{19}`,
`(traefik\.config\.reload\.total\.failure count=1) [\d]{19}`,
`(traefik\.config\.reload\.lastSuccessTimestamp value=1) [\d]{19}`,
`(traefik\.config\.reload\.lastFailureTimestamp value=1) [\d]{19}`,
}
msgService := udp.ReceiveString(t, func() {
influxDBRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
influxDBRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
influxDBRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
influxDBRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
influxDBRegistry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
msgServer := udp.ReceiveString(t, func() {
influxDBRegistry.ConfigReloadsCounter().Add(1)
influxDBRegistry.ConfigReloadsFailureCounter().Add(1)
influxDBRegistry.ServiceServerUpGauge().With("service", "test", "url", "http://127.0.0.1").Set(1)
influxDBRegistry.LastConfigReloadSuccessGauge().Set(1)
influxDBRegistry.LastConfigReloadFailureGauge().Set(1)
})
assertMessage(t, msgService, expectedService)
expectedEntrypoint := []string{
`(traefik\.entrypoint\.requests\.total,entrypoint=test(?:[a-z=0-9A-Z,:/.]+)? count=1) [\d]{19}`,
`(traefik\.entrypoint\.request\.duration(?:,code=[\d]{3})?,entrypoint=test(?:[a-z=0-9A-Z,:/.]+)? p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
`(traefik\.entrypoint\.connections\.open,entrypoint=test value=1) [\d]{19}`,
}
msgEntrypoint := udp.ReceiveString(t, func() {
influxDBRegistry.EntryPointReqsCounter().With("entrypoint", "test").Add(1)
influxDBRegistry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
influxDBRegistry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1)
})
assertMessage(t, msgEntrypoint, expectedEntrypoint)
assertMessage(t, msgServer, expectedServer)
expectedTLS := []string{
`(traefik\.tls\.certs\.notAfterTimestamp,key=value value=1) [\d]{19}`,
@ -74,6 +53,63 @@ func TestInfluxDB(t *testing.T) {
})
assertMessage(t, msgTLS, expectedTLS)
expectedEntrypoint := []string{
`(traefik\.entrypoint\.requests\.total,code=200,entrypoint=test,method=GET count=1) [\d]{19}`,
`(traefik\.entrypoint\.requests\.tls\.total,entrypoint=test,tls_cipher=bar,tls_version=foo count=1) [\d]{19}`,
`(traefik\.entrypoint\.request\.duration(?:,code=[\d]{3})?,entrypoint=test p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
`(traefik\.entrypoint\.connections\.open,entrypoint=test value=1) [\d]{19}`,
}
msgEntrypoint := udp.ReceiveString(t, func() {
influxDBRegistry.EntryPointReqsCounter().With("entrypoint", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
influxDBRegistry.EntryPointReqsTLSCounter().With("entrypoint", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
influxDBRegistry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
influxDBRegistry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1)
})
assertMessage(t, msgEntrypoint, expectedEntrypoint)
expectedRouter := []string{
`(traefik\.router\.requests\.total,code=200,method=GET,router=demo,service=test count=1) [\d]{19}`,
`(traefik\.router\.requests\.total,code=404,method=GET,router=demo,service=test count=1) [\d]{19}`,
`(traefik\.router\.requests\.tls\.total,router=demo,service=test,tls_cipher=bar,tls_version=foo count=1) [\d]{19}`,
`(traefik\.router\.request\.duration,code=200,router=demo,service=test p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
`(traefik\.router\.connections\.open,router=demo,service=test value=1) [\d]{19}`,
}
msgRouter := udp.ReceiveString(t, func() {
influxDBRegistry.RouterReqsCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
influxDBRegistry.RouterReqsCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
influxDBRegistry.RouterReqsTLSCounter().With("router", "demo", "service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
influxDBRegistry.RouterReqDurationHistogram().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
influxDBRegistry.RouterOpenConnsGauge().With("router", "demo", "service", "test").Set(1)
})
assertMessage(t, msgRouter, expectedRouter)
expectedService := []string{
`(traefik\.service\.requests\.total,code=200,method=GET,service=test count=1) [\d]{19}`,
`(traefik\.service\.requests\.total,code=404,method=GET,service=test count=1) [\d]{19}`,
`(traefik\.service\.requests\.tls\.total,service=test,tls_cipher=bar,tls_version=foo count=1) [\d]{19}`,
`(traefik\.service\.request\.duration,code=200,service=test p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
`(traefik\.service\.retries\.total(?:,code=[\d]{3},method=GET)?,service=test count=2) [\d]{19}`,
`(traefik\.service\.server\.up,service=test,url=http://127.0.0.1 value=1) [\d]{19}`,
`(traefik\.service\.connections\.open,service=test value=1) [\d]{19}`,
}
msgService := udp.ReceiveString(t, func() {
influxDBRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
influxDBRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
influxDBRegistry.ServiceReqsTLSCounter().With("service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
influxDBRegistry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
influxDBRegistry.ServiceOpenConnsGauge().With("service", "test").Set(1)
influxDBRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
influxDBRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
influxDBRegistry.ServiceServerUpGauge().With("service", "test", "url", "http://127.0.0.1").Set(1)
})
assertMessage(t, msgService, expectedService)
}
func TestInfluxDBHTTP(t *testing.T) {
@ -90,47 +126,27 @@ func TestInfluxDBHTTP(t *testing.T) {
}))
defer ts.Close()
influxDBRegistry := RegisterInfluxDB(context.Background(), &types.InfluxDB{Address: ts.URL, Protocol: "http", PushInterval: ptypes.Duration(time.Second), Database: "test", RetentionPolicy: "autogen", AddEntryPointsLabels: true, AddServicesLabels: true})
influxDBRegistry := RegisterInfluxDB(context.Background(), &types.InfluxDB{Address: ts.URL, Protocol: "http", PushInterval: ptypes.Duration(time.Second), Database: "test", RetentionPolicy: "autogen", AddEntryPointsLabels: true, AddServicesLabels: true, AddRoutersLabels: true})
defer StopInfluxDB()
if !influxDBRegistry.IsEpEnabled() || !influxDBRegistry.IsSvcEnabled() {
if !influxDBRegistry.IsEpEnabled() || !influxDBRegistry.IsRouterEnabled() || !influxDBRegistry.IsSvcEnabled() {
t.Fatalf("InfluxDB registry must be epEnabled")
}
expectedService := []string{
`(traefik\.service\.requests\.total,code=200,method=GET,service=test count=1) [\d]{19}`,
`(traefik\.service\.requests\.total,code=404,method=GET,service=test count=1) [\d]{19}`,
`(traefik\.service\.request\.duration,code=200,service=test p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
`(traefik\.service\.retries\.total(?:,code=[\d]{3},method=GET)?,service=test count=2) [\d]{19}`,
`(traefik\.config\.reload\.total(?:[a-z=0-9A-Z,]+)? count=1) [\d]{19}`,
`(traefik\.config\.reload\.total\.failure(?:[a-z=0-9A-Z,]+)? count=1) [\d]{19}`,
`(traefik\.service\.server\.up,service=test(?:[a-z=0-9A-Z,]+)?,url=http://127.0.0.1 value=1) [\d]{19}`,
expectedServer := []string{
`(traefik\.config\.reload\.total count=1) [\d]{19}`,
`(traefik\.config\.reload\.total\.failure count=1) [\d]{19}`,
`(traefik\.config\.reload\.lastSuccessTimestamp value=1) [\d]{19}`,
`(traefik\.config\.reload\.lastFailureTimestamp value=1) [\d]{19}`,
}
influxDBRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
influxDBRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
influxDBRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
influxDBRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
influxDBRegistry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
influxDBRegistry.ConfigReloadsCounter().Add(1)
influxDBRegistry.ConfigReloadsFailureCounter().Add(1)
influxDBRegistry.ServiceServerUpGauge().With("service", "test", "url", "http://127.0.0.1").Set(1)
msgService := <-c
influxDBRegistry.LastConfigReloadSuccessGauge().Set(1)
influxDBRegistry.LastConfigReloadFailureGauge().Set(1)
msgServer := <-c
assertMessage(t, *msgService, expectedService)
expectedEntrypoint := []string{
`(traefik\.entrypoint\.requests\.total,entrypoint=test(?:[a-z=0-9A-Z,:/.]+)? count=1) [\d]{19}`,
`(traefik\.entrypoint\.request\.duration(?:,code=[\d]{3})?,entrypoint=test(?:[a-z=0-9A-Z,:/.]+)? p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
`(traefik\.entrypoint\.connections\.open,entrypoint=test value=1) [\d]{19}`,
}
influxDBRegistry.EntryPointReqsCounter().With("entrypoint", "test").Add(1)
influxDBRegistry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
influxDBRegistry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1)
msgEntrypoint := <-c
assertMessage(t, *msgEntrypoint, expectedEntrypoint)
assertMessage(t, *msgServer, expectedServer)
expectedTLS := []string{
`(traefik\.tls\.certs\.notAfterTimestamp,key=value value=1) [\d]{19}`,
@ -140,6 +156,60 @@ func TestInfluxDBHTTP(t *testing.T) {
msgTLS := <-c
assertMessage(t, *msgTLS, expectedTLS)
expectedEntrypoint := []string{
`(traefik\.entrypoint\.requests\.total,code=200,entrypoint=test,method=GET count=1) [\d]{19}`,
`(traefik\.entrypoint\.requests\.tls\.total,entrypoint=test,tls_cipher=bar,tls_version=foo count=1) [\d]{19}`,
`(traefik\.entrypoint\.request\.duration(?:,code=[\d]{3})?,entrypoint=test p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
`(traefik\.entrypoint\.connections\.open,entrypoint=test value=1) [\d]{19}`,
}
influxDBRegistry.EntryPointReqsCounter().With("entrypoint", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
influxDBRegistry.EntryPointReqsTLSCounter().With("entrypoint", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
influxDBRegistry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
influxDBRegistry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1)
msgEntrypoint := <-c
assertMessage(t, *msgEntrypoint, expectedEntrypoint)
expectedRouter := []string{
`(traefik\.router\.requests\.total,code=200,method=GET,router=demo,service=test count=1) [\d]{19}`,
`(traefik\.router\.requests\.total,code=404,method=GET,router=demo,service=test count=1) [\d]{19}`,
`(traefik\.router\.requests\.tls\.total,router=demo,service=test,tls_cipher=bar,tls_version=foo count=1) [\d]{19}`,
`(traefik\.router\.request\.duration,code=200,router=demo,service=test p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
`(traefik\.router\.connections\.open,router=demo,service=test value=1) [\d]{19}`,
}
influxDBRegistry.RouterReqsCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
influxDBRegistry.RouterReqsCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
influxDBRegistry.RouterReqsTLSCounter().With("router", "demo", "service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
influxDBRegistry.RouterReqDurationHistogram().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
influxDBRegistry.RouterOpenConnsGauge().With("router", "demo", "service", "test").Set(1)
msgRouter := <-c
assertMessage(t, *msgRouter, expectedRouter)
expectedService := []string{
`(traefik\.service\.requests\.total,code=200,method=GET,service=test count=1) [\d]{19}`,
`(traefik\.service\.requests\.total,code=404,method=GET,service=test count=1) [\d]{19}`,
`(traefik\.service\.requests\.tls\.total,service=test,tls_cipher=bar,tls_version=foo count=1) [\d]{19}`,
`(traefik\.service\.request\.duration,code=200,service=test p50=10000,p90=10000,p95=10000,p99=10000) [\d]{19}`,
`(traefik\.service\.retries\.total(?:,code=[\d]{3},method=GET)?,service=test count=2) [\d]{19}`,
`(traefik\.service\.server\.up,service=test,url=http://127.0.0.1 value=1) [\d]{19}`,
`(traefik\.service\.connections\.open,service=test value=1) [\d]{19}`,
}
influxDBRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
influxDBRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
influxDBRegistry.ServiceReqsTLSCounter().With("service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
influxDBRegistry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
influxDBRegistry.ServiceOpenConnsGauge().With("service", "test").Set(1)
influxDBRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
influxDBRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
influxDBRegistry.ServiceServerUpGauge().With("service", "test", "url", "http://127.0.0.1").Set(1)
msgService := <-c
assertMessage(t, *msgService, expectedService)
}
func assertMessage(t *testing.T, msg string, patterns []string) {

View file

@ -12,6 +12,8 @@ import (
type Registry interface {
// IsEpEnabled shows whether metrics instrumentation is enabled on entry points.
IsEpEnabled() bool
// IsRouterEnabled shows whether metrics instrumentation is enabled on routers.
IsRouterEnabled() bool
// IsSvcEnabled shows whether metrics instrumentation is enabled on services.
IsSvcEnabled() bool
@ -30,6 +32,12 @@ type Registry interface {
EntryPointReqDurationHistogram() ScalableHistogram
EntryPointOpenConnsGauge() metrics.Gauge
// router metrics
RouterReqsCounter() metrics.Counter
RouterReqsTLSCounter() metrics.Counter
RouterReqDurationHistogram() ScalableHistogram
RouterOpenConnsGauge() metrics.Gauge
// service metrics
ServiceReqsCounter() metrics.Counter
ServiceReqsTLSCounter() metrics.Counter
@ -58,6 +66,10 @@ func NewMultiRegistry(registries []Registry) Registry {
var entryPointReqsTLSCounter []metrics.Counter
var entryPointReqDurationHistogram []ScalableHistogram
var entryPointOpenConnsGauge []metrics.Gauge
var routerReqsCounter []metrics.Counter
var routerReqsTLSCounter []metrics.Counter
var routerReqDurationHistogram []ScalableHistogram
var routerOpenConnsGauge []metrics.Gauge
var serviceReqsCounter []metrics.Counter
var serviceReqsTLSCounter []metrics.Counter
var serviceReqDurationHistogram []ScalableHistogram
@ -93,6 +105,18 @@ func NewMultiRegistry(registries []Registry) Registry {
if r.EntryPointOpenConnsGauge() != nil {
entryPointOpenConnsGauge = append(entryPointOpenConnsGauge, r.EntryPointOpenConnsGauge())
}
if r.RouterReqsCounter() != nil {
routerReqsCounter = append(routerReqsCounter, r.RouterReqsCounter())
}
if r.RouterReqsTLSCounter() != nil {
routerReqsTLSCounter = append(routerReqsTLSCounter, r.RouterReqsTLSCounter())
}
if r.RouterReqDurationHistogram() != nil {
routerReqDurationHistogram = append(routerReqDurationHistogram, r.RouterReqDurationHistogram())
}
if r.RouterOpenConnsGauge() != nil {
routerOpenConnsGauge = append(routerOpenConnsGauge, r.RouterOpenConnsGauge())
}
if r.ServiceReqsCounter() != nil {
serviceReqsCounter = append(serviceReqsCounter, r.ServiceReqsCounter())
}
@ -116,6 +140,7 @@ func NewMultiRegistry(registries []Registry) Registry {
return &standardRegistry{
epEnabled: len(entryPointReqsCounter) > 0 || len(entryPointReqDurationHistogram) > 0 || len(entryPointOpenConnsGauge) > 0,
svcEnabled: len(serviceReqsCounter) > 0 || len(serviceReqDurationHistogram) > 0 || len(serviceOpenConnsGauge) > 0 || len(serviceRetriesCounter) > 0 || len(serviceServerUpGauge) > 0,
routerEnabled: len(routerReqsCounter) > 0 || len(routerReqDurationHistogram) > 0 || len(routerOpenConnsGauge) > 0,
configReloadsCounter: multi.NewCounter(configReloadsCounter...),
configReloadsFailureCounter: multi.NewCounter(configReloadsFailureCounter...),
lastConfigReloadSuccessGauge: multi.NewGauge(lastConfigReloadSuccessGauge...),
@ -125,6 +150,10 @@ func NewMultiRegistry(registries []Registry) Registry {
entryPointReqsTLSCounter: multi.NewCounter(entryPointReqsTLSCounter...),
entryPointReqDurationHistogram: NewMultiHistogram(entryPointReqDurationHistogram...),
entryPointOpenConnsGauge: multi.NewGauge(entryPointOpenConnsGauge...),
routerReqsCounter: multi.NewCounter(routerReqsCounter...),
routerReqsTLSCounter: multi.NewCounter(routerReqsTLSCounter...),
routerReqDurationHistogram: NewMultiHistogram(routerReqDurationHistogram...),
routerOpenConnsGauge: multi.NewGauge(routerOpenConnsGauge...),
serviceReqsCounter: multi.NewCounter(serviceReqsCounter...),
serviceReqsTLSCounter: multi.NewCounter(serviceReqsTLSCounter...),
serviceReqDurationHistogram: NewMultiHistogram(serviceReqDurationHistogram...),
@ -136,6 +165,7 @@ func NewMultiRegistry(registries []Registry) Registry {
type standardRegistry struct {
epEnabled bool
routerEnabled bool
svcEnabled bool
configReloadsCounter metrics.Counter
configReloadsFailureCounter metrics.Counter
@ -146,6 +176,10 @@ type standardRegistry struct {
entryPointReqsTLSCounter metrics.Counter
entryPointReqDurationHistogram ScalableHistogram
entryPointOpenConnsGauge metrics.Gauge
routerReqsCounter metrics.Counter
routerReqsTLSCounter metrics.Counter
routerReqDurationHistogram ScalableHistogram
routerOpenConnsGauge metrics.Gauge
serviceReqsCounter metrics.Counter
serviceReqsTLSCounter metrics.Counter
serviceReqDurationHistogram ScalableHistogram
@ -158,6 +192,10 @@ func (r *standardRegistry) IsEpEnabled() bool {
return r.epEnabled
}
func (r *standardRegistry) IsRouterEnabled() bool {
return r.routerEnabled
}
func (r *standardRegistry) IsSvcEnabled() bool {
return r.svcEnabled
}
@ -198,6 +236,22 @@ func (r *standardRegistry) EntryPointOpenConnsGauge() metrics.Gauge {
return r.entryPointOpenConnsGauge
}
func (r *standardRegistry) RouterReqsCounter() metrics.Counter {
return r.routerReqsCounter
}
func (r *standardRegistry) RouterReqsTLSCounter() metrics.Counter {
return r.routerReqsTLSCounter
}
func (r *standardRegistry) RouterReqDurationHistogram() ScalableHistogram {
return r.routerReqDurationHistogram
}
func (r *standardRegistry) RouterOpenConnsGauge() metrics.Gauge {
return r.routerOpenConnsGauge
}
func (r *standardRegistry) ServiceReqsCounter() metrics.Counter {
return r.serviceReqsCounter
}

View file

@ -40,16 +40,21 @@ const (
entryPointReqDurationName = metricEntryPointPrefix + "request_duration_seconds"
entryPointOpenConnsName = metricEntryPointPrefix + "open_connections"
// service level.
// router level.
metricRouterPrefix = MetricNamePrefix + "router_"
routerReqsTotalName = metricRouterPrefix + "requests_total"
routerReqsTLSTotalName = metricRouterPrefix + "requests_tls_total"
routerReqDurationName = metricRouterPrefix + "request_duration_seconds"
routerOpenConnsName = metricRouterPrefix + "open_connections"
// MetricServicePrefix prefix of all service metric names.
MetricServicePrefix = MetricNamePrefix + "service_"
serviceReqsTotalName = MetricServicePrefix + "requests_total"
serviceReqsTLSTotalName = MetricServicePrefix + "requests_tls_total"
serviceReqDurationName = MetricServicePrefix + "request_duration_seconds"
serviceOpenConnsName = MetricServicePrefix + "open_connections"
serviceRetriesTotalName = MetricServicePrefix + "retries_total"
serviceServerUpName = MetricServicePrefix + "server_up"
// service level.
metricServicePrefix = MetricNamePrefix + "service_"
serviceReqsTotalName = metricServicePrefix + "requests_total"
serviceReqsTLSTotalName = metricServicePrefix + "requests_tls_total"
serviceReqDurationName = metricServicePrefix + "request_duration_seconds"
serviceOpenConnsName = metricServicePrefix + "open_connections"
serviceRetriesTotalName = metricServicePrefix + "retries_total"
serviceServerUpName = metricServicePrefix + "server_up"
)
// promState holds all metric state internally and acts as the only Collector we register for Prometheus.
@ -140,6 +145,7 @@ func initStandardRegistry(config *types.Prometheus) Registry {
reg := &standardRegistry{
epEnabled: config.AddEntryPointsLabels,
routerEnabled: config.AddRoutersLabels,
svcEnabled: config.AddServicesLabels,
configReloadsCounter: configReloads,
configReloadsFailureCounter: configReloadsFailures,
@ -180,6 +186,37 @@ func initStandardRegistry(config *types.Prometheus) Registry {
reg.entryPointOpenConnsGauge = entryPointOpenConns
}
if config.AddRoutersLabels {
routerReqs := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
Name: routerReqsTotalName,
Help: "How many HTTP requests are processed on a router, partitioned by service, status code, protocol, and method.",
}, []string{"code", "method", "protocol", "router", "service"})
routerReqsTLS := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
Name: routerReqsTLSTotalName,
Help: "How many HTTP requests with TLS are processed on a router, partitioned by service, TLS Version, and TLS cipher Used.",
}, []string{"tls_version", "tls_cipher", "router", "service"})
routerReqDurations := newHistogramFrom(promState.collectors, stdprometheus.HistogramOpts{
Name: routerReqDurationName,
Help: "How long it took to process the request on a router, partitioned by service, status code, protocol, and method.",
Buckets: buckets,
}, []string{"code", "method", "protocol", "router", "service"})
routerOpenConns := newGaugeFrom(promState.collectors, stdprometheus.GaugeOpts{
Name: routerOpenConnsName,
Help: "How many open connections exist on a router, partitioned by service, method, and protocol.",
}, []string{"method", "protocol", "router", "service"})
promState.describers = append(promState.describers, []func(chan<- *stdprometheus.Desc){
routerReqs.cv.Describe,
routerReqsTLS.cv.Describe,
routerReqDurations.hv.Describe,
routerOpenConns.gv.Describe,
}...)
reg.routerReqsCounter = routerReqs
reg.routerReqsTLSCounter = routerReqsTLS
reg.routerReqDurationHistogram, _ = NewHistogramWithScale(routerReqDurations, time.Second)
reg.routerOpenConnsGauge = routerOpenConns
}
if config.AddServicesLabels {
serviceReqs := newCounterFrom(promState.collectors, stdprometheus.CounterOpts{
Name: serviceReqsTotalName,

View file

@ -104,11 +104,11 @@ func TestPrometheus(t *testing.T) {
// Reset state of global promState.
defer promState.reset()
prometheusRegistry := RegisterPrometheus(context.Background(), &types.Prometheus{AddEntryPointsLabels: true, AddServicesLabels: true})
prometheusRegistry := RegisterPrometheus(context.Background(), &types.Prometheus{AddEntryPointsLabels: true, AddRoutersLabels: true, AddServicesLabels: true})
defer promRegistry.Unregister(promState)
if !prometheusRegistry.IsEpEnabled() || !prometheusRegistry.IsSvcEnabled() {
t.Errorf("PrometheusRegistry should return true for IsEnabled()")
if !prometheusRegistry.IsEpEnabled() || !prometheusRegistry.IsRouterEnabled() || !prometheusRegistry.IsSvcEnabled() {
t.Errorf("PrometheusRegistry should return true for IsEnabled(), IsRouterEnabled() and IsSvcEnabled()")
}
prometheusRegistry.ConfigReloadsCounter().Add(1)
@ -134,10 +134,31 @@ func TestPrometheus(t *testing.T) {
With("method", http.MethodGet, "protocol", "http", "entrypoint", "http").
Set(1)
prometheusRegistry.
RouterReqsCounter().
With("router", "demo", "service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
Add(1)
prometheusRegistry.
RouterReqsTLSCounter().
With("router", "demo", "service", "service1", "tls_version", "foo", "tls_cipher", "bar").
Add(1)
prometheusRegistry.
RouterReqDurationHistogram().
With("router", "demo", "service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
Observe(10000)
prometheusRegistry.
RouterOpenConnsGauge().
With("router", "demo", "service", "service1", "method", http.MethodGet, "protocol", "http").
Set(1)
prometheusRegistry.
ServiceReqsCounter().
With("service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
Add(1)
prometheusRegistry.
ServiceReqsTLSCounter().
With("service", "service1", "tls_version", "foo", "tls_cipher", "bar").
Add(1)
prometheusRegistry.
ServiceReqDurationHistogram().
With("service", "service1", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet, "protocol", "http").
@ -218,6 +239,48 @@ func TestPrometheus(t *testing.T) {
},
assert: buildGaugeAssert(t, entryPointOpenConnsName, 1),
},
{
name: routerReqsTotalName,
labels: map[string]string{
"code": "200",
"method": http.MethodGet,
"protocol": "http",
"service": "service1",
"router": "demo",
},
assert: buildCounterAssert(t, routerReqsTotalName, 1),
},
{
name: routerReqsTLSTotalName,
labels: map[string]string{
"service": "service1",
"router": "demo",
"tls_version": "foo",
"tls_cipher": "bar",
},
assert: buildCounterAssert(t, routerReqsTLSTotalName, 1),
},
{
name: routerReqDurationName,
labels: map[string]string{
"code": "200",
"method": http.MethodGet,
"protocol": "http",
"service": "service1",
"router": "demo",
},
assert: buildHistogramAssert(t, routerReqDurationName, 1),
},
{
name: routerOpenConnsName,
labels: map[string]string{
"method": http.MethodGet,
"protocol": "http",
"service": "service1",
"router": "demo",
},
assert: buildGaugeAssert(t, routerOpenConnsName, 1),
},
{
name: serviceReqsTotalName,
labels: map[string]string{
@ -228,6 +291,15 @@ func TestPrometheus(t *testing.T) {
},
assert: buildCounterAssert(t, serviceReqsTotalName, 1),
},
{
name: serviceReqsTLSTotalName,
labels: map[string]string{
"service": "service1",
"tls_version": "foo",
"tls_cipher": "bar",
},
assert: buildCounterAssert(t, serviceReqsTLSTotalName, 1),
},
{
name: serviceReqDurationName,
labels: map[string]string{

View file

@ -17,19 +17,29 @@ var (
)
const (
statsdMetricsServiceReqsName = "service.request.total"
statsdMetricsServiceLatencyName = "service.request.duration"
statsdRetriesTotalName = "service.retries.total"
statsdConfigReloadsName = "config.reload.total"
statsdConfigReloadsFailureName = statsdConfigReloadsName + ".failure"
statsdLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
statsdLastConfigReloadFailureName = "config.reload.lastFailureTimestamp"
statsdEntryPointReqsName = "entrypoint.request.total"
statsdEntryPointReqDurationName = "entrypoint.request.duration"
statsdEntryPointOpenConnsName = "entrypoint.connections.open"
statsdOpenConnsName = "service.connections.open"
statsdServerUpName = "service.server.up"
statsdConfigReloadsName = "config.reload.total"
statsdConfigReloadsFailureName = statsdConfigReloadsName + ".failure"
statsdLastConfigReloadSuccessName = "config.reload.lastSuccessTimestamp"
statsdLastConfigReloadFailureName = "config.reload.lastFailureTimestamp"
statsdTLSCertsNotAfterTimestampName = "tls.certs.notAfterTimestamp"
statsdEntryPointReqsName = "entrypoint.request.total"
statsdEntryPointReqsTLSName = "entrypoint.request.tls.total"
statsdEntryPointReqDurationName = "entrypoint.request.duration"
statsdEntryPointOpenConnsName = "entrypoint.connections.open"
statsdRouterReqsName = "router.request.total"
statsdRouterReqsTLSName = "router.request.tls.total"
statsdRouterReqsDurationName = "router.request.duration"
statsdRouterOpenConnsName = "router.connections.open"
statsdServiceReqsName = "service.request.total"
statsdServiceReqsTLSName = "service.request.tls.total"
statsdServiceReqsDurationName = "service.request.duration"
statsdServiceRetriesTotalName = "service.retries.total"
statsdServiceServerUpName = "service.server.up"
statsdServiceOpenConnsName = "service.connections.open"
)
// RegisterStatsd registers the metrics pusher if this didn't happen yet and creates a statsd Registry instance.
@ -59,17 +69,27 @@ func RegisterStatsd(ctx context.Context, config *types.Statsd) Registry {
if config.AddEntryPointsLabels {
registry.epEnabled = config.AddEntryPointsLabels
registry.entryPointReqsCounter = statsdClient.NewCounter(statsdEntryPointReqsName, 1.0)
registry.entryPointReqsTLSCounter = statsdClient.NewCounter(statsdEntryPointReqsTLSName, 1.0)
registry.entryPointReqDurationHistogram, _ = NewHistogramWithScale(statsdClient.NewTiming(statsdEntryPointReqDurationName, 1.0), time.Millisecond)
registry.entryPointOpenConnsGauge = statsdClient.NewGauge(statsdEntryPointOpenConnsName)
}
if config.AddRoutersLabels {
registry.routerEnabled = config.AddRoutersLabels
registry.routerReqsCounter = statsdClient.NewCounter(statsdRouterReqsName, 1.0)
registry.routerReqsTLSCounter = statsdClient.NewCounter(statsdRouterReqsTLSName, 1.0)
registry.routerReqDurationHistogram, _ = NewHistogramWithScale(statsdClient.NewTiming(statsdRouterReqsDurationName, 1.0), time.Millisecond)
registry.routerOpenConnsGauge = statsdClient.NewGauge(statsdRouterOpenConnsName)
}
if config.AddServicesLabels {
registry.svcEnabled = config.AddServicesLabels
registry.serviceReqsCounter = statsdClient.NewCounter(statsdMetricsServiceReqsName, 1.0)
registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(statsdClient.NewTiming(statsdMetricsServiceLatencyName, 1.0), time.Millisecond)
registry.serviceRetriesCounter = statsdClient.NewCounter(statsdRetriesTotalName, 1.0)
registry.serviceOpenConnsGauge = statsdClient.NewGauge(statsdOpenConnsName)
registry.serviceServerUpGauge = statsdClient.NewGauge(statsdServerUpName)
registry.serviceReqsCounter = statsdClient.NewCounter(statsdServiceReqsName, 1.0)
registry.serviceReqsTLSCounter = statsdClient.NewCounter(statsdServiceReqsTLSName, 1.0)
registry.serviceReqDurationHistogram, _ = NewHistogramWithScale(statsdClient.NewTiming(statsdServiceReqsDurationName, 1.0), time.Millisecond)
registry.serviceRetriesCounter = statsdClient.NewCounter(statsdServiceRetriesTotalName, 1.0)
registry.serviceOpenConnsGauge = statsdClient.NewGauge(statsdServiceOpenConnsName)
registry.serviceServerUpGauge = statsdClient.NewGauge(statsdServiceServerUpName)
}
return registry

View file

@ -13,85 +13,96 @@ import (
)
func TestStatsD(t *testing.T) {
t.Cleanup(func() {
StopStatsd()
})
udp.SetAddr(":18125")
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
udp.Timeout = 5 * time.Second
statsdRegistry := RegisterStatsd(context.Background(), &types.Statsd{Address: ":18125", PushInterval: ptypes.Duration(time.Second), AddEntryPointsLabels: true, AddServicesLabels: true})
defer StopStatsd()
statsdRegistry := RegisterStatsd(context.Background(), &types.Statsd{Address: ":18125", PushInterval: ptypes.Duration(time.Second), AddEntryPointsLabels: true, AddRoutersLabels: true, AddServicesLabels: true})
if !statsdRegistry.IsEpEnabled() || !statsdRegistry.IsSvcEnabled() {
t.Errorf("Statsd registry should return true for IsEnabled()")
}
expected := []string{
// We are only validating counts, as it is nearly impossible to validate latency, since it varies every run
"traefik.service.request.total:2.000000|c\n",
"traefik.service.retries.total:2.000000|c\n",
"traefik.service.request.duration:10000.000000|ms",
"traefik.config.reload.total:1.000000|c\n",
"traefik.config.reload.total:1.000000|c\n",
"traefik.entrypoint.request.total:1.000000|c\n",
"traefik.entrypoint.request.duration:10000.000000|ms",
"traefik.entrypoint.connections.open:1.000000|g\n",
"traefik.service.server.up:1.000000|g\n",
"tls.certs.notAfterTimestamp:1.000000|g\n",
}
udp.ShouldReceiveAll(t, expected, func() {
statsdRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
statsdRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
statsdRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
statsdRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
statsdRegistry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
statsdRegistry.ConfigReloadsCounter().Add(1)
statsdRegistry.ConfigReloadsFailureCounter().Add(1)
statsdRegistry.EntryPointReqsCounter().With("entrypoint", "test").Add(1)
statsdRegistry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
statsdRegistry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1)
statsdRegistry.ServiceServerUpGauge().With("service:test", "url", "http://127.0.0.1").Set(1)
statsdRegistry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)
})
testRegistry(t, "", statsdRegistry)
}
func TestStatsDWithPrefix(t *testing.T) {
t.Cleanup(func() {
StopStatsd()
})
udp.SetAddr(":18125")
// This is needed to make sure that UDP Listener listens for data a bit longer, otherwise it will quit after a millisecond
udp.Timeout = 5 * time.Second
statsdRegistry := RegisterStatsd(context.Background(), &types.Statsd{Address: ":18125", PushInterval: ptypes.Duration(time.Second), AddEntryPointsLabels: true, AddServicesLabels: true, Prefix: "testPrefix"})
defer StopStatsd()
statsdRegistry := RegisterStatsd(context.Background(), &types.Statsd{Address: ":18125", PushInterval: ptypes.Duration(time.Second), AddEntryPointsLabels: true, AddRoutersLabels: true, AddServicesLabels: true, Prefix: "testPrefix"})
if !statsdRegistry.IsEpEnabled() || !statsdRegistry.IsSvcEnabled() {
t.Errorf("Statsd registry should return true for IsEnabled()")
testRegistry(t, "testPrefix", statsdRegistry)
}
func testRegistry(t *testing.T, metricsPrefix string, registry Registry) {
t.Helper()
if !registry.IsEpEnabled() || !registry.IsRouterEnabled() || !registry.IsSvcEnabled() {
t.Errorf("Statsd registry should return true for IsEnabled(), IsRouterEnabled() and IsSvcEnabled()")
}
if metricsPrefix == "" {
metricsPrefix = "traefik"
}
expected := []string{
// We are only validating counts, as it is nearly impossible to validate latency, since it varies every run
"testPrefix.service.request.total:2.000000|c\n",
"testPrefix.service.retries.total:2.000000|c\n",
"testPrefix.service.request.duration:10000.000000|ms",
"testPrefix.config.reload.total:1.000000|c\n",
"testPrefix.config.reload.total:1.000000|c\n",
"testPrefix.entrypoint.request.total:1.000000|c\n",
"testPrefix.entrypoint.request.duration:10000.000000|ms",
"testPrefix.entrypoint.connections.open:1.000000|g\n",
"testPrefix.service.server.up:1.000000|g\n",
"tls.certs.notAfterTimestamp:1.000000|g\n",
metricsPrefix + ".config.reload.total:1.000000|c\n",
metricsPrefix + ".config.reload.total.failure:1.000000|c\n",
metricsPrefix + ".config.reload.lastSuccessTimestamp:1.000000|g\n",
metricsPrefix + ".config.reload.lastFailureTimestamp:1.000000|g\n",
metricsPrefix + ".tls.certs.notAfterTimestamp:1.000000|g\n",
metricsPrefix + ".entrypoint.request.total:1.000000|c\n",
metricsPrefix + ".entrypoint.request.tls.total:1.000000|c\n",
metricsPrefix + ".entrypoint.request.duration:10000.000000|ms",
metricsPrefix + ".entrypoint.connections.open:1.000000|g\n",
metricsPrefix + ".router.request.total:2.000000|c\n",
metricsPrefix + ".router.request.tls.total:1.000000|c\n",
metricsPrefix + ".router.request.duration:10000.000000|ms",
metricsPrefix + ".router.connections.open:1.000000|g\n",
metricsPrefix + ".service.request.total:2.000000|c\n",
metricsPrefix + ".service.request.tls.total:1.000000|c\n",
metricsPrefix + ".service.request.duration:10000.000000|ms",
metricsPrefix + ".service.connections.open:1.000000|g\n",
metricsPrefix + ".service.retries.total:2.000000|c\n",
metricsPrefix + ".service.server.up:1.000000|g\n",
}
udp.ShouldReceiveAll(t, expected, func() {
statsdRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
statsdRegistry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
statsdRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
statsdRegistry.ServiceRetriesCounter().With("service", "test").Add(1)
statsdRegistry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
statsdRegistry.ConfigReloadsCounter().Add(1)
statsdRegistry.ConfigReloadsFailureCounter().Add(1)
statsdRegistry.EntryPointReqsCounter().With("entrypoint", "test").Add(1)
statsdRegistry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
statsdRegistry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1)
statsdRegistry.ServiceServerUpGauge().With("service:test", "url", "http://127.0.0.1").Set(1)
statsdRegistry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)
registry.ConfigReloadsCounter().Add(1)
registry.ConfigReloadsFailureCounter().Add(1)
registry.LastConfigReloadSuccessGauge().Set(1)
registry.LastConfigReloadFailureGauge().Set(1)
registry.TLSCertsNotAfterTimestampGauge().With("key", "value").Set(1)
registry.EntryPointReqsCounter().With("entrypoint", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
registry.EntryPointReqsTLSCounter().With("entrypoint", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
registry.EntryPointReqDurationHistogram().With("entrypoint", "test").Observe(10000)
registry.EntryPointOpenConnsGauge().With("entrypoint", "test").Set(1)
registry.RouterReqsCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
registry.RouterReqsCounter().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
registry.RouterReqsTLSCounter().With("router", "demo", "service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
registry.RouterReqDurationHistogram().With("router", "demo", "service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
registry.RouterOpenConnsGauge().With("router", "demo", "service", "test").Set(1)
registry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusOK), "method", http.MethodGet).Add(1)
registry.ServiceReqsCounter().With("service", "test", "code", strconv.Itoa(http.StatusNotFound), "method", http.MethodGet).Add(1)
registry.ServiceReqsTLSCounter().With("service", "test", "tls_version", "foo", "tls_cipher", "bar").Add(1)
registry.ServiceReqDurationHistogram().With("service", "test", "code", strconv.Itoa(http.StatusOK)).Observe(10000)
registry.ServiceOpenConnsGauge().With("service", "test").Set(1)
registry.ServiceRetriesCounter().With("service", "test").Add(1)
registry.ServiceRetriesCounter().With("service", "test").Add(1)
registry.ServiceServerUpGauge().With("service:test", "url", "http://127.0.0.1").Set(1)
})
}